From 552fe04aa242f164f126abfdb3f6f90fd6679d9f Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Mon, 5 May 2008 21:21:51 +0300 Subject: PCI: make {pciehp,shpchp}_slot_with_bus static This patch makes the needlessly global {pciehp,shpchp}_slot_with_bus static. Signed-off-by: Adrian Bunk Signed-off-by: Jesse Barnes diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c index 48a2ed3..86d04c6 100644 --- a/drivers/pci/hotplug/pciehp_core.c +++ b/drivers/pci/hotplug/pciehp_core.c @@ -41,7 +41,7 @@ int pciehp_debug; int pciehp_poll_mode; int pciehp_poll_time; int pciehp_force; -int pciehp_slot_with_bus; +static int pciehp_slot_with_bus; struct workqueue_struct *pciehp_wq; #define DRIVER_VERSION "0.4" diff --git a/drivers/pci/hotplug/shpchp_core.c b/drivers/pci/hotplug/shpchp_core.c index 9784865..0066b0b 100644 --- a/drivers/pci/hotplug/shpchp_core.c +++ b/drivers/pci/hotplug/shpchp_core.c @@ -39,7 +39,7 @@ int shpchp_debug; int shpchp_poll_mode; int shpchp_poll_time; -int shpchp_slot_with_bus; +static int shpchp_slot_with_bus; struct workqueue_struct *shpchp_wq; #define DRIVER_VERSION "0.4" -- cgit v0.10.2 From 19792a0859f96e9fc8ce87d97b269bcb895389e5 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Mon, 5 May 2008 21:25:47 +0300 Subject: PCI: drivers/pci/pci.c: add prototypes This patch adds prototypes for pcibios_disable_device() and pcibios_set_pcie_reset_state() in include/linux/pci.h While I was at it, I also removed the unneeded "extern" from the prototype of pcibios_add_platform_entries(). Signed-off-by: Adrian Bunk Signed-off-by: Jesse Barnes diff --git a/include/linux/pci.h b/include/linux/pci.h index 509159b..aaa9f33 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1059,7 +1059,10 @@ extern int pci_pci_problems; extern unsigned long pci_cardbus_io_size; extern unsigned long pci_cardbus_mem_size; -extern int pcibios_add_platform_entries(struct pci_dev *dev); +int pcibios_add_platform_entries(struct pci_dev *dev); +void pcibios_disable_device(struct pci_dev *dev); +int pcibios_set_pcie_reset_state(struct pci_dev *dev, + enum pcie_reset_state state); #ifdef CONFIG_PCI_MMCONFIG extern void __init pci_mmcfg_early_init(void); -- cgit v0.10.2 From eaf611426d4b9ad0d0a30c0a8d414380128720af Mon Sep 17 00:00:00 2001 From: "Robert P. J. Day" Date: Sun, 11 May 2008 16:58:53 -0400 Subject: PCI: Replace deprecated __initcall with device_initcall. Signed-off-by: Robert P. J. Day Signed-off-by: Jesse Barnes diff --git a/drivers/pci/proc.c b/drivers/pci/proc.c index 963a976..95eb083 100644 --- a/drivers/pci/proc.c +++ b/drivers/pci/proc.c @@ -482,5 +482,5 @@ static int __init pci_proc_init(void) return 0; } -__initcall(pci_proc_init); +device_initcall(pci_proc_init); -- cgit v0.10.2 From 273c11270d3715c4c06d4df1607a1a60034d887b Mon Sep 17 00:00:00 2001 From: Miklos Vajna Date: Tue, 13 May 2008 18:38:56 +0200 Subject: x86/PCI: janitor work in irq.c Wrapped long lines, removed trailing whitespaces, fixed case indentation inside switch and so. Signed-off-by: Miklos Vajna Signed-off-by: Jesse Barnes diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index ca8df9c..c422e10 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c @@ -11,8 +11,8 @@ #include #include #include -#include -#include +#include +#include #include #include #include @@ -45,7 +45,8 @@ struct irq_router { char *name; u16 vendor, device; int (*get)(struct pci_dev *router, struct pci_dev *dev, int pirq); - int (*set)(struct pci_dev *router, struct pci_dev *dev, int pirq, int new); + int (*set)(struct pci_dev *router, struct pci_dev *dev, int pirq, + int new); }; struct irq_router_handler { @@ -61,7 +62,7 @@ void (*pcibios_disable_irq)(struct pci_dev *dev) = NULL; * and perform checksum verification. */ -static inline struct irq_routing_table * pirq_check_routing_table(u8 *addr) +static inline struct irq_routing_table *pirq_check_routing_table(u8 *addr) { struct irq_routing_table *rt; int i; @@ -74,10 +75,11 @@ static inline struct irq_routing_table * pirq_check_routing_table(u8 *addr) rt->size < sizeof(struct irq_routing_table)) return NULL; sum = 0; - for (i=0; i < rt->size; i++) + for (i = 0; i < rt->size; i++) sum += addr[i]; if (!sum) { - DBG(KERN_DEBUG "PCI: Interrupt Routing Table found at 0x%p\n", rt); + DBG(KERN_DEBUG "PCI: Interrupt Routing Table found at 0x%p\n", + rt); return rt; } return NULL; @@ -100,7 +102,8 @@ static struct irq_routing_table * __init pirq_find_routing_table(void) return rt; printk(KERN_WARNING "PCI: PIRQ table NOT found at pirqaddr\n"); } - for(addr = (u8 *) __va(0xf0000); addr < (u8 *) __va(0x100000); addr += 16) { + for (addr = (u8 *) __va(0xf0000); addr < (u8 *) __va(0x100000); + addr += 16) { rt = pirq_check_routing_table(addr); if (rt) return rt; @@ -122,20 +125,23 @@ static void __init pirq_peer_trick(void) struct irq_info *e; memset(busmap, 0, sizeof(busmap)); - for(i=0; i < (rt->size - sizeof(struct irq_routing_table)) / sizeof(struct irq_info); i++) { + for (i = 0; i < (rt->size - sizeof(struct irq_routing_table)) / + sizeof(struct irq_info); i++) { e = &rt->slots[i]; #ifdef DEBUG { int j; - DBG(KERN_DEBUG "%02x:%02x slot=%02x", e->bus, e->devfn/8, e->slot); - for(j=0; j<4; j++) - DBG(" %d:%02x/%04x", j, e->irq[j].link, e->irq[j].bitmap); + DBG(KERN_DEBUG "%02x:%02x slot=%02x", e->bus, + e->devfn/8, e->slot); + for (j = 0; j < 4; j++) + DBG(" %d:%02x/%04x", j, e->irq[j].link, + e->irq[j].bitmap); DBG("\n"); } #endif busmap[e->bus] = 1; } - for(i = 1; i < 256; i++) { + for (i = 1; i < 256; i++) { int node; if (!busmap[i] || pci_find_bus(0, i)) continue; @@ -174,7 +180,8 @@ void eisa_set_level_irq(unsigned int irq) * Common IRQ routing practice: nibbles in config space, * offset by some magic constant. */ -static unsigned int read_config_nybble(struct pci_dev *router, unsigned offset, unsigned nr) +static unsigned int +read_config_nybble(struct pci_dev *router, unsigned offset, unsigned nr) { u8 x; unsigned reg = offset + (nr >> 1); @@ -183,7 +190,8 @@ static unsigned int read_config_nybble(struct pci_dev *router, unsigned offset, return (nr & 1) ? (x >> 4) : (x & 0xf); } -static void write_config_nybble(struct pci_dev *router, unsigned offset, unsigned nr, unsigned int val) +static void write_config_nybble(struct pci_dev *router, unsigned offset, + unsigned nr, unsigned int val) { u8 x; unsigned reg = offset + (nr >> 1); @@ -200,15 +208,18 @@ static void write_config_nybble(struct pci_dev *router, unsigned offset, unsigne */ static int pirq_ali_get(struct pci_dev *router, struct pci_dev *dev, int pirq) { - static const unsigned char irqmap[16] = { 0, 9, 3, 10, 4, 5, 7, 6, 1, 11, 0, 12, 0, 14, 0, 15 }; + static const unsigned char irqmap[16] = { 0, 9, 3, 10, 4, 5, 7, 6, 1, + 11, 0, 12, 0, 14, 0, 15 }; WARN_ON_ONCE(pirq > 16); return irqmap[read_config_nybble(router, 0x48, pirq-1)]; } -static int pirq_ali_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) +static int +pirq_ali_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) { - static const unsigned char irqmap[16] = { 0, 8, 0, 2, 4, 5, 7, 6, 0, 1, 3, 9, 11, 0, 13, 15 }; + static const unsigned char irqmap[16] = { 0, 8, 0, 2, 4, 5, 7, 6, 0, 1, + 3, 9, 11, 0, 13, 15 }; unsigned int val = irqmap[irq]; WARN_ON_ONCE(pirq > 16); @@ -231,7 +242,8 @@ static int pirq_piix_get(struct pci_dev *router, struct pci_dev *dev, int pirq) return (x < 16) ? x : 0; } -static int pirq_piix_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) +static int +pirq_piix_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) { pci_write_config_byte(router, pirq, irq); return 1; @@ -247,7 +259,8 @@ static int pirq_via_get(struct pci_dev *router, struct pci_dev *dev, int pirq) return read_config_nybble(router, 0x55, pirq == 4 ? 5 : pirq); } -static int pirq_via_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) +static int +pirq_via_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) { write_config_nybble(router, 0x55, pirq == 4 ? 5 : pirq, irq); return 1; @@ -258,7 +271,8 @@ static int pirq_via_set(struct pci_dev *router, struct pci_dev *dev, int pirq, i * but without the ugly irq number munging. * However, for 82C586, nibble map is different . */ -static int pirq_via586_get(struct pci_dev *router, struct pci_dev *dev, int pirq) +static int +pirq_via586_get(struct pci_dev *router, struct pci_dev *dev, int pirq) { static const unsigned int pirqmap[5] = { 3, 2, 5, 1, 1 }; @@ -266,7 +280,8 @@ static int pirq_via586_get(struct pci_dev *router, struct pci_dev *dev, int pirq return read_config_nybble(router, 0x55, pirqmap[pirq-1]); } -static int pirq_via586_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) +static int +pirq_via586_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) { static const unsigned int pirqmap[5] = { 3, 2, 5, 1, 1 }; @@ -285,10 +300,11 @@ static int pirq_ite_get(struct pci_dev *router, struct pci_dev *dev, int pirq) static const unsigned char pirqmap[4] = { 1, 0, 2, 3 }; WARN_ON_ONCE(pirq > 4); - return read_config_nybble(router,0x43, pirqmap[pirq-1]); + return read_config_nybble(router, 0x43, pirqmap[pirq-1]); } -static int pirq_ite_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) +static int +pirq_ite_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) { static const unsigned char pirqmap[4] = { 1, 0, 2, 3 }; @@ -306,7 +322,8 @@ static int pirq_opti_get(struct pci_dev *router, struct pci_dev *dev, int pirq) return read_config_nybble(router, 0xb8, pirq >> 4); } -static int pirq_opti_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) +static int +pirq_opti_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) { write_config_nybble(router, 0xb8, pirq >> 4, irq); return 1; @@ -314,7 +331,7 @@ static int pirq_opti_set(struct pci_dev *router, struct pci_dev *dev, int pirq, /* * Cyrix: nibble offset 0x5C - * 0x5C bits 7:4 is INTB bits 3:0 is INTA + * 0x5C bits 7:4 is INTB bits 3:0 is INTA * 0x5D bits 7:4 is INTD bits 3:0 is INTC */ static int pirq_cyrix_get(struct pci_dev *router, struct pci_dev *dev, int pirq) @@ -322,7 +339,8 @@ static int pirq_cyrix_get(struct pci_dev *router, struct pci_dev *dev, int pirq) return read_config_nybble(router, 0x5C, (pirq-1)^1); } -static int pirq_cyrix_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) +static int +pirq_cyrix_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) { write_config_nybble(router, 0x5C, (pirq-1)^1, irq); return 1; @@ -350,7 +368,7 @@ static int pirq_cyrix_set(struct pci_dev *router, struct pci_dev *dev, int pirq, * Apparently there are systems implementing PCI routing table using * link values 0x01-0x04 and others using 0x41-0x44 for PCI INTA..D. * We try our best to handle both link mappings. - * + * * Currently (2003-05-21) it appears most SiS chipsets follow the * definition of routing registers from the SiS-5595 southbridge. * According to the SiS 5595 datasheets the revision id's of the @@ -370,7 +388,7 @@ static int pirq_cyrix_set(struct pci_dev *router, struct pci_dev *dev, int pirq, * * 0x62: USBIRQ: * bit 6 OHCI function disabled (0), enabled (1) - * + * * 0x6a: ACPI/SCI IRQ: bits 4-6 reserved * * 0x7e: Data Acq. Module IRQ - bits 4-6 reserved @@ -405,7 +423,8 @@ static int pirq_sis_get(struct pci_dev *router, struct pci_dev *dev, int pirq) return (x & PIRQ_SIS_IRQ_DISABLE) ? 0 : (x & PIRQ_SIS_IRQ_MASK); } -static int pirq_sis_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) +static int +pirq_sis_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) { u8 x; int reg; @@ -439,7 +458,8 @@ static int pirq_vlsi_get(struct pci_dev *router, struct pci_dev *dev, int pirq) return read_config_nybble(router, 0x74, pirq-1); } -static int pirq_vlsi_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) +static int +pirq_vlsi_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) { WARN_ON_ONCE(pirq >= 9); if (pirq > 8) { @@ -461,13 +481,15 @@ static int pirq_vlsi_set(struct pci_dev *router, struct pci_dev *dev, int pirq, * 0x00 for ACPI (SCI), 0x01 for USB, 0x02 for IDE0, 0x04 for IDE1, * and 0x03 for SMBus. */ -static int pirq_serverworks_get(struct pci_dev *router, struct pci_dev *dev, int pirq) +static int +pirq_serverworks_get(struct pci_dev *router, struct pci_dev *dev, int pirq) { outb(pirq, 0xc00); return inb(0xc01) & 0xf; } -static int pirq_serverworks_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) +static int pirq_serverworks_set(struct pci_dev *router, struct pci_dev *dev, + int pirq, int irq) { outb(pirq, 0xc00); outb(irq, 0xc01); @@ -482,27 +504,27 @@ static int pirq_serverworks_set(struct pci_dev *router, struct pci_dev *dev, int * offset 0x56 0-3 PIRQA 4-7 PIRQB * offset 0x57 0-3 PIRQC 4-7 PIRQD */ -static int pirq_amd756_get(struct pci_dev *router, struct pci_dev *dev, int pirq) +static int +pirq_amd756_get(struct pci_dev *router, struct pci_dev *dev, int pirq) { u8 irq; irq = 0; if (pirq <= 4) - { irq = read_config_nybble(router, 0x56, pirq - 1); - } - printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d get irq : %2d\n", + printk(KERN_INFO + "AMD756: dev %04x:%04x, router pirq : %d get irq : %2d\n", dev->vendor, dev->device, pirq, irq); return irq; } -static int pirq_amd756_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) +static int +pirq_amd756_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) { - printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d SET irq : %2d\n", + printk(KERN_INFO + "AMD756: dev %04x:%04x, router pirq : %d SET irq : %2d\n", dev->vendor, dev->device, pirq, irq); if (pirq <= 4) - { write_config_nybble(router, 0x56, pirq - 1, irq); - } return 1; } @@ -528,7 +550,8 @@ static int pirq_pico_set(struct pci_dev *router, struct pci_dev *dev, int pirq, #ifdef CONFIG_PCI_BIOS -static int pirq_bios_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) +static int +pirq_bios_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) { struct pci_dev *bridge; int pin = pci_get_interrupt_pin(dev, &bridge); @@ -537,11 +560,14 @@ static int pirq_bios_set(struct pci_dev *router, struct pci_dev *dev, int pirq, #endif -static __init int intel_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) +static __init int +intel_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) { static struct pci_device_id __initdata pirq_440gx[] = { - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443GX_0) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443GX_2) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_82443GX_0) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_82443GX_2) }, { }, }; @@ -549,50 +575,49 @@ static __init int intel_router_probe(struct irq_router *r, struct pci_dev *route if (pci_dev_present(pirq_440gx)) return 0; - switch(device) - { - case PCI_DEVICE_ID_INTEL_82371FB_0: - case PCI_DEVICE_ID_INTEL_82371SB_0: - case PCI_DEVICE_ID_INTEL_82371AB_0: - case PCI_DEVICE_ID_INTEL_82371MX: - case PCI_DEVICE_ID_INTEL_82443MX_0: - case PCI_DEVICE_ID_INTEL_82801AA_0: - case PCI_DEVICE_ID_INTEL_82801AB_0: - case PCI_DEVICE_ID_INTEL_82801BA_0: - case PCI_DEVICE_ID_INTEL_82801BA_10: - case PCI_DEVICE_ID_INTEL_82801CA_0: - case PCI_DEVICE_ID_INTEL_82801CA_12: - case PCI_DEVICE_ID_INTEL_82801DB_0: - case PCI_DEVICE_ID_INTEL_82801E_0: - case PCI_DEVICE_ID_INTEL_82801EB_0: - case PCI_DEVICE_ID_INTEL_ESB_1: - case PCI_DEVICE_ID_INTEL_ICH6_0: - case PCI_DEVICE_ID_INTEL_ICH6_1: - case PCI_DEVICE_ID_INTEL_ICH7_0: - case PCI_DEVICE_ID_INTEL_ICH7_1: - case PCI_DEVICE_ID_INTEL_ICH7_30: - case PCI_DEVICE_ID_INTEL_ICH7_31: - case PCI_DEVICE_ID_INTEL_ESB2_0: - case PCI_DEVICE_ID_INTEL_ICH8_0: - case PCI_DEVICE_ID_INTEL_ICH8_1: - case PCI_DEVICE_ID_INTEL_ICH8_2: - case PCI_DEVICE_ID_INTEL_ICH8_3: - case PCI_DEVICE_ID_INTEL_ICH8_4: - case PCI_DEVICE_ID_INTEL_ICH9_0: - case PCI_DEVICE_ID_INTEL_ICH9_1: - case PCI_DEVICE_ID_INTEL_ICH9_2: - case PCI_DEVICE_ID_INTEL_ICH9_3: - case PCI_DEVICE_ID_INTEL_ICH9_4: - case PCI_DEVICE_ID_INTEL_ICH9_5: - case PCI_DEVICE_ID_INTEL_TOLAPAI_0: - case PCI_DEVICE_ID_INTEL_ICH10_0: - case PCI_DEVICE_ID_INTEL_ICH10_1: - case PCI_DEVICE_ID_INTEL_ICH10_2: - case PCI_DEVICE_ID_INTEL_ICH10_3: - r->name = "PIIX/ICH"; - r->get = pirq_piix_get; - r->set = pirq_piix_set; - return 1; + switch (device) { + case PCI_DEVICE_ID_INTEL_82371FB_0: + case PCI_DEVICE_ID_INTEL_82371SB_0: + case PCI_DEVICE_ID_INTEL_82371AB_0: + case PCI_DEVICE_ID_INTEL_82371MX: + case PCI_DEVICE_ID_INTEL_82443MX_0: + case PCI_DEVICE_ID_INTEL_82801AA_0: + case PCI_DEVICE_ID_INTEL_82801AB_0: + case PCI_DEVICE_ID_INTEL_82801BA_0: + case PCI_DEVICE_ID_INTEL_82801BA_10: + case PCI_DEVICE_ID_INTEL_82801CA_0: + case PCI_DEVICE_ID_INTEL_82801CA_12: + case PCI_DEVICE_ID_INTEL_82801DB_0: + case PCI_DEVICE_ID_INTEL_82801E_0: + case PCI_DEVICE_ID_INTEL_82801EB_0: + case PCI_DEVICE_ID_INTEL_ESB_1: + case PCI_DEVICE_ID_INTEL_ICH6_0: + case PCI_DEVICE_ID_INTEL_ICH6_1: + case PCI_DEVICE_ID_INTEL_ICH7_0: + case PCI_DEVICE_ID_INTEL_ICH7_1: + case PCI_DEVICE_ID_INTEL_ICH7_30: + case PCI_DEVICE_ID_INTEL_ICH7_31: + case PCI_DEVICE_ID_INTEL_ESB2_0: + case PCI_DEVICE_ID_INTEL_ICH8_0: + case PCI_DEVICE_ID_INTEL_ICH8_1: + case PCI_DEVICE_ID_INTEL_ICH8_2: + case PCI_DEVICE_ID_INTEL_ICH8_3: + case PCI_DEVICE_ID_INTEL_ICH8_4: + case PCI_DEVICE_ID_INTEL_ICH9_0: + case PCI_DEVICE_ID_INTEL_ICH9_1: + case PCI_DEVICE_ID_INTEL_ICH9_2: + case PCI_DEVICE_ID_INTEL_ICH9_3: + case PCI_DEVICE_ID_INTEL_ICH9_4: + case PCI_DEVICE_ID_INTEL_ICH9_5: + case PCI_DEVICE_ID_INTEL_TOLAPAI_0: + case PCI_DEVICE_ID_INTEL_ICH10_0: + case PCI_DEVICE_ID_INTEL_ICH10_1: + case PCI_DEVICE_ID_INTEL_ICH10_2: + case PCI_DEVICE_ID_INTEL_ICH10_3: + r->name = "PIIX/ICH"; + r->get = pirq_piix_get; + r->set = pirq_piix_set; + return 1; } return 0; } @@ -606,7 +631,7 @@ static __init int via_router_probe(struct irq_router *r, * workarounds for some buggy BIOSes */ if (device == PCI_DEVICE_ID_VIA_82C586_0) { - switch(router->device) { + switch (router->device) { case PCI_DEVICE_ID_VIA_82C686: /* * Asus k7m bios wrongly reports 82C686A @@ -631,7 +656,7 @@ static __init int via_router_probe(struct irq_router *r, } } - switch(device) { + switch (device) { case PCI_DEVICE_ID_VIA_82C586_0: r->name = "VIA"; r->get = pirq_via586_get; @@ -652,88 +677,89 @@ static __init int via_router_probe(struct irq_router *r, return 0; } -static __init int vlsi_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) +static __init int +vlsi_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) { - switch(device) - { - case PCI_DEVICE_ID_VLSI_82C534: - r->name = "VLSI 82C534"; - r->get = pirq_vlsi_get; - r->set = pirq_vlsi_set; - return 1; + switch (device) { + case PCI_DEVICE_ID_VLSI_82C534: + r->name = "VLSI 82C534"; + r->get = pirq_vlsi_get; + r->set = pirq_vlsi_set; + return 1; } return 0; } -static __init int serverworks_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) +static __init int serverworks_router_probe(struct irq_router *r, + struct pci_dev *router, u16 device) { - switch(device) - { - case PCI_DEVICE_ID_SERVERWORKS_OSB4: - case PCI_DEVICE_ID_SERVERWORKS_CSB5: - r->name = "ServerWorks"; - r->get = pirq_serverworks_get; - r->set = pirq_serverworks_set; - return 1; + switch (device) { + case PCI_DEVICE_ID_SERVERWORKS_OSB4: + case PCI_DEVICE_ID_SERVERWORKS_CSB5: + r->name = "ServerWorks"; + r->get = pirq_serverworks_get; + r->set = pirq_serverworks_set; + return 1; } return 0; } -static __init int sis_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) +static __init int +sis_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) { if (device != PCI_DEVICE_ID_SI_503) return 0; - + r->name = "SIS"; r->get = pirq_sis_get; r->set = pirq_sis_set; return 1; } -static __init int cyrix_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) +static __init int +cyrix_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) { - switch(device) - { - case PCI_DEVICE_ID_CYRIX_5520: - r->name = "NatSemi"; - r->get = pirq_cyrix_get; - r->set = pirq_cyrix_set; - return 1; + switch (device) { + case PCI_DEVICE_ID_CYRIX_5520: + r->name = "NatSemi"; + r->get = pirq_cyrix_get; + r->set = pirq_cyrix_set; + return 1; } return 0; } -static __init int opti_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) +static __init int +opti_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) { - switch(device) - { - case PCI_DEVICE_ID_OPTI_82C700: - r->name = "OPTI"; - r->get = pirq_opti_get; - r->set = pirq_opti_set; - return 1; + switch (device) { + case PCI_DEVICE_ID_OPTI_82C700: + r->name = "OPTI"; + r->get = pirq_opti_get; + r->set = pirq_opti_set; + return 1; } return 0; } -static __init int ite_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) +static __init int +ite_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) { - switch(device) - { - case PCI_DEVICE_ID_ITE_IT8330G_0: - r->name = "ITE"; - r->get = pirq_ite_get; - r->set = pirq_ite_set; - return 1; + switch (device) { + case PCI_DEVICE_ID_ITE_IT8330G_0: + r->name = "ITE"; + r->get = pirq_ite_get; + r->set = pirq_ite_set; + return 1; } return 0; } -static __init int ali_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) +static __init int +ali_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) { - switch(device) - { + switch (device) { case PCI_DEVICE_ID_AL_M1533: case PCI_DEVICE_ID_AL_M1563: printk(KERN_DEBUG "PCI: Using ALI IRQ Router\n"); @@ -745,28 +771,29 @@ static __init int ali_router_probe(struct irq_router *r, struct pci_dev *router, return 0; } -static __init int amd_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) +static __init int +amd_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) { - switch(device) - { - case PCI_DEVICE_ID_AMD_VIPER_740B: - r->name = "AMD756"; - break; - case PCI_DEVICE_ID_AMD_VIPER_7413: - r->name = "AMD766"; - break; - case PCI_DEVICE_ID_AMD_VIPER_7443: - r->name = "AMD768"; - break; - default: - return 0; + switch (device) { + case PCI_DEVICE_ID_AMD_VIPER_740B: + r->name = "AMD756"; + break; + case PCI_DEVICE_ID_AMD_VIPER_7413: + r->name = "AMD766"; + break; + case PCI_DEVICE_ID_AMD_VIPER_7443: + r->name = "AMD768"; + break; + default: + return 0; } r->get = pirq_amd756_get; r->set = pirq_amd756_set; return 1; } - -static __init int pico_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) + +static __init int +pico_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) { switch (device) { case PCI_DEVICE_ID_PICOPOWER_PT86C523: @@ -807,7 +834,7 @@ static struct pci_dev *pirq_router_dev; * FIXME: should we have an option to say "generic for * chipset" ? */ - + static void __init pirq_find_router(struct irq_router *r) { struct irq_routing_table *rt = pirq_table; @@ -826,7 +853,7 @@ static void __init pirq_find_router(struct irq_router *r) r->name = "default"; r->get = NULL; r->set = NULL; - + DBG(KERN_DEBUG "PCI: Attempting to find IRQ router for %04x:%04x\n", rt->rtr_vendor, rt->rtr_device); @@ -837,12 +864,14 @@ static void __init pirq_find_router(struct irq_router *r) return; } - for( h = pirq_routers; h->vendor; h++) { + for (h = pirq_routers; h->vendor; h++) { /* First look for a router match */ - if (rt->rtr_vendor == h->vendor && h->probe(r, pirq_router_dev, rt->rtr_device)) + if (rt->rtr_vendor == h->vendor && + h->probe(r, pirq_router_dev, rt->rtr_device)) break; /* Fall back to a device match */ - if (pirq_router_dev->vendor == h->vendor && h->probe(r, pirq_router_dev, pirq_router_dev->device)) + if (pirq_router_dev->vendor == h->vendor && + h->probe(r, pirq_router_dev, pirq_router_dev->device)) break; } printk(KERN_INFO "PCI: Using IRQ router %s [%04x/%04x] at %s\n", @@ -857,11 +886,13 @@ static void __init pirq_find_router(struct irq_router *r) static struct irq_info *pirq_get_info(struct pci_dev *dev) { struct irq_routing_table *rt = pirq_table; - int entries = (rt->size - sizeof(struct irq_routing_table)) / sizeof(struct irq_info); + int entries = (rt->size - sizeof(struct irq_routing_table)) / + sizeof(struct irq_info); struct irq_info *info; for (info = rt->slots; entries--; info++) - if (info->bus == dev->bus->number && PCI_SLOT(info->devfn) == PCI_SLOT(dev->devfn)) + if (info->bus == dev->bus->number && + PCI_SLOT(info->devfn) == PCI_SLOT(dev->devfn)) return info; return NULL; } @@ -889,7 +920,7 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) if (!pirq_table) return 0; - + DBG(KERN_DEBUG "IRQ for %s[%c]", pci_name(dev), 'A' + pin); info = pirq_get_info(dev); if (!info) { @@ -902,7 +933,8 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) DBG(" -> not routed\n" KERN_DEBUG); return 0; } - DBG(" -> PIRQ %02x, mask %04x, excl %04x", pirq, mask, pirq_table->exclusive_irqs); + DBG(" -> PIRQ %02x, mask %04x, excl %04x", pirq, mask, + pirq_table->exclusive_irqs); mask &= pcibios_irq_mask; /* Work around broken HP Pavilion Notebooks which assign USB to @@ -915,7 +947,8 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) } /* same for Acer Travelmate 360, but with CB and irq 11 -> 10 */ - if (acer_tm360_irqrouting && dev->irq == 11 && dev->vendor == PCI_VENDOR_ID_O2) { + if (acer_tm360_irqrouting && dev->irq == 11 && + dev->vendor == PCI_VENDOR_ID_O2) { pirq = 0x68; mask = 0x400; dev->irq = r->get(pirq_router_dev, dev, pirq); @@ -928,17 +961,20 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) */ newirq = dev->irq; if (newirq && !((1 << newirq) & mask)) { - if ( pci_probe & PCI_USE_PIRQ_MASK) newirq = 0; - else printk("\n" KERN_WARNING - "PCI: IRQ %i for device %s doesn't match PIRQ mask " - "- try pci=usepirqmask\n" KERN_DEBUG, newirq, - pci_name(dev)); + if (pci_probe & PCI_USE_PIRQ_MASK) + newirq = 0; + else + printk("\n" KERN_WARNING + "PCI: IRQ %i for device %s doesn't match PIRQ mask - try pci=usepirqmask\n" + KERN_DEBUG, newirq, + pci_name(dev)); } if (!newirq && assign) { for (i = 0; i < 16; i++) { if (!(mask & (1 << i))) continue; - if (pirq_penalty[i] < pirq_penalty[newirq] && can_request_irq(i, IRQF_SHARED)) + if (pirq_penalty[i] < pirq_penalty[newirq] && + can_request_irq(i, IRQF_SHARED)) newirq = i; } } @@ -949,12 +985,13 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) irq = pirq & 0xf; DBG(" -> hardcoded IRQ %d\n", irq); msg = "Hardcoded"; - } else if ( r->get && (irq = r->get(pirq_router_dev, dev, pirq)) && \ - ((!(pci_probe & PCI_USE_PIRQ_MASK)) || ((1 << irq) & mask)) ) { + } else if (r->get && (irq = r->get(pirq_router_dev, dev, pirq)) && \ + ((!(pci_probe & PCI_USE_PIRQ_MASK)) || ((1 << irq) & mask))) { DBG(" -> got IRQ %d\n", irq); msg = "Found"; eisa_set_level_irq(irq); - } else if (newirq && r->set && (dev->class >> 8) != PCI_CLASS_DISPLAY_VGA) { + } else if (newirq && r->set && + (dev->class >> 8) != PCI_CLASS_DISPLAY_VGA) { DBG(" -> assigning IRQ %d", newirq); if (r->set(pirq_router_dev, dev, pirq, newirq)) { eisa_set_level_irq(newirq); @@ -972,7 +1009,8 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) } else return 0; } - printk(KERN_INFO "PCI: %s IRQ %d for device %s\n", msg, irq, pci_name(dev)); + printk(KERN_INFO "PCI: %s IRQ %d for device %s\n", msg, irq, + pci_name(dev)); /* Update IRQ for all devices with the same pirq value */ while ((dev2 = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev2)) != NULL) { @@ -984,20 +1022,26 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) if (!info) continue; if (info->irq[pin].link == pirq) { - /* We refuse to override the dev->irq information. Give a warning! */ - if ( dev2->irq && dev2->irq != irq && \ + /* + * We refuse to override the dev->irq + * information. Give a warning! + */ + if (dev2->irq && dev2->irq != irq && \ (!(pci_probe & PCI_USE_PIRQ_MASK) || \ - ((1 << dev2->irq) & mask)) ) { + ((1 << dev2->irq) & mask))) { #ifndef CONFIG_PCI_MSI - printk(KERN_INFO "IRQ routing conflict for %s, have irq %d, want irq %d\n", - pci_name(dev2), dev2->irq, irq); + printk(KERN_INFO + "IRQ routing conflict for %s, have irq %d, want irq %d\n", + pci_name(dev2), dev2->irq, irq); #endif - continue; - } + continue; + } dev2->irq = irq; pirq_penalty[irq]++; if (dev != dev2) - printk(KERN_INFO "PCI: Sharing IRQ %d with %s\n", irq, pci_name(dev2)); + printk(KERN_INFO + "PCI: Sharing IRQ %d with %s\n", + irq, pci_name(dev2)); } } return 1; @@ -1011,15 +1055,21 @@ static void __init pcibios_fixup_irqs(void) DBG(KERN_DEBUG "PCI: IRQ fixup\n"); while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { /* - * If the BIOS has set an out of range IRQ number, just ignore it. - * Also keep track of which IRQ's are already in use. + * If the BIOS has set an out of range IRQ number, just + * ignore it. Also keep track of which IRQ's are + * already in use. */ if (dev->irq >= 16) { - DBG(KERN_DEBUG "%s: ignoring bogus IRQ %d\n", pci_name(dev), dev->irq); + DBG(KERN_DEBUG "%s: ignoring bogus IRQ %d\n", + pci_name(dev), dev->irq); dev->irq = 0; } - /* If the IRQ is already assigned to a PCI device, ignore its ISA use penalty */ - if (pirq_penalty[dev->irq] >= 100 && pirq_penalty[dev->irq] < 100000) + /* + * If the IRQ is already assigned to a PCI device, + * ignore its ISA use penalty + */ + if (pirq_penalty[dev->irq] >= 100 && + pirq_penalty[dev->irq] < 100000) pirq_penalty[dev->irq] = 0; pirq_penalty[dev->irq]++; } @@ -1031,31 +1081,40 @@ static void __init pcibios_fixup_irqs(void) /* * Recalculate IRQ numbers if we use the I/O APIC. */ - if (io_apic_assign_pci_irqs) - { + if (io_apic_assign_pci_irqs) { int irq; if (pin) { - pin--; /* interrupt pins are numbered starting from 1 */ - irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, PCI_SLOT(dev->devfn), pin); + /* + * interrupt pins are numbered starting + * from 1 + */ + pin--; + irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, + PCI_SLOT(dev->devfn), pin); /* * Busses behind bridges are typically not listed in the MP-table. * In this case we have to look up the IRQ based on the parent bus, * parent slot, and pin number. The SMP code detects such bridged * busses itself so we should get into this branch reliably. */ - if (irq < 0 && dev->bus->parent) { /* go back to the bridge */ - struct pci_dev * bridge = dev->bus->self; + if (irq < 0 && dev->bus->parent) { + /* go back to the bridge */ + struct pci_dev *bridge = dev->bus->self; pin = (pin + PCI_SLOT(dev->devfn)) % 4; - irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, - PCI_SLOT(bridge->devfn), pin); + irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, + PCI_SLOT(bridge->devfn), + pin); if (irq >= 0) - printk(KERN_WARNING "PCI: using PPB %s[%c] to get irq %d\n", - pci_name(bridge), 'A' + pin, irq); + printk(KERN_WARNING + "PCI: using PPB %s[%c] to get irq %d\n", + pci_name(bridge), + 'A' + pin, irq); } if (irq >= 0) { - printk(KERN_INFO "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n", + printk(KERN_INFO + "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n", pci_name(dev), 'A' + pin, irq); dev->irq = irq; } @@ -1078,7 +1137,8 @@ static int __init fix_broken_hp_bios_irq9(const struct dmi_system_id *d) { if (!broken_hp_bios_irq9) { broken_hp_bios_irq9 = 1; - printk(KERN_INFO "%s detected - fixing broken IRQ routing\n", d->ident); + printk(KERN_INFO "%s detected - fixing broken IRQ routing\n", + d->ident); } return 0; } @@ -1091,7 +1151,8 @@ static int __init fix_acer_tm360_irqrouting(const struct dmi_system_id *d) { if (!acer_tm360_irqrouting) { acer_tm360_irqrouting = 1; - printk(KERN_INFO "%s detected - fixing broken IRQ routing\n", d->ident); + printk(KERN_INFO "%s detected - fixing broken IRQ routing\n", + d->ident); } return 0; } @@ -1103,7 +1164,8 @@ static struct dmi_system_id __initdata pciirq_dmi_table[] = { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), DMI_MATCH(DMI_BIOS_VERSION, "GE.M1.03"), - DMI_MATCH(DMI_PRODUCT_VERSION, "HP Pavilion Notebook Model GE"), + DMI_MATCH(DMI_PRODUCT_VERSION, + "HP Pavilion Notebook Model GE"), DMI_MATCH(DMI_BOARD_VERSION, "OmniBook N32N-736"), }, }, @@ -1138,11 +1200,14 @@ static int __init pcibios_irq_init(void) pirq_find_router(&pirq_router); if (pirq_table->exclusive_irqs) { int i; - for (i=0; i<16; i++) + for (i = 0; i < 16; i++) if (!(pirq_table->exclusive_irqs & (1 << i))) pirq_penalty[i] += 100; } - /* If we're using the I/O APIC, avoid using the PCI IRQ routing table */ + /* + * If we're using the I/O APIC, avoid using the PCI IRQ + * routing table + */ if (io_apic_assign_pci_irqs) pirq_table = NULL; } @@ -1189,33 +1254,40 @@ static int pirq_enable_irq(struct pci_dev *dev) if (pin && !pcibios_lookup_irq(dev, 1) && !dev->irq) { char *msg = ""; - pin--; /* interrupt pins are numbered starting from 1 */ + pin--; /* interrupt pins are numbered starting from 1 */ if (io_apic_assign_pci_irqs) { int irq; - irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, PCI_SLOT(dev->devfn), pin); + irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, + PCI_SLOT(dev->devfn), pin); /* - * Busses behind bridges are typically not listed in the MP-table. - * In this case we have to look up the IRQ based on the parent bus, - * parent slot, and pin number. The SMP code detects such bridged - * busses itself so we should get into this branch reliably. + * Busses behind bridges are typically not + * listed in the MP-table. In this case we have + * to look up the IRQ based on the parent bus, + * parent slot, and pin number. The SMP code + * detects such bridged busses itself so we + * should get into this branch reliably. */ temp_dev = dev; - while (irq < 0 && dev->bus->parent) { /* go back to the bridge */ - struct pci_dev * bridge = dev->bus->self; + while (irq < 0 && dev->bus->parent) { + /* go back to the bridge */ + struct pci_dev *bridge = dev->bus->self; pin = (pin + PCI_SLOT(dev->devfn)) % 4; - irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, + irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, PCI_SLOT(bridge->devfn), pin); if (irq >= 0) - printk(KERN_WARNING "PCI: using PPB %s[%c] to get irq %d\n", - pci_name(bridge), 'A' + pin, irq); + printk(KERN_WARNING + "PCI: using PPB %s[%c] to get irq %d\n", + pci_name(bridge), + 'A' + pin, irq); dev = bridge; } dev = temp_dev; if (irq >= 0) { - printk(KERN_INFO "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n", + printk(KERN_INFO + "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n", pci_name(dev), 'A' + pin, irq); dev->irq = irq; return 0; @@ -1226,12 +1298,17 @@ static int pirq_enable_irq(struct pci_dev *dev) else msg = " Please try using pci=biosirq."; - /* With IDE legacy devices the IRQ lookup failure is not a problem.. */ - if (dev->class >> 8 == PCI_CLASS_STORAGE_IDE && !(dev->class & 0x5)) + /* + * With IDE legacy devices the IRQ lookup failure is not + * a problem.. + */ + if (dev->class >> 8 == PCI_CLASS_STORAGE_IDE && + !(dev->class & 0x5)) return 0; - printk(KERN_WARNING "PCI: No IRQ known for interrupt pin %c of device %s.%s\n", - 'A' + pin, pci_name(dev), msg); + printk(KERN_WARNING + "PCI: No IRQ known for interrupt pin %c of device %s.%s\n", + 'A' + pin, pci_name(dev), msg); } return 0; } -- cgit v0.10.2 From e1a2a51e684bfe9d6165992d4a065439617a3107 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 15 May 2008 21:51:31 +0200 Subject: Suspend/Resume bug in PCI layer wrt quirks Some quirks should be called with interrupt disabled, we can't directly call them in .resume_early. Also the patch introduces pci_fixup_resume_early and pci_fixup_suspend, which matches current device core callbacks (.suspend/.resume_early). TBD: Somebody knows why we need quirk resume should double check if a quirk should be called in resume or resume_early. I changed some per my understanding, but can't make sure I fixed all. Signed-off-by: Shaohua Li Signed-off-by: Jesse Barnes diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 72cf61e..677fd9d 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -292,6 +292,9 @@ static int pci_device_suspend(struct device * dev, pm_message_t state) if (pci_dev->current_state == PCI_D0) pci_dev->current_state = PCI_UNKNOWN; } + + pci_fixup_device(pci_fixup_suspend, pci_dev); + return i; } @@ -337,6 +340,7 @@ static int pci_device_resume(struct device * dev) error = drv->resume(pci_dev); else error = pci_default_resume(pci_dev); + pci_fixup_device(pci_fixup_resume, pci_dev); return error; } @@ -346,7 +350,7 @@ static int pci_device_resume_early(struct device * dev) struct pci_dev * pci_dev = to_pci_dev(dev); struct pci_driver * drv = pci_dev->driver; - pci_fixup_device(pci_fixup_resume, pci_dev); + pci_fixup_device(pci_fixup_resume_early, pci_dev); if (drv && drv->resume_early) error = drv->resume_early(pci_dev); diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index dabb563..44aabb1 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -556,7 +556,7 @@ static void quirk_via_ioapic(struct pci_dev *dev) pci_write_config_byte (dev, 0x58, tmp); } DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C686, quirk_via_ioapic); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C686, quirk_via_ioapic); +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C686, quirk_via_ioapic); /* * VIA 8237: Some BIOSs don't set the 'Bypass APIC De-Assert Message' Bit. @@ -576,7 +576,7 @@ static void quirk_via_vt8237_bypass_apic_deassert(struct pci_dev *dev) } } DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8237, quirk_via_vt8237_bypass_apic_deassert); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8237, quirk_via_vt8237_bypass_apic_deassert); +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8237, quirk_via_vt8237_bypass_apic_deassert); /* * The AMD io apic can hang the box when an apic irq is masked. @@ -622,7 +622,7 @@ static void quirk_amd_8131_ioapic(struct pci_dev *dev) } } DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8131_BRIDGE, quirk_amd_8131_ioapic); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8131_BRIDGE, quirk_amd_8131_ioapic); +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8131_BRIDGE, quirk_amd_8131_ioapic); #endif /* CONFIG_X86_IO_APIC */ /* @@ -774,7 +774,7 @@ static void quirk_cardbus_legacy(struct pci_dev *dev) pci_write_config_dword(dev, PCI_CB_LEGACY_MODE_BASE, 0); } DECLARE_PCI_FIXUP_FINAL(PCI_ANY_ID, PCI_ANY_ID, quirk_cardbus_legacy); -DECLARE_PCI_FIXUP_RESUME(PCI_ANY_ID, PCI_ANY_ID, quirk_cardbus_legacy); +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_ANY_ID, PCI_ANY_ID, quirk_cardbus_legacy); /* * Following the PCI ordering rules is optional on the AMD762. I'm not @@ -797,7 +797,7 @@ static void quirk_amd_ordering(struct pci_dev *dev) } } DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_FE_GATE_700C, quirk_amd_ordering); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_FE_GATE_700C, quirk_amd_ordering); +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_FE_GATE_700C, quirk_amd_ordering); /* * DreamWorks provided workaround for Dunord I-3000 problem @@ -865,7 +865,7 @@ static void quirk_disable_pxb(struct pci_dev *pdev) } } DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82454NX, quirk_disable_pxb); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82454NX, quirk_disable_pxb); +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82454NX, quirk_disable_pxb); static void __devinit quirk_amd_ide_mode(struct pci_dev *pdev) { @@ -885,9 +885,9 @@ static void __devinit quirk_amd_ide_mode(struct pci_dev *pdev) } } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP600_SATA, quirk_amd_ide_mode); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP600_SATA, quirk_amd_ide_mode); +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP600_SATA, quirk_amd_ide_mode); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP700_SATA, quirk_amd_ide_mode); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP700_SATA, quirk_amd_ide_mode); +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP700_SATA, quirk_amd_ide_mode); /* * Serverworks CSB5 IDE does not fully support native mode @@ -1093,31 +1093,61 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_0, asu DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_12, asus_hides_smbus_lpc); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_12, asus_hides_smbus_lpc); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801EB_0, asus_hides_smbus_lpc); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801AA_0, asus_hides_smbus_lpc); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_0, asus_hides_smbus_lpc); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801BA_0, asus_hides_smbus_lpc); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_0, asus_hides_smbus_lpc); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_12, asus_hides_smbus_lpc); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_12, asus_hides_smbus_lpc); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801EB_0, asus_hides_smbus_lpc); +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801AA_0, asus_hides_smbus_lpc); +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_0, asus_hides_smbus_lpc); +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801BA_0, asus_hides_smbus_lpc); +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_0, asus_hides_smbus_lpc); +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_12, asus_hides_smbus_lpc); +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_12, asus_hides_smbus_lpc); +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801EB_0, asus_hides_smbus_lpc); -static void asus_hides_smbus_lpc_ich6(struct pci_dev *dev) +/* It appears we just have one such device. If not, we have a warning */ +static void __iomem *asus_rcba_base; +static void asus_hides_smbus_lpc_ich6_suspend(struct pci_dev *dev) { - u32 val, rcba; - void __iomem *base; + u32 rcba; if (likely(!asus_hides_smbus)) return; + WARN_ON(asus_rcba_base); + pci_read_config_dword(dev, 0xF0, &rcba); - base = ioremap_nocache(rcba & 0xFFFFC000, 0x4000); /* use bits 31:14, 16 kB aligned */ - if (base == NULL) return; - val=readl(base + 0x3418); /* read the Function Disable register, dword mode only */ - writel(val & 0xFFFFFFF7, base + 0x3418); /* enable the SMBus device */ - iounmap(base); + /* use bits 31:14, 16 kB aligned */ + asus_rcba_base = ioremap_nocache(rcba & 0xFFFFC000, 0x4000); + if (asus_rcba_base == NULL) + return; +} + +static void asus_hides_smbus_lpc_ich6_resume_early(struct pci_dev *dev) +{ + u32 val; + + if (likely(!asus_hides_smbus || !asus_rcba_base)) + return; + /* read the Function Disable register, dword mode only */ + val = readl(asus_rcba_base + 0x3418); + writel(val & 0xFFFFFFF7, asus_rcba_base + 0x3418); /* enable the SMBus device */ +} + +static void asus_hides_smbus_lpc_ich6_resume(struct pci_dev *dev) +{ + if (likely(!asus_hides_smbus || !asus_rcba_base)) + return; + iounmap(asus_rcba_base); + asus_rcba_base = NULL; dev_info(&dev->dev, "Enabled ICH6/i801 SMBus device\n"); } + +static void asus_hides_smbus_lpc_ich6(struct pci_dev *dev) +{ + asus_hides_smbus_lpc_ich6_suspend(dev); + asus_hides_smbus_lpc_ich6_resume_early(dev); + asus_hides_smbus_lpc_ich6_resume(dev); +} DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH6_1, asus_hides_smbus_lpc_ich6); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH6_1, asus_hides_smbus_lpc_ich6); +DECLARE_PCI_FIXUP_SUSPEND(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH6_1, asus_hides_smbus_lpc_ich6_suspend); +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH6_1, asus_hides_smbus_lpc_ich6_resume); +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH6_1, asus_hides_smbus_lpc_ich6_resume_early); /* * SiS 96x south bridge: BIOS typically hides SMBus device... @@ -1135,10 +1165,10 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_961, quirk_sis_96x_ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_962, quirk_sis_96x_smbus); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_963, quirk_sis_96x_smbus); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_LPC, quirk_sis_96x_smbus); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_961, quirk_sis_96x_smbus); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_962, quirk_sis_96x_smbus); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_963, quirk_sis_96x_smbus); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_LPC, quirk_sis_96x_smbus); +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_961, quirk_sis_96x_smbus); +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_962, quirk_sis_96x_smbus); +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_963, quirk_sis_96x_smbus); +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_LPC, quirk_sis_96x_smbus); /* * ... This is further complicated by the fact that some SiS96x south @@ -1172,7 +1202,7 @@ static void quirk_sis_503(struct pci_dev *dev) quirk_sis_96x_smbus(dev); } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_503, quirk_sis_503); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_503, quirk_sis_503); +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_503, quirk_sis_503); /* @@ -1205,7 +1235,7 @@ static void asus_hides_ac97_lpc(struct pci_dev *dev) } } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8237, asus_hides_ac97_lpc); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8237, asus_hides_ac97_lpc); +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8237, asus_hides_ac97_lpc); #if defined(CONFIG_ATA) || defined(CONFIG_ATA_MODULE) @@ -1270,12 +1300,12 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB363, qui DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB365, quirk_jmicron_ata); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB366, quirk_jmicron_ata); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB368, quirk_jmicron_ata); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB360, quirk_jmicron_ata); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB361, quirk_jmicron_ata); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB363, quirk_jmicron_ata); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB365, quirk_jmicron_ata); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB366, quirk_jmicron_ata); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB368, quirk_jmicron_ata); +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB360, quirk_jmicron_ata); +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB361, quirk_jmicron_ata); +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB363, quirk_jmicron_ata); +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB365, quirk_jmicron_ata); +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB366, quirk_jmicron_ata); +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB368, quirk_jmicron_ata); #endif @@ -1521,6 +1551,10 @@ extern struct pci_fixup __start_pci_fixups_enable[]; extern struct pci_fixup __end_pci_fixups_enable[]; extern struct pci_fixup __start_pci_fixups_resume[]; extern struct pci_fixup __end_pci_fixups_resume[]; +extern struct pci_fixup __start_pci_fixups_resume_early[]; +extern struct pci_fixup __end_pci_fixups_resume_early[]; +extern struct pci_fixup __start_pci_fixups_suspend[]; +extern struct pci_fixup __end_pci_fixups_suspend[]; void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev) @@ -1553,6 +1587,16 @@ void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev) end = __end_pci_fixups_resume; break; + case pci_fixup_resume_early: + start = __start_pci_fixups_resume_early; + end = __end_pci_fixups_resume_early; + break; + + case pci_fixup_suspend: + start = __start_pci_fixups_suspend; + end = __end_pci_fixups_suspend; + break; + default: /* stupid compiler warning, you would think with an enum... */ return; @@ -1629,7 +1673,7 @@ static void quirk_nvidia_ck804_pcie_aer_ext_cap(struct pci_dev *dev) } DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_CK804_PCIE, quirk_nvidia_ck804_pcie_aer_ext_cap); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_CK804_PCIE, +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_CK804_PCIE, quirk_nvidia_ck804_pcie_aer_ext_cap); static void __devinit quirk_via_cx700_pci_parking_caching(struct pci_dev *dev) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index f054778..cf108a3 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -84,6 +84,12 @@ VMLINUX_SYMBOL(__start_pci_fixups_resume) = .; \ *(.pci_fixup_resume) \ VMLINUX_SYMBOL(__end_pci_fixups_resume) = .; \ + VMLINUX_SYMBOL(__start_pci_fixups_resume_early) = .; \ + *(.pci_fixup_resume_early) \ + VMLINUX_SYMBOL(__end_pci_fixups_resume_early) = .; \ + VMLINUX_SYMBOL(__start_pci_fixups_suspend) = .; \ + *(.pci_fixup_suspend) \ + VMLINUX_SYMBOL(__end_pci_fixups_suspend) = .; \ } \ \ /* RapidIO route ops */ \ diff --git a/include/linux/pci.h b/include/linux/pci.h index aaa9f33..700704e 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1013,7 +1013,9 @@ enum pci_fixup_pass { pci_fixup_header, /* After reading configuration header */ pci_fixup_final, /* Final phase of device fixups */ pci_fixup_enable, /* pci_enable_device() time */ - pci_fixup_resume, /* pci_enable_device() time */ + pci_fixup_resume, /* pci_device_resume() */ + pci_fixup_suspend, /* pci_device_suspend */ + pci_fixup_resume_early, /* pci_device_resume_early() */ }; /* Anonymous variables would be nice... */ @@ -1035,6 +1037,12 @@ enum pci_fixup_pass { #define DECLARE_PCI_FIXUP_RESUME(vendor, device, hook) \ DECLARE_PCI_FIXUP_SECTION(.pci_fixup_resume, \ resume##vendor##device##hook, vendor, device, hook) +#define DECLARE_PCI_FIXUP_RESUME_EARLY(vendor, device, hook) \ + DECLARE_PCI_FIXUP_SECTION(.pci_fixup_resume_early, \ + resume_early##vendor##device##hook, vendor, device, hook) +#define DECLARE_PCI_FIXUP_SUSPEND(vendor, device, hook) \ + DECLARE_PCI_FIXUP_SECTION(.pci_fixup_suspend, \ + suspend##vendor##device##hook, vendor, device, hook) void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev); -- cgit v0.10.2 From 49db139955d3392c6c4facf987905d0a9afed581 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Tue, 13 May 2008 11:15:05 +0800 Subject: PCI: Disable PME during PCI scan If a device supports #PME and can generate PME events from D0, we may see superfluous events before a driver is loaded (drivers should only enable PME as needed), preventing suspend from working if the corresponding GPE was enabled. Likewise, if the ACPI device has the _PRW object, the _PSW/_DSW object will be called in order to disable the wakeup functionality. But when it is allowed to wake up the sleeping state, OSPM will enable it again. So we should disable PME in the course of scanning PCI devices and enable it again only when PME events are actually required to be generated from the requested PCI state (for example, D3_hot or D3_cold). It is also safe to disable PME again when the PME is disabled for the PCI devices. Signed-off-by: Zhao Yakui Signed-off-by: Li Shaohua Signed-off-by: Jesse Barnes diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 3706ce7..aebab71 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -857,6 +857,49 @@ int pci_cfg_space_size_ext(struct pci_dev *dev) return PCI_CFG_SPACE_SIZE; } +/** + * pci_disable_pme - Disable the PME function of PCI device + * @dev: PCI device affected + * -EINVAL is returned if PCI device doesn't support PME. + * Zero is returned if the PME is supported and can be disabled. + */ +static int pci_disable_pme(struct pci_dev *dev) +{ + int pm; + u16 value; + + /* find PCI PM capability in list */ + pm = pci_find_capability(dev, PCI_CAP_ID_PM); + + /* If device doesn't support PM Capabilities, it means that PME is + * not supported. + */ + if (!pm) + return -EINVAL; + /* Check device's ability to generate PME# */ + pci_read_config_word(dev, pm + PCI_PM_PMC, &value); + + value &= PCI_PM_CAP_PME_MASK; + /* Check if it can generate PME# */ + if (!value) { + /* + * If it is zero, it means that PME is still unsupported + * although there exists the PM capability. + */ + return -EINVAL; + } + + pci_read_config_word(dev, pm + PCI_PM_CTRL, &value); + + /* Clear PME_Status by writing 1 to it */ + value |= PCI_PM_CTRL_PME_STATUS ; + /* Disable PME enable bit */ + value &= ~PCI_PM_CTRL_PME_ENABLE; + pci_write_config_word(dev, pm + PCI_PM_CTRL, value); + + return 0; +} + int pci_cfg_space_size(struct pci_dev *dev) { int pos; @@ -964,6 +1007,7 @@ static struct pci_dev *pci_scan_device(struct pci_bus *bus, int devfn) } pci_vpd_pci22_init(dev); + pci_disable_pme(dev); return dev; } -- cgit v0.10.2 From 8a7a4faf96eef8c87421be0d3f33ea036804289b Mon Sep 17 00:00:00 2001 From: Kenji Kaneshige Date: Thu, 15 May 2008 15:18:53 +0900 Subject: pci-acpi: remove duplicate code for _OSC Remove the duplicated code in acpi_query_osc() and acpi_run_osc(). It simplifies the code very much. Signed-off-by: Kenji Kaneshige Acked-by: Shaohua Li Signed-off-by: Jesse Barnes diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index 9d6fc8e..032b326 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -46,40 +46,15 @@ static struct acpi_osc_data *acpi_get_osc_data(acpi_handle handle) static u8 OSC_UUID[16] = {0x5B, 0x4D, 0xDB, 0x33, 0xF7, 0x1F, 0x1C, 0x40, 0x96, 0x57, 0x74, 0x41, 0xC0, 0x3D, 0xD7, 0x66}; -static acpi_status -acpi_query_osc ( - acpi_handle handle, - u32 level, - void *context, - void **retval ) +static acpi_status acpi_run_osc(acpi_handle handle, + struct acpi_osc_data *osc_data) { - acpi_status status; - struct acpi_object_list input; - union acpi_object in_params[4]; - struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL}; - union acpi_object *out_obj; - u32 osc_dw0; - acpi_status *ret_status = (acpi_status *)retval; - struct acpi_osc_data *osc_data; - u32 flags = (unsigned long)context, temp; - acpi_handle tmp; - - status = acpi_get_handle(handle, "_OSC", &tmp); - if (ACPI_FAILURE(status)) - return status; - - osc_data = acpi_get_osc_data(handle); - if (!osc_data) { - printk(KERN_ERR "acpi osc data array is full\n"); - return AE_ERROR; - } - - osc_data->ctrlset_buf[OSC_SUPPORT_TYPE] |= (flags & OSC_SUPPORT_MASKS); - - /* do _OSC query for all possible controls */ - temp = osc_data->ctrlset_buf[OSC_CONTROL_TYPE]; - osc_data->ctrlset_buf[OSC_QUERY_TYPE] = OSC_QUERY_ENABLE; - osc_data->ctrlset_buf[OSC_CONTROL_TYPE] = OSC_CONTROL_MASKS; + acpi_status status; + struct acpi_object_list input; + union acpi_object in_params[4]; + struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL}; + union acpi_object *out_obj; + u32 osc_dw0, flags = osc_data->ctrlset_buf[OSC_QUERY_TYPE]; /* Setting up input parameters */ input.count = 4; @@ -97,14 +72,13 @@ acpi_query_osc ( status = acpi_evaluate_object(handle, "_OSC", &input, &output); if (ACPI_FAILURE(status)) - goto out_nofree; - out_obj = output.pointer; + return status; + out_obj = output.pointer; if (out_obj->type != ACPI_TYPE_BUFFER) { - printk(KERN_DEBUG - "Evaluate _OSC returns wrong type\n"); + printk(KERN_DEBUG "Evaluate _OSC returns wrong type\n"); status = AE_TYPE; - goto query_osc_out; + goto out_kfree; } osc_dw0 = *((u32 *) out_obj->buffer.pointer); if (osc_dw0) { @@ -115,93 +89,64 @@ acpi_query_osc ( if (osc_dw0 & OSC_INVALID_REVISION_ERROR) printk(KERN_DEBUG "_OSC invalid revision\n"); if (osc_dw0 & OSC_CAPABILITIES_MASK_ERROR) { - /* Update Global Control Set */ - osc_data->global_ctrlsets = - *((u32 *)(out_obj->buffer.pointer + 8)); - status = AE_OK; - goto query_osc_out; + if (flags & OSC_QUERY_ENABLE) + goto out_success; + printk(KERN_DEBUG "_OSC FW not grant req. control\n"); + status = AE_SUPPORT; + goto out_kfree; } status = AE_ERROR; - goto query_osc_out; + goto out_kfree; + } +out_success: + if (flags & OSC_QUERY_ENABLE) { + /* Update Global Control Set */ + osc_data->global_ctrlsets = + *((u32 *)(out_obj->buffer.pointer + 8)); } - - /* Update Global Control Set */ - osc_data->global_ctrlsets = *((u32 *)(out_obj->buffer.pointer + 8)); status = AE_OK; -query_osc_out: +out_kfree: kfree(output.pointer); -out_nofree: - *ret_status = status; - - osc_data->ctrlset_buf[OSC_QUERY_TYPE] = !OSC_QUERY_ENABLE; - osc_data->ctrlset_buf[OSC_CONTROL_TYPE] = temp; - if (ACPI_FAILURE(status)) { - /* no osc support at all */ - osc_data->ctrlset_buf[OSC_SUPPORT_TYPE] = 0; - } - return status; } - -static acpi_status -acpi_run_osc ( - acpi_handle handle, - void *context) +static acpi_status acpi_query_osc(acpi_handle handle, + u32 level, void *context, void **retval) { - acpi_status status; - struct acpi_object_list input; - union acpi_object in_params[4]; - struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL}; - union acpi_object *out_obj; - u32 osc_dw0; - - /* Setting up input parameters */ - input.count = 4; - input.pointer = in_params; - in_params[0].type = ACPI_TYPE_BUFFER; - in_params[0].buffer.length = 16; - in_params[0].buffer.pointer = OSC_UUID; - in_params[1].type = ACPI_TYPE_INTEGER; - in_params[1].integer.value = 1; - in_params[2].type = ACPI_TYPE_INTEGER; - in_params[2].integer.value = 3; - in_params[3].type = ACPI_TYPE_BUFFER; - in_params[3].buffer.length = 12; - in_params[3].buffer.pointer = (u8 *)context; + acpi_status status; + acpi_status *ret_status = (acpi_status *)retval; + struct acpi_osc_data *osc_data; + u32 flags = (unsigned long)context, temp; + acpi_handle tmp; - status = acpi_evaluate_object(handle, "_OSC", &input, &output); - if (ACPI_FAILURE (status)) + status = acpi_get_handle(handle, "_OSC", &tmp); + if (ACPI_FAILURE(status)) return status; - out_obj = output.pointer; - if (out_obj->type != ACPI_TYPE_BUFFER) { - printk(KERN_DEBUG - "Evaluate _OSC returns wrong type\n"); - status = AE_TYPE; - goto run_osc_out; + osc_data = acpi_get_osc_data(handle); + if (!osc_data) { + printk(KERN_ERR "acpi osc data array is full\n"); + return AE_ERROR; } - osc_dw0 = *((u32 *) out_obj->buffer.pointer); - if (osc_dw0) { - if (osc_dw0 & OSC_REQUEST_ERROR) - printk(KERN_DEBUG "_OSC request fails\n"); - if (osc_dw0 & OSC_INVALID_UUID_ERROR) - printk(KERN_DEBUG "_OSC invalid UUID\n"); - if (osc_dw0 & OSC_INVALID_REVISION_ERROR) - printk(KERN_DEBUG "_OSC invalid revision\n"); - if (osc_dw0 & OSC_CAPABILITIES_MASK_ERROR) { - printk(KERN_DEBUG "_OSC FW not grant req. control\n"); - status = AE_SUPPORT; - goto run_osc_out; - } - status = AE_ERROR; - goto run_osc_out; + + osc_data->ctrlset_buf[OSC_SUPPORT_TYPE] |= (flags & OSC_SUPPORT_MASKS); + + /* do _OSC query for all possible controls */ + temp = osc_data->ctrlset_buf[OSC_CONTROL_TYPE]; + osc_data->ctrlset_buf[OSC_QUERY_TYPE] = OSC_QUERY_ENABLE; + osc_data->ctrlset_buf[OSC_CONTROL_TYPE] = OSC_CONTROL_MASKS; + + status = acpi_run_osc(handle, osc_data); + *ret_status = status; + + osc_data->ctrlset_buf[OSC_QUERY_TYPE] = !OSC_QUERY_ENABLE; + osc_data->ctrlset_buf[OSC_CONTROL_TYPE] = temp; + if (ACPI_FAILURE(status)) { + /* no osc support at all */ + osc_data->ctrlset_buf[OSC_SUPPORT_TYPE] = 0; } - status = AE_OK; -run_osc_out: - kfree(output.pointer); return status; } @@ -260,7 +205,7 @@ acpi_status pci_osc_control_set(acpi_handle handle, u32 flags) return AE_SUPPORT; } osc_data->ctrlset_buf[OSC_CONTROL_TYPE] |= ctrlset; - status = acpi_run_osc(handle, osc_data->ctrlset_buf); + status = acpi_run_osc(handle, osc_data); if (ACPI_FAILURE (status)) { osc_data->ctrlset_buf[OSC_CONTROL_TYPE] &= ~ctrlset; } -- cgit v0.10.2 From 5e0b9947452c824d66fafe728a46312cff544a7f Mon Sep 17 00:00:00 2001 From: Kenji Kaneshige Date: Thu, 15 May 2008 15:20:11 +0900 Subject: pci-acpi: use local buffer for _OSC Current pci-acpi implementation uses array in osc_data directly to evaluate _OSC. It needs to save the old data and restore it if _OSC evaluation fails. To make it more robust, we should use local array to evaluate _OSC. Signed-off-by: Kenji Kaneshige Acked-by: Shaohua Li Signed-off-by: Jesse Barnes diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index 032b326..b1bd6e6 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -21,12 +21,18 @@ struct acpi_osc_data { acpi_handle handle; - u32 ctrlset_buf[3]; - u32 global_ctrlsets; + u32 support_set; + u32 control_set; + u32 query_result; struct list_head sibiling; }; static LIST_HEAD(acpi_osc_data_list); +struct acpi_osc_args { + u32 capbuf[3]; + u32 query_result; +}; + static struct acpi_osc_data *acpi_get_osc_data(acpi_handle handle) { struct acpi_osc_data *data; @@ -47,14 +53,14 @@ static struct acpi_osc_data *acpi_get_osc_data(acpi_handle handle) static u8 OSC_UUID[16] = {0x5B, 0x4D, 0xDB, 0x33, 0xF7, 0x1F, 0x1C, 0x40, 0x96, 0x57, 0x74, 0x41, 0xC0, 0x3D, 0xD7, 0x66}; static acpi_status acpi_run_osc(acpi_handle handle, - struct acpi_osc_data *osc_data) + struct acpi_osc_args *osc_args) { acpi_status status; struct acpi_object_list input; union acpi_object in_params[4]; struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL}; union acpi_object *out_obj; - u32 osc_dw0, flags = osc_data->ctrlset_buf[OSC_QUERY_TYPE]; + u32 osc_dw0, flags = osc_args->capbuf[OSC_QUERY_TYPE]; /* Setting up input parameters */ input.count = 4; @@ -68,7 +74,7 @@ static acpi_status acpi_run_osc(acpi_handle handle, in_params[2].integer.value = 3; in_params[3].type = ACPI_TYPE_BUFFER; in_params[3].buffer.length = 12; - in_params[3].buffer.pointer = (u8 *)osc_data->ctrlset_buf; + in_params[3].buffer.pointer = (u8 *)osc_args->capbuf; status = acpi_evaluate_object(handle, "_OSC", &input, &output); if (ACPI_FAILURE(status)) @@ -99,11 +105,9 @@ static acpi_status acpi_run_osc(acpi_handle handle, goto out_kfree; } out_success: - if (flags & OSC_QUERY_ENABLE) { - /* Update Global Control Set */ - osc_data->global_ctrlsets = + if (flags & OSC_QUERY_ENABLE) + osc_args->query_result = *((u32 *)(out_obj->buffer.pointer + 8)); - } status = AE_OK; out_kfree: @@ -117,8 +121,9 @@ static acpi_status acpi_query_osc(acpi_handle handle, acpi_status status; acpi_status *ret_status = (acpi_status *)retval; struct acpi_osc_data *osc_data; - u32 flags = (unsigned long)context, temp; + u32 flags = (unsigned long)context, support_set; acpi_handle tmp; + struct acpi_osc_args osc_args; status = acpi_get_handle(handle, "_OSC", &tmp); if (ACPI_FAILURE(status)) @@ -130,21 +135,18 @@ static acpi_status acpi_query_osc(acpi_handle handle, return AE_ERROR; } - osc_data->ctrlset_buf[OSC_SUPPORT_TYPE] |= (flags & OSC_SUPPORT_MASKS); - /* do _OSC query for all possible controls */ - temp = osc_data->ctrlset_buf[OSC_CONTROL_TYPE]; - osc_data->ctrlset_buf[OSC_QUERY_TYPE] = OSC_QUERY_ENABLE; - osc_data->ctrlset_buf[OSC_CONTROL_TYPE] = OSC_CONTROL_MASKS; + support_set = osc_data->support_set | (flags & OSC_SUPPORT_MASKS); + osc_args.capbuf[OSC_QUERY_TYPE] = OSC_QUERY_ENABLE; + osc_args.capbuf[OSC_SUPPORT_TYPE] = support_set; + osc_args.capbuf[OSC_CONTROL_TYPE] = OSC_CONTROL_MASKS; - status = acpi_run_osc(handle, osc_data); + status = acpi_run_osc(handle, &osc_args); *ret_status = status; - osc_data->ctrlset_buf[OSC_QUERY_TYPE] = !OSC_QUERY_ENABLE; - osc_data->ctrlset_buf[OSC_CONTROL_TYPE] = temp; - if (ACPI_FAILURE(status)) { - /* no osc support at all */ - osc_data->ctrlset_buf[OSC_SUPPORT_TYPE] = 0; + if (ACPI_SUCCESS(status)) { + osc_data->support_set = support_set; + osc_data->query_result = osc_args.query_result; } return status; @@ -181,10 +183,11 @@ acpi_status __pci_osc_support_set(u32 flags, const char *hid) **/ acpi_status pci_osc_control_set(acpi_handle handle, u32 flags) { - acpi_status status; - u32 ctrlset; + acpi_status status; + u32 ctrlset, control_set; acpi_handle tmp; struct acpi_osc_data *osc_data; + struct acpi_osc_args osc_args; status = acpi_get_handle(handle, "_OSC", &tmp); if (ACPI_FAILURE(status)) @@ -197,19 +200,21 @@ acpi_status pci_osc_control_set(acpi_handle handle, u32 flags) } ctrlset = (flags & OSC_CONTROL_MASKS); - if (!ctrlset) { + if (!ctrlset) return AE_TYPE; - } - if (osc_data->ctrlset_buf[OSC_SUPPORT_TYPE] && - ((osc_data->global_ctrlsets & ctrlset) != ctrlset)) { + + if (osc_data->support_set && + ((osc_data->query_result & ctrlset) != ctrlset)) return AE_SUPPORT; - } - osc_data->ctrlset_buf[OSC_CONTROL_TYPE] |= ctrlset; - status = acpi_run_osc(handle, osc_data); - if (ACPI_FAILURE (status)) { - osc_data->ctrlset_buf[OSC_CONTROL_TYPE] &= ~ctrlset; - } - + + control_set = osc_data->control_set | ctrlset; + osc_args.capbuf[OSC_QUERY_TYPE] = 0; + osc_args.capbuf[OSC_SUPPORT_TYPE] = osc_data->support_set; + osc_args.capbuf[OSC_CONTROL_TYPE] = control_set; + status = acpi_run_osc(handle, &osc_args); + if (ACPI_SUCCESS(status)) + osc_data->control_set = control_set; + return status; } EXPORT_SYMBOL(pci_osc_control_set); -- cgit v0.10.2 From 681f7d9db58d2b6dc3c3b784d6815f86a53b6ba0 Mon Sep 17 00:00:00 2001 From: Kenji Kaneshige Date: Thu, 15 May 2008 15:21:16 +0900 Subject: pci-acpi: add flag to indicate query had been done Current pci-acpi implementation checks osc_data->support_stat to see if control bits had been already queried. It is not good from the viewpoint of easy understanding. So this patch adds new 'is_queried' flag to indicate query had been done. Signed-off-by: Kenji Kaneshige Acked-by: Shaohua Li Signed-off-by: Jesse Barnes diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index b1bd6e6..9510218 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -23,6 +23,7 @@ struct acpi_osc_data { acpi_handle handle; u32 support_set; u32 control_set; + int is_queried; u32 query_result; struct list_head sibiling; }; @@ -147,6 +148,7 @@ static acpi_status acpi_query_osc(acpi_handle handle, if (ACPI_SUCCESS(status)) { osc_data->support_set = support_set; osc_data->query_result = osc_args.query_result; + osc_data->is_queried = 1; } return status; @@ -203,7 +205,7 @@ acpi_status pci_osc_control_set(acpi_handle handle, u32 flags) if (!ctrlset) return AE_TYPE; - if (osc_data->support_set && + if (osc_data->is_queried && ((osc_data->query_result & ctrlset) != ctrlset)) return AE_SUPPORT; -- cgit v0.10.2 From c155062d6ebed676f62761f90c62894a97816932 Mon Sep 17 00:00:00 2001 From: Kenji Kaneshige Date: Thu, 15 May 2008 15:22:35 +0900 Subject: pci-acpi: remove unused variable in __pci_osc_support_set The 'retval' variable in __pci_osc_support_set() is no longer used. Remove this unused variable. Signed-off-by: Kenji Kaneshige Acked-by: Shaohua Li Signed-off-by: Jesse Barnes diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index 9510218..3f27999 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -120,7 +120,6 @@ static acpi_status acpi_query_osc(acpi_handle handle, u32 level, void *context, void **retval) { acpi_status status; - acpi_status *ret_status = (acpi_status *)retval; struct acpi_osc_data *osc_data; u32 flags = (unsigned long)context, support_set; acpi_handle tmp; @@ -143,8 +142,6 @@ static acpi_status acpi_query_osc(acpi_handle handle, osc_args.capbuf[OSC_CONTROL_TYPE] = OSC_CONTROL_MASKS; status = acpi_run_osc(handle, &osc_args); - *ret_status = status; - if (ACPI_SUCCESS(status)) { osc_data->support_set = support_set; osc_data->query_result = osc_args.query_result; @@ -164,15 +161,11 @@ static acpi_status acpi_query_osc(acpi_handle handle, **/ acpi_status __pci_osc_support_set(u32 flags, const char *hid) { - acpi_status retval = AE_NOT_FOUND; - - if (!(flags & OSC_SUPPORT_MASKS)) { + if (!(flags & OSC_SUPPORT_MASKS)) return AE_TYPE; - } - acpi_get_devices(hid, - acpi_query_osc, - (void *)(unsigned long)flags, - (void **) &retval ); + + acpi_get_devices(hid, acpi_query_osc, + (void *)(unsigned long)flags, NULL); return AE_OK; } -- cgit v0.10.2 From 90a57dab6a2bce7f2ef186fe2d07a84d14a2a625 Mon Sep 17 00:00:00 2001 From: Kenji Kaneshige Date: Thu, 15 May 2008 15:23:13 +0900 Subject: pci-acpi: formatting cleanups for _OSC Minor cleanup in pci-acpi.c. Signed-off-by: Kenji Kaneshige Acked-by: Shaohua Li Signed-off-by: Jesse Barnes diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index 3f27999..5f96b86 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -51,7 +51,8 @@ static struct acpi_osc_data *acpi_get_osc_data(acpi_handle handle) return data; } -static u8 OSC_UUID[16] = {0x5B, 0x4D, 0xDB, 0x33, 0xF7, 0x1F, 0x1C, 0x40, 0x96, 0x57, 0x74, 0x41, 0xC0, 0x3D, 0xD7, 0x66}; +static u8 OSC_UUID[16] = {0x5B, 0x4D, 0xDB, 0x33, 0xF7, 0x1F, 0x1C, 0x40, + 0x96, 0x57, 0x74, 0x41, 0xC0, 0x3D, 0xD7, 0x66}; static acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_args *osc_args) @@ -87,7 +88,7 @@ static acpi_status acpi_run_osc(acpi_handle handle, status = AE_TYPE; goto out_kfree; } - osc_dw0 = *((u32 *) out_obj->buffer.pointer); + osc_dw0 = *((u32 *)out_obj->buffer.pointer); if (osc_dw0) { if (osc_dw0 & OSC_REQUEST_ERROR) printk(KERN_DEBUG "_OSC request fails\n"); -- cgit v0.10.2 From cf35e4ad57b4c39a4c74921e20e48ec0dbeb14f4 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 20 May 2008 01:02:06 +0300 Subject: PCI: remove CVS keywords This patch removes CVS keywords that weren't updated for a long time from comments. Signed-off-by: Adrian Bunk Signed-off-by: Jesse Barnes diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index e4548ab..15beaf4 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1,6 +1,4 @@ /* - * $Id: pci.c,v 1.91 1999/01/21 13:34:01 davem Exp $ - * * PCI Bus Services, see include/linux/pci.h for further explanation. * * Copyright 1993 -- 1997 Drew Eckhardt, Frederic Potter, diff --git a/drivers/pci/proc.c b/drivers/pci/proc.c index 95eb083..4400dff 100644 --- a/drivers/pci/proc.c +++ b/drivers/pci/proc.c @@ -1,6 +1,4 @@ /* - * $Id: proc.c,v 1.13 1998/05/12 07:36:07 mj Exp $ - * * Procfs interface for the PCI bus. * * Copyright (c) 1997--1999 Martin Mares -- cgit v0.10.2 From 5ca5c02f0e81c094c19d30dc0d13be4e929a994a Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Mon, 19 May 2008 13:48:17 +0900 Subject: PCI/MSI: skip calling pci_find_capability from msi_set_mask_bits The position of MSI capability is already cached in the msi_desc when we enter the msi_set_mask_bits(). Use it instead. Signed-off-by: Hidetoshi Seto Acked-by: Arnaldo Carvalho de Melo Signed-off-by: Jesse Barnes diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 8c61304..ccb1974 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -70,12 +70,10 @@ arch_teardown_msi_irqs(struct pci_dev *dev) } } -static void msi_set_enable(struct pci_dev *dev, int enable) +static void __msi_set_enable(struct pci_dev *dev, int pos, int enable) { - int pos; u16 control; - pos = pci_find_capability(dev, PCI_CAP_ID_MSI); if (pos) { pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &control); control &= ~PCI_MSI_FLAGS_ENABLE; @@ -85,6 +83,11 @@ static void msi_set_enable(struct pci_dev *dev, int enable) } } +static void msi_set_enable(struct pci_dev *dev, int enable) +{ + __msi_set_enable(dev, pci_find_capability(dev, PCI_CAP_ID_MSI), enable); +} + static void msix_set_enable(struct pci_dev *dev, int enable) { int pos; @@ -141,7 +144,8 @@ static void msi_set_mask_bits(unsigned int irq, u32 mask, u32 flag) mask_bits |= flag & mask; pci_write_config_dword(entry->dev, pos, mask_bits); } else { - msi_set_enable(entry->dev, !flag); + __msi_set_enable(entry->dev, entry->msi_attrib.pos, + !flag); } break; case PCI_CAP_ID_MSIX: -- cgit v0.10.2 From bb71ad880204b79d60331d3384103976e086cb9f Mon Sep 17 00:00:00 2001 From: Gary Hade Date: Mon, 12 May 2008 13:57:46 -0700 Subject: PCI: boot parameter to avoid expansion ROM memory allocation Contention for scarce PCI memory resources has been growing due to an increasing number of PCI slots in large multi-node systems. The kernel currently attempts by default to allocate memory for all PCI expansion ROMs so there has also been an increasing number of PCI memory allocation failures seen on these systems. This occurs because the BIOS either (1) provides insufficient PCI memory resource for all the expansion ROMs or (2) provides adequate PCI memory resource for expansion ROMs but provides the space in kernel unexpected BIOS assigned P2P non-prefetch windows. The resulting PCI memory allocation failures may be benign when related to memory requests for expansion ROMs themselves but in some cases they can occur when attempting to allocate space for more critical BARs. This can happen when a successful expansion ROM allocation request consumes memory resource that was intended for a non-ROM BAR. We have seen this happen during PCI hotplug of an adapter that contains a P2P bridge where successful memory allocation for an expansion ROM BAR on device behind the bridge consumed memory that was intended for a non-ROM BAR on the P2P bridge. In all cases the allocation failure messages can be very confusing for users. This patch provides a new 'pci=norom' kernel boot parameter that can be used to disable the default PCI expansion ROM memory resource allocation. This provides a way to avoid the above described issues on systems that do not contain PCI devices for which drivers or user-level applications depend on the default PCI expansion ROM memory resource allocation behavior. Signed-off-by: Gary Hade Signed-off-by: Jesse Barnes diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index e07c432..9cf7b34 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1496,6 +1496,9 @@ and is between 256 and 4096 characters. It is defined in the file Use with caution as certain devices share address decoders between ROMs and other resources. + norom [X86-32,X86_64] Do not assign address space to + expansion ROMs that do not already have + BIOS assigned address ranges. irqmask=0xMMMM [X86-32] Set a bit mask of IRQs allowed to be assigned automatically to PCI devices. You can make the kernel exclude IRQs of your ISA cards diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 6e64aaf..3a5261b 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -121,6 +121,21 @@ void __init dmi_check_skip_isa_align(void) dmi_check_system(can_skip_pciprobe_dmi_table); } +static void __devinit pcibios_fixup_device_resources(struct pci_dev *dev) +{ + struct resource *rom_r = &dev->resource[PCI_ROM_RESOURCE]; + + if (pci_probe & PCI_NOASSIGN_ROMS) { + if (rom_r->parent) + return; + if (rom_r->start) { + /* we deal with BIOS assigned ROM later */ + return; + } + rom_r->start = rom_r->end = rom_r->flags = 0; + } +} + /* * Called after each bus is probed, but before its children * are examined. @@ -128,7 +143,11 @@ void __init dmi_check_skip_isa_align(void) void __devinit pcibios_fixup_bus(struct pci_bus *b) { + struct pci_dev *dev; + pci_read_bridge_bases(b); + list_for_each_entry(dev, &b->devices, bus_list) + pcibios_fixup_device_resources(dev); } /* @@ -483,6 +502,9 @@ char * __devinit pcibios_setup(char *str) else if (!strcmp(str, "rom")) { pci_probe |= PCI_ASSIGN_ROMS; return NULL; + } else if (!strcmp(str, "norom")) { + pci_probe |= PCI_NOASSIGN_ROMS; + return NULL; } else if (!strcmp(str, "assign-busses")) { pci_probe |= PCI_ASSIGN_ALL_BUSSES; return NULL; diff --git a/arch/x86/pci/pci.h b/arch/x86/pci/pci.h index 720c4c5..291dafe 100644 --- a/arch/x86/pci/pci.h +++ b/arch/x86/pci/pci.h @@ -27,6 +27,7 @@ #define PCI_CAN_SKIP_ISA_ALIGN 0x8000 #define PCI_USE__CRS 0x10000 #define PCI_CHECK_ENABLE_AMD_MMCONF 0x20000 +#define PCI_NOASSIGN_ROMS 0x40000 extern unsigned int pci_probe; extern unsigned long pirq_table_addr; -- cgit v0.10.2 From 1eede070a59e1cc73da51e1aaa00d9ab86572cfc Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 20 May 2008 23:00:01 +0200 Subject: Introduce new top level suspend and hibernation callbacks Introduce 'struct pm_ops' and 'struct pm_ext_ops' ('ext' meaning 'extended') representing suspend and hibernation operations for bus types, device classes, device types and device drivers. Modify the PM core to use 'struct pm_ops' and 'struct pm_ext_ops' objects, if defined, instead of the ->suspend(), ->resume(), ->suspend_late(), and ->resume_early() callbacks (the old callbacks will be considered as legacy and gradually phased out). The main purpose of doing this is to separate suspend (aka S2RAM and standby) callbacks from hibernation callbacks in such a way that the new callbacks won't take arguments and the semantics of each of them will be clearly specified. This has been requested for multiple times by many people, including Linus himself, and the reason is that within the current scheme if ->resume() is called, for example, it's difficult to say why it's been called (ie. is it a resume from RAM or from hibernation or a suspend/hibernation failure etc.?). The second purpose is to make the suspend/hibernation callbacks more flexible so that device drivers can handle more than they can within the current scheme. For example, some drivers may need to prevent new children of the device from being registered before their ->suspend() callbacks are executed or they may want to carry out some operations requiring the availability of some other devices, not directly bound via the parent-child relationship, in order to prepare for the execution of ->suspend(), etc. Ultimately, we'd like to stop using the freezing of tasks for suspend and therefore the drivers' suspend/hibernation code will have to take care of the handling of the user space during suspend/hibernation. That, in turn, would be difficult within the current scheme, without the new ->prepare() and ->complete() callbacks. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Signed-off-by: Jesse Barnes diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index bf9290e..c1735f6 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -1211,9 +1211,9 @@ static int suspend(int vetoable) if (err != APM_SUCCESS) apm_error("suspend", err); err = (err == APM_SUCCESS) ? 0 : -EIO; - device_power_up(); + device_power_up(PMSG_RESUME); local_irq_enable(); - device_resume(); + device_resume(PMSG_RESUME); queue_event(APM_NORMAL_RESUME, NULL); spin_lock(&user_list_lock); for (as = user_list; as != NULL; as = as->next) { @@ -1238,7 +1238,7 @@ static void standby(void) apm_error("standby", err); local_irq_disable(); - device_power_up(); + device_power_up(PMSG_RESUME); local_irq_enable(); } @@ -1324,7 +1324,7 @@ static void check_events(void) ignore_bounce = 1; if ((event != APM_NORMAL_RESUME) || (ignore_normal_resume == 0)) { - device_resume(); + device_resume(PMSG_RESUME); queue_event(event, NULL); } ignore_normal_resume = 0; diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 45cc3d9..d571204 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -12,11 +12,9 @@ * and add it to the list of power-controlled devices. sysfs entries for * controlling device power management will also be added. * - * A different set of lists than the global subsystem list are used to - * keep track of power info because we use different lists to hold - * devices based on what stage of the power management process they - * are in. The power domain dependencies may also differ from the - * ancestral dependencies that the subsystem list maintains. + * A separate list is used for keeping track of power info, because the power + * domain dependencies may differ from the ancestral dependencies that the + * subsystem list maintains. */ #include @@ -30,31 +28,40 @@ #include "power.h" /* - * The entries in the dpm_active list are in a depth first order, simply + * The entries in the dpm_list list are in a depth first order, simply * because children are guaranteed to be discovered after parents, and * are inserted at the back of the list on discovery. * - * All the other lists are kept in the same order, for consistency. - * However the lists aren't always traversed in the same order. - * Semaphores must be acquired from the top (i.e., front) down - * and released in the opposite order. Devices must be suspended - * from the bottom (i.e., end) up and resumed in the opposite order. - * That way no parent will be suspended while it still has an active - * child. - * * Since device_pm_add() may be called with a device semaphore held, * we must never try to acquire a device semaphore while holding * dpm_list_mutex. */ -LIST_HEAD(dpm_active); -static LIST_HEAD(dpm_off); -static LIST_HEAD(dpm_off_irq); +LIST_HEAD(dpm_list); static DEFINE_MUTEX(dpm_list_mtx); -/* 'true' if all devices have been suspended, protected by dpm_list_mtx */ -static bool all_sleeping; +/* + * Set once the preparation of devices for a PM transition has started, reset + * before starting to resume devices. Protected by dpm_list_mtx. + */ +static bool transition_started; + +/** + * device_pm_lock - lock the list of active devices used by the PM core + */ +void device_pm_lock(void) +{ + mutex_lock(&dpm_list_mtx); +} + +/** + * device_pm_unlock - unlock the list of active devices used by the PM core + */ +void device_pm_unlock(void) +{ + mutex_unlock(&dpm_list_mtx); +} /** * device_pm_add - add a device to the list of active devices @@ -68,17 +75,25 @@ int device_pm_add(struct device *dev) dev->bus ? dev->bus->name : "No Bus", kobject_name(&dev->kobj)); mutex_lock(&dpm_list_mtx); - if ((dev->parent && dev->parent->power.sleeping) || all_sleeping) { - if (dev->parent->power.sleeping) - dev_warn(dev, "parent %s is sleeping\n", + if (dev->parent) { + if (dev->parent->power.status >= DPM_SUSPENDING) { + dev_warn(dev, "parent %s is sleeping, will not add\n", dev->parent->bus_id); - else - dev_warn(dev, "all devices are sleeping\n"); + WARN_ON(true); + } + } else if (transition_started) { + /* + * We refuse to register parentless devices while a PM + * transition is in progress in order to avoid leaving them + * unhandled down the road + */ WARN_ON(true); } error = dpm_sysfs_add(dev); - if (!error) - list_add_tail(&dev->power.entry, &dpm_active); + if (!error) { + dev->power.status = DPM_ON; + list_add_tail(&dev->power.entry, &dpm_list); + } mutex_unlock(&dpm_list_mtx); return error; } @@ -100,73 +115,243 @@ void device_pm_remove(struct device *dev) mutex_unlock(&dpm_list_mtx); } +/** + * pm_op - execute the PM operation appropiate for given PM event + * @dev: Device. + * @ops: PM operations to choose from. + * @state: PM transition of the system being carried out. + */ +static int pm_op(struct device *dev, struct pm_ops *ops, pm_message_t state) +{ + int error = 0; + + switch (state.event) { +#ifdef CONFIG_SUSPEND + case PM_EVENT_SUSPEND: + if (ops->suspend) { + error = ops->suspend(dev); + suspend_report_result(ops->suspend, error); + } + break; + case PM_EVENT_RESUME: + if (ops->resume) { + error = ops->resume(dev); + suspend_report_result(ops->resume, error); + } + break; +#endif /* CONFIG_SUSPEND */ +#ifdef CONFIG_HIBERNATION + case PM_EVENT_FREEZE: + case PM_EVENT_QUIESCE: + if (ops->freeze) { + error = ops->freeze(dev); + suspend_report_result(ops->freeze, error); + } + break; + case PM_EVENT_HIBERNATE: + if (ops->poweroff) { + error = ops->poweroff(dev); + suspend_report_result(ops->poweroff, error); + } + break; + case PM_EVENT_THAW: + case PM_EVENT_RECOVER: + if (ops->thaw) { + error = ops->thaw(dev); + suspend_report_result(ops->thaw, error); + } + break; + case PM_EVENT_RESTORE: + if (ops->restore) { + error = ops->restore(dev); + suspend_report_result(ops->restore, error); + } + break; +#endif /* CONFIG_HIBERNATION */ + default: + error = -EINVAL; + } + return error; +} + +/** + * pm_noirq_op - execute the PM operation appropiate for given PM event + * @dev: Device. + * @ops: PM operations to choose from. + * @state: PM transition of the system being carried out. + * + * The operation is executed with interrupts disabled by the only remaining + * functional CPU in the system. + */ +static int pm_noirq_op(struct device *dev, struct pm_ext_ops *ops, + pm_message_t state) +{ + int error = 0; + + switch (state.event) { +#ifdef CONFIG_SUSPEND + case PM_EVENT_SUSPEND: + if (ops->suspend_noirq) { + error = ops->suspend_noirq(dev); + suspend_report_result(ops->suspend_noirq, error); + } + break; + case PM_EVENT_RESUME: + if (ops->resume_noirq) { + error = ops->resume_noirq(dev); + suspend_report_result(ops->resume_noirq, error); + } + break; +#endif /* CONFIG_SUSPEND */ +#ifdef CONFIG_HIBERNATION + case PM_EVENT_FREEZE: + case PM_EVENT_QUIESCE: + if (ops->freeze_noirq) { + error = ops->freeze_noirq(dev); + suspend_report_result(ops->freeze_noirq, error); + } + break; + case PM_EVENT_HIBERNATE: + if (ops->poweroff_noirq) { + error = ops->poweroff_noirq(dev); + suspend_report_result(ops->poweroff_noirq, error); + } + break; + case PM_EVENT_THAW: + case PM_EVENT_RECOVER: + if (ops->thaw_noirq) { + error = ops->thaw_noirq(dev); + suspend_report_result(ops->thaw_noirq, error); + } + break; + case PM_EVENT_RESTORE: + if (ops->restore_noirq) { + error = ops->restore_noirq(dev); + suspend_report_result(ops->restore_noirq, error); + } + break; +#endif /* CONFIG_HIBERNATION */ + default: + error = -EINVAL; + } + return error; +} + +static char *pm_verb(int event) +{ + switch (event) { + case PM_EVENT_SUSPEND: + return "suspend"; + case PM_EVENT_RESUME: + return "resume"; + case PM_EVENT_FREEZE: + return "freeze"; + case PM_EVENT_QUIESCE: + return "quiesce"; + case PM_EVENT_HIBERNATE: + return "hibernate"; + case PM_EVENT_THAW: + return "thaw"; + case PM_EVENT_RESTORE: + return "restore"; + case PM_EVENT_RECOVER: + return "recover"; + default: + return "(unknown PM event)"; + } +} + +static void pm_dev_dbg(struct device *dev, pm_message_t state, char *info) +{ + dev_dbg(dev, "%s%s%s\n", info, pm_verb(state.event), + ((state.event & PM_EVENT_SLEEP) && device_may_wakeup(dev)) ? + ", may wakeup" : ""); +} + +static void pm_dev_err(struct device *dev, pm_message_t state, char *info, + int error) +{ + printk(KERN_ERR "PM: Device %s failed to %s%s: error %d\n", + kobject_name(&dev->kobj), pm_verb(state.event), info, error); +} + /*------------------------- Resume routines -------------------------*/ /** - * resume_device_early - Power on one device (early resume). + * resume_device_noirq - Power on one device (early resume). * @dev: Device. + * @state: PM transition of the system being carried out. * * Must be called with interrupts disabled. */ -static int resume_device_early(struct device *dev) +static int resume_device_noirq(struct device *dev, pm_message_t state) { int error = 0; TRACE_DEVICE(dev); TRACE_RESUME(0); - if (dev->bus && dev->bus->resume_early) { - dev_dbg(dev, "EARLY resume\n"); + if (!dev->bus) + goto End; + + if (dev->bus->pm) { + pm_dev_dbg(dev, state, "EARLY "); + error = pm_noirq_op(dev, dev->bus->pm, state); + } else if (dev->bus->resume_early) { + pm_dev_dbg(dev, state, "legacy EARLY "); error = dev->bus->resume_early(dev); } - + End: TRACE_RESUME(error); return error; } /** * dpm_power_up - Power on all regular (non-sysdev) devices. + * @state: PM transition of the system being carried out. * - * Walk the dpm_off_irq list and power each device up. This - * is used for devices that required they be powered down with - * interrupts disabled. As devices are powered on, they are moved - * to the dpm_off list. + * Execute the appropriate "noirq resume" callback for all devices marked + * as DPM_OFF_IRQ. * * Must be called with interrupts disabled and only one CPU running. */ -static void dpm_power_up(void) +static void dpm_power_up(pm_message_t state) { + struct device *dev; - while (!list_empty(&dpm_off_irq)) { - struct list_head *entry = dpm_off_irq.next; - struct device *dev = to_device(entry); + list_for_each_entry(dev, &dpm_list, power.entry) + if (dev->power.status > DPM_OFF) { + int error; - list_move_tail(entry, &dpm_off); - resume_device_early(dev); - } + dev->power.status = DPM_OFF; + error = resume_device_noirq(dev, state); + if (error) + pm_dev_err(dev, state, " early", error); + } } /** * device_power_up - Turn on all devices that need special attention. + * @state: PM transition of the system being carried out. * * Power on system devices, then devices that required we shut them down * with interrupts disabled. * * Must be called with interrupts disabled. */ -void device_power_up(void) +void device_power_up(pm_message_t state) { sysdev_resume(); - dpm_power_up(); + dpm_power_up(state); } EXPORT_SYMBOL_GPL(device_power_up); /** * resume_device - Restore state for one device. * @dev: Device. - * + * @state: PM transition of the system being carried out. */ -static int resume_device(struct device *dev) +static int resume_device(struct device *dev, pm_message_t state) { int error = 0; @@ -175,21 +360,40 @@ static int resume_device(struct device *dev) down(&dev->sem); - if (dev->bus && dev->bus->resume) { - dev_dbg(dev,"resuming\n"); - error = dev->bus->resume(dev); + if (dev->bus) { + if (dev->bus->pm) { + pm_dev_dbg(dev, state, ""); + error = pm_op(dev, &dev->bus->pm->base, state); + } else if (dev->bus->resume) { + pm_dev_dbg(dev, state, "legacy "); + error = dev->bus->resume(dev); + } + if (error) + goto End; } - if (!error && dev->type && dev->type->resume) { - dev_dbg(dev,"resuming\n"); - error = dev->type->resume(dev); + if (dev->type) { + if (dev->type->pm) { + pm_dev_dbg(dev, state, "type "); + error = pm_op(dev, dev->type->pm, state); + } else if (dev->type->resume) { + pm_dev_dbg(dev, state, "legacy type "); + error = dev->type->resume(dev); + } + if (error) + goto End; } - if (!error && dev->class && dev->class->resume) { - dev_dbg(dev,"class resume\n"); - error = dev->class->resume(dev); + if (dev->class) { + if (dev->class->pm) { + pm_dev_dbg(dev, state, "class "); + error = pm_op(dev, dev->class->pm, state); + } else if (dev->class->resume) { + pm_dev_dbg(dev, state, "legacy class "); + error = dev->class->resume(dev); + } } - + End: up(&dev->sem); TRACE_RESUME(error); @@ -198,78 +402,161 @@ static int resume_device(struct device *dev) /** * dpm_resume - Resume every device. + * @state: PM transition of the system being carried out. * - * Resume the devices that have either not gone through - * the late suspend, or that did go through it but also - * went through the early resume. + * Execute the appropriate "resume" callback for all devices the status of + * which indicates that they are inactive. + */ +static void dpm_resume(pm_message_t state) +{ + struct list_head list; + + INIT_LIST_HEAD(&list); + mutex_lock(&dpm_list_mtx); + transition_started = false; + while (!list_empty(&dpm_list)) { + struct device *dev = to_device(dpm_list.next); + + get_device(dev); + if (dev->power.status >= DPM_OFF) { + int error; + + dev->power.status = DPM_RESUMING; + mutex_unlock(&dpm_list_mtx); + + error = resume_device(dev, state); + + mutex_lock(&dpm_list_mtx); + if (error) + pm_dev_err(dev, state, "", error); + } else if (dev->power.status == DPM_SUSPENDING) { + /* Allow new children of the device to be registered */ + dev->power.status = DPM_RESUMING; + } + if (!list_empty(&dev->power.entry)) + list_move_tail(&dev->power.entry, &list); + put_device(dev); + } + list_splice(&list, &dpm_list); + mutex_unlock(&dpm_list_mtx); +} + +/** + * complete_device - Complete a PM transition for given device + * @dev: Device. + * @state: PM transition of the system being carried out. + */ +static void complete_device(struct device *dev, pm_message_t state) +{ + down(&dev->sem); + + if (dev->class && dev->class->pm && dev->class->pm->complete) { + pm_dev_dbg(dev, state, "completing class "); + dev->class->pm->complete(dev); + } + + if (dev->type && dev->type->pm && dev->type->pm->complete) { + pm_dev_dbg(dev, state, "completing type "); + dev->type->pm->complete(dev); + } + + if (dev->bus && dev->bus->pm && dev->bus->pm->base.complete) { + pm_dev_dbg(dev, state, "completing "); + dev->bus->pm->base.complete(dev); + } + + up(&dev->sem); +} + +/** + * dpm_complete - Complete a PM transition for all devices. + * @state: PM transition of the system being carried out. * - * Take devices from the dpm_off_list, resume them, - * and put them on the dpm_locked list. + * Execute the ->complete() callbacks for all devices that are not marked + * as DPM_ON. */ -static void dpm_resume(void) +static void dpm_complete(pm_message_t state) { + struct list_head list; + + INIT_LIST_HEAD(&list); mutex_lock(&dpm_list_mtx); - all_sleeping = false; - while(!list_empty(&dpm_off)) { - struct list_head *entry = dpm_off.next; - struct device *dev = to_device(entry); + while (!list_empty(&dpm_list)) { + struct device *dev = to_device(dpm_list.prev); - list_move_tail(entry, &dpm_active); - dev->power.sleeping = false; - mutex_unlock(&dpm_list_mtx); - resume_device(dev); - mutex_lock(&dpm_list_mtx); + get_device(dev); + if (dev->power.status > DPM_ON) { + dev->power.status = DPM_ON; + mutex_unlock(&dpm_list_mtx); + + complete_device(dev, state); + + mutex_lock(&dpm_list_mtx); + } + if (!list_empty(&dev->power.entry)) + list_move(&dev->power.entry, &list); + put_device(dev); } + list_splice(&list, &dpm_list); mutex_unlock(&dpm_list_mtx); } /** * device_resume - Restore state of each device in system. + * @state: PM transition of the system being carried out. * * Resume all the devices, unlock them all, and allow new * devices to be registered once again. */ -void device_resume(void) +void device_resume(pm_message_t state) { might_sleep(); - dpm_resume(); + dpm_resume(state); + dpm_complete(state); } EXPORT_SYMBOL_GPL(device_resume); /*------------------------- Suspend routines -------------------------*/ -static inline char *suspend_verb(u32 event) +/** + * resume_event - return a PM message representing the resume event + * corresponding to given sleep state. + * @sleep_state: PM message representing a sleep state. + */ +static pm_message_t resume_event(pm_message_t sleep_state) { - switch (event) { - case PM_EVENT_SUSPEND: return "suspend"; - case PM_EVENT_FREEZE: return "freeze"; - case PM_EVENT_PRETHAW: return "prethaw"; - default: return "(unknown suspend event)"; + switch (sleep_state.event) { + case PM_EVENT_SUSPEND: + return PMSG_RESUME; + case PM_EVENT_FREEZE: + case PM_EVENT_QUIESCE: + return PMSG_RECOVER; + case PM_EVENT_HIBERNATE: + return PMSG_RESTORE; } -} - -static void -suspend_device_dbg(struct device *dev, pm_message_t state, char *info) -{ - dev_dbg(dev, "%s%s%s\n", info, suspend_verb(state.event), - ((state.event == PM_EVENT_SUSPEND) && device_may_wakeup(dev)) ? - ", may wakeup" : ""); + return PMSG_ON; } /** - * suspend_device_late - Shut down one device (late suspend). + * suspend_device_noirq - Shut down one device (late suspend). * @dev: Device. - * @state: Power state device is entering. + * @state: PM transition of the system being carried out. * * This is called with interrupts off and only a single CPU running. */ -static int suspend_device_late(struct device *dev, pm_message_t state) +static int suspend_device_noirq(struct device *dev, pm_message_t state) { int error = 0; - if (dev->bus && dev->bus->suspend_late) { - suspend_device_dbg(dev, state, "LATE "); + if (!dev->bus) + return 0; + + if (dev->bus->pm) { + pm_dev_dbg(dev, state, "LATE "); + error = pm_noirq_op(dev, dev->bus->pm, state); + } else if (dev->bus->suspend_late) { + pm_dev_dbg(dev, state, "legacy LATE "); error = dev->bus->suspend_late(dev, state); suspend_report_result(dev->bus->suspend_late, error); } @@ -278,37 +565,30 @@ static int suspend_device_late(struct device *dev, pm_message_t state) /** * device_power_down - Shut down special devices. - * @state: Power state to enter. + * @state: PM transition of the system being carried out. * - * Power down devices that require interrupts to be disabled - * and move them from the dpm_off list to the dpm_off_irq list. + * Power down devices that require interrupts to be disabled. * Then power down system devices. * * Must be called with interrupts disabled and only one CPU running. */ int device_power_down(pm_message_t state) { + struct device *dev; int error = 0; - while (!list_empty(&dpm_off)) { - struct list_head *entry = dpm_off.prev; - struct device *dev = to_device(entry); - - error = suspend_device_late(dev, state); + list_for_each_entry_reverse(dev, &dpm_list, power.entry) { + error = suspend_device_noirq(dev, state); if (error) { - printk(KERN_ERR "Could not power down device %s: " - "error %d\n", - kobject_name(&dev->kobj), error); + pm_dev_err(dev, state, " late", error); break; } - if (!list_empty(&dev->power.entry)) - list_move(&dev->power.entry, &dpm_off_irq); + dev->power.status = DPM_OFF_IRQ; } - if (!error) error = sysdev_suspend(state); if (error) - dpm_power_up(); + dpm_power_up(resume_event(state)); return error; } EXPORT_SYMBOL_GPL(device_power_down); @@ -316,7 +596,7 @@ EXPORT_SYMBOL_GPL(device_power_down); /** * suspend_device - Save state of one device. * @dev: Device. - * @state: Power state device is entering. + * @state: PM transition of the system being carried out. */ static int suspend_device(struct device *dev, pm_message_t state) { @@ -324,24 +604,43 @@ static int suspend_device(struct device *dev, pm_message_t state) down(&dev->sem); - if (dev->class && dev->class->suspend) { - suspend_device_dbg(dev, state, "class "); - error = dev->class->suspend(dev, state); - suspend_report_result(dev->class->suspend, error); + if (dev->class) { + if (dev->class->pm) { + pm_dev_dbg(dev, state, "class "); + error = pm_op(dev, dev->class->pm, state); + } else if (dev->class->suspend) { + pm_dev_dbg(dev, state, "legacy class "); + error = dev->class->suspend(dev, state); + suspend_report_result(dev->class->suspend, error); + } + if (error) + goto End; } - if (!error && dev->type && dev->type->suspend) { - suspend_device_dbg(dev, state, "type "); - error = dev->type->suspend(dev, state); - suspend_report_result(dev->type->suspend, error); + if (dev->type) { + if (dev->type->pm) { + pm_dev_dbg(dev, state, "type "); + error = pm_op(dev, dev->type->pm, state); + } else if (dev->type->suspend) { + pm_dev_dbg(dev, state, "legacy type "); + error = dev->type->suspend(dev, state); + suspend_report_result(dev->type->suspend, error); + } + if (error) + goto End; } - if (!error && dev->bus && dev->bus->suspend) { - suspend_device_dbg(dev, state, ""); - error = dev->bus->suspend(dev, state); - suspend_report_result(dev->bus->suspend, error); + if (dev->bus) { + if (dev->bus->pm) { + pm_dev_dbg(dev, state, ""); + error = pm_op(dev, &dev->bus->pm->base, state); + } else if (dev->bus->suspend) { + pm_dev_dbg(dev, state, "legacy "); + error = dev->bus->suspend(dev, state); + suspend_report_result(dev->bus->suspend, error); + } } - + End: up(&dev->sem); return error; @@ -349,67 +648,141 @@ static int suspend_device(struct device *dev, pm_message_t state) /** * dpm_suspend - Suspend every device. - * @state: Power state to put each device in. + * @state: PM transition of the system being carried out. * - * Walk the dpm_locked list. Suspend each device and move it - * to the dpm_off list. - * - * (For historical reasons, if it returns -EAGAIN, that used to mean - * that the device would be called again with interrupts disabled. - * These days, we use the "suspend_late()" callback for that, so we - * print a warning and consider it an error). + * Execute the appropriate "suspend" callbacks for all devices. */ static int dpm_suspend(pm_message_t state) { + struct list_head list; int error = 0; + INIT_LIST_HEAD(&list); mutex_lock(&dpm_list_mtx); - while (!list_empty(&dpm_active)) { - struct list_head *entry = dpm_active.prev; - struct device *dev = to_device(entry); - - WARN_ON(dev->parent && dev->parent->power.sleeping); + while (!list_empty(&dpm_list)) { + struct device *dev = to_device(dpm_list.prev); - dev->power.sleeping = true; + get_device(dev); mutex_unlock(&dpm_list_mtx); + error = suspend_device(dev, state); + mutex_lock(&dpm_list_mtx); if (error) { - printk(KERN_ERR "Could not suspend device %s: " - "error %d%s\n", - kobject_name(&dev->kobj), - error, - (error == -EAGAIN ? - " (please convert to suspend_late)" : - "")); - dev->power.sleeping = false; + pm_dev_err(dev, state, "", error); + put_device(dev); break; } + dev->power.status = DPM_OFF; if (!list_empty(&dev->power.entry)) - list_move(&dev->power.entry, &dpm_off); + list_move(&dev->power.entry, &list); + put_device(dev); } - if (!error) - all_sleeping = true; + list_splice(&list, dpm_list.prev); mutex_unlock(&dpm_list_mtx); + return error; +} + +/** + * prepare_device - Execute the ->prepare() callback(s) for given device. + * @dev: Device. + * @state: PM transition of the system being carried out. + */ +static int prepare_device(struct device *dev, pm_message_t state) +{ + int error = 0; + + down(&dev->sem); + + if (dev->bus && dev->bus->pm && dev->bus->pm->base.prepare) { + pm_dev_dbg(dev, state, "preparing "); + error = dev->bus->pm->base.prepare(dev); + suspend_report_result(dev->bus->pm->base.prepare, error); + if (error) + goto End; + } + + if (dev->type && dev->type->pm && dev->type->pm->prepare) { + pm_dev_dbg(dev, state, "preparing type "); + error = dev->type->pm->prepare(dev); + suspend_report_result(dev->type->pm->prepare, error); + if (error) + goto End; + } + + if (dev->class && dev->class->pm && dev->class->pm->prepare) { + pm_dev_dbg(dev, state, "preparing class "); + error = dev->class->pm->prepare(dev); + suspend_report_result(dev->class->pm->prepare, error); + } + End: + up(&dev->sem); + + return error; +} +/** + * dpm_prepare - Prepare all devices for a PM transition. + * @state: PM transition of the system being carried out. + * + * Execute the ->prepare() callback for all devices. + */ +static int dpm_prepare(pm_message_t state) +{ + struct list_head list; + int error = 0; + + INIT_LIST_HEAD(&list); + mutex_lock(&dpm_list_mtx); + transition_started = true; + while (!list_empty(&dpm_list)) { + struct device *dev = to_device(dpm_list.next); + + get_device(dev); + dev->power.status = DPM_PREPARING; + mutex_unlock(&dpm_list_mtx); + + error = prepare_device(dev, state); + + mutex_lock(&dpm_list_mtx); + if (error) { + dev->power.status = DPM_ON; + if (error == -EAGAIN) { + put_device(dev); + continue; + } + printk(KERN_ERR "PM: Failed to prepare device %s " + "for power transition: error %d\n", + kobject_name(&dev->kobj), error); + put_device(dev); + break; + } + dev->power.status = DPM_SUSPENDING; + if (!list_empty(&dev->power.entry)) + list_move_tail(&dev->power.entry, &list); + put_device(dev); + } + list_splice(&list, &dpm_list); + mutex_unlock(&dpm_list_mtx); return error; } /** * device_suspend - Save state and stop all devices in system. - * @state: new power management state + * @state: PM transition of the system being carried out. * - * Prevent new devices from being registered, then lock all devices - * and suspend them. + * Prepare and suspend all devices. */ int device_suspend(pm_message_t state) { int error; might_sleep(); - error = dpm_suspend(state); + error = dpm_prepare(state); + if (!error) + error = dpm_suspend(state); if (error) - device_resume(); + device_resume(resume_event(state)); return error; } EXPORT_SYMBOL_GPL(device_suspend); diff --git a/drivers/base/power/power.h b/drivers/base/power/power.h index a6894f2..a3252c0 100644 --- a/drivers/base/power/power.h +++ b/drivers/base/power/power.h @@ -4,7 +4,7 @@ * main.c */ -extern struct list_head dpm_active; /* The active device list */ +extern struct list_head dpm_list; /* The active device list */ static inline struct device *to_device(struct list_head *entry) { diff --git a/drivers/base/power/trace.c b/drivers/base/power/trace.c index 2b4b392..8c1e656 100644 --- a/drivers/base/power/trace.c +++ b/drivers/base/power/trace.c @@ -188,9 +188,9 @@ static int show_file_hash(unsigned int value) static int show_dev_hash(unsigned int value) { int match = 0; - struct list_head * entry = dpm_active.prev; + struct list_head *entry = dpm_list.prev; - while (entry != &dpm_active) { + while (entry != &dpm_list) { struct device * dev = to_device(entry); unsigned int hash = hash_string(DEVSEED, dev->bus_id, DEVHASH); if (hash == value) { diff --git a/include/linux/device.h b/include/linux/device.h index 6a2d04c..f71a78d 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -68,6 +68,8 @@ struct bus_type { int (*resume_early)(struct device *dev); int (*resume)(struct device *dev); + struct pm_ext_ops *pm; + struct bus_type_private *p; }; @@ -131,6 +133,8 @@ struct device_driver { int (*resume) (struct device *dev); struct attribute_group **groups; + struct pm_ops *pm; + struct driver_private *p; }; @@ -197,6 +201,8 @@ struct class { int (*suspend)(struct device *dev, pm_message_t state); int (*resume)(struct device *dev); + + struct pm_ops *pm; }; extern int __must_check class_register(struct class *class); @@ -248,8 +254,11 @@ struct device_type { struct attribute_group **groups; int (*uevent)(struct device *dev, struct kobj_uevent_env *env); void (*release)(struct device *dev); + int (*suspend)(struct device *dev, pm_message_t state); int (*resume)(struct device *dev); + + struct pm_ops *pm; }; /* interface for exporting device attributes */ diff --git a/include/linux/pm.h b/include/linux/pm.h index 39a7ee8..4ad9de9 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -112,7 +112,9 @@ typedef struct pm_message { int event; } pm_message_t; -/* +/** + * struct pm_ops - device PM callbacks + * * Several driver power state transitions are externally visible, affecting * the state of pending I/O queues and (for drivers that touch hardware) * interrupts, wakeups, DMA, and other hardware state. There may also be @@ -120,6 +122,284 @@ typedef struct pm_message { * to the rest of the driver stack (such as a driver that's ON gating off * clocks which are not in active use). * + * The externally visible transitions are handled with the help of the following + * callbacks included in this structure: + * + * @prepare: Prepare the device for the upcoming transition, but do NOT change + * its hardware state. Prevent new children of the device from being + * registered after @prepare() returns (the driver's subsystem and + * generally the rest of the kernel is supposed to prevent new calls to the + * probe method from being made too once @prepare() has succeeded). If + * @prepare() detects a situation it cannot handle (e.g. registration of a + * child already in progress), it may return -EAGAIN, so that the PM core + * can execute it once again (e.g. after the new child has been registered) + * to recover from the race condition. This method is executed for all + * kinds of suspend transitions and is followed by one of the suspend + * callbacks: @suspend(), @freeze(), or @poweroff(). + * The PM core executes @prepare() for all devices before starting to + * execute suspend callbacks for any of them, so drivers may assume all of + * the other devices to be present and functional while @prepare() is being + * executed. In particular, it is safe to make GFP_KERNEL memory + * allocations from within @prepare(). However, drivers may NOT assume + * anything about the availability of the user space at that time and it + * is not correct to request firmware from within @prepare() (it's too + * late to do that). [To work around this limitation, drivers may + * register suspend and hibernation notifiers that are executed before the + * freezing of tasks.] + * + * @complete: Undo the changes made by @prepare(). This method is executed for + * all kinds of resume transitions, following one of the resume callbacks: + * @resume(), @thaw(), @restore(). Also called if the state transition + * fails before the driver's suspend callback (@suspend(), @freeze(), + * @poweroff()) can be executed (e.g. if the suspend callback fails for one + * of the other devices that the PM core has unsuccessfully attempted to + * suspend earlier). + * The PM core executes @complete() after it has executed the appropriate + * resume callback for all devices. + * + * @suspend: Executed before putting the system into a sleep state in which the + * contents of main memory are preserved. Quiesce the device, put it into + * a low power state appropriate for the upcoming system state (such as + * PCI_D3hot), and enable wakeup events as appropriate. + * + * @resume: Executed after waking the system up from a sleep state in which the + * contents of main memory were preserved. Put the device into the + * appropriate state, according to the information saved in memory by the + * preceding @suspend(). The driver starts working again, responding to + * hardware events and software requests. The hardware may have gone + * through a power-off reset, or it may have maintained state from the + * previous suspend() which the driver may rely on while resuming. On most + * platforms, there are no restrictions on availability of resources like + * clocks during @resume(). + * + * @freeze: Hibernation-specific, executed before creating a hibernation image. + * Quiesce operations so that a consistent image can be created, but do NOT + * otherwise put the device into a low power device state and do NOT emit + * system wakeup events. Save in main memory the device settings to be + * used by @restore() during the subsequent resume from hibernation or by + * the subsequent @thaw(), if the creation of the image or the restoration + * of main memory contents from it fails. + * + * @thaw: Hibernation-specific, executed after creating a hibernation image OR + * if the creation of the image fails. Also executed after a failing + * attempt to restore the contents of main memory from such an image. + * Undo the changes made by the preceding @freeze(), so the device can be + * operated in the same way as immediately before the call to @freeze(). + * + * @poweroff: Hibernation-specific, executed after saving a hibernation image. + * Quiesce the device, put it into a low power state appropriate for the + * upcoming system state (such as PCI_D3hot), and enable wakeup events as + * appropriate. + * + * @restore: Hibernation-specific, executed after restoring the contents of main + * memory from a hibernation image. Driver starts working again, + * responding to hardware events and software requests. Drivers may NOT + * make ANY assumptions about the hardware state right prior to @restore(). + * On most platforms, there are no restrictions on availability of + * resources like clocks during @restore(). + * + * All of the above callbacks, except for @complete(), return error codes. + * However, the error codes returned by the resume operations, @resume(), + * @thaw(), and @restore(), do not cause the PM core to abort the resume + * transition during which they are returned. The error codes returned in + * that cases are only printed by the PM core to the system logs for debugging + * purposes. Still, it is recommended that drivers only return error codes + * from their resume methods in case of an unrecoverable failure (i.e. when the + * device being handled refuses to resume and becomes unusable) to allow us to + * modify the PM core in the future, so that it can avoid attempting to handle + * devices that failed to resume and their children. + * + * It is allowed to unregister devices while the above callbacks are being + * executed. However, it is not allowed to unregister a device from within any + * of its own callbacks. + */ + +struct pm_ops { + int (*prepare)(struct device *dev); + void (*complete)(struct device *dev); + int (*suspend)(struct device *dev); + int (*resume)(struct device *dev); + int (*freeze)(struct device *dev); + int (*thaw)(struct device *dev); + int (*poweroff)(struct device *dev); + int (*restore)(struct device *dev); +}; + +/** + * struct pm_ext_ops - extended device PM callbacks + * + * Some devices require certain operations related to suspend and hibernation + * to be carried out with interrupts disabled. Thus, 'struct pm_ext_ops' below + * is defined, adding callbacks to be executed with interrupts disabled to + * 'struct pm_ops'. + * + * The following callbacks included in 'struct pm_ext_ops' are executed with + * the nonboot CPUs switched off and with interrupts disabled on the only + * functional CPU. They also are executed with the PM core list of devices + * locked, so they must NOT unregister any devices. + * + * @suspend_noirq: Complete the operations of ->suspend() by carrying out any + * actions required for suspending the device that need interrupts to be + * disabled + * + * @resume_noirq: Prepare for the execution of ->resume() by carrying out any + * actions required for resuming the device that need interrupts to be + * disabled + * + * @freeze_noirq: Complete the operations of ->freeze() by carrying out any + * actions required for freezing the device that need interrupts to be + * disabled + * + * @thaw_noirq: Prepare for the execution of ->thaw() by carrying out any + * actions required for thawing the device that need interrupts to be + * disabled + * + * @poweroff_noirq: Complete the operations of ->poweroff() by carrying out any + * actions required for handling the device that need interrupts to be + * disabled + * + * @restore_noirq: Prepare for the execution of ->restore() by carrying out any + * actions required for restoring the operations of the device that need + * interrupts to be disabled + * + * All of the above callbacks return error codes, but the error codes returned + * by the resume operations, @resume_noirq(), @thaw_noirq(), and + * @restore_noirq(), do not cause the PM core to abort the resume transition + * during which they are returned. The error codes returned in that cases are + * only printed by the PM core to the system logs for debugging purposes. + * Still, as stated above, it is recommended that drivers only return error + * codes from their resume methods if the device being handled fails to resume + * and is not usable any more. + */ + +struct pm_ext_ops { + struct pm_ops base; + int (*suspend_noirq)(struct device *dev); + int (*resume_noirq)(struct device *dev); + int (*freeze_noirq)(struct device *dev); + int (*thaw_noirq)(struct device *dev); + int (*poweroff_noirq)(struct device *dev); + int (*restore_noirq)(struct device *dev); +}; + +/** + * PM_EVENT_ messages + * + * The following PM_EVENT_ messages are defined for the internal use of the PM + * core, in order to provide a mechanism allowing the high level suspend and + * hibernation code to convey the necessary information to the device PM core + * code: + * + * ON No transition. + * + * FREEZE System is going to hibernate, call ->prepare() and ->freeze() + * for all devices. + * + * SUSPEND System is going to suspend, call ->prepare() and ->suspend() + * for all devices. + * + * HIBERNATE Hibernation image has been saved, call ->prepare() and + * ->poweroff() for all devices. + * + * QUIESCE Contents of main memory are going to be restored from a (loaded) + * hibernation image, call ->prepare() and ->freeze() for all + * devices. + * + * RESUME System is resuming, call ->resume() and ->complete() for all + * devices. + * + * THAW Hibernation image has been created, call ->thaw() and + * ->complete() for all devices. + * + * RESTORE Contents of main memory have been restored from a hibernation + * image, call ->restore() and ->complete() for all devices. + * + * RECOVER Creation of a hibernation image or restoration of the main + * memory contents from a hibernation image has failed, call + * ->thaw() and ->complete() for all devices. + */ + +#define PM_EVENT_ON 0x0000 +#define PM_EVENT_FREEZE 0x0001 +#define PM_EVENT_SUSPEND 0x0002 +#define PM_EVENT_HIBERNATE 0x0004 +#define PM_EVENT_QUIESCE 0x0008 +#define PM_EVENT_RESUME 0x0010 +#define PM_EVENT_THAW 0x0020 +#define PM_EVENT_RESTORE 0x0040 +#define PM_EVENT_RECOVER 0x0080 + +#define PM_EVENT_SLEEP (PM_EVENT_SUSPEND | PM_EVENT_HIBERNATE) + +#define PMSG_FREEZE ((struct pm_message){ .event = PM_EVENT_FREEZE, }) +#define PMSG_QUIESCE ((struct pm_message){ .event = PM_EVENT_QUIESCE, }) +#define PMSG_SUSPEND ((struct pm_message){ .event = PM_EVENT_SUSPEND, }) +#define PMSG_HIBERNATE ((struct pm_message){ .event = PM_EVENT_HIBERNATE, }) +#define PMSG_RESUME ((struct pm_message){ .event = PM_EVENT_RESUME, }) +#define PMSG_THAW ((struct pm_message){ .event = PM_EVENT_THAW, }) +#define PMSG_RESTORE ((struct pm_message){ .event = PM_EVENT_RESTORE, }) +#define PMSG_RECOVER ((struct pm_message){ .event = PM_EVENT_RECOVER, }) +#define PMSG_ON ((struct pm_message){ .event = PM_EVENT_ON, }) + +/** + * Device power management states + * + * These state labels are used internally by the PM core to indicate the current + * status of a device with respect to the PM core operations. + * + * DPM_ON Device is regarded as operational. Set this way + * initially and when ->complete() is about to be called. + * Also set when ->prepare() fails. + * + * DPM_PREPARING Device is going to be prepared for a PM transition. Set + * when ->prepare() is about to be called. + * + * DPM_RESUMING Device is going to be resumed. Set when ->resume(), + * ->thaw(), or ->restore() is about to be called. + * + * DPM_SUSPENDING Device has been prepared for a power transition. Set + * when ->prepare() has just succeeded. + * + * DPM_OFF Device is regarded as inactive. Set immediately after + * ->suspend(), ->freeze(), or ->poweroff() has succeeded. + * Also set when ->resume()_noirq, ->thaw_noirq(), or + * ->restore_noirq() is about to be called. + * + * DPM_OFF_IRQ Device is in a "deep sleep". Set immediately after + * ->suspend_noirq(), ->freeze_noirq(), or + * ->poweroff_noirq() has just succeeded. + */ + +enum dpm_state { + DPM_INVALID, + DPM_ON, + DPM_PREPARING, + DPM_RESUMING, + DPM_SUSPENDING, + DPM_OFF, + DPM_OFF_IRQ, +}; + +struct dev_pm_info { + pm_message_t power_state; + unsigned can_wakeup:1; + unsigned should_wakeup:1; + enum dpm_state status; /* Owned by the PM core */ +#ifdef CONFIG_PM_SLEEP + struct list_head entry; +#endif +}; + +/* + * The PM_EVENT_ messages are also used by drivers implementing the legacy + * suspend framework, based on the ->suspend() and ->resume() callbacks common + * for suspend and hibernation transitions, according to the rules below. + */ + +/* Necessary, because several drivers use PM_EVENT_PRETHAW */ +#define PM_EVENT_PRETHAW PM_EVENT_QUIESCE + +/* * One transition is triggered by resume(), after a suspend() call; the * message is implicit: * @@ -164,35 +444,13 @@ typedef struct pm_message { * or from system low-power states such as standby or suspend-to-RAM. */ -#define PM_EVENT_ON 0 -#define PM_EVENT_FREEZE 1 -#define PM_EVENT_SUSPEND 2 -#define PM_EVENT_HIBERNATE 4 -#define PM_EVENT_PRETHAW 8 - -#define PM_EVENT_SLEEP (PM_EVENT_SUSPEND | PM_EVENT_HIBERNATE) - -#define PMSG_FREEZE ((struct pm_message){ .event = PM_EVENT_FREEZE, }) -#define PMSG_PRETHAW ((struct pm_message){ .event = PM_EVENT_PRETHAW, }) -#define PMSG_SUSPEND ((struct pm_message){ .event = PM_EVENT_SUSPEND, }) -#define PMSG_HIBERNATE ((struct pm_message){ .event = PM_EVENT_HIBERNATE, }) -#define PMSG_ON ((struct pm_message){ .event = PM_EVENT_ON, }) - -struct dev_pm_info { - pm_message_t power_state; - unsigned can_wakeup:1; - unsigned should_wakeup:1; - bool sleeping:1; /* Owned by the PM core */ -#ifdef CONFIG_PM_SLEEP - struct list_head entry; -#endif -}; +#ifdef CONFIG_PM_SLEEP +extern void device_pm_lock(void); +extern void device_power_up(pm_message_t state); +extern void device_resume(pm_message_t state); +extern void device_pm_unlock(void); extern int device_power_down(pm_message_t state); -extern void device_power_up(void); -extern void device_resume(void); - -#ifdef CONFIG_PM_SLEEP extern int device_suspend(pm_message_t state); extern int device_prepare_suspend(pm_message_t state); diff --git a/kernel/power/disk.c b/kernel/power/disk.c index 14a656c..d416be0 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -193,6 +193,7 @@ static int create_image(int platform_mode) if (error) return error; + device_pm_lock(); local_irq_disable(); /* At this point, device_suspend() has been called, but *not* * device_power_down(). We *must* call device_power_down() now. @@ -224,9 +225,11 @@ static int create_image(int platform_mode) /* NOTE: device_power_up() is just a resume() for devices * that suspended with irqs off ... no overall powerup. */ - device_power_up(); + device_power_up(in_suspend ? + (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); Enable_irqs: local_irq_enable(); + device_pm_unlock(); return error; } @@ -280,7 +283,8 @@ int hibernation_snapshot(int platform_mode) Finish: platform_finish(platform_mode); Resume_devices: - device_resume(); + device_resume(in_suspend ? + (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); Resume_console: resume_console(); Close: @@ -300,8 +304,9 @@ static int resume_target_kernel(void) { int error; + device_pm_lock(); local_irq_disable(); - error = device_power_down(PMSG_PRETHAW); + error = device_power_down(PMSG_QUIESCE); if (error) { printk(KERN_ERR "PM: Some devices failed to power down, " "aborting resume\n"); @@ -329,9 +334,10 @@ static int resume_target_kernel(void) swsusp_free(); restore_processor_state(); touch_softlockup_watchdog(); - device_power_up(); + device_power_up(PMSG_RECOVER); Enable_irqs: local_irq_enable(); + device_pm_unlock(); return error; } @@ -350,7 +356,7 @@ int hibernation_restore(int platform_mode) pm_prepare_console(); suspend_console(); - error = device_suspend(PMSG_PRETHAW); + error = device_suspend(PMSG_QUIESCE); if (error) goto Finish; @@ -362,7 +368,7 @@ int hibernation_restore(int platform_mode) enable_nonboot_cpus(); } platform_restore_cleanup(platform_mode); - device_resume(); + device_resume(PMSG_RECOVER); Finish: resume_console(); pm_restore_console(); @@ -403,6 +409,7 @@ int hibernation_platform_enter(void) if (error) goto Finish; + device_pm_lock(); local_irq_disable(); error = device_power_down(PMSG_HIBERNATE); if (!error) { @@ -411,6 +418,7 @@ int hibernation_platform_enter(void) while (1); } local_irq_enable(); + device_pm_unlock(); /* * We don't need to reenable the nonboot CPUs or resume consoles, since @@ -419,7 +427,7 @@ int hibernation_platform_enter(void) Finish: hibernation_ops->finish(); Resume_devices: - device_resume(); + device_resume(PMSG_RESTORE); Resume_console: resume_console(); Close: diff --git a/kernel/power/main.c b/kernel/power/main.c index 6a6d5eb..d023b6b 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -228,6 +228,7 @@ static int suspend_enter(suspend_state_t state) { int error = 0; + device_pm_lock(); arch_suspend_disable_irqs(); BUG_ON(!irqs_disabled()); @@ -239,10 +240,11 @@ static int suspend_enter(suspend_state_t state) if (!suspend_test(TEST_CORE)) error = suspend_ops->enter(state); - device_power_up(); + device_power_up(PMSG_RESUME); Done: arch_suspend_enable_irqs(); BUG_ON(irqs_disabled()); + device_pm_unlock(); return error; } @@ -291,7 +293,7 @@ int suspend_devices_and_enter(suspend_state_t state) if (suspend_ops->finish) suspend_ops->finish(); Resume_devices: - device_resume(); + device_resume(PMSG_RESUME); Resume_console: resume_console(); Close: -- cgit v0.10.2 From bbb44d9f23d868a2837c6b22b8dfb123d8e7800c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 20 May 2008 00:49:04 +0200 Subject: PCI: implement new suspend/resume callbacks Implement new suspend and hibernation callbacks for the PCI bus type. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Signed-off-by: Jesse Barnes diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 677fd9d..8eb8a30 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -274,7 +274,57 @@ static int pci_device_remove(struct device * dev) return 0; } -static int pci_device_suspend(struct device * dev, pm_message_t state) +static void pci_device_shutdown(struct device *dev) +{ + struct pci_dev *pci_dev = to_pci_dev(dev); + struct pci_driver *drv = pci_dev->driver; + + if (drv && drv->shutdown) + drv->shutdown(pci_dev); + pci_msi_shutdown(pci_dev); + pci_msix_shutdown(pci_dev); +} + +#ifdef CONFIG_PM_SLEEP + +/* + * Default "suspend" method for devices that have no driver provided suspend, + * or not even a driver at all. + */ +static void pci_default_pm_suspend(struct pci_dev *pci_dev) +{ + pci_save_state(pci_dev); + /* + * mark its power state as "unknown", since we don't know if + * e.g. the BIOS will change its device state when we suspend. + */ + if (pci_dev->current_state == PCI_D0) + pci_dev->current_state = PCI_UNKNOWN; +} + +/* + * Default "resume" method for devices that have no driver provided resume, + * or not even a driver at all. + */ +static int pci_default_pm_resume(struct pci_dev *pci_dev) +{ + int retval = 0; + + /* restore the PCI config space */ + pci_restore_state(pci_dev); + /* if the device was enabled before suspend, reenable */ + retval = pci_reenable_device(pci_dev); + /* + * if the device was busmaster before the suspend, make it busmaster + * again + */ + if (pci_dev->is_busmaster) + pci_set_master(pci_dev); + + return retval; +} + +static int pci_legacy_suspend(struct device *dev, pm_message_t state) { struct pci_dev * pci_dev = to_pci_dev(dev); struct pci_driver * drv = pci_dev->driver; @@ -284,21 +334,12 @@ static int pci_device_suspend(struct device * dev, pm_message_t state) i = drv->suspend(pci_dev, state); suspend_report_result(drv->suspend, i); } else { - pci_save_state(pci_dev); - /* - * mark its power state as "unknown", since we don't know if - * e.g. the BIOS will change its device state when we suspend. - */ - if (pci_dev->current_state == PCI_D0) - pci_dev->current_state = PCI_UNKNOWN; + pci_default_pm_suspend(pci_dev); } - - pci_fixup_device(pci_fixup_suspend, pci_dev); - return i; } -static int pci_device_suspend_late(struct device * dev, pm_message_t state) +static int pci_legacy_suspend_late(struct device *dev, pm_message_t state) { struct pci_dev * pci_dev = to_pci_dev(dev); struct pci_driver * drv = pci_dev->driver; @@ -311,26 +352,7 @@ static int pci_device_suspend_late(struct device * dev, pm_message_t state) return i; } -/* - * Default resume method for devices that have no driver provided resume, - * or not even a driver at all. - */ -static int pci_default_resume(struct pci_dev *pci_dev) -{ - int retval = 0; - - /* restore the PCI config space */ - pci_restore_state(pci_dev); - /* if the device was enabled before suspend, reenable */ - retval = pci_reenable_device(pci_dev); - /* if the device was busmaster before the suspend, make it busmaster again */ - if (pci_dev->is_busmaster) - pci_set_master(pci_dev); - - return retval; -} - -static int pci_device_resume(struct device * dev) +static int pci_legacy_resume(struct device *dev) { int error; struct pci_dev * pci_dev = to_pci_dev(dev); @@ -339,35 +361,313 @@ static int pci_device_resume(struct device * dev) if (drv && drv->resume) error = drv->resume(pci_dev); else - error = pci_default_resume(pci_dev); - pci_fixup_device(pci_fixup_resume, pci_dev); + error = pci_default_pm_resume(pci_dev); return error; } -static int pci_device_resume_early(struct device * dev) +static int pci_legacy_resume_early(struct device *dev) { int error = 0; struct pci_dev * pci_dev = to_pci_dev(dev); struct pci_driver * drv = pci_dev->driver; - pci_fixup_device(pci_fixup_resume_early, pci_dev); - if (drv && drv->resume_early) error = drv->resume_early(pci_dev); return error; } -static void pci_device_shutdown(struct device *dev) +static int pci_pm_prepare(struct device *dev) +{ + struct device_driver *drv = dev->driver; + int error = 0; + + if (drv && drv->pm && drv->pm->prepare) + error = drv->pm->prepare(dev); + + return error; +} + +static void pci_pm_complete(struct device *dev) +{ + struct device_driver *drv = dev->driver; + + if (drv && drv->pm && drv->pm->complete) + drv->pm->complete(dev); +} + +#ifdef CONFIG_SUSPEND + +static int pci_pm_suspend(struct device *dev) +{ + struct pci_dev *pci_dev = to_pci_dev(dev); + struct device_driver *drv = dev->driver; + int error = 0; + + if (drv && drv->pm) { + if (drv->pm->suspend) { + error = drv->pm->suspend(dev); + suspend_report_result(drv->pm->suspend, error); + } else { + pci_default_pm_suspend(pci_dev); + } + } else { + error = pci_legacy_suspend(dev, PMSG_SUSPEND); + } + pci_fixup_device(pci_fixup_suspend, pci_dev); + + return error; +} + +static int pci_pm_suspend_noirq(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); struct pci_driver *drv = pci_dev->driver; + int error = 0; - if (drv && drv->shutdown) - drv->shutdown(pci_dev); - pci_msi_shutdown(pci_dev); - pci_msix_shutdown(pci_dev); + if (drv && drv->pm) { + if (drv->pm->suspend_noirq) { + error = drv->pm->suspend_noirq(dev); + suspend_report_result(drv->pm->suspend_noirq, error); + } + } else { + error = pci_legacy_suspend_late(dev, PMSG_SUSPEND); + } + + return error; +} + +static int pci_pm_resume(struct device *dev) +{ + struct pci_dev *pci_dev = to_pci_dev(dev); + struct device_driver *drv = dev->driver; + int error; + + pci_fixup_device(pci_fixup_resume, pci_dev); + + if (drv && drv->pm) { + error = drv->pm->resume ? drv->pm->resume(dev) : + pci_default_pm_resume(pci_dev); + } else { + error = pci_legacy_resume(dev); + } + + return error; +} + +static int pci_pm_resume_noirq(struct device *dev) +{ + struct pci_dev *pci_dev = to_pci_dev(dev); + struct pci_driver *drv = pci_dev->driver; + int error = 0; + + pci_fixup_device(pci_fixup_resume_early, pci_dev); + + if (drv && drv->pm) { + if (drv->pm->resume_noirq) + error = drv->pm->resume_noirq(dev); + } else { + error = pci_legacy_resume_early(dev); + } + + return error; +} + +#else /* !CONFIG_SUSPEND */ + +#define pci_pm_suspend NULL +#define pci_pm_suspend_noirq NULL +#define pci_pm_resume NULL +#define pci_pm_resume_noirq NULL + +#endif /* !CONFIG_SUSPEND */ + +#ifdef CONFIG_HIBERNATION + +static int pci_pm_freeze(struct device *dev) +{ + struct pci_dev *pci_dev = to_pci_dev(dev); + struct device_driver *drv = dev->driver; + int error = 0; + + if (drv && drv->pm) { + if (drv->pm->freeze) { + error = drv->pm->freeze(dev); + suspend_report_result(drv->pm->freeze, error); + } else { + pci_default_pm_suspend(pci_dev); + } + } else { + error = pci_legacy_suspend(dev, PMSG_FREEZE); + pci_fixup_device(pci_fixup_suspend, pci_dev); + } + + return error; +} + +static int pci_pm_freeze_noirq(struct device *dev) +{ + struct pci_dev *pci_dev = to_pci_dev(dev); + struct pci_driver *drv = pci_dev->driver; + int error = 0; + + if (drv && drv->pm) { + if (drv->pm->freeze_noirq) { + error = drv->pm->freeze_noirq(dev); + suspend_report_result(drv->pm->freeze_noirq, error); + } + } else { + error = pci_legacy_suspend_late(dev, PMSG_FREEZE); + } + + return error; +} + +static int pci_pm_thaw(struct device *dev) +{ + struct device_driver *drv = dev->driver; + int error = 0; + + if (drv && drv->pm) { + if (drv->pm->thaw) + error = drv->pm->thaw(dev); + } else { + pci_fixup_device(pci_fixup_resume, to_pci_dev(dev)); + error = pci_legacy_resume(dev); + } + + return error; +} + +static int pci_pm_thaw_noirq(struct device *dev) +{ + struct pci_dev *pci_dev = to_pci_dev(dev); + struct pci_driver *drv = pci_dev->driver; + int error = 0; + + if (drv && drv->pm) { + if (drv->pm->thaw_noirq) + error = drv->pm->thaw_noirq(dev); + } else { + pci_fixup_device(pci_fixup_resume_early, pci_dev); + error = pci_legacy_resume_early(dev); + } + + return error; +} + +static int pci_pm_poweroff(struct device *dev) +{ + struct device_driver *drv = dev->driver; + int error = 0; + + pci_fixup_device(pci_fixup_suspend, to_pci_dev(dev)); + + if (drv && drv->pm) { + if (drv->pm->poweroff) { + error = drv->pm->poweroff(dev); + suspend_report_result(drv->pm->poweroff, error); + } + } else { + error = pci_legacy_suspend(dev, PMSG_HIBERNATE); + } + + return error; +} + +static int pci_pm_poweroff_noirq(struct device *dev) +{ + struct pci_dev *pci_dev = to_pci_dev(dev); + struct pci_driver *drv = pci_dev->driver; + int error = 0; + + if (drv && drv->pm) { + if (drv->pm->poweroff_noirq) { + error = drv->pm->poweroff_noirq(dev); + suspend_report_result(drv->pm->poweroff_noirq, error); + } + } else { + error = pci_legacy_suspend_late(dev, PMSG_HIBERNATE); + } + + return error; +} + +static int pci_pm_restore(struct device *dev) +{ + struct pci_dev *pci_dev = to_pci_dev(dev); + struct device_driver *drv = dev->driver; + int error; + + if (drv && drv->pm) { + error = drv->pm->restore ? drv->pm->restore(dev) : + pci_default_pm_resume(pci_dev); + } else { + error = pci_legacy_resume(dev); + } + pci_fixup_device(pci_fixup_resume, pci_dev); + + return error; +} + +static int pci_pm_restore_noirq(struct device *dev) +{ + struct pci_dev *pci_dev = to_pci_dev(dev); + struct pci_driver *drv = pci_dev->driver; + int error = 0; + + pci_fixup_device(pci_fixup_resume, pci_dev); + + if (drv && drv->pm) { + if (drv->pm->restore_noirq) + error = drv->pm->restore_noirq(dev); + } else { + error = pci_legacy_resume_early(dev); + } + pci_fixup_device(pci_fixup_resume_early, pci_dev); + + return error; } +#else /* !CONFIG_HIBERNATION */ + +#define pci_pm_freeze NULL +#define pci_pm_freeze_noirq NULL +#define pci_pm_thaw NULL +#define pci_pm_thaw_noirq NULL +#define pci_pm_poweroff NULL +#define pci_pm_poweroff_noirq NULL +#define pci_pm_restore NULL +#define pci_pm_restore_noirq NULL + +#endif /* !CONFIG_HIBERNATION */ + +struct pm_ext_ops pci_pm_ops = { + .base = { + .prepare = pci_pm_prepare, + .complete = pci_pm_complete, + .suspend = pci_pm_suspend, + .resume = pci_pm_resume, + .freeze = pci_pm_freeze, + .thaw = pci_pm_thaw, + .poweroff = pci_pm_poweroff, + .restore = pci_pm_restore, + }, + .suspend_noirq = pci_pm_suspend_noirq, + .resume_noirq = pci_pm_resume_noirq, + .freeze_noirq = pci_pm_freeze_noirq, + .thaw_noirq = pci_pm_thaw_noirq, + .poweroff_noirq = pci_pm_poweroff_noirq, + .restore_noirq = pci_pm_restore_noirq, +}; + +#define PCI_PM_OPS_PTR &pci_pm_ops + +#else /* !CONFIG_PM_SLEEP */ + +#define PCI_PM_OPS_PTR NULL + +#endif /* !CONFIG_PM_SLEEP */ + /** * __pci_register_driver - register a new pci driver * @drv: the driver structure to register @@ -390,6 +690,9 @@ int __pci_register_driver(struct pci_driver *drv, struct module *owner, drv->driver.owner = owner; drv->driver.mod_name = mod_name; + if (drv->pm) + drv->driver.pm = &drv->pm->base; + spin_lock_init(&drv->dynids.lock); INIT_LIST_HEAD(&drv->dynids.list); @@ -515,12 +818,9 @@ struct bus_type pci_bus_type = { .uevent = pci_uevent, .probe = pci_device_probe, .remove = pci_device_remove, - .suspend = pci_device_suspend, - .suspend_late = pci_device_suspend_late, - .resume_early = pci_device_resume_early, - .resume = pci_device_resume, .shutdown = pci_device_shutdown, .dev_attrs = pci_dev_attrs, + .pm = PCI_PM_OPS_PTR, }; static int __init pci_driver_init(void) diff --git a/include/linux/pci.h b/include/linux/pci.h index 700704e..507ee52 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -389,7 +389,7 @@ struct pci_driver { int (*resume_early) (struct pci_dev *dev); int (*resume) (struct pci_dev *dev); /* Device woken up */ void (*shutdown) (struct pci_dev *dev); - + struct pm_ext_ops *pm; struct pci_error_handlers *err_handler; struct device_driver driver; struct pci_dynids dynids; -- cgit v0.10.2 From 25e18499e08cb097cbbfeab5de25d094d5312ee5 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 21 May 2008 01:40:43 +0200 Subject: Implement new suspend and hibernation callbacks for platform busses Implement new suspend and hibernation callbacks for the platform bus type. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Signed-off-by: Greg KH Signed-off-by: Jesse Barnes diff --git a/drivers/base/platform.c b/drivers/base/platform.c index 911ec60..3f94039 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -453,6 +453,8 @@ int platform_driver_register(struct platform_driver *drv) drv->driver.suspend = platform_drv_suspend; if (drv->resume) drv->driver.resume = platform_drv_resume; + if (drv->pm) + drv->driver.pm = &drv->pm->base; return driver_register(&drv->driver); } EXPORT_SYMBOL_GPL(platform_driver_register); @@ -560,7 +562,9 @@ static int platform_match(struct device *dev, struct device_driver *drv) return (strncmp(pdev->name, drv->name, BUS_ID_SIZE) == 0); } -static int platform_suspend(struct device *dev, pm_message_t mesg) +#ifdef CONFIG_PM_SLEEP + +static int platform_legacy_suspend(struct device *dev, pm_message_t mesg) { int ret = 0; @@ -570,7 +574,7 @@ static int platform_suspend(struct device *dev, pm_message_t mesg) return ret; } -static int platform_suspend_late(struct device *dev, pm_message_t mesg) +static int platform_legacy_suspend_late(struct device *dev, pm_message_t mesg) { struct platform_driver *drv = to_platform_driver(dev->driver); struct platform_device *pdev; @@ -583,7 +587,7 @@ static int platform_suspend_late(struct device *dev, pm_message_t mesg) return ret; } -static int platform_resume_early(struct device *dev) +static int platform_legacy_resume_early(struct device *dev) { struct platform_driver *drv = to_platform_driver(dev->driver); struct platform_device *pdev; @@ -596,7 +600,7 @@ static int platform_resume_early(struct device *dev) return ret; } -static int platform_resume(struct device *dev) +static int platform_legacy_resume(struct device *dev) { int ret = 0; @@ -606,15 +610,291 @@ static int platform_resume(struct device *dev) return ret; } +static int platform_pm_prepare(struct device *dev) +{ + struct device_driver *drv = dev->driver; + int ret = 0; + + if (drv && drv->pm && drv->pm->prepare) + ret = drv->pm->prepare(dev); + + return ret; +} + +static void platform_pm_complete(struct device *dev) +{ + struct device_driver *drv = dev->driver; + + if (drv && drv->pm && drv->pm->complete) + drv->pm->complete(dev); +} + +#ifdef CONFIG_SUSPEND + +static int platform_pm_suspend(struct device *dev) +{ + struct device_driver *drv = dev->driver; + int ret = 0; + + if (drv && drv->pm) { + if (drv->pm->suspend) + ret = drv->pm->suspend(dev); + } else { + ret = platform_legacy_suspend(dev, PMSG_SUSPEND); + } + + return ret; +} + +static int platform_pm_suspend_noirq(struct device *dev) +{ + struct platform_driver *pdrv; + int ret = 0; + + if (!dev->driver) + return 0; + + pdrv = to_platform_driver(dev->driver); + if (pdrv->pm) { + if (pdrv->pm->suspend_noirq) + ret = pdrv->pm->suspend_noirq(dev); + } else { + ret = platform_legacy_suspend_late(dev, PMSG_SUSPEND); + } + + return ret; +} + +static int platform_pm_resume(struct device *dev) +{ + struct device_driver *drv = dev->driver; + int ret = 0; + + if (drv && drv->pm) { + if (drv->pm->resume) + ret = drv->pm->resume(dev); + } else { + ret = platform_legacy_resume(dev); + } + + return ret; +} + +static int platform_pm_resume_noirq(struct device *dev) +{ + struct platform_driver *pdrv; + int ret = 0; + + if (!dev->driver) + return 0; + + pdrv = to_platform_driver(dev->driver); + if (pdrv->pm) { + if (pdrv->pm->resume_noirq) + ret = pdrv->pm->resume_noirq(dev); + } else { + ret = platform_legacy_resume_early(dev); + } + + return ret; +} + +#else /* !CONFIG_SUSPEND */ + +#define platform_pm_suspend NULL +#define platform_pm_resume NULL +#define platform_pm_suspend_noirq NULL +#define platform_pm_resume_noirq NULL + +#endif /* !CONFIG_SUSPEND */ + +#ifdef CONFIG_HIBERNATION + +static int platform_pm_freeze(struct device *dev) +{ + struct device_driver *drv = dev->driver; + int ret = 0; + + if (!drv) + return 0; + + if (drv->pm) { + if (drv->pm->freeze) + ret = drv->pm->freeze(dev); + } else { + ret = platform_legacy_suspend(dev, PMSG_FREEZE); + } + + return ret; +} + +static int platform_pm_freeze_noirq(struct device *dev) +{ + struct platform_driver *pdrv; + int ret = 0; + + if (!dev->driver) + return 0; + + pdrv = to_platform_driver(dev->driver); + if (pdrv->pm) { + if (pdrv->pm->freeze_noirq) + ret = pdrv->pm->freeze_noirq(dev); + } else { + ret = platform_legacy_suspend_late(dev, PMSG_FREEZE); + } + + return ret; +} + +static int platform_pm_thaw(struct device *dev) +{ + struct device_driver *drv = dev->driver; + int ret = 0; + + if (drv && drv->pm) { + if (drv->pm->thaw) + ret = drv->pm->thaw(dev); + } else { + ret = platform_legacy_resume(dev); + } + + return ret; +} + +static int platform_pm_thaw_noirq(struct device *dev) +{ + struct platform_driver *pdrv; + int ret = 0; + + if (!dev->driver) + return 0; + + pdrv = to_platform_driver(dev->driver); + if (pdrv->pm) { + if (pdrv->pm->thaw_noirq) + ret = pdrv->pm->thaw_noirq(dev); + } else { + ret = platform_legacy_resume_early(dev); + } + + return ret; +} + +static int platform_pm_poweroff(struct device *dev) +{ + struct device_driver *drv = dev->driver; + int ret = 0; + + if (drv && drv->pm) { + if (drv->pm->poweroff) + ret = drv->pm->poweroff(dev); + } else { + ret = platform_legacy_suspend(dev, PMSG_HIBERNATE); + } + + return ret; +} + +static int platform_pm_poweroff_noirq(struct device *dev) +{ + struct platform_driver *pdrv; + int ret = 0; + + if (!dev->driver) + return 0; + + pdrv = to_platform_driver(dev->driver); + if (pdrv->pm) { + if (pdrv->pm->poweroff_noirq) + ret = pdrv->pm->poweroff_noirq(dev); + } else { + ret = platform_legacy_suspend_late(dev, PMSG_HIBERNATE); + } + + return ret; +} + +static int platform_pm_restore(struct device *dev) +{ + struct device_driver *drv = dev->driver; + int ret = 0; + + if (drv && drv->pm) { + if (drv->pm->restore) + ret = drv->pm->restore(dev); + } else { + ret = platform_legacy_resume(dev); + } + + return ret; +} + +static int platform_pm_restore_noirq(struct device *dev) +{ + struct platform_driver *pdrv; + int ret = 0; + + if (!dev->driver) + return 0; + + pdrv = to_platform_driver(dev->driver); + if (pdrv->pm) { + if (pdrv->pm->restore_noirq) + ret = pdrv->pm->restore_noirq(dev); + } else { + ret = platform_legacy_resume_early(dev); + } + + return ret; +} + +#else /* !CONFIG_HIBERNATION */ + +#define platform_pm_freeze NULL +#define platform_pm_thaw NULL +#define platform_pm_poweroff NULL +#define platform_pm_restore NULL +#define platform_pm_freeze_noirq NULL +#define platform_pm_thaw_noirq NULL +#define platform_pm_poweroff_noirq NULL +#define platform_pm_restore_noirq NULL + +#endif /* !CONFIG_HIBERNATION */ + +struct pm_ext_ops platform_pm_ops = { + .base = { + .prepare = platform_pm_prepare, + .complete = platform_pm_complete, + .suspend = platform_pm_suspend, + .resume = platform_pm_resume, + .freeze = platform_pm_freeze, + .thaw = platform_pm_thaw, + .poweroff = platform_pm_poweroff, + .restore = platform_pm_restore, + }, + .suspend_noirq = platform_pm_suspend_noirq, + .resume_noirq = platform_pm_resume_noirq, + .freeze_noirq = platform_pm_freeze_noirq, + .thaw_noirq = platform_pm_thaw_noirq, + .poweroff_noirq = platform_pm_poweroff_noirq, + .restore_noirq = platform_pm_restore_noirq, +}; + +#define PLATFORM_PM_OPS_PTR &platform_pm_ops + +#else /* !CONFIG_PM_SLEEP */ + +#define PLATFORM_PM_OPS_PTR NULL + +#endif /* !CONFIG_PM_SLEEP */ + struct bus_type platform_bus_type = { .name = "platform", .dev_attrs = platform_dev_attrs, .match = platform_match, .uevent = platform_uevent, - .suspend = platform_suspend, - .suspend_late = platform_suspend_late, - .resume_early = platform_resume_early, - .resume = platform_resume, + .pm = PLATFORM_PM_OPS_PTR, }; EXPORT_SYMBOL_GPL(platform_bus_type); diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index 3261681..95ac21a 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -53,6 +53,7 @@ struct platform_driver { int (*suspend_late)(struct platform_device *, pm_message_t state); int (*resume_early)(struct platform_device *); int (*resume)(struct platform_device *); + struct pm_ext_ops *pm; struct device_driver driver; }; -- cgit v0.10.2 From e7891c733f9b26c851edde50cf886a30bd133dbd Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 22 May 2008 14:35:21 -0700 Subject: PCI/x86: write_pci_config_byte fix offset also add write_pci_config_16 Signed-off-by: Yinghai Lu Signed-off-by: Jesse Barnes diff --git a/arch/x86/pci/early.c b/arch/x86/pci/early.c index 42df4b6..8e2821e 100644 --- a/arch/x86/pci/early.c +++ b/arch/x86/pci/early.c @@ -49,7 +49,14 @@ void write_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset, u8 val) { PDprintk("%x writing to %x: %x\n", slot, offset, val); outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); - outb(val, 0xcfc); + outb(val, 0xcfc + (offset&3)); +} + +void write_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset, u16 val) +{ + PDprintk("%x writing to %x: %x\n", slot, offset, val); + outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); + outw(val, 0xcfc + (offset&2)); } int early_pci_allowed(void) diff --git a/include/asm-x86/pci-direct.h b/include/asm-x86/pci-direct.h index 5b21485..7bd40b4 100644 --- a/include/asm-x86/pci-direct.h +++ b/include/asm-x86/pci-direct.h @@ -11,6 +11,7 @@ extern u8 read_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset); extern u16 read_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset); extern void write_pci_config(u8 bus, u8 slot, u8 func, u8 offset, u32 val); extern void write_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset, u8 val); +extern void write_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset, u16 val); extern int early_pci_allowed(void); -- cgit v0.10.2 From e3f2baebf4209b5927e23fa65d5977d31db936b3 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 22 May 2008 14:35:11 -0700 Subject: PCI/x86: early dump pci conf space v2 Allows us to dump PCI space before any kernel changes have been made. Signed-off-by: Yinghai Lu Signed-off-by: Jesse Barnes diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index 6dff128..524b685 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -361,6 +361,11 @@ void __init setup_arch(char **cmdline_p) parse_early_param(); +#ifdef CONFIG_PCI + if (pci_early_dump_regs) + early_dump_pci_devices(); +#endif + #ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT if (init_ohci1394_dma_early) init_ohci1394_dma_on_all_controllers(); diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 3a5261b..d19fd07 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -20,6 +20,7 @@ unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 | PCI_PROBE_MMCONF; +unsigned int pci_early_dump_regs; static int pci_bf_sort; int pci_routeirq; int pcibios_last_bus = -1; @@ -511,6 +512,9 @@ char * __devinit pcibios_setup(char *str) } else if (!strcmp(str, "use_crs")) { pci_probe |= PCI_USE__CRS; return NULL; + } else if (!strcmp(str, "earlydump")) { + pci_early_dump_regs = 1; + return NULL; } else if (!strcmp(str, "routeirq")) { pci_routeirq = 1; return NULL; diff --git a/arch/x86/pci/early.c b/arch/x86/pci/early.c index 8e2821e..858dbe3 100644 --- a/arch/x86/pci/early.c +++ b/arch/x86/pci/early.c @@ -64,3 +64,54 @@ int early_pci_allowed(void) return (pci_probe & (PCI_PROBE_CONF1|PCI_PROBE_NOEARLY)) == PCI_PROBE_CONF1; } + +void early_dump_pci_device(u8 bus, u8 slot, u8 func) +{ + int i; + int j; + u32 val; + + printk("PCI: %02x:%02x:%02x", bus, slot, func); + + for (i = 0; i < 256; i += 4) { + if (!(i & 0x0f)) + printk("\n%04x:",i); + + val = read_pci_config(bus, slot, func, i); + for (j = 0; j < 4; j++) { + printk(" %02x", val & 0xff); + val >>= 8; + } + } + printk("\n"); +} + +void early_dump_pci_devices(void) +{ + unsigned bus, slot, func; + + if (!early_pci_allowed()) + return; + + for (bus = 0; bus < 256; bus++) { + for (slot = 0; slot < 32; slot++) { + for (func = 0; func < 8; func++) { + u32 class; + u8 type; + class = read_pci_config(bus, slot, func, + PCI_CLASS_REVISION); + if (class == 0xffffffff) + break; + + early_dump_pci_device(bus, slot, func); + + /* No multi-function device? */ + type = read_pci_config_byte(bus, slot, func, + PCI_HEADER_TYPE); + if (!(type & 0x80)) + break; + } + } + } +} + diff --git a/include/asm-x86/pci-direct.h b/include/asm-x86/pci-direct.h index 7bd40b4..80c775d 100644 --- a/include/asm-x86/pci-direct.h +++ b/include/asm-x86/pci-direct.h @@ -15,4 +15,7 @@ extern void write_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset, u16 val); extern int early_pci_allowed(void); +extern unsigned int pci_early_dump_regs; +extern void early_dump_pci_device(u8 bus, u8 slot, u8 func); +extern void early_dump_pci_devices(void); #endif -- cgit v0.10.2 From b143b3cc82fac459feb1abdffb1d77be9805adaa Mon Sep 17 00:00:00 2001 From: Kenji Kaneshige Date: Wed, 28 May 2008 14:56:00 +0900 Subject: pciehp: remove redundant pci_dev initialization Remove the redundant initialization of pci_dev member of struct controller in pciehp_probe(). It is initialized in pcie_init(). Signed-off-by: Kenji Kaneshige Signed-off-by: Jesse Barnes diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c index 86d04c6..54553b1 100644 --- a/drivers/pci/hotplug/pciehp_core.c +++ b/drivers/pci/hotplug/pciehp_core.c @@ -454,7 +454,6 @@ static int pciehp_probe(struct pcie_device *dev, const struct pcie_port_service_ INIT_LIST_HEAD(&ctrl->slot_list); pdev = dev->port; - ctrl->pci_dev = pdev; rc = pcie_init(ctrl, dev); if (rc) { -- cgit v0.10.2 From 125c39f7d233de28f342d80858025ffed0c4b7f4 Mon Sep 17 00:00:00 2001 From: Kenji Kaneshige Date: Wed, 28 May 2008 14:57:30 +0900 Subject: pciehp: evaluate _OSC/OSHP before controller init Current pciehp evaluates _OSC/OSHP method after some controller initialization is done. So if evaluating _OSC/OSHP is failed, we need to cleanup already initialized data structures or hardware. This clearly is not robust way. With this patch, _OSC/OSHP evaluation is done first. Signed-off-by: Kenji Kaneshige Signed-off-by: Jesse Barnes diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h index 79c9dda..084b73e 100644 --- a/drivers/pci/hotplug/pciehp.h +++ b/drivers/pci/hotplug/pciehp.h @@ -202,6 +202,7 @@ struct hpc_ops { #include #include +extern int pciehp_acpi_get_hp_hw_control_from_firmware(struct pci_dev *dev); #define pciehp_get_hp_hw_control_from_firmware(dev) \ pciehp_acpi_get_hp_hw_control_from_firmware(dev) static inline int pciehp_get_hp_params_from_firmware(struct pci_dev *dev, diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c index 54553b1..49414e9 100644 --- a/drivers/pci/hotplug/pciehp_core.c +++ b/drivers/pci/hotplug/pciehp_core.c @@ -444,7 +444,13 @@ static int pciehp_probe(struct pcie_device *dev, const struct pcie_port_service_ struct controller *ctrl; struct slot *t_slot; u8 value; - struct pci_dev *pdev; + struct pci_dev *pdev = dev->port; + + if (pciehp_force) + dbg("Bypassing BIOS check for pciehp use on %s\n", + pci_name(pdev)); + else if (pciehp_get_hp_hw_control_from_firmware(pdev)) + goto err_out_none; ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL); if (!ctrl) { @@ -453,8 +459,6 @@ static int pciehp_probe(struct pcie_device *dev, const struct pcie_port_service_ } INIT_LIST_HEAD(&ctrl->slot_list); - pdev = dev->port; - rc = pcie_init(ctrl, dev); if (rc) { dbg("%s: controller initialization failed\n", PCIE_MODULE_NAME); diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index 79f1049..6339c63 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -1018,7 +1018,7 @@ static struct hpc_ops pciehp_hpc_ops = { }; #ifdef CONFIG_ACPI -static int pciehp_acpi_get_hp_hw_control_from_firmware(struct pci_dev *dev) +int pciehp_acpi_get_hp_hw_control_from_firmware(struct pci_dev *dev) { acpi_status status; acpi_handle chandle, handle = DEVICE_ACPI_HANDLE(&(dev->dev)); @@ -1122,23 +1122,10 @@ int pcie_init_hardware_part2(struct controller *ctrl, struct pcie_device *dev) if (pcie_write_cmd(ctrl, cmd, mask)) { err("%s: Cannot enable software notification\n", __func__); - goto abort; + return -1; } - if (pciehp_force) - dbg("Bypassing BIOS check for pciehp use on %s\n", - pci_name(ctrl->pci_dev)); - else if (pciehp_get_hp_hw_control_from_firmware(ctrl->pci_dev)) - goto abort_disable_intr; - return 0; - - /* We end up here for the many possible ways to fail this API. */ -abort_disable_intr: - if (pcie_write_cmd(ctrl, 0, HP_INTR_ENABLE)) - err("%s : disabling interrupts failed\n", __func__); -abort: - return -1; } static inline void dbg_ctrl(struct controller *ctrl) -- cgit v0.10.2 From d737bdc141f0f040171fffbb7f9e08a825b27aab Mon Sep 17 00:00:00 2001 From: Kenji Kaneshige Date: Wed, 28 May 2008 14:59:44 +0900 Subject: pciehp: block signals while waiting for command completion Since we need to wait for command completion for muximum 1sec, waiting command should not be interrupted by a signal. Signed-off-by: Kenji Kaneshige Signed-off-by: Jesse Barnes diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index 6339c63..7e3a3d1 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -268,9 +268,8 @@ completed: return timeout; } -static inline int pcie_wait_cmd(struct controller *ctrl, int poll) +static inline void pcie_wait_cmd(struct controller *ctrl, int poll) { - int retval = 0; unsigned int msecs = pciehp_poll_mode ? 2500 : 1000; unsigned long timeout = msecs_to_jiffies(msecs); int rc; @@ -278,16 +277,9 @@ static inline int pcie_wait_cmd(struct controller *ctrl, int poll) if (poll) rc = pcie_poll_cmd(ctrl); else - rc = wait_event_interruptible_timeout(ctrl->queue, - !ctrl->cmd_busy, timeout); + rc = wait_event_timeout(ctrl->queue, !ctrl->cmd_busy, timeout); if (!rc) dbg("Command not completed in 1000 msec\n"); - else if (rc < 0) { - retval = -EINTR; - info("Command was interrupted by a signal\n"); - } - - return retval; } /** @@ -365,7 +357,7 @@ static int pcie_write_cmd(struct controller *ctrl, u16 cmd, u16 mask) if (!(slot_ctrl & HP_INTR_ENABLE) || !(slot_ctrl & CMD_CMPL_INTR_ENABLE)) poll = 1; - retval = pcie_wait_cmd(ctrl, poll); + pcie_wait_cmd(ctrl, poll); } out: mutex_unlock(&ctrl->ctrl_lock); @@ -797,7 +789,7 @@ static irqreturn_t pcie_isr(int irq, void *dev_id) if (intr_loc & CMD_COMPLETED) { ctrl->cmd_busy = 0; smp_mb(); - wake_up_interruptible(&ctrl->queue); + wake_up(&ctrl->queue); } if (!(intr_loc & ~CMD_COMPLETED)) -- cgit v0.10.2 From ac9c052d10d8d6f46a30cb46c0d6d753997c299f Mon Sep 17 00:00:00 2001 From: Kenji Kaneshige Date: Wed, 28 May 2008 15:01:03 +0900 Subject: shpchp: check firmware before taking control Fix the following problems of shpchp driver about getting hotplug control from firmware. - The shpchp driver must not control the hotplug controller if it fails to get control from the firmware. But current shpchp controls the hotplug controller regardless the result, because it doesn't check the return value of get_hp_hw_control_from_firmware(). - Current shpchp driver doesn't support _OSC. The pciehp driver already have the code for evaluating _OSC and OSHP and shpchp and pciehp can share it. So this patch move that code from pciehp to acpi_pcihp.c. Signed-off-by: Kenji Kaneshige Signed-off-by: Jesse Barnes diff --git a/drivers/pci/hotplug/acpi_pcihp.c b/drivers/pci/hotplug/acpi_pcihp.c index f8c187a..93e37f0 100644 --- a/drivers/pci/hotplug/acpi_pcihp.c +++ b/drivers/pci/hotplug/acpi_pcihp.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -299,7 +300,7 @@ free_and_return: * * @handle - the handle of the hotplug controller. */ -acpi_status acpi_run_oshp(acpi_handle handle) +static acpi_status acpi_run_oshp(acpi_handle handle) { acpi_status status; struct acpi_buffer string = { ACPI_ALLOCATE_BUFFER, NULL }; @@ -322,9 +323,6 @@ acpi_status acpi_run_oshp(acpi_handle handle) kfree(string.pointer); return status; } -EXPORT_SYMBOL_GPL(acpi_run_oshp); - - /* acpi_get_hp_params_from_firmware * @@ -374,6 +372,85 @@ acpi_status acpi_get_hp_params_from_firmware(struct pci_bus *bus, } EXPORT_SYMBOL_GPL(acpi_get_hp_params_from_firmware); +/** + * acpi_get_hp_hw_control_from_firmware + * @dev: the pci_dev of the bridge that has a hotplug controller + * @flags: requested control bits for _OSC + * + * Attempt to take hotplug control from firmware. + */ +int acpi_get_hp_hw_control_from_firmware(struct pci_dev *dev, u32 flags) +{ + acpi_status status; + acpi_handle chandle, handle = DEVICE_ACPI_HANDLE(&(dev->dev)); + struct pci_dev *pdev = dev; + struct pci_bus *parent; + struct acpi_buffer string = { ACPI_ALLOCATE_BUFFER, NULL }; + + flags &= (OSC_PCI_EXPRESS_NATIVE_HP_CONTROL | + OSC_SHPC_NATIVE_HP_CONTROL | + OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL); + if (!flags) { + err("Invalid flags %u specified!\n", flags); + return -EINVAL; + } + + /* + * Per PCI firmware specification, we should run the ACPI _OSC + * method to get control of hotplug hardware before using it. If + * an _OSC is missing, we look for an OSHP to do the same thing. + * To handle different BIOS behavior, we look for _OSC and OSHP + * within the scope of the hotplug controller and its parents, + * upto the host bridge under which this controller exists. + */ + while (!handle) { + /* + * This hotplug controller was not listed in the ACPI name + * space at all. Try to get acpi handle of parent pci bus. + */ + if (!pdev || !pdev->bus->parent) + break; + parent = pdev->bus->parent; + dbg("Could not find %s in acpi namespace, trying parent\n", + pci_name(pdev)); + if (!parent->self) + /* Parent must be a host bridge */ + handle = acpi_get_pci_rootbridge_handle( + pci_domain_nr(parent), + parent->number); + else + handle = DEVICE_ACPI_HANDLE(&(parent->self->dev)); + pdev = parent->self; + } + + while (handle) { + acpi_get_name(handle, ACPI_FULL_PATHNAME, &string); + dbg("Trying to get hotplug control for %s \n", + (char *)string.pointer); + status = pci_osc_control_set(handle, flags); + if (status == AE_NOT_FOUND) + status = acpi_run_oshp(handle); + if (ACPI_SUCCESS(status)) { + dbg("Gained control for hotplug HW for pci %s (%s)\n", + pci_name(dev), (char *)string.pointer); + kfree(string.pointer); + return 0; + } + if (acpi_root_bridge(handle)) + break; + chandle = handle; + status = acpi_get_parent(chandle, &handle); + if (ACPI_FAILURE(status)) + break; + } + + dbg("Cannot get control of hotplug hardware for pci %s\n", + pci_name(dev)); + + kfree(string.pointer); + return -ENODEV; +} +EXPORT_SYMBOL(acpi_get_hp_hw_control_from_firmware); /* acpi_root_bridge - check to see if this acpi object is a root bridge * diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h index 084b73e..8492fab 100644 --- a/drivers/pci/hotplug/pciehp.h +++ b/drivers/pci/hotplug/pciehp.h @@ -202,9 +202,13 @@ struct hpc_ops { #include #include -extern int pciehp_acpi_get_hp_hw_control_from_firmware(struct pci_dev *dev); -#define pciehp_get_hp_hw_control_from_firmware(dev) \ - pciehp_acpi_get_hp_hw_control_from_firmware(dev) +static inline int pciehp_get_hp_hw_control_from_firmware(struct pci_dev *dev) +{ + u32 flags = (OSC_PCI_EXPRESS_NATIVE_HP_CONTROL | + OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL); + return acpi_get_hp_hw_control_from_firmware(dev, flags); +} + static inline int pciehp_get_hp_params_from_firmware(struct pci_dev *dev, struct hotplug_params *hpp) { diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index 7e3a3d1..a48021d 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -1009,75 +1009,6 @@ static struct hpc_ops pciehp_hpc_ops = { .check_lnk_status = hpc_check_lnk_status, }; -#ifdef CONFIG_ACPI -int pciehp_acpi_get_hp_hw_control_from_firmware(struct pci_dev *dev) -{ - acpi_status status; - acpi_handle chandle, handle = DEVICE_ACPI_HANDLE(&(dev->dev)); - struct pci_dev *pdev = dev; - struct pci_bus *parent; - struct acpi_buffer string = { ACPI_ALLOCATE_BUFFER, NULL }; - - /* - * Per PCI firmware specification, we should run the ACPI _OSC - * method to get control of hotplug hardware before using it. - * If an _OSC is missing, we look for an OSHP to do the same thing. - * To handle different BIOS behavior, we look for _OSC and OSHP - * within the scope of the hotplug controller and its parents, upto - * the host bridge under which this controller exists. - */ - while (!handle) { - /* - * This hotplug controller was not listed in the ACPI name - * space at all. Try to get acpi handle of parent pci bus. - */ - if (!pdev || !pdev->bus->parent) - break; - parent = pdev->bus->parent; - dbg("Could not find %s in acpi namespace, trying parent\n", - pci_name(pdev)); - if (!parent->self) - /* Parent must be a host bridge */ - handle = acpi_get_pci_rootbridge_handle( - pci_domain_nr(parent), - parent->number); - else - handle = DEVICE_ACPI_HANDLE( - &(parent->self->dev)); - pdev = parent->self; - } - - while (handle) { - acpi_get_name(handle, ACPI_FULL_PATHNAME, &string); - dbg("Trying to get hotplug control for %s \n", - (char *)string.pointer); - status = pci_osc_control_set(handle, - OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL | - OSC_PCI_EXPRESS_NATIVE_HP_CONTROL); - if (status == AE_NOT_FOUND) - status = acpi_run_oshp(handle); - if (ACPI_SUCCESS(status)) { - dbg("Gained control for hotplug HW for pci %s (%s)\n", - pci_name(dev), (char *)string.pointer); - kfree(string.pointer); - return 0; - } - if (acpi_root_bridge(handle)) - break; - chandle = handle; - status = acpi_get_parent(chandle, &handle); - if (ACPI_FAILURE(status)) - break; - } - - dbg("Cannot get control of hotplug hardware for pci %s\n", - pci_name(dev)); - - kfree(string.pointer); - return -1; -} -#endif - static int pcie_init_hardware_part1(struct controller *ctrl, struct pcie_device *dev) { diff --git a/drivers/pci/hotplug/shpchp.h b/drivers/pci/hotplug/shpchp.h index f66e8d6..8a026f7 100644 --- a/drivers/pci/hotplug/shpchp.h +++ b/drivers/pci/hotplug/shpchp.h @@ -170,6 +170,7 @@ extern void shpchp_queue_pushbutton_work(struct work_struct *work); extern int shpc_init( struct controller *ctrl, struct pci_dev *pdev); #ifdef CONFIG_ACPI +#include static inline int get_hp_params_from_firmware(struct pci_dev *dev, struct hotplug_params *hpp) { @@ -177,14 +178,15 @@ static inline int get_hp_params_from_firmware(struct pci_dev *dev, return -ENODEV; return 0; } -#define get_hp_hw_control_from_firmware(pdev) \ - do { \ - if (DEVICE_ACPI_HANDLE(&(pdev->dev))) \ - acpi_run_oshp(DEVICE_ACPI_HANDLE(&(pdev->dev)));\ - } while (0) + +static inline int get_hp_hw_control_from_firmware(struct pci_dev *dev) +{ + u32 flags = OSC_SHPC_NATIVE_HP_CONTROL; + return acpi_get_hp_hw_control_from_firmware(dev, flags); +} #else #define get_hp_params_from_firmware(dev, hpp) (-ENODEV) -#define get_hp_hw_control_from_firmware(dev) do { } while (0) +#define get_hp_hw_control_from_firmware(dev) (0) #endif struct ctrl_reg { diff --git a/drivers/pci/hotplug/shpchp_core.c b/drivers/pci/hotplug/shpchp_core.c index 0066b0b..df41ecc 100644 --- a/drivers/pci/hotplug/shpchp_core.c +++ b/drivers/pci/hotplug/shpchp_core.c @@ -330,13 +330,14 @@ static int get_cur_bus_speed (struct hotplug_slot *hotplug_slot, enum pci_bus_sp static int is_shpc_capable(struct pci_dev *dev) { - if ((dev->vendor == PCI_VENDOR_ID_AMD) || (dev->device == - PCI_DEVICE_ID_AMD_GOLAM_7450)) - return 1; - if (pci_find_capability(dev, PCI_CAP_ID_SHPC)) - return 1; - - return 0; + if ((dev->vendor == PCI_VENDOR_ID_AMD) || (dev->device == + PCI_DEVICE_ID_AMD_GOLAM_7450)) + return 1; + if (!pci_find_capability(dev, PCI_CAP_ID_SHPC)) + return 0; + if (get_hp_hw_control_from_firmware(dev)) + return 0; + return 1; } static int shpc_probe(struct pci_dev *pdev, const struct pci_device_id *ent) diff --git a/drivers/pci/hotplug/shpchp_hpc.c b/drivers/pci/hotplug/shpchp_hpc.c index 7d770b2..7a0bff3 100644 --- a/drivers/pci/hotplug/shpchp_hpc.c +++ b/drivers/pci/hotplug/shpchp_hpc.c @@ -1084,7 +1084,6 @@ int shpc_init(struct controller *ctrl, struct pci_dev *pdev) dbg("%s: HPC at b:d:f:irq=0x%x:%x:%x:%x\n", __func__, pdev->bus->number, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn), pdev->irq); - get_hp_hw_control_from_firmware(pdev); /* * If this is the first controller to be initialized, diff --git a/include/linux/pci_hotplug.h b/include/linux/pci_hotplug.h index 8f67e8f..dbdcd1a 100644 --- a/include/linux/pci_hotplug.h +++ b/include/linux/pci_hotplug.h @@ -227,9 +227,9 @@ struct hotplug_params { #include #include #include -extern acpi_status acpi_run_oshp(acpi_handle handle); extern acpi_status acpi_get_hp_params_from_firmware(struct pci_bus *bus, struct hotplug_params *hpp); +int acpi_get_hp_hw_control_from_firmware(struct pci_dev *dev, u32 flags); int acpi_root_bridge(acpi_handle handle); #endif #endif -- cgit v0.10.2 From d8b23e8ffb567758fc6074e97210ddb42114827c Mon Sep 17 00:00:00 2001 From: Kenji Kaneshige Date: Mon, 2 Jun 2008 09:07:46 -0700 Subject: pciehp: fixes typo in dbg_ctrl() in pciehp_hpc.c Fixup a typo in dbg_ctrl(); it was fetching SLOTSTATUS twice. Signed-off-by: Kenji Kaneshige Signed-off-by: Kristen Carlson Accardi Signed-off-by: Jesse Barnes diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index a48021d..eeb517b 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -1086,7 +1086,7 @@ static inline void dbg_ctrl(struct controller *ctrl) dbg(" Comamnd Completed : %3s\n", NO_CMD_CMPL(ctrl)? "no" : "yes"); pciehp_readw(ctrl, SLOTSTATUS, ®16); dbg("Slot Status : 0x%04x\n", reg16); - pciehp_readw(ctrl, SLOTSTATUS, ®16); + pciehp_readw(ctrl, SLOTCTRL, ®16); dbg("Slot Control : 0x%04x\n", reg16); } -- cgit v0.10.2 From 6a3f084971bad985722afe25b16a5c0a990cea75 Mon Sep 17 00:00:00 2001 From: Kenji Kaneshige Date: Mon, 2 Jun 2008 09:22:34 -0700 Subject: pciehp: removes redundant NULL write to slot status register Cleanup to remove a redundant NULL write to SLOTSTATUS. Signed-off-by: Kenji Kaneshige Signed-off-by: Kristen Carlson Accardi Signed-off-by: Jesse Barnes diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index eeb517b..0cfaedf 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -777,7 +777,7 @@ static irqreturn_t pcie_isr(int irq, void *dev_id) intr_loc |= detected; if (!intr_loc) return IRQ_NONE; - if (pciehp_writew(ctrl, SLOTSTATUS, detected)) { + if (detected && pciehp_writew(ctrl, SLOTSTATUS, detected)) { err("%s: Cannot write to SLOTSTATUS\n", __func__); return IRQ_NONE; } -- cgit v0.10.2 From ece6763419f44ed72f4fc78752e5f5364df1794b Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Thu, 29 May 2008 15:04:38 +0200 Subject: PCI: eliminate double kfree in intel-iommu initialization The destination of goto error also does a kfree(g_iommus), so it is not correct to do one here. This was found using Coccinelle (http://www.emn.fr/x-info/coccinelle/). Signed-off-by: Julia Lawall Signed-off-by: Jesse Barnes diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 66c0fd2..4f05d91 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1725,7 +1725,6 @@ int __init init_dmars(void) deferred_flush = kzalloc(g_num_of_iommus * sizeof(struct deferred_flush_tables), GFP_KERNEL); if (!deferred_flush) { - kfree(g_iommus); ret = -ENOMEM; goto error; } -- cgit v0.10.2 From 10260d9ab702454460242ef4d3ecfc49fcb96a5b Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 4 Jun 2008 13:53:31 +0200 Subject: PCI: Unhide the SMBus on the Compaq Evo D510 One more machine with a hidden Intel SMBus. Unhiding it reveals a SMSC EMC6D100 hardware monitoring chip. I have checked that this machine has no ACPI magic touching the SMBus nor the hardware monitoring chip, so this should be safe. Signed-off-by: Jean Delvare Signed-off-by: Jesse Barnes diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 44aabb1..93faf84 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -1054,6 +1054,12 @@ static void __init asus_hides_smbus_hostbridge(struct pci_dev *dev) * its on-board VGA controller */ asus_hides_smbus = 1; } + else if (dev->device == PCI_DEVICE_ID_INTEL_82845G_IG) + switch(dev->subsystem_device) { + case 0x00b8: /* Compaq Evo D510 CMT */ + case 0x00b9: /* Compaq Evo D510 SFF */ + asus_hides_smbus = 1; + } } } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82845_HB, asus_hides_smbus_hostbridge); @@ -1068,6 +1074,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82855GM_HB, as DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82915GM_HB, asus_hides_smbus_hostbridge); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82810_IG3, asus_hides_smbus_hostbridge); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82845G_IG, asus_hides_smbus_hostbridge); static void asus_hides_smbus_lpc(struct pci_dev *dev) { -- cgit v0.10.2 From 5d9526d07a8dc87460c13c277b3edcc26b0e662f Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Wed, 4 Jun 2008 11:39:07 -0600 Subject: PCIe: fix 'symbol not declared' sparse warnings While refreshing my physical PCI slot series against upstream, I noticed a few simple sparse/compile warnings that were easy to fix. Fix the following sparse warnings in PCIe: drivers/pci/pcie/aer/aerdrv.c:86:6: warning: symbol 'pci_no_aer' was not declared. Should it be static? drivers/pci/pcie/portdrv_bus.c:21:17: warning: symbol 'pcie_port_bus_type' was not declared. Should it be static? Signed-off-by: Alex Chiang Signed-off-by: Jesse Barnes diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c index 07c3bdb..b7a3aa6 100644 --- a/drivers/pci/pcie/aer/aerdrv.c +++ b/drivers/pci/pcie/aer/aerdrv.c @@ -26,6 +26,7 @@ #include #include "aerdrv.h" +#include "../../pci.h" /* * Version Information diff --git a/drivers/pci/pcie/portdrv_bus.c b/drivers/pci/pcie/portdrv_bus.c index 3f09768..359fe55 100644 --- a/drivers/pci/pcie/portdrv_bus.c +++ b/drivers/pci/pcie/portdrv_bus.c @@ -13,6 +13,7 @@ #include #include +#include "portdrv.h" static int pcie_port_bus_match(struct device *dev, struct device_driver *drv); static int pcie_port_bus_suspend(struct device *dev, pm_message_t state); -- cgit v0.10.2 From 27e468597315e5ce078fdd39856b7954b639183a Mon Sep 17 00:00:00 2001 From: Krzysztof Helt Date: Sun, 8 Jun 2008 13:47:02 +0200 Subject: PCI: unhide the SMBus on the Compaq Deskpro EN MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch unhides the SMBus on Compaq Deskpro EN SFF P667 with the Intel 815E chipset. Unhiding it reveals a THMC51 hardware monitoring chip. Jean Delvare has checked that this machine has no ACPI magic touching the SMBus nor the hardware monitoring chip, so this should be safe. The patch was tested on Fedora Core 9 with 2.6.25.4 kernel. Signed-off-by: Krzysztof Helt Tested-by: Rafał Haładuda CC: Jean Delvare Signed-off-by: Jesse Barnes diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 93faf84..92b52eb 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -1060,6 +1060,14 @@ static void __init asus_hides_smbus_hostbridge(struct pci_dev *dev) case 0x00b9: /* Compaq Evo D510 SFF */ asus_hides_smbus = 1; } + else if (dev->device == PCI_DEVICE_ID_INTEL_82815_CGC) + switch (dev->subsystem_device) { + case 0x001A: /* Compaq Deskpro EN SSF P667 815E */ + /* Motherboard doesn't have host bridge + * subvendor/subdevice IDs, therefore checking + * its on-board VGA controller */ + asus_hides_smbus = 1; + } } } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82845_HB, asus_hides_smbus_hostbridge); @@ -1075,6 +1083,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82915GM_HB, as DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82810_IG3, asus_hides_smbus_hostbridge); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82845G_IG, asus_hides_smbus_hostbridge); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82815_CGC, asus_hides_smbus_hostbridge); static void asus_hides_smbus_lpc(struct pci_dev *dev) { -- cgit v0.10.2 From 64dab20450184e6586c12704da11ce48a0918050 Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Tue, 10 Jun 2008 14:20:03 -0700 Subject: PCI: update location of PCI hotplug mailing lists Just concentrate PCI traffic at linux-pci@vger rather than splitting it between vger and a sf.net moderated (!) mailing list. Acked-by: Matthew Wilcox Acked-by: Scott Murray Acked-by: Kristen Carlson Accardi Signed-off-by: Jesse Barnes diff --git a/MAINTAINERS b/MAINTAINERS index cb71eb4..89db949 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -248,7 +248,7 @@ S: Supported ACPI PCI HOTPLUG DRIVER P: Kristen Carlson Accardi M: kristen.c.accardi@intel.com -L: pcihpd-discuss@lists.sourceforge.net +L: linux-pci@vger.kernel.org S: Supported ACPI THERMAL DRIVER @@ -1134,21 +1134,21 @@ COMPACTPCI HOTPLUG CORE P: Scott Murray M: scottm@somanetworks.com M: scott@spiteful.org -L: pcihpd-discuss@lists.sourceforge.net +L: linux-pci@vger.kernel.org S: Supported COMPACTPCI HOTPLUG ZIATECH ZT5550 DRIVER P: Scott Murray M: scottm@somanetworks.com M: scott@spiteful.org -L: pcihpd-discuss@lists.sourceforge.net +L: linux-pci@vger.kernel.org S: Supported COMPACTPCI HOTPLUG GENERIC DRIVER P: Scott Murray M: scottm@somanetworks.com M: scott@spiteful.org -L: pcihpd-discuss@lists.sourceforge.net +L: linux-pci@vger.kernel.org S: Supported COMPUTONE INTELLIPORT MULTIPORT CARD @@ -3177,7 +3177,7 @@ S: Supported PCIE HOTPLUG DRIVER P: Kristen Carlson Accardi M: kristen.c.accardi@intel.com -L: pcihpd-discuss@lists.sourceforge.net +L: linux-pci@vger.kernel.org S: Supported PCMCIA SUBSYSTEM @@ -3815,7 +3815,7 @@ S: Maintained SHPC HOTPLUG DRIVER P: Kristen Carlson Accardi M: kristen.c.accardi@intel.com -L: pcihpd-discuss@lists.sourceforge.net +L: linux-pci@vger.kernel.org S: Supported SECURE DIGITAL HOST CONTROLLER INTERFACE DRIVER -- cgit v0.10.2 From fe99740cac117f208707488c03f3789cf4904957 Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Tue, 10 Jun 2008 15:27:37 -0600 Subject: PCI: construct one fakephp slot per PCI slot Register one slot per slot, rather than one slot per function. Change the name of the slot to fake%d instead of the pci address. Signed-off-by: Alex Chiang Signed-off-by: Matthew Wilcox Cc: Greg KH Cc: Kristen Carlson Accardi Cc: Len Brown Cc: Kenji Kaneshige Signed-off-by: Andrew Morton Signed-off-by: Jesse Barnes diff --git a/drivers/pci/hotplug/fakephp.c b/drivers/pci/hotplug/fakephp.c index 7e9a827..b57223f 100644 --- a/drivers/pci/hotplug/fakephp.c +++ b/drivers/pci/hotplug/fakephp.c @@ -66,6 +66,7 @@ struct dummy_slot { struct pci_dev *dev; struct work_struct remove_work; unsigned long removed; + char name[8]; }; static int debug; @@ -100,6 +101,7 @@ static int add_slot(struct pci_dev *dev) struct dummy_slot *dslot; struct hotplug_slot *slot; int retval = -ENOMEM; + static int count = 1; slot = kzalloc(sizeof(struct hotplug_slot), GFP_KERNEL); if (!slot) @@ -113,13 +115,13 @@ static int add_slot(struct pci_dev *dev) slot->info->max_bus_speed = PCI_SPEED_UNKNOWN; slot->info->cur_bus_speed = PCI_SPEED_UNKNOWN; - slot->name = &dev->dev.bus_id[0]; - dbg("slot->name = %s\n", slot->name); - dslot = kzalloc(sizeof(struct dummy_slot), GFP_KERNEL); if (!dslot) goto error_info; + slot->name = dslot->name; + snprintf(slot->name, sizeof(dslot->name), "fake%d", count++); + dbg("slot->name = %s\n", slot->name); slot->ops = &dummy_hotplug_slot_ops; slot->release = &dummy_release; slot->private = dslot; @@ -148,17 +150,17 @@ error: static int __init pci_scan_buses(void) { struct pci_dev *dev = NULL; - int retval = 0; + int lastslot = 0; while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { - retval = add_slot(dev); - if (retval) { - pci_dev_put(dev); - break; - } + if (PCI_FUNC(dev->devfn) > 0 && + lastslot == PCI_SLOT(dev->devfn)) + continue; + lastslot = PCI_SLOT(dev->devfn); + add_slot(dev); } - return retval; + return 0; } static void remove_slot(struct dummy_slot *dslot) @@ -296,23 +298,9 @@ static int enable_slot(struct hotplug_slot *hotplug_slot) return 0; } -/* find the hotplug_slot for the pci_dev */ -static struct hotplug_slot *get_slot_from_dev(struct pci_dev *dev) -{ - struct dummy_slot *dslot; - - list_for_each_entry(dslot, &slot_list, node) { - if (dslot->dev == dev) - return dslot->slot; - } - return NULL; -} - - static int disable_slot(struct hotplug_slot *slot) { struct dummy_slot *dslot; - struct hotplug_slot *hslot; struct pci_dev *dev; int func; @@ -322,41 +310,27 @@ static int disable_slot(struct hotplug_slot *slot) dbg("%s - physical_slot = %s\n", __func__, slot->name); - /* don't disable bridged devices just yet, we can't handle them easily... */ - if (dslot->dev->subordinate) { - err("Can't remove PCI devices with other PCI devices behind it yet.\n"); - return -ENODEV; - } - if (test_and_set_bit(0, &dslot->removed)) { - dbg("Slot already scheduled for removal\n"); - return -ENODEV; - } - /* search for subfunctions and disable them first */ - if (!(dslot->dev->devfn & 7)) { - for (func = 1; func < 8; func++) { - dev = pci_get_slot(dslot->dev->bus, - dslot->dev->devfn + func); - if (dev) { - hslot = get_slot_from_dev(dev); - if (hslot) - disable_slot(hslot); - else { - err("Hotplug slot not found for subfunction of PCI device\n"); - return -ENODEV; - } - pci_dev_put(dev); - } else - dbg("No device in slot found\n"); + for (func = 7; func >= 0; func--) { + dev = pci_get_slot(dslot->dev->bus, dslot->dev->devfn + func); + if (!dev) + continue; + + if (test_and_set_bit(0, &dslot->removed)) { + dbg("Slot already scheduled for removal\n"); + return -ENODEV; } - } - /* remove the device from the pci core */ - pci_remove_bus_device(dslot->dev); + /* queue work item to blow away this sysfs entry and other + * parts. + */ + INIT_WORK(&dslot->remove_work, remove_slot_worker); + queue_work(dummyphp_wq, &dslot->remove_work); - /* queue work item to blow away this sysfs entry and other parts. */ - INIT_WORK(&dslot->remove_work, remove_slot_worker); - queue_work(dummyphp_wq, &dslot->remove_work); + /* blow away this sysfs entry and other parts. */ + remove_slot(dslot); + pci_dev_put(dev); + } return 0; } -- cgit v0.10.2 From f46753c5e354b857b20ab8e0fe7b2579831dc369 Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Tue, 10 Jun 2008 15:28:50 -0600 Subject: PCI: introduce pci_slot Currently, /sys/bus/pci/slots/ only exposes hotplug attributes when a hotplug driver is loaded, but PCI slots have attributes such as address, speed, width, etc. that are not related to hotplug at all. Introduce pci_slot as the primary data structure and kobject model. Hotplug attributes described in hotplug_slot become a secondary structure associated with the pci_slot. This patch only creates the infrastructure that allows the separation of PCI slot attributes and hotplug attributes. In this patch, the PCI hotplug core remains the only user of this infrastructure, and thus, /sys/bus/pci/slots/ will still only become populated when a hotplug driver is loaded. A later patch in this series will add a second user of this new infrastructure and demonstrate splitting the task of exposing pci_slot attributes from hotplug_slot attributes. - Make pci_slot the primary sysfs entity. hotplug_slot becomes a subsidiary structure. o pci_create_slot() creates and registers a slot with the PCI core o pci_slot_add_hotplug() gives it hotplug capability - Change the prototype of pci_hp_register() to take the bus and slot number (on parent bus) as parameters. - Remove all the ->get_address methods since this functionality is now handled by pci_slot directly. [achiang@hp.com: rpaphp-correctly-pci_hp_register-for-empty-pci-slots] Tested-by: Badari Pulavarty Acked-by: Benjamin Herrenschmidt [akpm@linux-foundation.org: build fix] [akpm@linux-foundation.org: make headers_check happy] [akpm@linux-foundation.org: nuther build fix] [akpm@linux-foundation.org: fix typo in #include] Signed-off-by: Alex Chiang Signed-off-by: Matthew Wilcox Cc: Greg KH Cc: Kristen Carlson Accardi Cc: Len Brown Acked-by: Kenji Kaneshige Signed-off-by: Andrew Morton Signed-off-by: Jesse Barnes diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile index 4d1ce2e..7d63f8c 100644 --- a/drivers/pci/Makefile +++ b/drivers/pci/Makefile @@ -2,7 +2,7 @@ # Makefile for the PCI bus specific drivers. # -obj-y += access.o bus.o probe.o remove.o pci.o quirks.o \ +obj-y += access.o bus.o probe.o remove.o pci.o quirks.o slot.o \ pci-driver.o search.o pci-sysfs.o rom.o setup-res.o obj-$(CONFIG_PROC_FS) += proc.o diff --git a/drivers/pci/hotplug/acpiphp.h b/drivers/pci/hotplug/acpiphp.h index 7a29164..eecf7cb 100644 --- a/drivers/pci/hotplug/acpiphp.h +++ b/drivers/pci/hotplug/acpiphp.h @@ -215,7 +215,6 @@ extern u8 acpiphp_get_power_status (struct acpiphp_slot *slot); extern u8 acpiphp_get_attention_status (struct acpiphp_slot *slot); extern u8 acpiphp_get_latch_status (struct acpiphp_slot *slot); extern u8 acpiphp_get_adapter_status (struct acpiphp_slot *slot); -extern u32 acpiphp_get_address (struct acpiphp_slot *slot); /* variables */ extern int acpiphp_debug; diff --git a/drivers/pci/hotplug/acpiphp_core.c b/drivers/pci/hotplug/acpiphp_core.c index 7af68ba..0e496e8 100644 --- a/drivers/pci/hotplug/acpiphp_core.c +++ b/drivers/pci/hotplug/acpiphp_core.c @@ -70,7 +70,6 @@ static int disable_slot (struct hotplug_slot *slot); static int set_attention_status (struct hotplug_slot *slot, u8 value); static int get_power_status (struct hotplug_slot *slot, u8 *value); static int get_attention_status (struct hotplug_slot *slot, u8 *value); -static int get_address (struct hotplug_slot *slot, u32 *value); static int get_latch_status (struct hotplug_slot *slot, u8 *value); static int get_adapter_status (struct hotplug_slot *slot, u8 *value); @@ -83,7 +82,6 @@ static struct hotplug_slot_ops acpi_hotplug_slot_ops = { .get_attention_status = get_attention_status, .get_latch_status = get_latch_status, .get_adapter_status = get_adapter_status, - .get_address = get_address, }; @@ -274,23 +272,6 @@ static int get_adapter_status(struct hotplug_slot *hotplug_slot, u8 *value) return 0; } - -/** - * get_address - get pci address of a slot - * @hotplug_slot: slot to get status - * @value: pointer to struct pci_busdev (seg, bus, dev) - */ -static int get_address(struct hotplug_slot *hotplug_slot, u32 *value) -{ - struct slot *slot = hotplug_slot->private; - - dbg("%s - physical_slot = %s\n", __func__, hotplug_slot->name); - - *value = acpiphp_get_address(slot->acpi_slot); - - return 0; -} - static int __init init_acpi(void) { int retval; @@ -357,7 +338,11 @@ int acpiphp_register_hotplug_slot(struct acpiphp_slot *acpiphp_slot) acpiphp_slot->slot = slot; snprintf(slot->name, sizeof(slot->name), "%u", slot->acpi_slot->sun); - retval = pci_hp_register(slot->hotplug_slot); + retval = pci_hp_register(slot->hotplug_slot, + acpiphp_slot->bridge->pci_bus, + acpiphp_slot->device); + if (retval == -EBUSY) + goto error_hpslot; if (retval) { err("pci_hp_register failed with error %d\n", retval); goto error_hpslot; diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c index 648596d..9342c84 100644 --- a/drivers/pci/hotplug/acpiphp_glue.c +++ b/drivers/pci/hotplug/acpiphp_glue.c @@ -258,7 +258,12 @@ register_slot(acpi_handle handle, u32 lvl, void *context, void **rv) bridge->pci_bus->number, slot->device); retval = acpiphp_register_hotplug_slot(slot); if (retval) { - warn("acpiphp_register_hotplug_slot failed(err code = 0x%x)\n", retval); + if (retval == -EBUSY) + warn("Slot %d already registered by another " + "hotplug driver\n", slot->sun); + else + warn("acpiphp_register_hotplug_slot failed " + "(err code = 0x%x)\n", retval); goto err_exit; } } @@ -1867,19 +1872,3 @@ u8 acpiphp_get_adapter_status(struct acpiphp_slot *slot) return (sta == 0) ? 0 : 1; } - - -/* - * pci address (seg/bus/dev) - */ -u32 acpiphp_get_address(struct acpiphp_slot *slot) -{ - u32 address; - struct pci_bus *pci_bus = slot->bridge->pci_bus; - - address = (pci_domain_nr(pci_bus) << 16) | - (pci_bus->number << 8) | - slot->device; - - return address; -} diff --git a/drivers/pci/hotplug/acpiphp_ibm.c b/drivers/pci/hotplug/acpiphp_ibm.c index ede9051..2b7c45e 100644 --- a/drivers/pci/hotplug/acpiphp_ibm.c +++ b/drivers/pci/hotplug/acpiphp_ibm.c @@ -33,8 +33,10 @@ #include #include #include +#include #include "acpiphp.h" +#include "../pci.h" #define DRIVER_VERSION "1.0.1" #define DRIVER_AUTHOR "Irene Zubarev , Vernon Mauery " @@ -430,7 +432,7 @@ static int __init ibm_acpiphp_init(void) int retval = 0; acpi_status status; struct acpi_device *device; - struct kobject *sysdir = &pci_hotplug_slots_kset->kobj; + struct kobject *sysdir = &pci_slots_kset->kobj; dbg("%s\n", __func__); @@ -477,7 +479,7 @@ init_return: static void __exit ibm_acpiphp_exit(void) { acpi_status status; - struct kobject *sysdir = &pci_hotplug_slots_kset->kobj; + struct kobject *sysdir = &pci_slots_kset->kobj; dbg("%s\n", __func__); diff --git a/drivers/pci/hotplug/cpci_hotplug_core.c b/drivers/pci/hotplug/cpci_hotplug_core.c index d8a6b80..9359479 100644 --- a/drivers/pci/hotplug/cpci_hotplug_core.c +++ b/drivers/pci/hotplug/cpci_hotplug_core.c @@ -285,7 +285,7 @@ cpci_hp_register_bus(struct pci_bus *bus, u8 first, u8 last) info->attention_status = cpci_get_attention_status(slot); dbg("registering slot %s", slot->hotplug_slot->name); - status = pci_hp_register(slot->hotplug_slot); + status = pci_hp_register(slot->hotplug_slot, bus, i); if (status) { err("pci_hp_register failed with error %d", status); goto error_name; diff --git a/drivers/pci/hotplug/cpqphp_core.c b/drivers/pci/hotplug/cpqphp_core.c index 36b115b..54defec 100644 --- a/drivers/pci/hotplug/cpqphp_core.c +++ b/drivers/pci/hotplug/cpqphp_core.c @@ -434,7 +434,9 @@ static int ctrl_slot_setup(struct controller *ctrl, slot->bus, slot->device, slot->number, ctrl->slot_device_offset, slot_number); - result = pci_hp_register(hotplug_slot); + result = pci_hp_register(hotplug_slot, + ctrl->pci_dev->subordinate, + slot->device); if (result) { err("pci_hp_register failed with error %d\n", result); goto error_name; diff --git a/drivers/pci/hotplug/fakephp.c b/drivers/pci/hotplug/fakephp.c index b57223f..40337a0 100644 --- a/drivers/pci/hotplug/fakephp.c +++ b/drivers/pci/hotplug/fakephp.c @@ -126,7 +126,7 @@ static int add_slot(struct pci_dev *dev) slot->release = &dummy_release; slot->private = dslot; - retval = pci_hp_register(slot); + retval = pci_hp_register(slot, dev->bus, PCI_SLOT(dev->devfn)); if (retval) { err("pci_hp_register failed with error %d\n", retval); goto error_dslot; diff --git a/drivers/pci/hotplug/ibmphp_ebda.c b/drivers/pci/hotplug/ibmphp_ebda.c index dca7efc..8467d02 100644 --- a/drivers/pci/hotplug/ibmphp_ebda.c +++ b/drivers/pci/hotplug/ibmphp_ebda.c @@ -1001,7 +1001,8 @@ static int __init ebda_rsrc_controller (void) tmp_slot = list_entry (list, struct slot, ibm_slot_list); snprintf (tmp_slot->hotplug_slot->name, 30, "%s", create_file_name (tmp_slot)); - pci_hp_register (tmp_slot->hotplug_slot); + pci_hp_register(tmp_slot->hotplug_slot, + pci_find_bus(0, tmp_slot->bus), tmp_slot->device); } print_ebda_hpc (); diff --git a/drivers/pci/hotplug/pci_hotplug_core.c b/drivers/pci/hotplug/pci_hotplug_core.c index a11021e..4df31f3 100644 --- a/drivers/pci/hotplug/pci_hotplug_core.c +++ b/drivers/pci/hotplug/pci_hotplug_core.c @@ -40,6 +40,7 @@ #include #include #include +#include "../pci.h" #define MY_NAME "pci_hotplug" @@ -60,41 +61,7 @@ static int debug; ////////////////////////////////////////////////////////////////// static LIST_HEAD(pci_hotplug_slot_list); - -struct kset *pci_hotplug_slots_kset; - -static ssize_t hotplug_slot_attr_show(struct kobject *kobj, - struct attribute *attr, char *buf) -{ - struct hotplug_slot *slot = to_hotplug_slot(kobj); - struct hotplug_slot_attribute *attribute = to_hotplug_attr(attr); - return attribute->show ? attribute->show(slot, buf) : -EIO; -} - -static ssize_t hotplug_slot_attr_store(struct kobject *kobj, - struct attribute *attr, const char *buf, size_t len) -{ - struct hotplug_slot *slot = to_hotplug_slot(kobj); - struct hotplug_slot_attribute *attribute = to_hotplug_attr(attr); - return attribute->store ? attribute->store(slot, buf, len) : -EIO; -} - -static struct sysfs_ops hotplug_slot_sysfs_ops = { - .show = hotplug_slot_attr_show, - .store = hotplug_slot_attr_store, -}; - -static void hotplug_slot_release(struct kobject *kobj) -{ - struct hotplug_slot *slot = to_hotplug_slot(kobj); - if (slot->release) - slot->release(slot); -} - -static struct kobj_type hotplug_slot_ktype = { - .sysfs_ops = &hotplug_slot_sysfs_ops, - .release = &hotplug_slot_release, -}; +static DEFINE_SPINLOCK(pci_hotplug_slot_list_lock); /* these strings match up with the values in pci_bus_speed */ static char *pci_bus_speed_strings[] = { @@ -149,16 +116,15 @@ GET_STATUS(power_status, u8) GET_STATUS(attention_status, u8) GET_STATUS(latch_status, u8) GET_STATUS(adapter_status, u8) -GET_STATUS(address, u32) GET_STATUS(max_bus_speed, enum pci_bus_speed) GET_STATUS(cur_bus_speed, enum pci_bus_speed) -static ssize_t power_read_file (struct hotplug_slot *slot, char *buf) +static ssize_t power_read_file(struct pci_slot *slot, char *buf) { int retval; u8 value; - retval = get_power_status (slot, &value); + retval = get_power_status(slot->hotplug, &value); if (retval) goto exit; retval = sprintf (buf, "%d\n", value); @@ -166,9 +132,10 @@ exit: return retval; } -static ssize_t power_write_file (struct hotplug_slot *slot, const char *buf, +static ssize_t power_write_file(struct pci_slot *pci_slot, const char *buf, size_t count) { + struct hotplug_slot *slot = pci_slot->hotplug; unsigned long lpower; u8 power; int retval = 0; @@ -204,29 +171,30 @@ exit: return count; } -static struct hotplug_slot_attribute hotplug_slot_attr_power = { +static struct pci_slot_attribute hotplug_slot_attr_power = { .attr = {.name = "power", .mode = S_IFREG | S_IRUGO | S_IWUSR}, .show = power_read_file, .store = power_write_file }; -static ssize_t attention_read_file (struct hotplug_slot *slot, char *buf) +static ssize_t attention_read_file(struct pci_slot *slot, char *buf) { int retval; u8 value; - retval = get_attention_status (slot, &value); + retval = get_attention_status(slot->hotplug, &value); if (retval) goto exit; - retval = sprintf (buf, "%d\n", value); + retval = sprintf(buf, "%d\n", value); exit: return retval; } -static ssize_t attention_write_file (struct hotplug_slot *slot, const char *buf, +static ssize_t attention_write_file(struct pci_slot *slot, const char *buf, size_t count) { + struct hotplug_slot_ops *ops = slot->hotplug->ops; unsigned long lattention; u8 attention; int retval = 0; @@ -235,13 +203,13 @@ static ssize_t attention_write_file (struct hotplug_slot *slot, const char *buf, attention = (u8)(lattention & 0xff); dbg (" - attention = %d\n", attention); - if (!try_module_get(slot->ops->owner)) { + if (!try_module_get(ops->owner)) { retval = -ENODEV; goto exit; } - if (slot->ops->set_attention_status) - retval = slot->ops->set_attention_status(slot, attention); - module_put(slot->ops->owner); + if (ops->set_attention_status) + retval = ops->set_attention_status(slot->hotplug, attention); + module_put(ops->owner); exit: if (retval) @@ -249,18 +217,18 @@ exit: return count; } -static struct hotplug_slot_attribute hotplug_slot_attr_attention = { +static struct pci_slot_attribute hotplug_slot_attr_attention = { .attr = {.name = "attention", .mode = S_IFREG | S_IRUGO | S_IWUSR}, .show = attention_read_file, .store = attention_write_file }; -static ssize_t latch_read_file (struct hotplug_slot *slot, char *buf) +static ssize_t latch_read_file(struct pci_slot *slot, char *buf) { int retval; u8 value; - retval = get_latch_status (slot, &value); + retval = get_latch_status(slot->hotplug, &value); if (retval) goto exit; retval = sprintf (buf, "%d\n", value); @@ -269,17 +237,17 @@ exit: return retval; } -static struct hotplug_slot_attribute hotplug_slot_attr_latch = { +static struct pci_slot_attribute hotplug_slot_attr_latch = { .attr = {.name = "latch", .mode = S_IFREG | S_IRUGO}, .show = latch_read_file, }; -static ssize_t presence_read_file (struct hotplug_slot *slot, char *buf) +static ssize_t presence_read_file(struct pci_slot *slot, char *buf) { int retval; u8 value; - retval = get_adapter_status (slot, &value); + retval = get_adapter_status(slot->hotplug, &value); if (retval) goto exit; retval = sprintf (buf, "%d\n", value); @@ -288,42 +256,20 @@ exit: return retval; } -static struct hotplug_slot_attribute hotplug_slot_attr_presence = { +static struct pci_slot_attribute hotplug_slot_attr_presence = { .attr = {.name = "adapter", .mode = S_IFREG | S_IRUGO}, .show = presence_read_file, }; -static ssize_t address_read_file (struct hotplug_slot *slot, char *buf) -{ - int retval; - u32 address; - - retval = get_address (slot, &address); - if (retval) - goto exit; - retval = sprintf (buf, "%04x:%02x:%02x\n", - (address >> 16) & 0xffff, - (address >> 8) & 0xff, - address & 0xff); - -exit: - return retval; -} - -static struct hotplug_slot_attribute hotplug_slot_attr_address = { - .attr = {.name = "address", .mode = S_IFREG | S_IRUGO}, - .show = address_read_file, -}; - static char *unknown_speed = "Unknown bus speed"; -static ssize_t max_bus_speed_read_file (struct hotplug_slot *slot, char *buf) +static ssize_t max_bus_speed_read_file(struct pci_slot *slot, char *buf) { char *speed_string; int retval; enum pci_bus_speed value; - retval = get_max_bus_speed (slot, &value); + retval = get_max_bus_speed(slot->hotplug, &value); if (retval) goto exit; @@ -338,18 +284,18 @@ exit: return retval; } -static struct hotplug_slot_attribute hotplug_slot_attr_max_bus_speed = { +static struct pci_slot_attribute hotplug_slot_attr_max_bus_speed = { .attr = {.name = "max_bus_speed", .mode = S_IFREG | S_IRUGO}, .show = max_bus_speed_read_file, }; -static ssize_t cur_bus_speed_read_file (struct hotplug_slot *slot, char *buf) +static ssize_t cur_bus_speed_read_file(struct pci_slot *slot, char *buf) { char *speed_string; int retval; enum pci_bus_speed value; - retval = get_cur_bus_speed (slot, &value); + retval = get_cur_bus_speed(slot->hotplug, &value); if (retval) goto exit; @@ -364,14 +310,15 @@ exit: return retval; } -static struct hotplug_slot_attribute hotplug_slot_attr_cur_bus_speed = { +static struct pci_slot_attribute hotplug_slot_attr_cur_bus_speed = { .attr = {.name = "cur_bus_speed", .mode = S_IFREG | S_IRUGO}, .show = cur_bus_speed_read_file, }; -static ssize_t test_write_file (struct hotplug_slot *slot, const char *buf, +static ssize_t test_write_file(struct pci_slot *pci_slot, const char *buf, size_t count) { + struct hotplug_slot *slot = pci_slot->hotplug; unsigned long ltest; u32 test; int retval = 0; @@ -394,13 +341,14 @@ exit: return count; } -static struct hotplug_slot_attribute hotplug_slot_attr_test = { +static struct pci_slot_attribute hotplug_slot_attr_test = { .attr = {.name = "test", .mode = S_IFREG | S_IRUGO | S_IWUSR}, .store = test_write_file }; -static int has_power_file (struct hotplug_slot *slot) +static int has_power_file(struct pci_slot *pci_slot) { + struct hotplug_slot *slot = pci_slot->hotplug; if ((!slot) || (!slot->ops)) return -ENODEV; if ((slot->ops->enable_slot) || @@ -410,8 +358,9 @@ static int has_power_file (struct hotplug_slot *slot) return -ENOENT; } -static int has_attention_file (struct hotplug_slot *slot) +static int has_attention_file(struct pci_slot *pci_slot) { + struct hotplug_slot *slot = pci_slot->hotplug; if ((!slot) || (!slot->ops)) return -ENODEV; if ((slot->ops->set_attention_status) || @@ -420,8 +369,9 @@ static int has_attention_file (struct hotplug_slot *slot) return -ENOENT; } -static int has_latch_file (struct hotplug_slot *slot) +static int has_latch_file(struct pci_slot *pci_slot) { + struct hotplug_slot *slot = pci_slot->hotplug; if ((!slot) || (!slot->ops)) return -ENODEV; if (slot->ops->get_latch_status) @@ -429,8 +379,9 @@ static int has_latch_file (struct hotplug_slot *slot) return -ENOENT; } -static int has_adapter_file (struct hotplug_slot *slot) +static int has_adapter_file(struct pci_slot *pci_slot) { + struct hotplug_slot *slot = pci_slot->hotplug; if ((!slot) || (!slot->ops)) return -ENODEV; if (slot->ops->get_adapter_status) @@ -438,17 +389,9 @@ static int has_adapter_file (struct hotplug_slot *slot) return -ENOENT; } -static int has_address_file (struct hotplug_slot *slot) -{ - if ((!slot) || (!slot->ops)) - return -ENODEV; - if (slot->ops->get_address) - return 0; - return -ENOENT; -} - -static int has_max_bus_speed_file (struct hotplug_slot *slot) +static int has_max_bus_speed_file(struct pci_slot *pci_slot) { + struct hotplug_slot *slot = pci_slot->hotplug; if ((!slot) || (!slot->ops)) return -ENODEV; if (slot->ops->get_max_bus_speed) @@ -456,8 +399,9 @@ static int has_max_bus_speed_file (struct hotplug_slot *slot) return -ENOENT; } -static int has_cur_bus_speed_file (struct hotplug_slot *slot) +static int has_cur_bus_speed_file(struct pci_slot *pci_slot) { + struct hotplug_slot *slot = pci_slot->hotplug; if ((!slot) || (!slot->ops)) return -ENODEV; if (slot->ops->get_cur_bus_speed) @@ -465,8 +409,9 @@ static int has_cur_bus_speed_file (struct hotplug_slot *slot) return -ENOENT; } -static int has_test_file (struct hotplug_slot *slot) +static int has_test_file(struct pci_slot *pci_slot) { + struct hotplug_slot *slot = pci_slot->hotplug; if ((!slot) || (!slot->ops)) return -ENODEV; if (slot->ops->hardware_test) @@ -474,7 +419,7 @@ static int has_test_file (struct hotplug_slot *slot) return -ENOENT; } -static int fs_add_slot (struct hotplug_slot *slot) +static int fs_add_slot(struct pci_slot *slot) { int retval = 0; @@ -505,13 +450,6 @@ static int fs_add_slot (struct hotplug_slot *slot) goto exit_adapter; } - if (has_address_file(slot) == 0) { - retval = sysfs_create_file(&slot->kobj, - &hotplug_slot_attr_address.attr); - if (retval) - goto exit_address; - } - if (has_max_bus_speed_file(slot) == 0) { retval = sysfs_create_file(&slot->kobj, &hotplug_slot_attr_max_bus_speed.attr); @@ -544,10 +482,6 @@ exit_cur_speed: sysfs_remove_file(&slot->kobj, &hotplug_slot_attr_max_bus_speed.attr); exit_max_speed: - if (has_address_file(slot) == 0) - sysfs_remove_file(&slot->kobj, &hotplug_slot_attr_address.attr); - -exit_address: if (has_adapter_file(slot) == 0) sysfs_remove_file(&slot->kobj, &hotplug_slot_attr_presence.attr); @@ -567,7 +501,7 @@ exit: return retval; } -static void fs_remove_slot (struct hotplug_slot *slot) +static void fs_remove_slot(struct pci_slot *slot) { if (has_power_file(slot) == 0) sysfs_remove_file(&slot->kobj, &hotplug_slot_attr_power.attr); @@ -581,9 +515,6 @@ static void fs_remove_slot (struct hotplug_slot *slot) if (has_adapter_file(slot) == 0) sysfs_remove_file(&slot->kobj, &hotplug_slot_attr_presence.attr); - if (has_address_file(slot) == 0) - sysfs_remove_file(&slot->kobj, &hotplug_slot_attr_address.attr); - if (has_max_bus_speed_file(slot) == 0) sysfs_remove_file(&slot->kobj, &hotplug_slot_attr_max_bus_speed.attr); @@ -599,12 +530,16 @@ static struct hotplug_slot *get_slot_from_name (const char *name) struct hotplug_slot *slot; struct list_head *tmp; + spin_lock(&pci_hotplug_slot_list_lock); list_for_each (tmp, &pci_hotplug_slot_list) { slot = list_entry (tmp, struct hotplug_slot, slot_list); if (strcmp(slot->name, name) == 0) - return slot; + goto out; } - return NULL; + slot = NULL; +out: + spin_unlock(&pci_hotplug_slot_list_lock); + return slot; } /** @@ -616,9 +551,10 @@ static struct hotplug_slot *get_slot_from_name (const char *name) * * Returns 0 if successful, anything else for an error. */ -int pci_hp_register (struct hotplug_slot *slot) +int pci_hp_register(struct hotplug_slot *slot, struct pci_bus *bus, int slot_nr) { int result; + struct pci_slot *pci_slot; struct hotplug_slot *tmp; if (slot == NULL) @@ -636,19 +572,28 @@ int pci_hp_register (struct hotplug_slot *slot) if (tmp) return -EEXIST; - slot->kobj.kset = pci_hotplug_slots_kset; - result = kobject_init_and_add(&slot->kobj, &hotplug_slot_ktype, NULL, - "%s", slot->name); - if (result) { - err("Unable to register kobject '%s'", slot->name); - return -EINVAL; + pci_slot = pci_create_slot(bus, slot_nr, slot->name); + if (IS_ERR(pci_slot)) + return PTR_ERR(pci_slot); + + if (pci_slot->hotplug) { + dbg("%s: already claimed\n", __func__); + pci_destroy_slot(pci_slot); + return -EBUSY; } - list_add (&slot->slot_list, &pci_hotplug_slot_list); + slot->pci_slot = pci_slot; + pci_slot->hotplug = slot; + + spin_lock(&pci_hotplug_slot_list_lock); + list_add(&slot->slot_list, &pci_hotplug_slot_list); + spin_unlock(&pci_hotplug_slot_list_lock); + + result = fs_add_slot(pci_slot); + kobject_uevent(&pci_slot->kobj, KOBJ_ADD); + dbg("Added slot %s to the list\n", slot->name); + - result = fs_add_slot (slot); - kobject_uevent(&slot->kobj, KOBJ_ADD); - dbg ("Added slot %s to the list\n", slot->name); return result; } @@ -661,22 +606,30 @@ int pci_hp_register (struct hotplug_slot *slot) * * Returns 0 if successful, anything else for an error. */ -int pci_hp_deregister (struct hotplug_slot *slot) +int pci_hp_deregister(struct hotplug_slot *hotplug) { struct hotplug_slot *temp; + struct pci_slot *slot; - if (slot == NULL) + if (!hotplug) return -ENODEV; - temp = get_slot_from_name (slot->name); - if (temp != slot) { + temp = get_slot_from_name(hotplug->name); + if (temp != hotplug) return -ENODEV; - } - list_del (&slot->slot_list); - fs_remove_slot (slot); - dbg ("Removed slot %s from the list\n", slot->name); - kobject_put(&slot->kobj); + spin_lock(&pci_hotplug_slot_list_lock); + list_del(&hotplug->slot_list); + spin_unlock(&pci_hotplug_slot_list_lock); + + slot = hotplug->pci_slot; + fs_remove_slot(slot); + dbg("Removed slot %s from the list\n", hotplug->name); + + hotplug->release(hotplug); + slot->hotplug = NULL; + pci_destroy_slot(slot); + return 0; } @@ -690,13 +643,15 @@ int pci_hp_deregister (struct hotplug_slot *slot) * * Returns 0 if successful, anything else for an error. */ -int __must_check pci_hp_change_slot_info(struct hotplug_slot *slot, +int __must_check pci_hp_change_slot_info(struct hotplug_slot *hotplug, struct hotplug_slot_info *info) { - if ((slot == NULL) || (info == NULL)) + struct pci_slot *slot; + if (!hotplug || !info) return -ENODEV; + slot = hotplug->pci_slot; - memcpy (slot->info, info, sizeof (struct hotplug_slot_info)); + memcpy(hotplug->info, info, sizeof(struct hotplug_slot_info)); return 0; } @@ -704,36 +659,22 @@ int __must_check pci_hp_change_slot_info(struct hotplug_slot *slot, static int __init pci_hotplug_init (void) { int result; - struct kset *pci_bus_kset; - pci_bus_kset = bus_get_kset(&pci_bus_type); - - pci_hotplug_slots_kset = kset_create_and_add("slots", NULL, - &pci_bus_kset->kobj); - if (!pci_hotplug_slots_kset) { - result = -ENOMEM; - err("Register subsys error\n"); - goto exit; - } result = cpci_hotplug_init(debug); if (result) { err ("cpci_hotplug_init with error %d\n", result); - goto err_subsys; + goto err_cpci; } info (DRIVER_DESC " version: " DRIVER_VERSION "\n"); - goto exit; -err_subsys: - kset_unregister(pci_hotplug_slots_kset); -exit: +err_cpci: return result; } static void __exit pci_hotplug_exit (void) { cpci_hotplug_exit(); - kset_unregister(pci_hotplug_slots_kset); } module_init(pci_hotplug_init); @@ -745,7 +686,6 @@ MODULE_LICENSE("GPL"); module_param(debug, bool, 0644); MODULE_PARM_DESC(debug, "Debugging mode enabled or not"); -EXPORT_SYMBOL_GPL(pci_hotplug_slots_kset); EXPORT_SYMBOL_GPL(pci_hp_register); EXPORT_SYMBOL_GPL(pci_hp_deregister); EXPORT_SYMBOL_GPL(pci_hp_change_slot_info); diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c index 49414e9..7b21c86 100644 --- a/drivers/pci/hotplug/pciehp_core.c +++ b/drivers/pci/hotplug/pciehp_core.c @@ -72,7 +72,6 @@ static int get_power_status (struct hotplug_slot *slot, u8 *value); static int get_attention_status (struct hotplug_slot *slot, u8 *value); static int get_latch_status (struct hotplug_slot *slot, u8 *value); static int get_adapter_status (struct hotplug_slot *slot, u8 *value); -static int get_address (struct hotplug_slot *slot, u32 *value); static int get_max_bus_speed (struct hotplug_slot *slot, enum pci_bus_speed *value); static int get_cur_bus_speed (struct hotplug_slot *slot, enum pci_bus_speed *value); @@ -85,7 +84,6 @@ static struct hotplug_slot_ops pciehp_hotplug_slot_ops = { .get_attention_status = get_attention_status, .get_latch_status = get_latch_status, .get_adapter_status = get_adapter_status, - .get_address = get_address, .get_max_bus_speed = get_max_bus_speed, .get_cur_bus_speed = get_cur_bus_speed, }; @@ -252,7 +250,9 @@ static int init_slots(struct controller *ctrl) dbg("Registering bus=%x dev=%x hp_slot=%x sun=%x " "slot_device_offset=%x\n", slot->bus, slot->device, slot->hp_slot, slot->number, ctrl->slot_device_offset); - retval = pci_hp_register(hotplug_slot); + retval = pci_hp_register(hotplug_slot, + ctrl->pci_dev->subordinate, + slot->device); if (retval) { err("pci_hp_register failed with error %d\n", retval); if (retval == -EEXIST) @@ -263,7 +263,7 @@ static int init_slots(struct controller *ctrl) } /* create additional sysfs entries */ if (EMI(ctrl)) { - retval = sysfs_create_file(&hotplug_slot->kobj, + retval = sysfs_create_file(&hotplug_slot->pci_slot->kobj, &hotplug_slot_attr_lock.attr); if (retval) { pci_hp_deregister(hotplug_slot); @@ -296,7 +296,7 @@ static void cleanup_slots(struct controller *ctrl) slot = list_entry(tmp, struct slot, slot_list); list_del(&slot->slot_list); if (EMI(ctrl)) - sysfs_remove_file(&slot->hotplug_slot->kobj, + sysfs_remove_file(&slot->hotplug_slot->pci_slot->kobj, &hotplug_slot_attr_lock.attr); cancel_delayed_work(&slot->work); flush_scheduled_work(); @@ -398,19 +398,8 @@ static int get_adapter_status(struct hotplug_slot *hotplug_slot, u8 *value) return 0; } -static int get_address(struct hotplug_slot *hotplug_slot, u32 *value) -{ - struct slot *slot = hotplug_slot->private; - struct pci_bus *bus = slot->ctrl->pci_dev->subordinate; - - dbg("%s - physical_slot = %s\n", __func__, hotplug_slot->name); - - *value = (pci_domain_nr(bus) << 16) | (slot->bus << 8) | slot->device; - - return 0; -} - -static int get_max_bus_speed(struct hotplug_slot *hotplug_slot, enum pci_bus_speed *value) +static int get_max_bus_speed(struct hotplug_slot *hotplug_slot, + enum pci_bus_speed *value) { struct slot *slot = hotplug_slot->private; int retval; @@ -474,7 +463,12 @@ static int pciehp_probe(struct pcie_device *dev, const struct pcie_port_service_ /* Setup the slot information structures */ rc = init_slots(ctrl); if (rc) { - err("%s: slot initialization failed\n", PCIE_MODULE_NAME); + if (rc == -EBUSY) + warn("%s: slot already registered by another " + "hotplug driver\n", PCIE_MODULE_NAME); + else + err("%s: slot initialization failed\n", + PCIE_MODULE_NAME); goto err_out_release_ctlr; } diff --git a/drivers/pci/hotplug/rpadlpar_sysfs.c b/drivers/pci/hotplug/rpadlpar_sysfs.c index 779c5db..a796301 100644 --- a/drivers/pci/hotplug/rpadlpar_sysfs.c +++ b/drivers/pci/hotplug/rpadlpar_sysfs.c @@ -14,8 +14,10 @@ */ #include #include +#include #include #include "rpadlpar.h" +#include "../pci.h" #define DLPAR_KOBJ_NAME "control" @@ -27,7 +29,6 @@ #define MAX_DRC_NAME_LEN 64 - static ssize_t add_slot_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t nbytes) { @@ -112,7 +113,7 @@ int dlpar_sysfs_init(void) int error; dlpar_kobj = kobject_create_and_add(DLPAR_KOBJ_NAME, - &pci_hotplug_slots_kset->kobj); + &pci_slots_kset->kobj); if (!dlpar_kobj) return -EINVAL; diff --git a/drivers/pci/hotplug/rpaphp_slot.c b/drivers/pci/hotplug/rpaphp_slot.c index 56197b6..9b714ea 100644 --- a/drivers/pci/hotplug/rpaphp_slot.c +++ b/drivers/pci/hotplug/rpaphp_slot.c @@ -33,33 +33,6 @@ #include #include "rpaphp.h" -static ssize_t address_read_file (struct hotplug_slot *php_slot, char *buf) -{ - int retval; - struct slot *slot = (struct slot *)php_slot->private; - struct pci_bus *bus; - - if (!slot) - return -ENOENT; - - bus = slot->bus; - if (!bus) - return -ENOENT; - - if (bus->self) - retval = sprintf(buf, pci_name(bus->self)); - else - retval = sprintf(buf, "%04x:%02x:00.0", - pci_domain_nr(bus), bus->number); - - return retval; -} - -static struct hotplug_slot_attribute php_attr_address = { - .attr = {.name = "address", .mode = S_IFREG | S_IRUGO}, - .show = address_read_file, -}; - /* free up the memory used by a slot */ static void rpaphp_release_slot(struct hotplug_slot *hotplug_slot) { @@ -135,9 +108,6 @@ int rpaphp_deregister_slot(struct slot *slot) list_del(&slot->rpaphp_slot_list); - /* remove "address" file */ - sysfs_remove_file(&php_slot->kobj, &php_attr_address.attr); - retval = pci_hp_deregister(php_slot); if (retval) err("Problem unregistering a slot %s\n", slot->name); @@ -151,6 +121,7 @@ int rpaphp_register_slot(struct slot *slot) { struct hotplug_slot *php_slot = slot->hotplug_slot; int retval; + int slotno; dbg("%s registering slot:path[%s] index[%x], name[%s] pdomain[%x] type[%d]\n", __func__, slot->dn->full_name, slot->index, slot->name, @@ -162,19 +133,16 @@ int rpaphp_register_slot(struct slot *slot) return -EAGAIN; } - retval = pci_hp_register(php_slot); + if (slot->dn->child) + slotno = PCI_SLOT(PCI_DN(slot->dn->child)->devfn); + else + slotno = -1; + retval = pci_hp_register(php_slot, slot->bus, slotno); if (retval) { err("pci_hp_register failed with error %d\n", retval); return retval; } - /* create "address" file */ - retval = sysfs_create_file(&php_slot->kobj, &php_attr_address.attr); - if (retval) { - err("sysfs_create_file failed with error %d\n", retval); - goto sysfs_fail; - } - /* add slot to our internal list */ list_add(&slot->rpaphp_slot_list, &rpaphp_slot_head); info("Slot [%s] registered\n", slot->name); diff --git a/drivers/pci/hotplug/sgi_hotplug.c b/drivers/pci/hotplug/sgi_hotplug.c index 2fe37cd..2036a43 100644 --- a/drivers/pci/hotplug/sgi_hotplug.c +++ b/drivers/pci/hotplug/sgi_hotplug.c @@ -197,13 +197,15 @@ static int sn_hp_slot_private_alloc(struct hotplug_slot *bss_hotplug_slot, static struct hotplug_slot * sn_hp_destroy(void) { struct slot *slot; + struct pci_slot *pci_slot; struct hotplug_slot *bss_hotplug_slot = NULL; list_for_each_entry(slot, &sn_hp_list, hp_list) { bss_hotplug_slot = slot->hotplug_slot; + pci_slot = bss_hotplug_slot->pci_slot; list_del(&((struct slot *)bss_hotplug_slot->private)-> hp_list); - sysfs_remove_file(&bss_hotplug_slot->kobj, + sysfs_remove_file(&pci_slot->kobj, &sn_slot_path_attr.attr); break; } @@ -614,6 +616,7 @@ static void sn_release_slot(struct hotplug_slot *bss_hotplug_slot) static int sn_hotplug_slot_register(struct pci_bus *pci_bus) { int device; + struct pci_slot *pci_slot; struct hotplug_slot *bss_hotplug_slot; int rc = 0; @@ -650,11 +653,12 @@ static int sn_hotplug_slot_register(struct pci_bus *pci_bus) bss_hotplug_slot->ops = &sn_hotplug_slot_ops; bss_hotplug_slot->release = &sn_release_slot; - rc = pci_hp_register(bss_hotplug_slot); + rc = pci_hp_register(bss_hotplug_slot, pci_bus, device); if (rc) goto register_err; - rc = sysfs_create_file(&bss_hotplug_slot->kobj, + pci_slot = bss_hotplug_slot->pci_slot; + rc = sysfs_create_file(&pci_slot->kobj, &sn_slot_path_attr.attr); if (rc) goto register_err; diff --git a/drivers/pci/hotplug/shpchp_core.c b/drivers/pci/hotplug/shpchp_core.c index df41ecc..a8cbd03 100644 --- a/drivers/pci/hotplug/shpchp_core.c +++ b/drivers/pci/hotplug/shpchp_core.c @@ -68,7 +68,6 @@ static int get_power_status (struct hotplug_slot *slot, u8 *value); static int get_attention_status (struct hotplug_slot *slot, u8 *value); static int get_latch_status (struct hotplug_slot *slot, u8 *value); static int get_adapter_status (struct hotplug_slot *slot, u8 *value); -static int get_address (struct hotplug_slot *slot, u32 *value); static int get_max_bus_speed (struct hotplug_slot *slot, enum pci_bus_speed *value); static int get_cur_bus_speed (struct hotplug_slot *slot, enum pci_bus_speed *value); @@ -81,7 +80,6 @@ static struct hotplug_slot_ops shpchp_hotplug_slot_ops = { .get_attention_status = get_attention_status, .get_latch_status = get_latch_status, .get_adapter_status = get_adapter_status, - .get_address = get_address, .get_max_bus_speed = get_max_bus_speed, .get_cur_bus_speed = get_cur_bus_speed, }; @@ -159,7 +157,8 @@ static int init_slots(struct controller *ctrl) dbg("Registering bus=%x dev=%x hp_slot=%x sun=%x " "slot_device_offset=%x\n", slot->bus, slot->device, slot->hp_slot, slot->number, ctrl->slot_device_offset); - retval = pci_hp_register(slot->hotplug_slot); + retval = pci_hp_register(slot->hotplug_slot, + ctrl->pci_dev->subordinate, slot->device); if (retval) { err("pci_hp_register failed with error %d\n", retval); if (retval == -EEXIST) @@ -288,19 +287,8 @@ static int get_adapter_status (struct hotplug_slot *hotplug_slot, u8 *value) return 0; } -static int get_address (struct hotplug_slot *hotplug_slot, u32 *value) -{ - struct slot *slot = get_slot(hotplug_slot); - struct pci_bus *bus = slot->ctrl->pci_dev->subordinate; - - dbg("%s - physical_slot = %s\n", __func__, hotplug_slot->name); - - *value = (pci_domain_nr(bus) << 16) | (slot->bus << 8) | slot->device; - - return 0; -} - -static int get_max_bus_speed (struct hotplug_slot *hotplug_slot, enum pci_bus_speed *value) +static int get_max_bus_speed(struct hotplug_slot *hotplug_slot, + enum pci_bus_speed *value) { struct slot *slot = get_slot(hotplug_slot); int retval; diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 0a497c1b..e1d7bbf 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -106,3 +106,16 @@ pci_match_one_device(const struct pci_device_id *id, const struct pci_dev *dev) } struct pci_dev *pci_find_upstream_pcie_bridge(struct pci_dev *pdev); + +/* PCI slot sysfs helper code */ +#define to_pci_slot(s) container_of(s, struct pci_slot, kobj) + +extern struct kset *pci_slots_kset; + +struct pci_slot_attribute { + struct attribute attr; + ssize_t (*show)(struct pci_slot *, char *); + ssize_t (*store)(struct pci_slot *, const char *, size_t); +}; +#define to_pci_slot_attr(s) container_of(s, struct pci_slot_attribute, attr) + diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index aebab71..4562827 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -414,6 +414,7 @@ static struct pci_bus * pci_alloc_bus(void) INIT_LIST_HEAD(&b->node); INIT_LIST_HEAD(&b->children); INIT_LIST_HEAD(&b->devices); + INIT_LIST_HEAD(&b->slots); } return b; } diff --git a/drivers/pci/slot.c b/drivers/pci/slot.c new file mode 100644 index 0000000..7e5b85c --- /dev/null +++ b/drivers/pci/slot.c @@ -0,0 +1,233 @@ +/* + * drivers/pci/slot.c + * Copyright (C) 2006 Matthew Wilcox + * Copyright (C) 2006-2008 Hewlett-Packard Development Company, L.P. + * Alex Chiang + */ + +#include +#include +#include +#include "pci.h" + +struct kset *pci_slots_kset; +EXPORT_SYMBOL_GPL(pci_slots_kset); + +static ssize_t pci_slot_attr_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct pci_slot *slot = to_pci_slot(kobj); + struct pci_slot_attribute *attribute = to_pci_slot_attr(attr); + return attribute->show ? attribute->show(slot, buf) : -EIO; +} + +static ssize_t pci_slot_attr_store(struct kobject *kobj, + struct attribute *attr, const char *buf, size_t len) +{ + struct pci_slot *slot = to_pci_slot(kobj); + struct pci_slot_attribute *attribute = to_pci_slot_attr(attr); + return attribute->store ? attribute->store(slot, buf, len) : -EIO; +} + +static struct sysfs_ops pci_slot_sysfs_ops = { + .show = pci_slot_attr_show, + .store = pci_slot_attr_store, +}; + +static ssize_t address_read_file(struct pci_slot *slot, char *buf) +{ + if (slot->number == 0xff) + return sprintf(buf, "%04x:%02x\n", + pci_domain_nr(slot->bus), + slot->bus->number); + else + return sprintf(buf, "%04x:%02x:%02x\n", + pci_domain_nr(slot->bus), + slot->bus->number, + slot->number); +} + +static void pci_slot_release(struct kobject *kobj) +{ + struct pci_slot *slot = to_pci_slot(kobj); + + pr_debug("%s: releasing pci_slot on %x:%d\n", __func__, + slot->bus->number, slot->number); + + list_del(&slot->list); + + kfree(slot); +} + +static struct pci_slot_attribute pci_slot_attr_address = + __ATTR(address, (S_IFREG | S_IRUGO), address_read_file, NULL); + +static struct attribute *pci_slot_default_attrs[] = { + &pci_slot_attr_address.attr, + NULL, +}; + +static struct kobj_type pci_slot_ktype = { + .sysfs_ops = &pci_slot_sysfs_ops, + .release = &pci_slot_release, + .default_attrs = pci_slot_default_attrs, +}; + +/** + * pci_create_slot - create or increment refcount for physical PCI slot + * @parent: struct pci_bus of parent bridge + * @slot_nr: PCI_SLOT(pci_dev->devfn) or -1 for placeholder + * @name: user visible string presented in /sys/bus/pci/slots/ + * + * PCI slots have first class attributes such as address, speed, width, + * and a &struct pci_slot is used to manage them. This interface will + * either return a new &struct pci_slot to the caller, or if the pci_slot + * already exists, its refcount will be incremented. + * + * Slots are uniquely identified by a @pci_bus, @slot_nr, @name tuple. + * + * Placeholder slots: + * In most cases, @pci_bus, @slot_nr will be sufficient to uniquely identify + * a slot. There is one notable exception - pSeries (rpaphp), where the + * @slot_nr cannot be determined until a device is actually inserted into + * the slot. In this scenario, the caller may pass -1 for @slot_nr. + * + * The following semantics are imposed when the caller passes @slot_nr == + * -1. First, the check for existing %struct pci_slot is skipped, as the + * caller may know about several unpopulated slots on a given %struct + * pci_bus, and each slot would have a @slot_nr of -1. Uniqueness for + * these slots is then determined by the @name parameter. We expect + * kobject_init_and_add() to warn us if the caller attempts to create + * multiple slots with the same name. The other change in semantics is + * user-visible, which is the 'address' parameter presented in sysfs will + * consist solely of a dddd:bb tuple, where dddd is the PCI domain of the + * %struct pci_bus and bb is the bus number. In other words, the devfn of + * the 'placeholder' slot will not be displayed. + */ + +struct pci_slot *pci_create_slot(struct pci_bus *parent, int slot_nr, + const char *name) +{ + struct pci_slot *slot; + int err; + + down_write(&pci_bus_sem); + + if (slot_nr == -1) + goto placeholder; + + /* If we've already created this slot, bump refcount and return. */ + list_for_each_entry(slot, &parent->slots, list) { + if (slot->number == slot_nr) { + kobject_get(&slot->kobj); + pr_debug("%s: inc refcount to %d on %04x:%02x:%02x\n", + __func__, + atomic_read(&slot->kobj.kref.refcount), + pci_domain_nr(parent), parent->number, + slot_nr); + goto out; + } + } + +placeholder: + slot = kzalloc(sizeof(*slot), GFP_KERNEL); + if (!slot) { + slot = ERR_PTR(-ENOMEM); + goto out; + } + + slot->bus = parent; + slot->number = slot_nr; + + slot->kobj.kset = pci_slots_kset; + err = kobject_init_and_add(&slot->kobj, &pci_slot_ktype, NULL, + "%s", name); + if (err) { + printk(KERN_ERR "Unable to register kobject %s\n", name); + goto err; + } + + INIT_LIST_HEAD(&slot->list); + list_add(&slot->list, &parent->slots); + + /* Don't care if debug printk has a -1 for slot_nr */ + pr_debug("%s: created pci_slot on %04x:%02x:%02x\n", + __func__, pci_domain_nr(parent), parent->number, slot_nr); + + out: + up_write(&pci_bus_sem); + return slot; + err: + kfree(slot); + slot = ERR_PTR(err); + goto out; +} +EXPORT_SYMBOL_GPL(pci_create_slot); + +/** + * pci_update_slot_number - update %struct pci_slot -> number + * @slot - %struct pci_slot to update + * @slot_nr - new number for slot + * + * The primary purpose of this interface is to allow callers who earlier + * created a placeholder slot in pci_create_slot() by passing a -1 as + * slot_nr, to update their %struct pci_slot with the correct @slot_nr. + */ + +void pci_update_slot_number(struct pci_slot *slot, int slot_nr) +{ + int name_count = 0; + struct pci_slot *tmp; + + down_write(&pci_bus_sem); + + list_for_each_entry(tmp, &slot->bus->slots, list) { + WARN_ON(tmp->number == slot_nr); + if (!strcmp(kobject_name(&tmp->kobj), kobject_name(&slot->kobj))) + name_count++; + } + + if (name_count > 1) + printk(KERN_WARNING "pci_update_slot_number found %d slots with the same name: %s\n", name_count, kobject_name(&slot->kobj)); + + slot->number = slot_nr; + up_write(&pci_bus_sem); +} +EXPORT_SYMBOL_GPL(pci_update_slot_number); + +/** + * pci_destroy_slot - decrement refcount for physical PCI slot + * @slot: struct pci_slot to decrement + * + * %struct pci_slot is refcounted, so destroying them is really easy; we + * just call kobject_put on its kobj and let our release methods do the + * rest. + */ + +void pci_destroy_slot(struct pci_slot *slot) +{ + pr_debug("%s: dec refcount to %d on %04x:%02x:%02x\n", __func__, + atomic_read(&slot->kobj.kref.refcount) - 1, + pci_domain_nr(slot->bus), slot->bus->number, slot->number); + + down_write(&pci_bus_sem); + kobject_put(&slot->kobj); + up_write(&pci_bus_sem); +} +EXPORT_SYMBOL_GPL(pci_destroy_slot); + +static int pci_slot_init(void) +{ + struct kset *pci_bus_kset; + + pci_bus_kset = bus_get_kset(&pci_bus_type); + pci_slots_kset = kset_create_and_add("slots", NULL, + &pci_bus_kset->kobj); + if (!pci_slots_kset) { + printk(KERN_ERR "PCI: Slot initialization failure\n"); + return -ENOMEM; + } + return 0; +} + +subsys_initcall(pci_slot_init); diff --git a/include/linux/pci.h b/include/linux/pci.h index 507ee52..f1f73f7 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -17,8 +17,7 @@ #ifndef LINUX_PCI_H #define LINUX_PCI_H -/* Include the pci register defines */ -#include +#include /* The pci register defines */ /* * The PCI interface treats multi-function devices as independent @@ -49,12 +48,22 @@ #include #include #include +#include #include #include /* Include the ID list */ #include +/* pci_slot represents a physical slot */ +struct pci_slot { + struct pci_bus *bus; /* The bus this slot is on */ + struct list_head list; /* node in list of slots on this bus */ + struct hotplug_slot *hotplug; /* Hotplug info (migrate over time) */ + unsigned char number; /* PCI_SLOT(pci_dev->devfn) */ + struct kobject kobj; +}; + /* File state for mmap()s on /proc/bus/pci/X/Y */ enum pci_mmap_state { pci_mmap_io, @@ -142,6 +151,7 @@ struct pci_dev { void *sysdata; /* hook for sys-specific extension */ struct proc_dir_entry *procent; /* device entry in /proc/bus/pci */ + struct pci_slot *slot; /* Physical slot this device is in */ unsigned int devfn; /* encoded device & function index */ unsigned short vendor; @@ -266,6 +276,7 @@ struct pci_bus { struct list_head children; /* list of child buses */ struct list_head devices; /* list of devices on this bus */ struct pci_dev *self; /* bridge device as seen by parent */ + struct list_head slots; /* list of slots on this bus */ struct resource *resource[PCI_BUS_NUM_RESOURCES]; /* address space routed to this bus */ @@ -488,6 +499,10 @@ struct pci_bus *pci_create_bus(struct device *parent, int bus, struct pci_ops *ops, void *sysdata); struct pci_bus *pci_add_new_bus(struct pci_bus *parent, struct pci_dev *dev, int busnr); +struct pci_slot *pci_create_slot(struct pci_bus *parent, int slot_nr, + const char *name); +void pci_destroy_slot(struct pci_slot *slot); +void pci_update_slot_number(struct pci_slot *slot, int slot_nr); int pci_scan_slot(struct pci_bus *bus, int devfn); struct pci_dev *pci_scan_single_device(struct pci_bus *bus, int devfn); void pci_device_add(struct pci_dev *dev, struct pci_bus *bus); diff --git a/include/linux/pci_hotplug.h b/include/linux/pci_hotplug.h index dbdcd1a..a08cd06 100644 --- a/include/linux/pci_hotplug.h +++ b/include/linux/pci_hotplug.h @@ -95,9 +95,6 @@ struct hotplug_slot_attribute { * @get_adapter_status: Called to get see if an adapter is present in the slot or not. * If this field is NULL, the value passed in the struct hotplug_slot_info * will be used when this value is requested by a user. - * @get_address: Called to get pci address of a slot. - * If this field is NULL, the value passed in the struct hotplug_slot_info - * will be used when this value is requested by a user. * @get_max_bus_speed: Called to get the max bus speed for a slot. * If this field is NULL, the value passed in the struct hotplug_slot_info * will be used when this value is requested by a user. @@ -120,7 +117,6 @@ struct hotplug_slot_ops { int (*get_attention_status) (struct hotplug_slot *slot, u8 *value); int (*get_latch_status) (struct hotplug_slot *slot, u8 *value); int (*get_adapter_status) (struct hotplug_slot *slot, u8 *value); - int (*get_address) (struct hotplug_slot *slot, u32 *value); int (*get_max_bus_speed) (struct hotplug_slot *slot, enum pci_bus_speed *value); int (*get_cur_bus_speed) (struct hotplug_slot *slot, enum pci_bus_speed *value); }; @@ -140,7 +136,6 @@ struct hotplug_slot_info { u8 attention_status; u8 latch_status; u8 adapter_status; - u32 address; enum pci_bus_speed max_bus_speed; enum pci_bus_speed cur_bus_speed; }; @@ -166,15 +161,14 @@ struct hotplug_slot { /* Variables below this are for use only by the hotplug pci core. */ struct list_head slot_list; - struct kobject kobj; + struct pci_slot *pci_slot; }; #define to_hotplug_slot(n) container_of(n, struct hotplug_slot, kobj) -extern int pci_hp_register (struct hotplug_slot *slot); -extern int pci_hp_deregister (struct hotplug_slot *slot); +extern int pci_hp_register(struct hotplug_slot *, struct pci_bus *, int nr); +extern int pci_hp_deregister(struct hotplug_slot *slot); extern int __must_check pci_hp_change_slot_info (struct hotplug_slot *slot, struct hotplug_slot_info *info); -extern struct kset *pci_hotplug_slots_kset; /* PCI Setting Record (Type 0) */ struct hpp_type0 { -- cgit v0.10.2 From 8344b568f5bdc7ee1bba909de3294c6348c36056 Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Tue, 10 Jun 2008 15:30:42 -0600 Subject: PCI: ACPI PCI slot detection driver Detect all physical PCI slots as described by ACPI, and create entries in /sys/bus/pci/slots/. Not all physical slots are hotpluggable, and the acpiphp module does not detect them. Now we know the physical PCI geography of our system, without caring about hotplug. [kaneshige.kenji@jp.fujitsu.com: export-kobject_rename-for-pci_hotplug_core] Signed-off-by: Kenji Kaneshige Acked-by: Greg KH [akpm@linux-foundation.org: build fix] [akpm@linux-foundation.org: fix build with CONFIG_DMI=n] Signed-off-by: Alex Chiang Cc: Greg KH Cc: Kristen Carlson Accardi Cc: Len Brown Acked-by: Len Brown Acked-by: Kenji Kaneshige Signed-off-by: Andrew Morton Signed-off-by: Andrew Morton Signed-off-by: Jesse Barnes diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index c52fca8..250d41a 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -338,6 +338,15 @@ config ACPI_EC the battery and thermal drivers. If you are compiling for a mobile system, say Y. +config ACPI_PCI_SLOT + tristate "PCI slot detection driver" + default n + help + This driver will attempt to discover all PCI slots in your system, + and creates entries in /sys/bus/pci/slots/. This feature can + help you correlate PCI bus addresses with the physical geography + of your slots. If you are unsure, say N. + config ACPI_POWER bool default y diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile index 40b0fca..579c29c 100644 --- a/drivers/acpi/Makefile +++ b/drivers/acpi/Makefile @@ -48,6 +48,7 @@ obj-$(CONFIG_ACPI_DOCK) += dock.o obj-$(CONFIG_ACPI_BAY) += bay.o obj-$(CONFIG_ACPI_VIDEO) += video.o obj-y += pci_root.o pci_link.o pci_irq.o pci_bind.o +obj-$(CONFIG_ACPI_PCI_SLOT) += pci_slot.o obj-$(CONFIG_ACPI_POWER) += power.o obj-$(CONFIG_ACPI_PROCESSOR) += processor.o obj-$(CONFIG_ACPI_CONTAINER) += container.o diff --git a/drivers/acpi/pci_slot.c b/drivers/acpi/pci_slot.c new file mode 100644 index 0000000..b9ab030 --- /dev/null +++ b/drivers/acpi/pci_slot.c @@ -0,0 +1,368 @@ +/* + * pci_slot.c - ACPI PCI Slot Driver + * + * The code here is heavily leveraged from the acpiphp module. + * Thanks to Matthew Wilcox for much guidance. + * Thanks to Kenji Kaneshige for code + * review and fixes. + * + * Copyright (C) 2007 Alex Chiang + * Copyright (C) 2007 Hewlett-Packard Development Company, L.P. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +static int debug; +static int check_sta_before_sun; + +#define DRIVER_VERSION "0.1" +#define DRIVER_AUTHOR "Alex Chiang " +#define DRIVER_DESC "ACPI PCI Slot Detection Driver" +MODULE_AUTHOR(DRIVER_AUTHOR); +MODULE_DESCRIPTION(DRIVER_DESC); +MODULE_LICENSE("GPL"); +MODULE_PARM_DESC(debug, "Debugging mode enabled or not"); +module_param(debug, bool, 0644); + +#define _COMPONENT ACPI_PCI_COMPONENT +ACPI_MODULE_NAME("pci_slot"); + +#define MY_NAME "pci_slot" +#define err(format, arg...) printk(KERN_ERR "%s: " format , MY_NAME , ## arg) +#define info(format, arg...) printk(KERN_INFO "%s: " format , MY_NAME , ## arg) +#define dbg(format, arg...) \ + do { \ + if (debug) \ + printk(KERN_DEBUG "%s: " format, \ + MY_NAME , ## arg); \ + } while (0) + +#define SLOT_NAME_SIZE 20 /* Inspired by #define in acpiphp.h */ + +struct acpi_pci_slot { + acpi_handle root_handle; /* handle of the root bridge */ + struct pci_slot *pci_slot; /* corresponding pci_slot */ + struct list_head list; /* node in the list of slots */ +}; + +static int acpi_pci_slot_add(acpi_handle handle); +static void acpi_pci_slot_remove(acpi_handle handle); + +static LIST_HEAD(slot_list); +static DEFINE_MUTEX(slot_list_lock); +static struct acpi_pci_driver acpi_pci_slot_driver = { + .add = acpi_pci_slot_add, + .remove = acpi_pci_slot_remove, +}; + +static int +check_slot(acpi_handle handle, int *device, unsigned long *sun) +{ + int retval = 0; + unsigned long adr, sta; + acpi_status status; + struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; + + acpi_get_name(handle, ACPI_FULL_PATHNAME, &buffer); + dbg("Checking slot on path: %s\n", (char *)buffer.pointer); + + if (check_sta_before_sun) { + /* If SxFy doesn't have _STA, we just assume it's there */ + status = acpi_evaluate_integer(handle, "_STA", NULL, &sta); + if (ACPI_SUCCESS(status) && !(sta & ACPI_STA_DEVICE_PRESENT)) { + retval = -1; + goto out; + } + } + + status = acpi_evaluate_integer(handle, "_ADR", NULL, &adr); + if (ACPI_FAILURE(status)) { + dbg("_ADR returned %d on %s\n", status, (char *)buffer.pointer); + retval = -1; + goto out; + } + + *device = (adr >> 16) & 0xffff; + + /* No _SUN == not a slot == bail */ + status = acpi_evaluate_integer(handle, "_SUN", NULL, sun); + if (ACPI_FAILURE(status)) { + dbg("_SUN returned %d on %s\n", status, (char *)buffer.pointer); + retval = -1; + goto out; + } + +out: + kfree(buffer.pointer); + return retval; +} + +struct callback_args { + acpi_walk_callback user_function; /* only for walk_p2p_bridge */ + struct pci_bus *pci_bus; + acpi_handle root_handle; +}; + +/* + * register_slot + * + * Called once for each SxFy object in the namespace. Don't worry about + * calling pci_create_slot multiple times for the same pci_bus:device, + * since each subsequent call simply bumps the refcount on the pci_slot. + * + * The number of calls to pci_destroy_slot from unregister_slot is + * symmetrical. + */ +static acpi_status +register_slot(acpi_handle handle, u32 lvl, void *context, void **rv) +{ + int device; + unsigned long sun; + char name[SLOT_NAME_SIZE]; + struct acpi_pci_slot *slot; + struct pci_slot *pci_slot; + struct callback_args *parent_context = context; + struct pci_bus *pci_bus = parent_context->pci_bus; + + if (check_slot(handle, &device, &sun)) + return AE_OK; + + slot = kmalloc(sizeof(*slot), GFP_KERNEL); + if (!slot) { + err("%s: cannot allocate memory\n", __func__); + return AE_OK; + } + + snprintf(name, sizeof(name), "%u", (u32)sun); + pci_slot = pci_create_slot(pci_bus, device, name); + if (IS_ERR(pci_slot)) { + err("pci_create_slot returned %ld\n", PTR_ERR(pci_slot)); + kfree(slot); + } + + slot->root_handle = parent_context->root_handle; + slot->pci_slot = pci_slot; + INIT_LIST_HEAD(&slot->list); + mutex_lock(&slot_list_lock); + list_add(&slot->list, &slot_list); + mutex_unlock(&slot_list_lock); + + dbg("pci_slot: %p, pci_bus: %x, device: %d, name: %s\n", + pci_slot, pci_bus->number, device, name); + + return AE_OK; +} + +/* + * walk_p2p_bridge - discover and walk p2p bridges + * @handle: points to an acpi_pci_root + * @context: p2p_bridge_context pointer + * + * Note that when we call ourselves recursively, we pass a different + * value of pci_bus in the child_context. + */ +static acpi_status +walk_p2p_bridge(acpi_handle handle, u32 lvl, void *context, void **rv) +{ + int device, function; + unsigned long adr; + acpi_status status; + acpi_handle dummy_handle; + acpi_walk_callback user_function; + + struct pci_dev *dev; + struct pci_bus *pci_bus; + struct callback_args child_context; + struct callback_args *parent_context = context; + + pci_bus = parent_context->pci_bus; + user_function = parent_context->user_function; + + status = acpi_get_handle(handle, "_ADR", &dummy_handle); + if (ACPI_FAILURE(status)) + return AE_OK; + + status = acpi_evaluate_integer(handle, "_ADR", NULL, &adr); + if (ACPI_FAILURE(status)) + return AE_OK; + + device = (adr >> 16) & 0xffff; + function = adr & 0xffff; + + dev = pci_get_slot(pci_bus, PCI_DEVFN(device, function)); + if (!dev || !dev->subordinate) + goto out; + + child_context.pci_bus = dev->subordinate; + child_context.user_function = user_function; + child_context.root_handle = parent_context->root_handle; + + dbg("p2p bridge walk, pci_bus = %x\n", dev->subordinate->number); + status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, (u32)1, + user_function, &child_context, NULL); + if (ACPI_FAILURE(status)) + goto out; + + status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, (u32)1, + walk_p2p_bridge, &child_context, NULL); +out: + pci_dev_put(dev); + return AE_OK; +} + +/* + * walk_root_bridge - generic root bridge walker + * @handle: points to an acpi_pci_root + * @user_function: user callback for slot objects + * + * Call user_function for all objects underneath this root bridge. + * Walk p2p bridges underneath us and call user_function on those too. + */ +static int +walk_root_bridge(acpi_handle handle, acpi_walk_callback user_function) +{ + int seg, bus; + unsigned long tmp; + acpi_status status; + acpi_handle dummy_handle; + struct pci_bus *pci_bus; + struct callback_args context; + + /* If the bridge doesn't have _STA, we assume it is always there */ + status = acpi_get_handle(handle, "_STA", &dummy_handle); + if (ACPI_SUCCESS(status)) { + status = acpi_evaluate_integer(handle, "_STA", NULL, &tmp); + if (ACPI_FAILURE(status)) { + info("%s: _STA evaluation failure\n", __func__); + return 0; + } + if ((tmp & ACPI_STA_DEVICE_FUNCTIONING) == 0) + /* don't register this object */ + return 0; + } + + status = acpi_evaluate_integer(handle, "_SEG", NULL, &tmp); + seg = ACPI_SUCCESS(status) ? tmp : 0; + + status = acpi_evaluate_integer(handle, "_BBN", NULL, &tmp); + bus = ACPI_SUCCESS(status) ? tmp : 0; + + pci_bus = pci_find_bus(seg, bus); + if (!pci_bus) + return 0; + + context.pci_bus = pci_bus; + context.user_function = user_function; + context.root_handle = handle; + + dbg("root bridge walk, pci_bus = %x\n", pci_bus->number); + status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, (u32)1, + user_function, &context, NULL); + if (ACPI_FAILURE(status)) + return status; + + status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, (u32)1, + walk_p2p_bridge, &context, NULL); + if (ACPI_FAILURE(status)) + err("%s: walk_p2p_bridge failure - %d\n", __func__, status); + + return status; +} + +/* + * acpi_pci_slot_add + * @handle: points to an acpi_pci_root + */ +static int +acpi_pci_slot_add(acpi_handle handle) +{ + acpi_status status; + + status = walk_root_bridge(handle, register_slot); + if (ACPI_FAILURE(status)) + err("%s: register_slot failure - %d\n", __func__, status); + + return status; +} + +/* + * acpi_pci_slot_remove + * @handle: points to an acpi_pci_root + */ +static void +acpi_pci_slot_remove(acpi_handle handle) +{ + struct acpi_pci_slot *slot, *tmp; + + mutex_lock(&slot_list_lock); + list_for_each_entry_safe(slot, tmp, &slot_list, list) { + if (slot->root_handle == handle) { + list_del(&slot->list); + pci_destroy_slot(slot->pci_slot); + kfree(slot); + } + } + mutex_unlock(&slot_list_lock); +} + +static int do_sta_before_sun(const struct dmi_system_id *d) +{ + info("%s detected: will evaluate _STA before calling _SUN\n", d->ident); + check_sta_before_sun = 1; + return 0; +} + +static struct dmi_system_id acpi_pci_slot_dmi_table[] __initdata = { + /* + * Fujitsu Primequest machines will return 1023 to indicate an + * error if the _SUN method is evaluated on SxFy objects that + * are not present (as indicated by _STA), so for those machines, + * we want to check _STA before evaluating _SUN. + */ + { + .callback = do_sta_before_sun, + .ident = "Fujitsu PRIMEQUEST", + .matches = { + DMI_MATCH(DMI_BIOS_VENDOR, "FUJITSU LIMITED"), + DMI_MATCH(DMI_BIOS_VERSION, "PRIMEQUEST"), + }, + }, + {} +}; + +static int __init +acpi_pci_slot_init(void) +{ + dmi_check_system(acpi_pci_slot_dmi_table); + acpi_pci_register_driver(&acpi_pci_slot_driver); + return 0; +} + +static void __exit +acpi_pci_slot_exit(void) +{ + acpi_pci_unregister_driver(&acpi_pci_slot_driver); +} + +module_init(acpi_pci_slot_init); +module_exit(acpi_pci_slot_exit); diff --git a/drivers/pci/hotplug/pci_hotplug_core.c b/drivers/pci/hotplug/pci_hotplug_core.c index 4df31f3..b3b2d8c 100644 --- a/drivers/pci/hotplug/pci_hotplug_core.c +++ b/drivers/pci/hotplug/pci_hotplug_core.c @@ -572,6 +572,11 @@ int pci_hp_register(struct hotplug_slot *slot, struct pci_bus *bus, int slot_nr) if (tmp) return -EEXIST; + /* + * No problems if we call this interface from both ACPI_PCI_SLOT + * driver and call it here again. If we've already created the + * pci_slot, the interface will simply bump the refcount. + */ pci_slot = pci_create_slot(bus, slot_nr, slot->name); if (IS_ERR(pci_slot)) return PTR_ERR(pci_slot); @@ -585,6 +590,17 @@ int pci_hp_register(struct hotplug_slot *slot, struct pci_bus *bus, int slot_nr) slot->pci_slot = pci_slot; pci_slot->hotplug = slot; + /* + * Allow pcihp drivers to override the ACPI_PCI_SLOT name. + */ + if (strcmp(kobject_name(&pci_slot->kobj), slot->name)) { + result = kobject_rename(&pci_slot->kobj, slot->name); + if (result) { + pci_destroy_slot(pci_slot); + return result; + } + } + spin_lock(&pci_hotplug_slot_list_lock); list_add(&slot->slot_list, &pci_hotplug_slot_list); spin_unlock(&pci_hotplug_slot_list_lock); diff --git a/drivers/pci/hotplug/sgi_hotplug.c b/drivers/pci/hotplug/sgi_hotplug.c index 2036a43..410fe03 100644 --- a/drivers/pci/hotplug/sgi_hotplug.c +++ b/drivers/pci/hotplug/sgi_hotplug.c @@ -668,7 +668,7 @@ static int sn_hotplug_slot_register(struct pci_bus *pci_bus) register_err: dev_dbg(&pci_bus->self->dev, "bus failed to register with err = %d\n", - rc); + rc); alloc_err: if (rc == -ENOMEM) diff --git a/lib/kobject.c b/lib/kobject.c index 718e510..dcade05 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -439,6 +439,7 @@ out: return error; } +EXPORT_SYMBOL_GPL(kobject_rename); /** * kobject_move - move object to another parent -- cgit v0.10.2 From 06166780eb53685e72b589814d535d1f9948e761 Mon Sep 17 00:00:00 2001 From: David Brownell Date: Thu, 5 Jun 2008 01:15:40 +0200 Subject: ACPI PM: acpi_pm_device_sleep_state() cleanup Get rid of a superfluous acpi_pm_device_sleep_state() parameter. The only legitimate value of that parameter must be derived from the first parameter, which is what all the callers already do. (However, this does not address the fact that ACPI still doesn't set up those flags.) Signed-off-by: David Brownell Acked-by: Pavel Machek Signed-off-by: Rafael J. Wysocki Signed-off-by: Len Brown diff --git a/drivers/acpi/sleep/main.c b/drivers/acpi/sleep/main.c index c3b0cd8..fbd40e9 100644 --- a/drivers/acpi/sleep/main.c +++ b/drivers/acpi/sleep/main.c @@ -369,8 +369,8 @@ int acpi_suspend(u32 acpi_state) /** * acpi_pm_device_sleep_state - return preferred power state of ACPI device * in the system sleep state given by %acpi_target_sleep_state - * @dev: device to examine - * @wake: if set, the device should be able to wake up the system + * @dev: device to examine; its driver model wakeup flags control + * whether it should be able to wake up the system * @d_min_p: used to store the upper limit of allowed states range * Return value: preferred power state of the device on success, -ENODEV on * failure (ie. if there's no 'struct acpi_device' for @dev) @@ -388,7 +388,7 @@ int acpi_suspend(u32 acpi_state) * via @wake. */ -int acpi_pm_device_sleep_state(struct device *dev, int wake, int *d_min_p) +int acpi_pm_device_sleep_state(struct device *dev, int *d_min_p) { acpi_handle handle = DEVICE_ACPI_HANDLE(dev); struct acpi_device *adev; @@ -427,7 +427,7 @@ int acpi_pm_device_sleep_state(struct device *dev, int wake, int *d_min_p) * can wake the system. _S0W may be valid, too. */ if (acpi_target_sleep_state == ACPI_STATE_S0 || - (wake && adev->wakeup.state.enabled && + (device_may_wakeup(dev) && adev->wakeup.state.enabled && adev->wakeup.sleep_state <= acpi_target_sleep_state)) { acpi_status status; diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index 9d6fc8e..caabf05 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -298,8 +298,7 @@ static pci_power_t acpi_pci_choose_state(struct pci_dev *pdev, { int acpi_state; - acpi_state = acpi_pm_device_sleep_state(&pdev->dev, - device_may_wakeup(&pdev->dev), NULL); + acpi_state = acpi_pm_device_sleep_state(&pdev->dev, NULL); if (acpi_state < 0) return PCI_POWER_ERROR; diff --git a/drivers/pnp/pnpacpi/core.c b/drivers/pnp/pnpacpi/core.c index 5090277..c1b9ea3 100644 --- a/drivers/pnp/pnpacpi/core.c +++ b/drivers/pnp/pnpacpi/core.c @@ -117,9 +117,7 @@ static int pnpacpi_suspend(struct pnp_dev *dev, pm_message_t state) { int power_state; - power_state = acpi_pm_device_sleep_state(&dev->dev, - device_may_wakeup(&dev->dev), - NULL); + power_state = acpi_pm_device_sleep_state(&dev->dev, NULL); if (power_state < 0) power_state = (state.event == PM_EVENT_ON) ? ACPI_STATE_D0 : ACPI_STATE_D3; diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 2f1c68c..db90a74 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -376,9 +376,9 @@ acpi_handle acpi_get_pci_rootbridge_handle(unsigned int, unsigned int); #define DEVICE_ACPI_HANDLE(dev) ((acpi_handle)((dev)->archdata.acpi_handle)) #ifdef CONFIG_PM_SLEEP -int acpi_pm_device_sleep_state(struct device *, int, int *); +int acpi_pm_device_sleep_state(struct device *, int *); #else /* !CONFIG_PM_SLEEP */ -static inline int acpi_pm_device_sleep_state(struct device *d, int w, int *p) +static inline int acpi_pm_device_sleep_state(struct device *d, int *p) { if (p) *p = ACPI_STATE_D0; -- cgit v0.10.2 From 8d2bdf49481b27096e242119e73abe9348c1019b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 5 Jun 2008 01:16:37 +0200 Subject: PCI ACPI: Drop the second argument of platform_pci_choose_state Since the second argument of acpi_pci_choose_state() and platform_pci_choose_state() is never used, remove it. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Signed-off-by: Len Brown diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index caabf05..dab9d47 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -293,8 +293,7 @@ EXPORT_SYMBOL(pci_osc_control_set); * choose highest power _SxD or any lower power */ -static pci_power_t acpi_pci_choose_state(struct pci_dev *pdev, - pm_message_t state) +static pci_power_t acpi_pci_choose_state(struct pci_dev *pdev) { int acpi_state; diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index e4548ab..75c6023 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -508,7 +508,7 @@ pci_set_power_state(struct pci_dev *dev, pci_power_t state) return 0; } -pci_power_t (*platform_pci_choose_state)(struct pci_dev *dev, pm_message_t state); +pci_power_t (*platform_pci_choose_state)(struct pci_dev *dev); /** * pci_choose_state - Choose the power state of a PCI device @@ -528,7 +528,7 @@ pci_power_t pci_choose_state(struct pci_dev *dev, pm_message_t state) return PCI_D0; if (platform_pci_choose_state) { - ret = platform_pci_choose_state(dev, state); + ret = platform_pci_choose_state(dev); if (ret != PCI_POWER_ERROR) return ret; } diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 0a497c1b..ff30b3c 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -6,8 +6,7 @@ extern void pci_remove_sysfs_dev_files(struct pci_dev *pdev); extern void pci_cleanup_rom(struct pci_dev *dev); /* Firmware callbacks */ -extern pci_power_t (*platform_pci_choose_state)(struct pci_dev *dev, - pm_message_t state); +extern pci_power_t (*platform_pci_choose_state)(struct pci_dev *dev); extern int (*platform_pci_set_power_state)(struct pci_dev *dev, pci_power_t state); -- cgit v0.10.2 From 0e6859d49ff194e01afc229c996e3aefca1a0539 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 5 Jun 2008 01:17:28 +0200 Subject: ACPI PM: Remove obsolete Toshiba workaround Remove the obsolete workaround for a Toshiba Satellite 4030cdt S1 problem from drivers/acpi/sleep/main.c . Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Signed-off-by: Len Brown diff --git a/drivers/acpi/sleep/main.c b/drivers/acpi/sleep/main.c index fbd40e9..0f2caea 100644 --- a/drivers/acpi/sleep/main.c +++ b/drivers/acpi/sleep/main.c @@ -62,8 +62,6 @@ static u32 acpi_suspend_states[] = { [PM_SUSPEND_MAX] = ACPI_STATE_S5 }; -static int init_8259A_after_S1; - /** * acpi_suspend_begin - Set the target system sleep state to the state * associated with given @pm_state, if supported. @@ -186,13 +184,6 @@ static void acpi_suspend_finish(void) acpi_set_firmware_waking_vector((acpi_physical_address) 0); acpi_target_sleep_state = ACPI_STATE_S0; - -#ifdef CONFIG_X86 - if (init_8259A_after_S1) { - printk("Broken toshiba laptop -> kicking interrupts\n"); - init_8259A(0); - } -#endif } /** @@ -232,26 +223,6 @@ static struct platform_suspend_ops acpi_suspend_ops = { .finish = acpi_suspend_finish, .end = acpi_suspend_end, }; - -/* - * Toshiba fails to preserve interrupts over S1, reinitialization - * of 8259 is needed after S1 resume. - */ -static int __init init_ints_after_s1(const struct dmi_system_id *d) -{ - printk(KERN_WARNING "%s with broken S1 detected.\n", d->ident); - init_8259A_after_S1 = 1; - return 0; -} - -static struct dmi_system_id __initdata acpisleep_dmi_table[] = { - { - .callback = init_ints_after_s1, - .ident = "Toshiba Satellite 4030cdt", - .matches = {DMI_MATCH(DMI_PRODUCT_NAME, "S4030CDT/4.3"),}, - }, - {}, -}; #endif /* CONFIG_SUSPEND */ #ifdef CONFIG_HIBERNATION @@ -473,8 +444,6 @@ int __init acpi_sleep_init(void) u8 type_a, type_b; #ifdef CONFIG_SUSPEND int i = 0; - - dmi_check_system(acpisleep_dmi_table); #endif if (acpi_disabled) -- cgit v0.10.2 From d8f3de0d2412bb91639cfefc5b3c79dbf3812212 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 12 Jun 2008 23:24:06 +0200 Subject: Suspend-related patches for 2.6.27 ACPI PM: Add possibility to change suspend sequence There are some systems out there that don't work correctly with our current suspend/hibernation code ordering. Provide a workaround for these systems allowing them to pass 'acpi_sleep=old_ordering' in the kernel command line so that it will use the pre-ACPI 2.0 ("old") suspend code ordering. Unfortunately, this requires us to add a platform hook to the resuming of devices for recovering the platform in case one of the device drivers' .suspend() routines returns error code. Namely, ACPI 1.0 specifies that _PTS should be called before suspending devices, but _WAK still should be called before resuming them in order to undo the changes made by _PTS. However, if there is an error during suspending devices, they are automatically resumed without returning control to the PM core, so the _WAK has to be called from within device_resume() in that cases. The patch also reorders and refactors the ACPI suspend/hibernation code to avoid duplication as far as reasonably possible. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Signed-off-by: Jesse Barnes diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 9cf7b34..18d793e 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -147,10 +147,14 @@ and is between 256 and 4096 characters. It is defined in the file default: 0 acpi_sleep= [HW,ACPI] Sleep options - Format: { s3_bios, s3_mode, s3_beep } + Format: { s3_bios, s3_mode, s3_beep, old_ordering } See Documentation/power/video.txt for s3_bios and s3_mode. s3_beep is for debugging; it makes the PC's speaker beep as soon as the kernel's real-mode entry point is called. + old_ordering causes the ACPI 1.0 ordering of the _PTS + control method, wrt putting devices into low power + states, to be enforced (the ACPI 2.0 ordering of _PTS is + used by default). acpi_sci= [HW,ACPI] ACPI System Control Interrupt trigger mode Format: { level | edge | high | low } diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index afc25ee..882e970 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -124,6 +124,8 @@ static int __init acpi_sleep_setup(char *str) acpi_realmode_flags |= 2; if (strncmp(str, "s3_beep", 7) == 0) acpi_realmode_flags |= 4; + if (strncmp(str, "old_ordering", 12) == 0) + acpi_old_suspend_ordering(); str = strchr(str, ','); if (str != NULL) str += strspn(str, ", \t"); diff --git a/drivers/acpi/sleep/main.c b/drivers/acpi/sleep/main.c index 0f2caea..4addf8a 100644 --- a/drivers/acpi/sleep/main.c +++ b/drivers/acpi/sleep/main.c @@ -24,10 +24,6 @@ u8 sleep_states[ACPI_S_STATE_COUNT]; -#ifdef CONFIG_PM_SLEEP -static u32 acpi_target_sleep_state = ACPI_STATE_S0; -#endif - static int acpi_sleep_prepare(u32 acpi_state) { #ifdef CONFIG_ACPI_SLEEP @@ -50,9 +46,96 @@ static int acpi_sleep_prepare(u32 acpi_state) return 0; } -#ifdef CONFIG_SUSPEND -static struct platform_suspend_ops acpi_suspend_ops; +#ifdef CONFIG_PM_SLEEP +static u32 acpi_target_sleep_state = ACPI_STATE_S0; + +/* + * ACPI 1.0 wants us to execute _PTS before suspending devices, so we allow the + * user to request that behavior by using the 'acpi_old_suspend_ordering' + * kernel command line option that causes the following variable to be set. + */ +static bool old_suspend_ordering; + +void __init acpi_old_suspend_ordering(void) +{ + old_suspend_ordering = true; +} + +/** + * acpi_pm_disable_gpes - Disable the GPEs. + */ +static int acpi_pm_disable_gpes(void) +{ + acpi_hw_disable_all_gpes(); + return 0; +} + +/** + * __acpi_pm_prepare - Prepare the platform to enter the target state. + * + * If necessary, set the firmware waking vector and do arch-specific + * nastiness to get the wakeup code to the waking vector. + */ +static int __acpi_pm_prepare(void) +{ + int error = acpi_sleep_prepare(acpi_target_sleep_state); + + if (error) + acpi_target_sleep_state = ACPI_STATE_S0; + return error; +} + +/** + * acpi_pm_prepare - Prepare the platform to enter the target sleep + * state and disable the GPEs. + */ +static int acpi_pm_prepare(void) +{ + int error = __acpi_pm_prepare(); + + if (!error) + acpi_hw_disable_all_gpes(); + return error; +} + +/** + * acpi_pm_finish - Instruct the platform to leave a sleep state. + * + * This is called after we wake back up (or if entering the sleep state + * failed). + */ +static void acpi_pm_finish(void) +{ + u32 acpi_state = acpi_target_sleep_state; + + if (acpi_state == ACPI_STATE_S0) + return; + printk(KERN_INFO PREFIX "Waking up from system sleep state S%d\n", + acpi_state); + acpi_disable_wakeup_device(acpi_state); + acpi_leave_sleep_state(acpi_state); + + /* reset firmware waking vector */ + acpi_set_firmware_waking_vector((acpi_physical_address) 0); + + acpi_target_sleep_state = ACPI_STATE_S0; +} + +/** + * acpi_pm_end - Finish up suspend sequence. + */ +static void acpi_pm_end(void) +{ + /* + * This is necessary in case acpi_pm_finish() is not called during a + * failing transition to a sleep state. + */ + acpi_target_sleep_state = ACPI_STATE_S0; +} +#endif /* CONFIG_PM_SLEEP */ + +#ifdef CONFIG_SUSPEND extern void do_suspend_lowlevel(void); static u32 acpi_suspend_states[] = { @@ -66,7 +149,6 @@ static u32 acpi_suspend_states[] = { * acpi_suspend_begin - Set the target system sleep state to the state * associated with given @pm_state, if supported. */ - static int acpi_suspend_begin(suspend_state_t pm_state) { u32 acpi_state = acpi_suspend_states[pm_state]; @@ -83,25 +165,6 @@ static int acpi_suspend_begin(suspend_state_t pm_state) } /** - * acpi_suspend_prepare - Do preliminary suspend work. - * - * If necessary, set the firmware waking vector and do arch-specific - * nastiness to get the wakeup code to the waking vector. - */ - -static int acpi_suspend_prepare(void) -{ - int error = acpi_sleep_prepare(acpi_target_sleep_state); - - if (error) { - acpi_target_sleep_state = ACPI_STATE_S0; - return error; - } - - return ACPI_SUCCESS(acpi_hw_disable_all_gpes()) ? 0 : -EFAULT; -} - -/** * acpi_suspend_enter - Actually enter a sleep state. * @pm_state: ignored * @@ -109,7 +172,6 @@ static int acpi_suspend_prepare(void) * assembly, which in turn call acpi_enter_sleep_state(). * It's unfortunate, but it works. Please fix if you're feeling frisky. */ - static int acpi_suspend_enter(suspend_state_t pm_state) { acpi_status status = AE_OK; @@ -166,39 +228,6 @@ static int acpi_suspend_enter(suspend_state_t pm_state) return ACPI_SUCCESS(status) ? 0 : -EFAULT; } -/** - * acpi_suspend_finish - Instruct the platform to leave a sleep state. - * - * This is called after we wake back up (or if entering the sleep state - * failed). - */ - -static void acpi_suspend_finish(void) -{ - u32 acpi_state = acpi_target_sleep_state; - - acpi_disable_wakeup_device(acpi_state); - acpi_leave_sleep_state(acpi_state); - - /* reset firmware waking vector */ - acpi_set_firmware_waking_vector((acpi_physical_address) 0); - - acpi_target_sleep_state = ACPI_STATE_S0; -} - -/** - * acpi_suspend_end - Finish up suspend sequence. - */ - -static void acpi_suspend_end(void) -{ - /* - * This is necessary in case acpi_suspend_finish() is not called during a - * failing transition to a sleep state. - */ - acpi_target_sleep_state = ACPI_STATE_S0; -} - static int acpi_suspend_state_valid(suspend_state_t pm_state) { u32 acpi_state; @@ -218,10 +247,39 @@ static int acpi_suspend_state_valid(suspend_state_t pm_state) static struct platform_suspend_ops acpi_suspend_ops = { .valid = acpi_suspend_state_valid, .begin = acpi_suspend_begin, - .prepare = acpi_suspend_prepare, + .prepare = acpi_pm_prepare, + .enter = acpi_suspend_enter, + .finish = acpi_pm_finish, + .end = acpi_pm_end, +}; + +/** + * acpi_suspend_begin_old - Set the target system sleep state to the + * state associated with given @pm_state, if supported, and + * execute the _PTS control method. This function is used if the + * pre-ACPI 2.0 suspend ordering has been requested. + */ +static int acpi_suspend_begin_old(suspend_state_t pm_state) +{ + int error = acpi_suspend_begin(pm_state); + + if (!error) + error = __acpi_pm_prepare(); + return error; +} + +/* + * The following callbacks are used if the pre-ACPI 2.0 suspend ordering has + * been requested. + */ +static struct platform_suspend_ops acpi_suspend_ops_old = { + .valid = acpi_suspend_state_valid, + .begin = acpi_suspend_begin_old, + .prepare = acpi_pm_disable_gpes, .enter = acpi_suspend_enter, - .finish = acpi_suspend_finish, - .end = acpi_suspend_end, + .finish = acpi_pm_finish, + .end = acpi_pm_end, + .recover = acpi_pm_finish, }; #endif /* CONFIG_SUSPEND */ @@ -229,22 +287,9 @@ static struct platform_suspend_ops acpi_suspend_ops = { static int acpi_hibernation_begin(void) { acpi_target_sleep_state = ACPI_STATE_S4; - return 0; } -static int acpi_hibernation_prepare(void) -{ - int error = acpi_sleep_prepare(ACPI_STATE_S4); - - if (error) { - acpi_target_sleep_state = ACPI_STATE_S0; - return error; - } - - return ACPI_SUCCESS(acpi_hw_disable_all_gpes()) ? 0 : -EFAULT; -} - static int acpi_hibernation_enter(void) { acpi_status status = AE_OK; @@ -274,52 +319,55 @@ static void acpi_hibernation_leave(void) acpi_leave_sleep_state_prep(ACPI_STATE_S4); } -static void acpi_hibernation_finish(void) +static void acpi_pm_enable_gpes(void) { - acpi_disable_wakeup_device(ACPI_STATE_S4); - acpi_leave_sleep_state(ACPI_STATE_S4); - - /* reset firmware waking vector */ - acpi_set_firmware_waking_vector((acpi_physical_address) 0); - - acpi_target_sleep_state = ACPI_STATE_S0; + acpi_hw_enable_all_runtime_gpes(); } -static void acpi_hibernation_end(void) -{ - /* - * This is necessary in case acpi_hibernation_finish() is not called - * during a failing transition to the sleep state. - */ - acpi_target_sleep_state = ACPI_STATE_S0; -} +static struct platform_hibernation_ops acpi_hibernation_ops = { + .begin = acpi_hibernation_begin, + .end = acpi_pm_end, + .pre_snapshot = acpi_pm_prepare, + .finish = acpi_pm_finish, + .prepare = acpi_pm_prepare, + .enter = acpi_hibernation_enter, + .leave = acpi_hibernation_leave, + .pre_restore = acpi_pm_disable_gpes, + .restore_cleanup = acpi_pm_enable_gpes, +}; -static int acpi_hibernation_pre_restore(void) +/** + * acpi_hibernation_begin_old - Set the target system sleep state to + * ACPI_STATE_S4 and execute the _PTS control method. This + * function is used if the pre-ACPI 2.0 suspend ordering has been + * requested. + */ +static int acpi_hibernation_begin_old(void) { - acpi_status status; - - status = acpi_hw_disable_all_gpes(); - - return ACPI_SUCCESS(status) ? 0 : -EFAULT; -} + int error = acpi_sleep_prepare(ACPI_STATE_S4); -static void acpi_hibernation_restore_cleanup(void) -{ - acpi_hw_enable_all_runtime_gpes(); + if (!error) + acpi_target_sleep_state = ACPI_STATE_S4; + return error; } -static struct platform_hibernation_ops acpi_hibernation_ops = { - .begin = acpi_hibernation_begin, - .end = acpi_hibernation_end, - .pre_snapshot = acpi_hibernation_prepare, - .finish = acpi_hibernation_finish, - .prepare = acpi_hibernation_prepare, +/* + * The following callbacks are used if the pre-ACPI 2.0 suspend ordering has + * been requested. + */ +static struct platform_hibernation_ops acpi_hibernation_ops_old = { + .begin = acpi_hibernation_begin_old, + .end = acpi_pm_end, + .pre_snapshot = acpi_pm_disable_gpes, + .finish = acpi_pm_finish, + .prepare = acpi_pm_disable_gpes, .enter = acpi_hibernation_enter, .leave = acpi_hibernation_leave, - .pre_restore = acpi_hibernation_pre_restore, - .restore_cleanup = acpi_hibernation_restore_cleanup, + .pre_restore = acpi_pm_disable_gpes, + .restore_cleanup = acpi_pm_enable_gpes, + .recover = acpi_pm_finish, }; -#endif /* CONFIG_HIBERNATION */ +#endif /* CONFIG_HIBERNATION */ int acpi_suspend(u32 acpi_state) { @@ -461,13 +509,15 @@ int __init acpi_sleep_init(void) } } - suspend_set_ops(&acpi_suspend_ops); + suspend_set_ops(old_suspend_ordering ? + &acpi_suspend_ops_old : &acpi_suspend_ops); #endif #ifdef CONFIG_HIBERNATION status = acpi_get_sleep_type_data(ACPI_STATE_S4, &type_a, &type_b); if (ACPI_SUCCESS(status)) { - hibernation_set_ops(&acpi_hibernation_ops); + hibernation_set_ops(old_suspend_ordering ? + &acpi_hibernation_ops_old : &acpi_hibernation_ops); sleep_states[ACPI_STATE_S4] = 1; printk(" S4"); } diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index d571204..3250c52 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -781,8 +781,6 @@ int device_suspend(pm_message_t state) error = dpm_prepare(state); if (!error) error = dpm_suspend(state); - if (error) - device_resume(resume_event(state)); return error; } EXPORT_SYMBOL_GPL(device_suspend); diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 41f7ce7..33adcf9 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -234,6 +234,9 @@ int acpi_check_region(resource_size_t start, resource_size_t n, int acpi_check_mem_region(resource_size_t start, resource_size_t n, const char *name); +#ifdef CONFIG_PM_SLEEP +void __init acpi_old_suspend_ordering(void); +#endif /* CONFIG_PM_SLEEP */ #else /* CONFIG_ACPI */ static inline int early_acpi_boot_init(void) diff --git a/include/linux/suspend.h b/include/linux/suspend.h index a697742..e8e6915 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -86,6 +86,11 @@ typedef int __bitwise suspend_state_t; * that implement @begin(), but platforms implementing @begin() should * also provide a @end() which cleans up transitions aborted before * @enter(). + * + * @recover: Recover the platform from a suspend failure. + * Called by the PM core if the suspending of devices fails. + * This callback is optional and should only be implemented by platforms + * which require special recovery actions in that situation. */ struct platform_suspend_ops { int (*valid)(suspend_state_t state); @@ -94,6 +99,7 @@ struct platform_suspend_ops { int (*enter)(suspend_state_t state); void (*finish)(void); void (*end)(void); + void (*recover)(void); }; #ifdef CONFIG_SUSPEND @@ -149,7 +155,7 @@ extern void mark_free_pages(struct zone *zone); * The methods in this structure allow a platform to carry out special * operations required by it during a hibernation transition. * - * All the methods below must be implemented. + * All the methods below, except for @recover(), must be implemented. * * @begin: Tell the platform driver that we're starting hibernation. * Called right after shrinking memory and before freezing devices. @@ -189,6 +195,11 @@ extern void mark_free_pages(struct zone *zone); * @restore_cleanup: Clean up after a failing image restoration. * Called right after the nonboot CPUs have been enabled and before * thawing devices (runs with IRQs on). + * + * @recover: Recover the platform from a failure to suspend devices. + * Called by the PM core if the suspending of devices during hibernation + * fails. This callback is optional and should only be implemented by + * platforms which require special recovery actions in that situation. */ struct platform_hibernation_ops { int (*begin)(void); @@ -200,6 +211,7 @@ struct platform_hibernation_ops { void (*leave)(void); int (*pre_restore)(void); void (*restore_cleanup)(void); + void (*recover)(void); }; #ifdef CONFIG_HIBERNATION diff --git a/kernel/power/disk.c b/kernel/power/disk.c index d416be0..f011e08 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -180,6 +180,17 @@ static void platform_restore_cleanup(int platform_mode) } /** + * platform_recover - recover the platform from a failure to suspend + * devices. + */ + +static void platform_recover(int platform_mode) +{ + if (platform_mode && hibernation_ops && hibernation_ops->recover) + hibernation_ops->recover(); +} + +/** * create_image - freeze devices that need to be frozen with interrupts * off, create the hibernation image and thaw those devices. Control * reappears in this routine after a restore. @@ -258,10 +269,10 @@ int hibernation_snapshot(int platform_mode) suspend_console(); error = device_suspend(PMSG_FREEZE); if (error) - goto Resume_console; + goto Recover_platform; if (hibernation_test(TEST_DEVICES)) - goto Resume_devices; + goto Recover_platform; error = platform_pre_snapshot(platform_mode); if (error || hibernation_test(TEST_PLATFORM)) @@ -285,11 +296,14 @@ int hibernation_snapshot(int platform_mode) Resume_devices: device_resume(in_suspend ? (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); - Resume_console: resume_console(); Close: platform_end(platform_mode); return error; + + Recover_platform: + platform_recover(platform_mode); + goto Resume_devices; } /** @@ -398,8 +412,11 @@ int hibernation_platform_enter(void) suspend_console(); error = device_suspend(PMSG_HIBERNATE); - if (error) - goto Resume_console; + if (error) { + if (hibernation_ops->recover) + hibernation_ops->recover(); + goto Resume_devices; + } error = hibernation_ops->prepare(); if (error) @@ -428,7 +445,6 @@ int hibernation_platform_enter(void) hibernation_ops->finish(); Resume_devices: device_resume(PMSG_RESTORE); - Resume_console: resume_console(); Close: hibernation_ops->end(); diff --git a/kernel/power/main.c b/kernel/power/main.c index d023b6b..3398f46 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -269,11 +269,11 @@ int suspend_devices_and_enter(suspend_state_t state) error = device_suspend(PMSG_SUSPEND); if (error) { printk(KERN_ERR "PM: Some devices failed to suspend\n"); - goto Resume_console; + goto Recover_platform; } if (suspend_test(TEST_DEVICES)) - goto Resume_devices; + goto Recover_platform; if (suspend_ops->prepare) { error = suspend_ops->prepare(); @@ -294,12 +294,16 @@ int suspend_devices_and_enter(suspend_state_t state) suspend_ops->finish(); Resume_devices: device_resume(PMSG_RESUME); - Resume_console: resume_console(); Close: if (suspend_ops->end) suspend_ops->end(); return error; + + Recover_platform: + if (suspend_ops->recover) + suspend_ops->recover(); + goto Resume_devices; } /** -- cgit v0.10.2 From 15650a2f644a2f15738cf22807c090d89328f500 Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Mon, 16 Jun 2008 15:29:45 -0700 Subject: x86/PCI: fixup early quirk probing On x86, we do early PCI probing to apply some quirks for chipset bugs. However, in a recent cleanup (7bcbc78dea92fdf0947fa48e248da3c993a5690f) a thinko was introduced that causes us to probe all subfunctions of even single function devices (a function was factored out of an inner loop and a "break" became a "return"). Fix that up by making check_dev_quirk() return a value so we can keep the factored code intact. Acked-by: Ingo Molnar Signed-off-by: Jesse Barnes diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index 9f51e1e..8566fea 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -133,7 +133,18 @@ static struct chipset early_qrk[] __initdata = { {} }; -static void __init check_dev_quirk(int num, int slot, int func) +/** + * check_dev_quirk - apply early quirks to a given PCI device + * @num: bus number + * @slot: slot number + * @func: PCI function + * + * Check the vendor & device ID against the early quirks table. + * + * If the device is single function, let early_quirks() know so we don't + * poke at this device again. + */ +static int __init check_dev_quirk(int num, int slot, int func) { u16 class; u16 vendor; @@ -144,7 +155,7 @@ static void __init check_dev_quirk(int num, int slot, int func) class = read_pci_config_16(num, slot, func, PCI_CLASS_DEVICE); if (class == 0xffff) - return; + return -1; /* no class, treat as single function */ vendor = read_pci_config_16(num, slot, func, PCI_VENDOR_ID); @@ -167,7 +178,9 @@ static void __init check_dev_quirk(int num, int slot, int func) type = read_pci_config_byte(num, slot, func, PCI_HEADER_TYPE); if (!(type & 0x80)) - return; + return -1; + + return 0; } void __init early_quirks(void) @@ -180,6 +193,9 @@ void __init early_quirks(void) /* Poor man's PCI discovery */ for (num = 0; num < 32; num++) for (slot = 0; slot < 32; slot++) - for (func = 0; func < 8; func++) - check_dev_quirk(num, slot, func); + for (func = 0; func < 8; func++) { + /* Only probe function 0 on single fn devices */ + if (check_dev_quirk(num, slot, func)) + break; + } } -- cgit v0.10.2 From 65b943f630bc177b743ca05b4cb6defe8fcffa6e Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Wed, 25 Jun 2008 15:27:34 -0700 Subject: PCI: fixup kdoc blocks for hotplug functions A few warnings snuck in as parameters were added or renamed. Signed-off-by: Jesse Barnes diff --git a/drivers/pci/hotplug/pci_hotplug_core.c b/drivers/pci/hotplug/pci_hotplug_core.c index b3b2d8c..b514162 100644 --- a/drivers/pci/hotplug/pci_hotplug_core.c +++ b/drivers/pci/hotplug/pci_hotplug_core.c @@ -544,7 +544,9 @@ out: /** * pci_hp_register - register a hotplug_slot with the PCI hotplug subsystem + * @bus: bus this slot is on * @slot: pointer to the &struct hotplug_slot to register + * @slot_nr: slot number * * Registers a hotplug slot with the pci hotplug subsystem, which will allow * userspace interaction to the slot. @@ -615,7 +617,7 @@ int pci_hp_register(struct hotplug_slot *slot, struct pci_bus *bus, int slot_nr) /** * pci_hp_deregister - deregister a hotplug_slot with the PCI hotplug subsystem - * @slot: pointer to the &struct hotplug_slot to deregister + * @hotplug: pointer to the &struct hotplug_slot to deregister * * The @slot must have been registered with the pci hotplug subsystem * previously with a call to pci_hp_register(). @@ -651,7 +653,7 @@ int pci_hp_deregister(struct hotplug_slot *hotplug) /** * pci_hp_change_slot_info - changes the slot's information structure in the core - * @slot: pointer to the slot whose info has changed + * @hotplug: pointer to the slot whose info has changed * @info: pointer to the info copy into the slot's info structure * * @slot must have been registered with the pci -- cgit v0.10.2 From b30dd56d1c3786fb0c4e442a58d9a2ea78eeabb9 Mon Sep 17 00:00:00 2001 From: Kenji Kaneshige Date: Fri, 20 Jun 2008 12:06:24 +0900 Subject: pciehp: fix typo in hpc_release_ctlr Fix the typo in hpc_release_ctlr(). Signed-off-by: Kenji Kaneshige Signed-off-by: Jesse Barnes diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index 0cfaedf..c030c94 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -610,7 +610,7 @@ static void hpc_release_ctlr(struct controller *ctrl) { /* Mask Hot-plug Interrupt Enable */ if (pcie_write_cmd(ctrl, 0, HP_INTR_ENABLE | CMD_CMPL_INTR_ENABLE)) - err("%s: Cannot mask hotplut interrupt enable\n", __func__); + err("%s: Cannot mask hotplug interrupt enable\n", __func__); /* Free interrupt handler or interrupt polling timer */ pciehp_free_irq(ctrl); -- cgit v0.10.2 From 820943b6fc4781621dee52ba026106758a727dd3 Mon Sep 17 00:00:00 2001 From: Kenji Kaneshige Date: Fri, 20 Jun 2008 12:04:33 +0900 Subject: pciehp: cleanup pcie_poll_cmd Cleanup pcie_poll_cmd(): check the slot status once before entering our completion test loop and convert the loop to a simpler while() block. Signed-off-by: Kenji Kaneshige Signed-off-by: Jesse Barnes diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index c030c94..36ea949 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -252,20 +252,23 @@ static inline int pcie_poll_cmd(struct controller *ctrl) u16 slot_status; int timeout = 1000; - if (!pciehp_readw(ctrl, SLOTSTATUS, &slot_status)) - if (slot_status & CMD_COMPLETED) - goto completed; - for (timeout = 1000; timeout > 0; timeout -= 100) { + if (!pciehp_readw(ctrl, SLOTSTATUS, &slot_status)) { + if (slot_status & CMD_COMPLETED) { + pciehp_writew(ctrl, SLOTSTATUS, CMD_COMPLETED); + return 1; + } + } + while (timeout > 1000) { msleep(100); - if (!pciehp_readw(ctrl, SLOTSTATUS, &slot_status)) - if (slot_status & CMD_COMPLETED) - goto completed; + timeout -= 100; + if (!pciehp_readw(ctrl, SLOTSTATUS, &slot_status)) { + if (slot_status & CMD_COMPLETED) { + pciehp_writew(ctrl, SLOTSTATUS, CMD_COMPLETED); + return 1; + } + } } return 0; /* timeout */ - -completed: - pciehp_writew(ctrl, SLOTSTATUS, CMD_COMPLETED); - return timeout; } static inline void pcie_wait_cmd(struct controller *ctrl, int poll) -- cgit v0.10.2 From 66618bad123494beb30c0d590460e972e5b0977e Mon Sep 17 00:00:00 2001 From: Kenji Kaneshige Date: Fri, 20 Jun 2008 12:05:12 +0900 Subject: pciehp: change command polling frequency Change command polling frequency to 100Hz from 10Hz in order to reduce the delay in the common case of a command completing quickly. Signed-off-by: Kenji Kaneshige Signed-off-by: Jesse Barnes diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index 36ea949..5ef4bae 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -259,8 +259,8 @@ static inline int pcie_poll_cmd(struct controller *ctrl) } } while (timeout > 1000) { - msleep(100); - timeout -= 100; + msleep(10); + timeout -= 10; if (!pciehp_readw(ctrl, SLOTSTATUS, &slot_status)) { if (slot_status & CMD_COMPLETED) { pciehp_writew(ctrl, SLOTSTATUS, CMD_COMPLETED); -- cgit v0.10.2 From 563f119080b505076429b47722fbf6374b546fa7 Mon Sep 17 00:00:00 2001 From: Kenji Kaneshige Date: Fri, 20 Jun 2008 12:05:52 +0900 Subject: pciehp: remove inline from command related functions The pcie_poll_cmd() and pcie_wait_cmd() are too large to be inlined. Signed-off-by: Kenji Kaneshige Signed-off-by: Jesse Barnes diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index 5ef4bae..59c2809 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -247,7 +247,7 @@ static inline void pciehp_free_irq(struct controller *ctrl) free_irq(ctrl->pci_dev->irq, ctrl); } -static inline int pcie_poll_cmd(struct controller *ctrl) +static int pcie_poll_cmd(struct controller *ctrl) { u16 slot_status; int timeout = 1000; @@ -271,7 +271,7 @@ static inline int pcie_poll_cmd(struct controller *ctrl) return 0; /* timeout */ } -static inline void pcie_wait_cmd(struct controller *ctrl, int poll) +static void pcie_wait_cmd(struct controller *ctrl, int poll) { unsigned int msecs = pciehp_poll_mode ? 2500 : 1000; unsigned long timeout = msecs_to_jiffies(msecs); -- cgit v0.10.2 From b86ec7ed2877f560ff069e8ed1b433a9005619c6 Mon Sep 17 00:00:00 2001 From: Kenji Kaneshige Date: Fri, 20 Jun 2008 11:54:06 +0900 Subject: Remove unnecessary 'tmp' variable from pci_hp_register(). Signed-off-by: Kenji Kaneshige Signed-off-by: Jesse Barnes diff --git a/drivers/pci/hotplug/pci_hotplug_core.c b/drivers/pci/hotplug/pci_hotplug_core.c index b514162..5f85b1b 100644 --- a/drivers/pci/hotplug/pci_hotplug_core.c +++ b/drivers/pci/hotplug/pci_hotplug_core.c @@ -557,7 +557,6 @@ int pci_hp_register(struct hotplug_slot *slot, struct pci_bus *bus, int slot_nr) { int result; struct pci_slot *pci_slot; - struct hotplug_slot *tmp; if (slot == NULL) return -ENODEV; @@ -570,8 +569,7 @@ int pci_hp_register(struct hotplug_slot *slot, struct pci_bus *bus, int slot_nr) } /* Check if we have already registered a slot with the same name. */ - tmp = get_slot_from_name(slot->name); - if (tmp) + if (get_slot_from_name(slot->name)) return -EEXIST; /* -- cgit v0.10.2 From 80ccba1186d48fa728dc4b1456cc07ffb07da501 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 13 Jun 2008 10:52:11 -0600 Subject: PCI: use dev_printk when possible Convert printks to use dev_printk(). I converted pr_debug() to dev_dbg(). Both use KERN_DEBUG and are enabled only when DEBUG is defined. I converted printk(KERN_DEBUG) to dev_printk(KERN_DEBUG), not to dev_dbg(), because dev_dbg() is only enabled when DEBUG is defined. I converted DBG(KERN_INFO) (only in setup-bus.c) to dev_info(). The DBG() name makes it sound like debug, but it's been enabled forever, so dev_info() preserves the previous behavior. I tried to make the resource assignment formats more consistent, e.g., "BAR %d: got res [%#llx-%#llx] bus [%#llx-%#llx] flags %#lx\n" instead of sometimes using "start-end" and sometimes using "size@start". I'm not attached to one or the other; I'd just like them consistent. Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index ccb1974..15af618 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -565,9 +565,8 @@ int pci_enable_msi(struct pci_dev* dev) /* Check whether driver already requested for MSI-X irqs */ if (dev->msix_enabled) { - printk(KERN_INFO "PCI: %s: Can't enable MSI. " - "Device already has MSI-X enabled\n", - pci_name(dev)); + dev_info(&dev->dev, "can't enable MSI " + "(MSI-X already enabled)\n"); return -EINVAL; } status = msi_capability_init(dev); @@ -690,9 +689,8 @@ int pci_enable_msix(struct pci_dev* dev, struct msix_entry *entries, int nvec) /* Check whether driver already requested for MSI irq */ if (dev->msi_enabled) { - printk(KERN_INFO "PCI: %s: Can't enable MSI-X. " - "Device already has an MSI irq assigned\n", - pci_name(dev)); + dev_info(&dev->dev, "can't enable MSI-X " + "(MSI IRQ already assigned)\n"); return -EINVAL; } status = msix_capability_init(dev, entries, nvec); diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 7869f8f..8b755a7 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -422,8 +422,8 @@ pci_set_power_state(struct pci_dev *dev, pci_power_t state) * to sleep if we're already in a low power state */ if (state != PCI_D0 && dev->current_state > state) { - printk(KERN_ERR "%s(): %s: state=%d, current state=%d\n", - __func__, pci_name(dev), state, dev->current_state); + dev_err(&dev->dev, "invalid power transition " + "(from state %d to %d)\n", dev->current_state, state); return -EINVAL; } else if (dev->current_state == state) return 0; /* we're already there */ @@ -431,9 +431,8 @@ pci_set_power_state(struct pci_dev *dev, pci_power_t state) pci_read_config_word(dev,pm + PCI_PM_PMC,&pmc); if ((pmc & PCI_PM_CAP_VER_MASK) > 3) { - printk(KERN_DEBUG - "PCI: %s has unsupported PM cap regs version (%u)\n", - pci_name(dev), pmc & PCI_PM_CAP_VER_MASK); + dev_printk(KERN_DEBUG, &dev->dev, "unsupported PM cap regs " + "version (%u)\n", pmc & PCI_PM_CAP_VER_MASK); return -EIO; } @@ -541,7 +540,8 @@ pci_power_t pci_choose_state(struct pci_dev *dev, pm_message_t state) case PM_EVENT_HIBERNATE: return PCI_D3hot; default: - printk("Unrecognized suspend event %d\n", state.event); + dev_info(&dev->dev, "unrecognized suspend event %d\n", + state.event); BUG(); } return PCI_D0; @@ -566,7 +566,7 @@ static int pci_save_pcie_state(struct pci_dev *dev) else found = 1; if (!save_state) { - dev_err(&dev->dev, "Out of memory in pci_save_pcie_state\n"); + dev_err(&dev->dev, "out of memory in pci_save_pcie_state\n"); return -ENOMEM; } cap = (u16 *)&save_state->data[0]; @@ -617,7 +617,7 @@ static int pci_save_pcix_state(struct pci_dev *dev) else found = 1; if (!save_state) { - dev_err(&dev->dev, "Out of memory in pci_save_pcie_state\n"); + dev_err(&dev->dev, "out of memory in pci_save_pcie_state\n"); return -ENOMEM; } cap = (u16 *)&save_state->data[0]; @@ -683,10 +683,9 @@ pci_restore_state(struct pci_dev *dev) for (i = 15; i >= 0; i--) { pci_read_config_dword(dev, i * 4, &val); if (val != dev->saved_config_space[i]) { - printk(KERN_DEBUG "PM: Writing back config space on " - "device %s at offset %x (was %x, writing %x)\n", - pci_name(dev), i, - val, (int)dev->saved_config_space[i]); + dev_printk(KERN_DEBUG, &dev->dev, "restoring config " + "space at offset %#x (was %#x, writing %#x)\n", + i, val, (int)dev->saved_config_space[i]); pci_write_config_dword(dev,i * 4, dev->saved_config_space[i]); } @@ -1114,13 +1113,11 @@ int pci_request_region(struct pci_dev *pdev, int bar, const char *res_name) return 0; err_out: - printk (KERN_WARNING "PCI: Unable to reserve %s region #%d:%llx@%llx " - "for device %s\n", - pci_resource_flags(pdev, bar) & IORESOURCE_IO ? "I/O" : "mem", + dev_warn(&pdev->dev, "BAR %d: can't reserve %s region [%#llx-%#llx]\n", bar + 1, /* PCI BAR # */ - (unsigned long long)pci_resource_len(pdev, bar), + pci_resource_flags(pdev, bar) & IORESOURCE_IO ? "I/O" : "mem", (unsigned long long)pci_resource_start(pdev, bar), - pci_name(pdev)); + (unsigned long long)pci_resource_end(pdev, bar)); return -EBUSY; } @@ -1212,7 +1209,7 @@ pci_set_master(struct pci_dev *dev) pci_read_config_word(dev, PCI_COMMAND, &cmd); if (! (cmd & PCI_COMMAND_MASTER)) { - pr_debug("PCI: Enabling bus mastering for device %s\n", pci_name(dev)); + dev_dbg(&dev->dev, "enabling bus mastering\n"); cmd |= PCI_COMMAND_MASTER; pci_write_config_word(dev, PCI_COMMAND, cmd); } @@ -1277,8 +1274,8 @@ pci_set_cacheline_size(struct pci_dev *dev) if (cacheline_size == pci_cache_line_size) return 0; - printk(KERN_DEBUG "PCI: cache line size of %d is not supported " - "by device %s\n", pci_cache_line_size << 2, pci_name(dev)); + dev_printk(KERN_DEBUG, &dev->dev, "cache line size of %d is not " + "supported\n", pci_cache_line_size << 2); return -EINVAL; } @@ -1303,8 +1300,7 @@ pci_set_mwi(struct pci_dev *dev) pci_read_config_word(dev, PCI_COMMAND, &cmd); if (! (cmd & PCI_COMMAND_INVALIDATE)) { - pr_debug("PCI: Enabling Mem-Wr-Inval for device %s\n", - pci_name(dev)); + dev_dbg(&dev->dev, "enabling Mem-Wr-Inval\n"); cmd |= PCI_COMMAND_INVALIDATE; pci_write_config_word(dev, PCI_COMMAND, cmd); } diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 4562827..f829119 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -277,8 +277,8 @@ static void pci_read_bases(struct pci_dev *dev, unsigned int howmany, int rom) res->end = res->start + sz64; #else if (sz64 > 0x100000000ULL) { - printk(KERN_ERR "PCI: Unable to handle 64-bit " - "BAR for device %s\n", pci_name(dev)); + dev_err(&dev->dev, "BAR %d: can't handle 64-bit" + " BAR\n", pos); res->start = 0; res->flags = 0; } else if (lhi) { @@ -329,7 +329,7 @@ void __devinit pci_read_bridge_bases(struct pci_bus *child) return; if (dev->transparent) { - printk(KERN_INFO "PCI: Transparent bridge - %s\n", pci_name(dev)); + dev_info(&dev->dev, "transparent bridge\n"); for(i = 3; i < PCI_BUS_NUM_RESOURCES; i++) child->resource[i] = child->parent->resource[i - 3]; } @@ -392,7 +392,8 @@ void __devinit pci_read_bridge_bases(struct pci_bus *child) limit |= ((long) mem_limit_hi) << 32; #else if (mem_base_hi || mem_limit_hi) { - printk(KERN_ERR "PCI: Unable to handle 64-bit address space for bridge %s\n", pci_name(dev)); + dev_err(&dev->dev, "can't handle 64-bit " + "address space for bridge\n"); return; } #endif @@ -512,8 +513,8 @@ int __devinit pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max, pci_read_config_dword(dev, PCI_PRIMARY_BUS, &buses); - pr_debug("PCI: Scanning behind PCI bridge %s, config %06x, pass %d\n", - pci_name(dev), buses & 0xffffff, pass); + dev_dbg(&dev->dev, "scanning behind bridge, config %06x, pass %d\n", + buses & 0xffffff, pass); /* Disable MasterAbortMode during probing to avoid reporting of bus errors (in some architectures) */ @@ -536,8 +537,8 @@ int __devinit pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max, * ignore it. This can happen with the i450NX chipset. */ if (pci_find_bus(pci_domain_nr(bus), busnr)) { - printk(KERN_INFO "PCI: Bus %04x:%02x already known\n", - pci_domain_nr(bus), busnr); + dev_info(&dev->dev, "bus %04x:%02x already known\n", + pci_domain_nr(bus), busnr); goto out; } @@ -721,7 +722,7 @@ static int pci_setup_device(struct pci_dev * dev) dev->class = class; class >>= 8; - pr_debug("PCI: Found %s [%04x/%04x] %06x %02x\n", pci_name(dev), + dev_dbg(&dev->dev, "found [%04x/%04x] class %06x header type %02x\n", dev->vendor, dev->device, class, dev->hdr_type); /* "Unknown power state" */ @@ -789,13 +790,13 @@ static int pci_setup_device(struct pci_dev * dev) break; default: /* unknown header */ - printk(KERN_ERR "PCI: device %s has unknown header type %02x, ignoring.\n", - pci_name(dev), dev->hdr_type); + dev_err(&dev->dev, "unknown header type %02x, " + "ignoring device\n", dev->hdr_type); return -1; bad: - printk(KERN_ERR "PCI: %s: class %x doesn't match header type %02x. Ignoring class.\n", - pci_name(dev), class, dev->hdr_type); + dev_err(&dev->dev, "ignoring class %02x (doesn't match header " + "type %02x)\n", class, dev->hdr_type); dev->class = PCI_CLASS_NOT_DEFINED; } @@ -971,7 +972,7 @@ static struct pci_dev *pci_scan_device(struct pci_bus *bus, int devfn) return NULL; /* Card hasn't responded in 60 seconds? Must be stuck. */ if (delay > 60 * 1000) { - printk(KERN_WARNING "Device %04x:%02x:%02x.%d not " + printk(KERN_WARNING "pci %04x:%02x:%02x.%d: not " "responding\n", pci_domain_nr(bus), bus->number, PCI_SLOT(devfn), PCI_FUNC(devfn)); diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index 8ddb918..827c0a5 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -27,13 +27,6 @@ #include -#define DEBUG_CONFIG 1 -#if DEBUG_CONFIG -#define DBG(x...) printk(x) -#else -#define DBG(x...) -#endif - static void pbus_assign_resources_sorted(struct pci_bus *bus) { struct pci_dev *dev; @@ -81,8 +74,8 @@ void pci_setup_cardbus(struct pci_bus *bus) struct pci_dev *bridge = bus->self; struct pci_bus_region region; - printk("PCI: Bus %d, cardbus bridge: %s\n", - bus->number, pci_name(bridge)); + dev_info(&bridge->dev, "CardBus bridge, secondary bus %04x:%02x\n", + pci_domain_nr(bus), bus->number); pcibios_resource_to_bus(bridge, ®ion, bus->resource[0]); if (bus->resource[0]->flags & IORESOURCE_IO) { @@ -90,7 +83,7 @@ void pci_setup_cardbus(struct pci_bus *bus) * The IO resource is allocated a range twice as large as it * would normally need. This allows us to set both IO regs. */ - printk(KERN_INFO " IO window: 0x%08lx-0x%08lx\n", + dev_info(&bridge->dev, " IO window: %#08lx-%#08lx\n", (unsigned long)region.start, (unsigned long)region.end); pci_write_config_dword(bridge, PCI_CB_IO_BASE_0, @@ -101,7 +94,7 @@ void pci_setup_cardbus(struct pci_bus *bus) pcibios_resource_to_bus(bridge, ®ion, bus->resource[1]); if (bus->resource[1]->flags & IORESOURCE_IO) { - printk(KERN_INFO " IO window: 0x%08lx-0x%08lx\n", + dev_info(&bridge->dev, " IO window: %#08lx-%#08lx\n", (unsigned long)region.start, (unsigned long)region.end); pci_write_config_dword(bridge, PCI_CB_IO_BASE_1, @@ -112,7 +105,7 @@ void pci_setup_cardbus(struct pci_bus *bus) pcibios_resource_to_bus(bridge, ®ion, bus->resource[2]); if (bus->resource[2]->flags & IORESOURCE_MEM) { - printk(KERN_INFO " PREFETCH window: 0x%08lx-0x%08lx\n", + dev_info(&bridge->dev, " PREFETCH window: %#08lx-%#08lx\n", (unsigned long)region.start, (unsigned long)region.end); pci_write_config_dword(bridge, PCI_CB_MEMORY_BASE_0, @@ -123,7 +116,7 @@ void pci_setup_cardbus(struct pci_bus *bus) pcibios_resource_to_bus(bridge, ®ion, bus->resource[3]); if (bus->resource[3]->flags & IORESOURCE_MEM) { - printk(KERN_INFO " MEM window: 0x%08lx-0x%08lx\n", + dev_info(&bridge->dev, " MEM window: %#08lx-%#08lx\n", (unsigned long)region.start, (unsigned long)region.end); pci_write_config_dword(bridge, PCI_CB_MEMORY_BASE_1, @@ -151,7 +144,8 @@ static void pci_setup_bridge(struct pci_bus *bus) struct pci_bus_region region; u32 l, bu, lu, io_upper16; - DBG(KERN_INFO "PCI: Bridge: %s\n", pci_name(bridge)); + dev_info(&bridge->dev, "PCI bridge, secondary bus %04x:%02x\n", + pci_domain_nr(bus), bus->number); /* Set up the top and bottom of the PCI I/O segment for this bus. */ pcibios_resource_to_bus(bridge, ®ion, bus->resource[0]); @@ -162,7 +156,7 @@ static void pci_setup_bridge(struct pci_bus *bus) l |= region.end & 0xf000; /* Set up upper 16 bits of I/O base/limit. */ io_upper16 = (region.end & 0xffff0000) | (region.start >> 16); - DBG(KERN_INFO " IO window: %04lx-%04lx\n", + dev_info(&bridge->dev, " IO window: %#04lx-%#04lx\n", (unsigned long)region.start, (unsigned long)region.end); } @@ -170,7 +164,7 @@ static void pci_setup_bridge(struct pci_bus *bus) /* Clear upper 16 bits of I/O base/limit. */ io_upper16 = 0; l = 0x00f0; - DBG(KERN_INFO " IO window: disabled.\n"); + dev_info(&bridge->dev, " IO window: disabled\n"); } /* Temporarily disable the I/O range before updating PCI_IO_BASE. */ pci_write_config_dword(bridge, PCI_IO_BASE_UPPER16, 0x0000ffff); @@ -185,13 +179,13 @@ static void pci_setup_bridge(struct pci_bus *bus) if (bus->resource[1]->flags & IORESOURCE_MEM) { l = (region.start >> 16) & 0xfff0; l |= region.end & 0xfff00000; - DBG(KERN_INFO " MEM window: 0x%08lx-0x%08lx\n", + dev_info(&bridge->dev, " MEM window: %#08lx-%#08lx\n", (unsigned long)region.start, (unsigned long)region.end); } else { l = 0x0000fff0; - DBG(KERN_INFO " MEM window: disabled.\n"); + dev_info(&bridge->dev, " MEM window: disabled\n"); } pci_write_config_dword(bridge, PCI_MEMORY_BASE, l); @@ -208,13 +202,13 @@ static void pci_setup_bridge(struct pci_bus *bus) l |= region.end & 0xfff00000; bu = upper_32_bits(region.start); lu = upper_32_bits(region.end); - DBG(KERN_INFO " PREFETCH window: 0x%016llx-0x%016llx\n", + dev_info(&bridge->dev, " PREFETCH window: %#016llx-%#016llx\n", (unsigned long long)region.start, (unsigned long long)region.end); } else { l = 0x0000fff0; - DBG(KERN_INFO " PREFETCH window: disabled.\n"); + dev_info(&bridge->dev, " PREFETCH window: disabled\n"); } pci_write_config_dword(bridge, PCI_PREF_MEMORY_BASE, l); @@ -361,9 +355,8 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, unsigned long align = (i < PCI_BRIDGE_RESOURCES) ? r_size : r->start; order = __ffs(align) - 20; if (order > 11) { - printk(KERN_WARNING "PCI: region %s/%d " - "too large: 0x%016llx-0x%016llx\n", - pci_name(dev), i, + dev_warn(&dev->dev, "BAR %d too large: " + "%#016llx-%#016llx\n", i, (unsigned long long)r->start, (unsigned long long)r->end); r->flags = 0; @@ -529,8 +522,8 @@ void __ref pci_bus_assign_resources(struct pci_bus *bus) break; default: - printk(KERN_INFO "PCI: not setting up bridge %s " - "for bus %d\n", pci_name(dev), b->number); + dev_info(&dev->dev, "not setting up bridge for bus " + "%04x:%02x\n", pci_domain_nr(b), b->number); break; } } diff --git a/drivers/pci/setup-irq.c b/drivers/pci/setup-irq.c index 05ca2ed..aa795fd 100644 --- a/drivers/pci/setup-irq.c +++ b/drivers/pci/setup-irq.c @@ -47,8 +47,7 @@ pdev_fixup_irq(struct pci_dev *dev, } dev->irq = irq; - pr_debug("PCI: fixup irq: (%s) got %d\n", - kobject_name(&dev->dev.kobj), dev->irq); + dev_dbg(&dev->dev, "fixup irq: got %d\n", dev->irq); /* Always tell the device, so the driver knows what is the real IRQ to use; the device does not use it. */ diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c index 7d35cdf..1a5fc83 100644 --- a/drivers/pci/setup-res.c +++ b/drivers/pci/setup-res.c @@ -26,8 +26,7 @@ #include "pci.h" -void -pci_update_resource(struct pci_dev *dev, struct resource *res, int resno) +void pci_update_resource(struct pci_dev *dev, struct resource *res, int resno) { struct pci_bus_region region; u32 new, check, mask; @@ -43,20 +42,20 @@ pci_update_resource(struct pci_dev *dev, struct resource *res, int resno) /* * Ignore non-moveable resources. This might be legacy resources for * which no functional BAR register exists or another important - * system resource we should better not move around in system address - * space. + * system resource we shouldn't move around. */ if (res->flags & IORESOURCE_PCI_FIXED) return; pcibios_resource_to_bus(dev, ®ion, res); - pr_debug(" got res [%llx:%llx] bus [%llx:%llx] flags %lx for " - "BAR %d of %s\n", (unsigned long long)res->start, + dev_dbg(&dev->dev, "BAR %d: got res [%#llx-%#llx] bus [%#llx-%#llx] " + "flags %#lx\n", resno, + (unsigned long long)res->start, (unsigned long long)res->end, (unsigned long long)region.start, (unsigned long long)region.end, - (unsigned long)res->flags, resno, pci_name(dev)); + (unsigned long)res->flags); new = region.start | (res->flags & PCI_REGION_FLAG_MASK); if (res->flags & IORESOURCE_IO) @@ -81,9 +80,8 @@ pci_update_resource(struct pci_dev *dev, struct resource *res, int resno) pci_read_config_dword(dev, reg, &check); if ((new ^ check) & mask) { - printk(KERN_ERR "PCI: Error while updating region " - "%s/%d (%08x != %08x)\n", pci_name(dev), resno, - new, check); + dev_err(&dev->dev, "BAR %d: error updating (%#08x != %#08x)\n", + resno, new, check); } if ((new & (PCI_BASE_ADDRESS_SPACE|PCI_BASE_ADDRESS_MEM_TYPE_MASK)) == @@ -92,15 +90,14 @@ pci_update_resource(struct pci_dev *dev, struct resource *res, int resno) pci_write_config_dword(dev, reg + 4, new); pci_read_config_dword(dev, reg + 4, &check); if (check != new) { - printk(KERN_ERR "PCI: Error updating region " - "%s/%d (high %08x != %08x)\n", - pci_name(dev), resno, new, check); + dev_err(&dev->dev, "BAR %d: error updating " + "(high %#08x != %#08x)\n", resno, new, check); } } res->flags &= ~IORESOURCE_UNSET; - pr_debug("PCI: moved device %s resource %d (%lx) to %x\n", - pci_name(dev), resno, res->flags, - new & ~PCI_REGION_FLAG_MASK); + dev_dbg(&dev->dev, "BAR %d: moved to bus [%#llx-%#llx] flags %#lx\n", + resno, (unsigned long long)region.start, + (unsigned long long)region.end, res->flags); } int pci_claim_resource(struct pci_dev *dev, int resource) @@ -117,10 +114,11 @@ int pci_claim_resource(struct pci_dev *dev, int resource) err = insert_resource(root, res); if (err) { - printk(KERN_ERR "PCI: %s region %d of %s %s [%llx:%llx]\n", - root ? "Address space collision on" : - "No parent found for", - resource, dtype, pci_name(dev), + dev_err(&dev->dev, "BAR %d: %s of %s [%#llx-%#llx]\n", + resource, + root ? "address space collision on" : + "no parent found for", + dtype, (unsigned long long)res->start, (unsigned long long)res->end); } @@ -140,11 +138,10 @@ int pci_assign_resource(struct pci_dev *dev, int resno) align = resource_alignment(res); if (!align) { - printk(KERN_ERR "PCI: Cannot allocate resource (bogus " - "alignment) %d [%llx:%llx] (flags %lx) of %s\n", + dev_err(&dev->dev, "BAR %d: can't allocate resource (bogus " + "alignment) [%#llx-%#llx] flags %#lx\n", resno, (unsigned long long)res->start, - (unsigned long long)res->end, res->flags, - pci_name(dev)); + (unsigned long long)res->end, res->flags); return -EINVAL; } @@ -165,11 +162,11 @@ int pci_assign_resource(struct pci_dev *dev, int resno) } if (ret) { - printk(KERN_ERR "PCI: Failed to allocate %s resource " - "#%d:%llx@%llx for %s\n", + dev_err(&dev->dev, "BAR %d: can't allocate %s resource " + "[%#llx-%#llx]\n", resno, res->flags & IORESOURCE_IO ? "I/O" : "mem", - resno, (unsigned long long)size, - (unsigned long long)res->start, pci_name(dev)); + (unsigned long long)res->start, + (unsigned long long)res->end); } else { res->flags &= ~IORESOURCE_STARTALIGN; if (resno < PCI_BRIDGE_RESOURCES) @@ -205,11 +202,11 @@ int pci_assign_resource_fixed(struct pci_dev *dev, int resno) } if (ret) { - printk(KERN_ERR "PCI: Failed to allocate %s resource " - "#%d:%llx@%llx for %s\n", + dev_err(&dev->dev, "BAR %d: can't allocate %s resource " + "[%#llx-%#llx\n]", resno, res->flags & IORESOURCE_IO ? "I/O" : "mem", - resno, (unsigned long long)(res->end - res->start + 1), - (unsigned long long)res->start, pci_name(dev)); + (unsigned long long)res->start, + (unsigned long long)res->end); } else if (resno < PCI_BRIDGE_RESOURCES) { pci_update_resource(dev, res, resno); } @@ -239,11 +236,10 @@ void pdev_sort_resources(struct pci_dev *dev, struct resource_list *head) r_align = resource_alignment(r); if (!r_align) { - printk(KERN_WARNING "PCI: bogus alignment of resource " - "%d [%llx:%llx] (flags %lx) of %s\n", + dev_warn(&dev->dev, "BAR %d: bogus alignment " + "[%#llx-%#llx] flags %#lx\n", i, (unsigned long long)r->start, - (unsigned long long)r->end, r->flags, - pci_name(dev)); + (unsigned long long)r->end, r->flags); continue; } for (list = head; ; list = list->next) { @@ -291,7 +287,7 @@ int pci_enable_resources(struct pci_dev *dev, int mask) if (!r->parent) { dev_err(&dev->dev, "device not available because of " - "BAR %d [%llx:%llx] collisions\n", i, + "BAR %d [%#llx-%#llx] collisions\n", i, (unsigned long long) r->start, (unsigned long long) r->end); return -EINVAL; -- cgit v0.10.2 From 531f254e5cdadb894f04ed27107cdb34c15817ea Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 13 Jun 2008 10:52:12 -0600 Subject: PCIE: aer: use dev_printk when possible Convert printks to use dev_printk(). Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c index b7a3aa6..77036f4 100644 --- a/drivers/pci/pcie/aer/aerdrv.c +++ b/drivers/pci/pcie/aer/aerdrv.c @@ -220,8 +220,7 @@ static int __devinit aer_probe (struct pcie_device *dev, /* Alloc rpc data structure */ if (!(rpc = aer_alloc_rpc(dev))) { - printk(KERN_DEBUG "%s: Alloc rpc fails on PCIE device[%s]\n", - __func__, device->bus_id); + dev_printk(KERN_DEBUG, device, "alloc rpc failed\n"); aer_remove(dev); return -ENOMEM; } @@ -229,8 +228,7 @@ static int __devinit aer_probe (struct pcie_device *dev, /* Request IRQ ISR */ if ((status = request_irq(dev->irq, aer_irq, IRQF_SHARED, "aerdrv", dev))) { - printk(KERN_DEBUG "%s: Request ISR fails on PCIE device[%s]\n", - __func__, device->bus_id); + dev_printk(KERN_DEBUG, device, "request IRQ failed\n"); aer_remove(dev); return status; } @@ -274,7 +272,7 @@ static pci_ers_result_t aer_root_reset(struct pci_dev *dev) * to issue Configuration Requests to those devices. */ msleep(200); - printk(KERN_DEBUG "Complete link reset at Root[%s]\n", dev->dev.bus_id); + dev_printk(KERN_DEBUG, &dev->dev, "Root Port link has been reset\n"); /* Enable Root Port's interrupt in response to error messages */ pci_read_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, &status); diff --git a/drivers/pci/pcie/aer/aerdrv_acpi.c b/drivers/pci/pcie/aer/aerdrv_acpi.c index d39a78d..30f581b 100644 --- a/drivers/pci/pcie/aer/aerdrv_acpi.c +++ b/drivers/pci/pcie/aer/aerdrv_acpi.c @@ -50,10 +50,10 @@ int aer_osc_setup(struct pcie_device *pciedev) } if (ACPI_FAILURE(status)) { - printk(KERN_DEBUG "AER service couldn't init device %s - %s\n", - pciedev->device.bus_id, - (status == AE_SUPPORT || status == AE_NOT_FOUND) ? - "no _OSC support" : "Run ACPI _OSC fails"); + dev_printk(KERN_DEBUG, &pciedev->device, "AER service couldn't " + "init device: %s\n", + (status == AE_SUPPORT || status == AE_NOT_FOUND) ? + "no _OSC support" : "_OSC failed"); return -1; } diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c index aaa8239..ee5e7b5 100644 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ b/drivers/pci/pcie/aer/aerdrv_core.c @@ -221,9 +221,9 @@ static void report_error_detected(struct pci_dev *dev, void *data) * of a driver for this device is unaware of * its hw state. */ - printk(KERN_DEBUG "Device ID[%s] has %s\n", - dev->dev.bus_id, (dev->driver) ? - "no AER-aware driver" : "no driver"); + dev_printk(KERN_DEBUG, &dev->dev, "device has %s\n", + dev->driver ? + "no AER-aware driver" : "no driver"); } return; } @@ -304,7 +304,7 @@ static pci_ers_result_t broadcast_error_message(struct pci_dev *dev, { struct aer_broadcast_data result_data; - printk(KERN_DEBUG "Broadcast %s message\n", error_mesg); + dev_printk(KERN_DEBUG, &dev->dev, "broadcast %s message\n", error_mesg); result_data.state = state; if (cb == report_error_detected) result_data.result = PCI_ERS_RESULT_CAN_RECOVER; @@ -404,18 +404,16 @@ static pci_ers_result_t reset_link(struct pcie_device *aerdev, data.aer_driver = to_service_driver(aerdev->device.driver); } else { - printk(KERN_DEBUG "No link-reset support to Device ID" - "[%s]\n", - dev->dev.bus_id); + dev_printk(KERN_DEBUG, &dev->dev, "no link-reset " + "support\n"); return PCI_ERS_RESULT_DISCONNECT; } } status = data.aer_driver->reset_link(udev); if (status != PCI_ERS_RESULT_RECOVERED) { - printk(KERN_DEBUG "Link reset at upstream Device ID" - "[%s] failed\n", - udev->dev.bus_id); + dev_printk(KERN_DEBUG, &dev->dev, "link reset at upstream " + "device %s failed\n", pci_name(udev)); return PCI_ERS_RESULT_DISCONNECT; } @@ -511,10 +509,12 @@ static void handle_error_source(struct pcie_device * aerdev, } else { status = do_recovery(aerdev, dev, info.severity); if (status == PCI_ERS_RESULT_RECOVERED) { - printk(KERN_DEBUG "AER driver successfully recovered\n"); + dev_printk(KERN_DEBUG, &dev->dev, "AER driver " + "successfully recovered\n"); } else { /* TODO: Should kernel panic here? */ - printk(KERN_DEBUG "AER driver didn't recover\n"); + dev_printk(KERN_DEBUG, &dev->dev, "AER driver didn't " + "recover\n"); } } } -- cgit v0.10.2 From 34438ba602f9b8904aafe7559046ea68e99e88a0 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 13 Jun 2008 10:52:13 -0600 Subject: PCIE: port driver: use dev_printk when possible Convert printks to use dev_printk(). Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c index fb0abfa..890f0d2 100644 --- a/drivers/pci/pcie/portdrv_core.c +++ b/drivers/pci/pcie/portdrv_core.c @@ -23,20 +23,20 @@ static int pcie_port_probe_service(struct device *dev) { struct pcie_device *pciedev; struct pcie_port_service_driver *driver; - int status = -ENODEV; + int status; if (!dev || !dev->driver) - return status; + return -ENODEV; driver = to_service_driver(dev->driver); if (!driver || !driver->probe) - return status; + return -ENODEV; pciedev = to_pcie_device(dev); status = driver->probe(pciedev, driver->id_table); if (!status) { - printk(KERN_DEBUG "Load service driver %s on pcie device %s\n", - driver->name, dev->bus_id); + dev_printk(KERN_DEBUG, dev, "service driver %s loaded\n", + driver->name); get_device(dev); } return status; @@ -53,8 +53,8 @@ static int pcie_port_remove_service(struct device *dev) pciedev = to_pcie_device(dev); driver = to_service_driver(dev->driver); if (driver && driver->remove) { - printk(KERN_DEBUG "Unload service driver %s on pcie device %s\n", - driver->name, dev->bus_id); + dev_printk(KERN_DEBUG, dev, "unloading service driver %s\n", + driver->name); driver->remove(pciedev); put_device(dev); } @@ -103,7 +103,7 @@ static int pcie_port_resume_service(struct device *dev) */ static void release_pcie_device(struct device *dev) { - printk(KERN_DEBUG "Free Port Service[%s]\n", dev->bus_id); + dev_printk(KERN_DEBUG, dev, "free port service\n"); kfree(to_pcie_device(dev)); } @@ -150,7 +150,7 @@ static int assign_interrupt_mode(struct pci_dev *dev, int *vectors, int mask) if (pos) { struct msix_entry msix_entries[PCIE_PORT_DEVICE_MAXSERVICES] = {{0, 0}, {0, 1}, {0, 2}, {0, 3}}; - printk("%s Found MSIX capability\n", __func__); + dev_info(&dev->dev, "found MSI-X capability\n"); status = pci_enable_msix(dev, msix_entries, nvec); if (!status) { int j = 0; @@ -165,7 +165,7 @@ static int assign_interrupt_mode(struct pci_dev *dev, int *vectors, int mask) if (status) { pos = pci_find_capability(dev, PCI_CAP_ID_MSI); if (pos) { - printk("%s Found MSI capability\n", __func__); + dev_info(&dev->dev, "found MSI capability\n"); status = pci_enable_msi(dev); if (!status) { interrupt_mode = PCIE_PORT_MSI_MODE; @@ -252,7 +252,7 @@ static struct pcie_device* alloc_pcie_device(struct pci_dev *parent, return NULL; pcie_device_init(parent, device, port_type, service_type, irq,irq_mode); - printk(KERN_DEBUG "Allocate Port Service[%s]\n", device->device.bus_id); + dev_printk(KERN_DEBUG, &device->device, "allocate port service\n"); return device; } diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c index 51d1632..367c9c2 100644 --- a/drivers/pci/pcie/portdrv_pci.c +++ b/drivers/pci/pcie/portdrv_pci.c @@ -91,9 +91,8 @@ static int __devinit pcie_portdrv_probe (struct pci_dev *dev, pci_set_master(dev); if (!dev->irq && dev->pin) { - printk(KERN_WARNING - "%s->Dev[%04x:%04x] has invalid IRQ. Check vendor BIOS\n", - __func__, dev->vendor, dev->device); + dev_warn(&dev->dev, "device [%04x/%04x] has invalid IRQ; " + "check vendor BIOS\n", dev->vendor, dev->device); } if (pcie_port_device_register(dev)) { pci_disable_device(dev); -- cgit v0.10.2 From e4ec7a00ed30429030112e5591cf3138645727c2 Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Wed, 25 Jun 2008 16:12:25 -0700 Subject: PCI: correct resource number in debug output If pci_request_region fails, make the warning include the resource number, not the resource number + 1. diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 8b755a7..8e8ecc1 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1114,10 +1114,10 @@ int pci_request_region(struct pci_dev *pdev, int bar, const char *res_name) err_out: dev_warn(&pdev->dev, "BAR %d: can't reserve %s region [%#llx-%#llx]\n", - bar + 1, /* PCI BAR # */ - pci_resource_flags(pdev, bar) & IORESOURCE_IO ? "I/O" : "mem", - (unsigned long long)pci_resource_start(pdev, bar), - (unsigned long long)pci_resource_end(pdev, bar)); + bar, + pci_resource_flags(pdev, bar) & IORESOURCE_IO ? "I/O" : "mem", + (unsigned long long)pci_resource_start(pdev, bar), + (unsigned long long)pci_resource_end(pdev, bar)); return -EBUSY; } -- cgit v0.10.2 From c4635eb06af700820d658a163f06aff12e17cfb2 Mon Sep 17 00:00:00 2001 From: Kenji Kaneshige Date: Fri, 20 Jun 2008 12:07:08 +0900 Subject: pciehp: fix interrupt initialization Current pciehp driver's intialization sequence is as follows: (1) initialize controller data structure (2) install interrupt handler (3) enable software notification (4) initialize controller specific slot data structure (5) initialize generic slot data structure and register it to pci hotplug core The interrupt handler of pciehp assumes that controller specific slot data structure is already initialized. However, it is installed at (2) before initializing controller specific slot data structure at (4). Because of this, pciehp driver cannot handle the following cases properly. - If devices that shares IRQ with pciehp raise interrupts between (2) and (4). - If hotplug events (e.g. MRL open) happen between (3) and (4). We already have a workaround for this problem ("pciehp: fix NULL dereference in interrupt handler: dbd79aed1aea2bece0bf43cc2ff3b2f9baf48a08). But we still need fundamental fix. This patch fix the problem by changing the initilization sequence as follows: (1) initialize controller data structure (2) initialize controller specific slot data structure (3) install interrupt handler (4) enable software notification (5) initialize generic slot data structure and register it to pci hotplug core Signed-off-by: Kenji Kaneshige Acked-by: Alex Chiang Signed-off-by: Jesse Barnes diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h index 8492fab..d17233a 100644 --- a/drivers/pci/hotplug/pciehp.h +++ b/drivers/pci/hotplug/pciehp.h @@ -43,6 +43,7 @@ extern int pciehp_poll_mode; extern int pciehp_poll_time; extern int pciehp_debug; extern int pciehp_force; +extern int pciehp_slot_with_bus; extern struct workqueue_struct *pciehp_wq; #define dbg(format, arg...) \ @@ -156,10 +157,10 @@ extern u8 pciehp_handle_power_fault(struct slot *p_slot); extern int pciehp_configure_device(struct slot *p_slot); extern int pciehp_unconfigure_device(struct slot *p_slot); extern void pciehp_queue_pushbutton_work(struct work_struct *work); -int pcie_init(struct controller *ctrl, struct pcie_device *dev); +struct controller *pcie_init(struct pcie_device *dev); int pciehp_enable_slot(struct slot *p_slot); int pciehp_disable_slot(struct slot *p_slot); -int pcie_init_hardware_part2(struct controller *ctrl, struct pcie_device *dev); +int pcie_enable_notification(struct controller *ctrl); static inline struct slot *pciehp_find_slot(struct controller *ctrl, u8 device) { diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c index 7b21c86..d0fb569 100644 --- a/drivers/pci/hotplug/pciehp_core.c +++ b/drivers/pci/hotplug/pciehp_core.c @@ -41,7 +41,7 @@ int pciehp_debug; int pciehp_poll_mode; int pciehp_poll_time; int pciehp_force; -static int pciehp_slot_with_bus; +int pciehp_slot_with_bus; struct workqueue_struct *pciehp_wq; #define DRIVER_VERSION "0.4" @@ -183,23 +183,10 @@ static struct hotplug_slot_attribute hotplug_slot_attr_lock = { */ static void release_slot(struct hotplug_slot *hotplug_slot) { - struct slot *slot = hotplug_slot->private; - dbg("%s - physical_slot = %s\n", __func__, hotplug_slot->name); - kfree(slot->hotplug_slot->info); - kfree(slot->hotplug_slot); - kfree(slot); -} - -static void make_slot_name(struct slot *slot) -{ - if (pciehp_slot_with_bus) - snprintf(slot->hotplug_slot->name, SLOT_NAME_SIZE, "%04d_%04d", - slot->bus, slot->number); - else - snprintf(slot->hotplug_slot->name, SLOT_NAME_SIZE, "%d", - slot->number); + kfree(hotplug_slot->info); + kfree(hotplug_slot); } static int init_slots(struct controller *ctrl) @@ -208,44 +195,27 @@ static int init_slots(struct controller *ctrl) struct hotplug_slot *hotplug_slot; struct hotplug_slot_info *info; int retval = -ENOMEM; - int i; - - for (i = 0; i < ctrl->num_slots; i++) { - slot = kzalloc(sizeof(*slot), GFP_KERNEL); - if (!slot) - goto error; + list_for_each_entry(slot, &ctrl->slot_list, slot_list) { hotplug_slot = kzalloc(sizeof(*hotplug_slot), GFP_KERNEL); if (!hotplug_slot) - goto error_slot; - slot->hotplug_slot = hotplug_slot; + goto error; info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) goto error_hpslot; - hotplug_slot->info = info; - - hotplug_slot->name = slot->name; - - slot->hp_slot = i; - slot->ctrl = ctrl; - slot->bus = ctrl->pci_dev->subordinate->number; - slot->device = ctrl->slot_device_offset + i; - slot->hpc_ops = ctrl->hpc_ops; - slot->number = ctrl->first_slot; - mutex_init(&slot->lock); - INIT_DELAYED_WORK(&slot->work, pciehp_queue_pushbutton_work); /* register this slot with the hotplug pci core */ + hotplug_slot->info = info; + hotplug_slot->name = slot->name; hotplug_slot->private = slot; hotplug_slot->release = &release_slot; - make_slot_name(slot); hotplug_slot->ops = &pciehp_hotplug_slot_ops; - get_power_status(hotplug_slot, &info->power_status); get_attention_status(hotplug_slot, &info->attention_status); get_latch_status(hotplug_slot, &info->latch_status); get_adapter_status(hotplug_slot, &info->adapter_status); + slot->hotplug_slot = hotplug_slot; dbg("Registering bus=%x dev=%x hp_slot=%x sun=%x " "slot_device_offset=%x\n", slot->bus, slot->device, @@ -271,8 +241,6 @@ static int init_slots(struct controller *ctrl) goto error_info; } } - - list_add(&slot->slot_list, &ctrl->slot_list); } return 0; @@ -280,27 +248,18 @@ error_info: kfree(info); error_hpslot: kfree(hotplug_slot); -error_slot: - kfree(slot); error: return retval; } static void cleanup_slots(struct controller *ctrl) { - struct list_head *tmp; - struct list_head *next; struct slot *slot; - list_for_each_safe(tmp, next, &ctrl->slot_list) { - slot = list_entry(tmp, struct slot, slot_list); - list_del(&slot->slot_list); + list_for_each_entry(slot, &ctrl->slot_list, slot_list) { if (EMI(ctrl)) sysfs_remove_file(&slot->hotplug_slot->pci_slot->kobj, &hotplug_slot_attr_lock.attr); - cancel_delayed_work(&slot->work); - flush_scheduled_work(); - flush_workqueue(pciehp_wq); pci_hp_deregister(slot->hotplug_slot); } } @@ -441,25 +400,13 @@ static int pciehp_probe(struct pcie_device *dev, const struct pcie_port_service_ else if (pciehp_get_hp_hw_control_from_firmware(pdev)) goto err_out_none; - ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL); + ctrl = pcie_init(dev); if (!ctrl) { - err("%s : out of memory\n", __func__); - goto err_out_none; - } - INIT_LIST_HEAD(&ctrl->slot_list); - - rc = pcie_init(ctrl, dev); - if (rc) { dbg("%s: controller initialization failed\n", PCIE_MODULE_NAME); - goto err_out_free_ctrl; + goto err_out_none; } - pci_set_drvdata(pdev, ctrl); - dbg("%s: ctrl bus=0x%x, device=%x, function=%x, irq=%x\n", - __func__, pdev->bus->number, PCI_SLOT(pdev->devfn), - PCI_FUNC(pdev->devfn), pdev->irq); - /* Setup the slot information structures */ rc = init_slots(ctrl); if (rc) { @@ -492,8 +439,6 @@ err_out_free_ctrl_slot: cleanup_slots(ctrl); err_out_release_ctlr: ctrl->hpc_ops->release_ctlr(ctrl); -err_out_free_ctrl: - kfree(ctrl); err_out_none: return -ENODEV; } @@ -505,7 +450,6 @@ static void pciehp_remove (struct pcie_device *dev) cleanup_slots(ctrl); ctrl->hpc_ops->release_ctlr(ctrl); - kfree(ctrl); } #ifdef CONFIG_PM @@ -525,7 +469,7 @@ static int pciehp_resume (struct pcie_device *dev) u8 status; /* reinitialize the chipset's event detection logic */ - pcie_init_hardware_part2(ctrl, dev); + pcie_enable_notification(ctrl); t_slot = pciehp_find_slot(ctrl, ctrl->slot_device_offset); diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index 59c2809..1ce5243 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -609,23 +609,6 @@ static void hpc_set_green_led_blink(struct slot *slot) __func__, ctrl->cap_base + SLOTCTRL, slot_cmd); } -static void hpc_release_ctlr(struct controller *ctrl) -{ - /* Mask Hot-plug Interrupt Enable */ - if (pcie_write_cmd(ctrl, 0, HP_INTR_ENABLE | CMD_CMPL_INTR_ENABLE)) - err("%s: Cannot mask hotplug interrupt enable\n", __func__); - - /* Free interrupt handler or interrupt polling timer */ - pciehp_free_irq(ctrl); - - /* - * If this is the last controller to be released, destroy the - * pciehp work queue - */ - if (atomic_dec_and_test(&pciehp_num_controllers)) - destroy_workqueue(pciehp_wq); -} - static int hpc_power_on_slot(struct slot * slot) { struct controller *ctrl = slot->ctrl; @@ -798,19 +781,7 @@ static irqreturn_t pcie_isr(int irq, void *dev_id) if (!(intr_loc & ~CMD_COMPLETED)) return IRQ_HANDLED; - /* - * Return without handling events if this handler routine is - * called before controller initialization is done. This may - * happen if hotplug event or another interrupt that shares - * the IRQ with pciehp arrives before slot initialization is - * done after interrupt handler is registered. - * - * FIXME - Need more structural fixes. We need to be ready to - * handle the event before installing interrupt handler. - */ p_slot = pciehp_find_slot(ctrl, ctrl->slot_device_offset); - if (!p_slot || !p_slot->hpc_ops) - return IRQ_HANDLED; /* Check MRL Sensor Changed */ if (intr_loc & MRL_SENS_CHANGED) @@ -987,6 +958,7 @@ static int hpc_get_cur_lnk_width(struct slot *slot, return retval; } +static void pcie_release_ctrl(struct controller *ctrl); static struct hpc_ops pciehp_hpc_ops = { .power_on_slot = hpc_power_on_slot, .power_off_slot = hpc_power_off_slot, @@ -1008,28 +980,11 @@ static struct hpc_ops pciehp_hpc_ops = { .green_led_off = hpc_set_green_led_off, .green_led_blink = hpc_set_green_led_blink, - .release_ctlr = hpc_release_ctlr, + .release_ctlr = pcie_release_ctrl, .check_lnk_status = hpc_check_lnk_status, }; -static int pcie_init_hardware_part1(struct controller *ctrl, - struct pcie_device *dev) -{ - /* Clear all remaining event bits in Slot Status register */ - if (pciehp_writew(ctrl, SLOTSTATUS, 0x1f)) { - err("%s: Cannot write to SLOTSTATUS register\n", __func__); - return -1; - } - - /* Mask Hot-plug Interrupt Enable */ - if (pcie_write_cmd(ctrl, 0, HP_INTR_ENABLE | CMD_CMPL_INTR_ENABLE)) { - err("%s: Cannot mask hotplug interrupt enable\n", __func__); - return -1; - } - return 0; -} - -int pcie_init_hardware_part2(struct controller *ctrl, struct pcie_device *dev) +int pcie_enable_notification(struct controller *ctrl) { u16 cmd, mask; @@ -1050,10 +1005,76 @@ int pcie_init_hardware_part2(struct controller *ctrl, struct pcie_device *dev) err("%s: Cannot enable software notification\n", __func__); return -1; } + return 0; +} + +static void pcie_disable_notification(struct controller *ctrl) +{ + u16 mask; + mask = PRSN_DETECT_ENABLE | ATTN_BUTTN_ENABLE | MRL_DETECT_ENABLE | + PWR_FAULT_DETECT_ENABLE | HP_INTR_ENABLE | CMD_CMPL_INTR_ENABLE; + if (pcie_write_cmd(ctrl, 0, mask)) + warn("%s: Cannot disable software notification\n", __func__); +} + +static int pcie_init_notification(struct controller *ctrl) +{ + if (pciehp_request_irq(ctrl)) + return -1; + if (pcie_enable_notification(ctrl)) { + pciehp_free_irq(ctrl); + return -1; + } + return 0; +} + +static void pcie_shutdown_notification(struct controller *ctrl) +{ + pcie_disable_notification(ctrl); + pciehp_free_irq(ctrl); +} + +static void make_slot_name(struct slot *slot) +{ + if (pciehp_slot_with_bus) + snprintf(slot->name, SLOT_NAME_SIZE, "%04d_%04d", + slot->bus, slot->number); + else + snprintf(slot->name, SLOT_NAME_SIZE, "%d", slot->number); +} +static int pcie_init_slot(struct controller *ctrl) +{ + struct slot *slot; + + slot = kzalloc(sizeof(*slot), GFP_KERNEL); + if (!slot) + return -ENOMEM; + + slot->hp_slot = 0; + slot->ctrl = ctrl; + slot->bus = ctrl->pci_dev->subordinate->number; + slot->device = ctrl->slot_device_offset + slot->hp_slot; + slot->hpc_ops = ctrl->hpc_ops; + slot->number = ctrl->first_slot; + make_slot_name(slot); + mutex_init(&slot->lock); + INIT_DELAYED_WORK(&slot->work, pciehp_queue_pushbutton_work); + list_add(&slot->slot_list, &ctrl->slot_list); return 0; } +static void pcie_cleanup_slot(struct controller *ctrl) +{ + struct slot *slot; + slot = list_first_entry(&ctrl->slot_list, struct slot, slot_list); + list_del(&slot->slot_list); + cancel_delayed_work(&slot->work); + flush_scheduled_work(); + flush_workqueue(pciehp_wq); + kfree(slot); +} + static inline void dbg_ctrl(struct controller *ctrl) { int i; @@ -1093,11 +1114,19 @@ static inline void dbg_ctrl(struct controller *ctrl) dbg("Slot Control : 0x%04x\n", reg16); } -int pcie_init(struct controller *ctrl, struct pcie_device *dev) +struct controller *pcie_init(struct pcie_device *dev) { + struct controller *ctrl; u32 slot_cap; struct pci_dev *pdev = dev->port; + ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL); + if (!ctrl) { + err("%s : out of memory\n", __func__); + goto abort; + } + INIT_LIST_HEAD(&ctrl->slot_list); + ctrl->pci_dev = pdev; ctrl->cap_base = pci_find_capability(pdev, PCI_CAP_ID_EXP); if (!ctrl->cap_base) { @@ -1128,15 +1157,12 @@ int pcie_init(struct controller *ctrl, struct pcie_device *dev) !(POWER_CTRL(ctrl) | ATTN_LED(ctrl) | PWR_LED(ctrl) | EMI(ctrl))) ctrl->no_cmd_complete = 1; - info("HPC vendor_id %x device_id %x ss_vid %x ss_did %x\n", - pdev->vendor, pdev->device, - pdev->subsystem_vendor, pdev->subsystem_device); + /* Clear all remaining event bits in Slot Status register */ + if (pciehp_writew(ctrl, SLOTSTATUS, 0x1f)) + goto abort_ctrl; - if (pcie_init_hardware_part1(ctrl, dev)) - goto abort; - - if (pciehp_request_irq(ctrl)) - goto abort; + /* Disable sotfware notification */ + pcie_disable_notification(ctrl); /* * If this is the first controller to be initialized, @@ -1144,18 +1170,39 @@ int pcie_init(struct controller *ctrl, struct pcie_device *dev) */ if (atomic_add_return(1, &pciehp_num_controllers) == 1) { pciehp_wq = create_singlethread_workqueue("pciehpd"); - if (!pciehp_wq) { - goto abort_free_irq; - } + if (!pciehp_wq) + goto abort_ctrl; } - if (pcie_init_hardware_part2(ctrl, dev)) - goto abort_free_irq; + info("HPC vendor_id %x device_id %x ss_vid %x ss_did %x\n", + pdev->vendor, pdev->device, + pdev->subsystem_vendor, pdev->subsystem_device); + + if (pcie_init_slot(ctrl)) + goto abort_ctrl; - return 0; + if (pcie_init_notification(ctrl)) + goto abort_slot; -abort_free_irq: - pciehp_free_irq(ctrl); + return ctrl; + +abort_slot: + pcie_cleanup_slot(ctrl); +abort_ctrl: + kfree(ctrl); abort: - return -1; + return NULL; +} + +void pcie_release_ctrl(struct controller *ctrl) +{ + pcie_shutdown_notification(ctrl); + pcie_cleanup_slot(ctrl); + /* + * If this is the last controller to be released, destroy the + * pciehp work queue + */ + if (atomic_dec_and_test(&pciehp_num_controllers)) + destroy_workqueue(pciehp_wq); + kfree(ctrl); } -- cgit v0.10.2 From 3aa50c44628629a6d58f93e0a1244e95a874884e Mon Sep 17 00:00:00 2001 From: Kenji Kaneshige Date: Thu, 26 Jun 2008 20:07:33 +0900 Subject: pciehp: remove needless command completed interrupt setting Currently, pciehp driver enables command completed interrupt as follows. (1) Don't enable at initialization. (2) Enable command completed interrupt whenever pciehp issues a command, if the command doesn't attempt to disable the interrupt. (3) Disable command completed interrupt at driver unloading. Once we enable command completed interrupt, we don't need to re-enable it for every command. So we can simplify above steps as follows: (1) Enable command completed interrupt at initialization. (2) No special sequence for command completed interrupt. (3) Disable command completed interrupt at driver unloading. Signed-off-by: Kenji Kaneshige Signed-off-by: Jesse Barnes diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index 1ce5243..1323a43 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -337,10 +337,6 @@ static int pcie_write_cmd(struct controller *ctrl, u16 cmd, u16 mask) slot_ctrl &= ~mask; slot_ctrl |= (cmd & mask); - /* Don't enable command completed if caller is changing it. */ - if (!(mask & CMD_CMPL_INTR_ENABLE)) - slot_ctrl |= CMD_CMPL_INTR_ENABLE; - ctrl->cmd_busy = 1; smp_mb(); retval = pciehp_writew(ctrl, SLOTCTRL, slot_ctrl); @@ -996,10 +992,10 @@ int pcie_enable_notification(struct controller *ctrl) if (MRL_SENS(ctrl)) cmd |= MRL_DETECT_ENABLE; if (!pciehp_poll_mode) - cmd |= HP_INTR_ENABLE; + cmd |= HP_INTR_ENABLE | CMD_CMPL_INTR_ENABLE; - mask = PRSN_DETECT_ENABLE | ATTN_BUTTN_ENABLE | - PWR_FAULT_DETECT_ENABLE | MRL_DETECT_ENABLE | HP_INTR_ENABLE; + mask = PRSN_DETECT_ENABLE | ATTN_BUTTN_ENABLE | MRL_DETECT_ENABLE | + PWR_FAULT_DETECT_ENABLE | HP_INTR_ENABLE | CMD_CMPL_INTR_ENABLE; if (pcie_write_cmd(ctrl, cmd, mask)) { err("%s: Cannot enable software notification\n", __func__); -- cgit v0.10.2 From b97089400d44b9e90ce5029a2e458cd087473c74 Mon Sep 17 00:00:00 2001 From: Kenji Kaneshige Date: Thu, 26 Jun 2008 20:06:24 +0900 Subject: pciehp: use get_service_data Current pciehp driver saves its private data pointer into pci_dev structure using pci_set_drvdata()/pci_get_drvdata(). But because pciehp is not a pci device driver but a PCI Express service driver, it should save its private data pointer into pcie_device structure using set_service_data()/get_service_data(). Signed-off-by: Kenji Kaneshige Signed-off-by: Jesse Barnes diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c index d0fb569..3677495 100644 --- a/drivers/pci/hotplug/pciehp_core.c +++ b/drivers/pci/hotplug/pciehp_core.c @@ -405,7 +405,7 @@ static int pciehp_probe(struct pcie_device *dev, const struct pcie_port_service_ dbg("%s: controller initialization failed\n", PCIE_MODULE_NAME); goto err_out_none; } - pci_set_drvdata(pdev, ctrl); + set_service_data(dev, ctrl); /* Setup the slot information structures */ rc = init_slots(ctrl); @@ -445,8 +445,7 @@ err_out_none: static void pciehp_remove (struct pcie_device *dev) { - struct pci_dev *pdev = dev->port; - struct controller *ctrl = pci_get_drvdata(pdev); + struct controller *ctrl = get_service_data(dev); cleanup_slots(ctrl); ctrl->hpc_ops->release_ctlr(ctrl); @@ -463,8 +462,7 @@ static int pciehp_resume (struct pcie_device *dev) { printk("%s ENTRY\n", __func__); if (pciehp_force) { - struct pci_dev *pdev = dev->port; - struct controller *ctrl = pci_get_drvdata(pdev); + struct controller *ctrl = get_service_data(dev); struct slot *t_slot; u8 status; -- cgit v0.10.2 From 9433f6dd3a4677e9b92c6e1cd7f98b11598b7c2c Mon Sep 17 00:00:00 2001 From: Wang Chen Date: Thu, 26 Jun 2008 10:50:04 +0800 Subject: PCI: Fix comment of pci_dynids struct pci_driver has no field of driver_data. It's in pci_device_id. Signed-off-by: Wang Chen Signed-off-by: Jesse Barnes diff --git a/include/linux/pci.h b/include/linux/pci.h index f1f73f7..76c9a4a 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -338,7 +338,7 @@ struct pci_bus_region { struct pci_dynids { spinlock_t lock; /* protects list, index */ struct list_head list; /* for IDs added at runtime */ - unsigned int use_driver_data:1; /* pci_driver->driver_data is used */ + unsigned int use_driver_data:1; /* pci_device_id->driver_data is used */ }; /* ---------------------------------------------------------------- */ -- cgit v0.10.2 From 8b285ce84bbc719e363a796f466404576b840d36 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 27 Jun 2008 13:23:20 +0100 Subject: PCI: fix pci_setup_device()'s sprinting into a const buffer Make pci_setup_device() write the bus ID directly into the allotted storage, rather than using pci_name() as the address as that now returns a const pointer. Signed-off-by: David Howells Signed-off-by: Jesse Barnes diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index f829119..0420fd8 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -713,8 +713,9 @@ static int pci_setup_device(struct pci_dev * dev) { u32 class; - sprintf(pci_name(dev), "%04x:%02x:%02x.%d", pci_domain_nr(dev->bus), - dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn)); + snprintf(dev->dev.bus_id, BUS_ID_SIZE, + "%04x:%02x:%02x.%d", pci_domain_nr(dev->bus), + dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn)); pci_read_config_dword(dev, PCI_CLASS_REVISION, &class); dev->revision = class & 0xff; -- cgit v0.10.2 From 0aea531326d1a17ccef7d9a538429c5b32cf4f12 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 18 Jun 2008 01:33:32 +0300 Subject: PCI: remove unused arch pcibios_update_resource() functions Russell King did the following back in 2003: <-- snip --> [PCI] pci-9: Kill per-architecture pcibios_update_resource() Kill pcibios_update_resource(), replacing it with pci_update_resource(). pci_update_resource() uses pcibios_resource_to_bus() to convert a resource to a device BAR - the transformation should be exactly the same as the transformation used for the PCI bridges. pci_update_resource "knows" about 64-bit BARs, but doesn't attempt to set the high 32-bits to anything non-zero - currently no architecture attempts to do something different. If anyone cares, please fix; I'm going to reflect current behaviour for the time being. Ivan pointed out the following architectures need to examine their pcibios_update_resource() implementation - they should make sure that this new implementation does the right thing. #warning's have been added where appropriate. ia64 mips mips64 This cset also includes a fix for the problem reported by AKPM where 64-bit arch compilers complain about the resource mask being placed in a u32. <-- snip --> This patch removes the unused pcibios_update_resource() functions the kernel gained since, from FRV, m68k, mips & sh architectures. Signed-off-by: Adrian Bunk Acked-by: David Howells Acked-by: Greg Ungerer Acked-by: Paul Mundt Acked-by: Ralf Baechle Signed-off-by: Jesse Barnes diff --git a/arch/frv/mb93090-mb00/pci-frv.c b/arch/frv/mb93090-mb00/pci-frv.c index 4f165c9..edae117 100644 --- a/arch/frv/mb93090-mb00/pci-frv.c +++ b/arch/frv/mb93090-mb00/pci-frv.c @@ -19,36 +19,6 @@ #include "pci-frv.h" -#if 0 -void -pcibios_update_resource(struct pci_dev *dev, struct resource *root, - struct resource *res, int resource) -{ - u32 new, check; - int reg; - - new = res->start | (res->flags & PCI_REGION_FLAG_MASK); - if (resource < 6) { - reg = PCI_BASE_ADDRESS_0 + 4*resource; - } else if (resource == PCI_ROM_RESOURCE) { - res->flags |= IORESOURCE_ROM_ENABLE; - new |= PCI_ROM_ADDRESS_ENABLE; - reg = dev->rom_base_reg; - } else { - /* Somebody might have asked allocation of a non-standard resource */ - return; - } - - pci_write_config_dword(dev, reg, new); - pci_read_config_dword(dev, reg, &check); - if ((new ^ check) & ((new & PCI_BASE_ADDRESS_SPACE_IO) ? PCI_BASE_ADDRESS_IO_MASK : PCI_BASE_ADDRESS_MEM_MASK)) { - printk(KERN_ERR "PCI: Error while updating region " - "%s/%d (%08x != %08x)\n", pci_name(dev), resource, - new, check); - } -} -#endif - /* * We need to avoid collisions with `mirrored' VGA ports * and other strange ISA hardware, so we always want the diff --git a/arch/m68knommu/kernel/comempci.c b/arch/m68knommu/kernel/comempci.c index 6ee00ef..0a68b5a 100644 --- a/arch/m68knommu/kernel/comempci.c +++ b/arch/m68knommu/kernel/comempci.c @@ -375,15 +375,6 @@ int pcibios_enable_device(struct pci_dev *dev, int mask) /*****************************************************************************/ -void pcibios_update_resource(struct pci_dev *dev, struct resource *root, struct resource *r, int resource) -{ - printk(KERN_WARNING "%s(%d): no support for changing PCI resources...\n", - __FILE__, __LINE__); -} - - -/*****************************************************************************/ - /* * Local routines to interrcept the standard I/O and vector handling * code. Don't include this 'till now - initialization code above needs diff --git a/arch/mips/pmc-sierra/yosemite/ht.c b/arch/mips/pmc-sierra/yosemite/ht.c index 6380662..678388f 100644 --- a/arch/mips/pmc-sierra/yosemite/ht.c +++ b/arch/mips/pmc-sierra/yosemite/ht.c @@ -345,42 +345,6 @@ int pcibios_enable_device(struct pci_dev *dev, int mask) return pcibios_enable_resources(dev); } - - -void pcibios_update_resource(struct pci_dev *dev, struct resource *root, - struct resource *res, int resource) -{ - u32 new, check; - int reg; - - return; - - new = res->start | (res->flags & PCI_REGION_FLAG_MASK); - if (resource < 6) { - reg = PCI_BASE_ADDRESS_0 + 4 * resource; - } else if (resource == PCI_ROM_RESOURCE) { - res->flags |= IORESOURCE_ROM_ENABLE; - reg = dev->rom_base_reg; - } else { - /* - * Somebody might have asked allocation of a non-standard - * resource - */ - return; - } - - pci_write_config_dword(dev, reg, new); - pci_read_config_dword(dev, reg, &check); - if ((new ^ check) & - ((new & PCI_BASE_ADDRESS_SPACE_IO) ? PCI_BASE_ADDRESS_IO_MASK : - PCI_BASE_ADDRESS_MEM_MASK)) { - printk(KERN_ERR "PCI: Error while updating region " - "%s/%d (%08x != %08x)\n", pci_name(dev), resource, - new, check); - } -} - - void pcibios_align_resource(void *data, struct resource *res, resource_size_t size, resource_size_t align) { diff --git a/arch/sh/drivers/pci/pci.c b/arch/sh/drivers/pci/pci.c index 08d2e73..f57095a 100644 --- a/arch/sh/drivers/pci/pci.c +++ b/arch/sh/drivers/pci/pci.c @@ -76,38 +76,6 @@ void __devinit __weak pcibios_fixup_bus(struct pci_bus *bus) pci_read_bridge_bases(bus); } -void -pcibios_update_resource(struct pci_dev *dev, struct resource *root, - struct resource *res, int resource) -{ - u32 new, check; - int reg; - - new = res->start | (res->flags & PCI_REGION_FLAG_MASK); - if (resource < 6) { - reg = PCI_BASE_ADDRESS_0 + 4*resource; - } else if (resource == PCI_ROM_RESOURCE) { - res->flags |= IORESOURCE_ROM_ENABLE; - new |= PCI_ROM_ADDRESS_ENABLE; - reg = dev->rom_base_reg; - } else { - /* - * Somebody might have asked allocation of a non-standard - * resource - */ - return; - } - - pci_write_config_dword(dev, reg, new); - pci_read_config_dword(dev, reg, &check); - if ((new ^ check) & ((new & PCI_BASE_ADDRESS_SPACE_IO) ? - PCI_BASE_ADDRESS_IO_MASK : PCI_BASE_ADDRESS_MEM_MASK)) { - printk(KERN_ERR "PCI: Error while updating region " - "%s/%d (%08x != %08x)\n", pci_name(dev), resource, - new, check); - } -} - void pcibios_align_resource(void *data, struct resource *res, resource_size_t size, resource_size_t align) __attribute__ ((weak)); -- cgit v0.10.2 From 80be038593dba7aa46fb24a14f0ba83e5ade0edb Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 30 Jun 2008 11:35:53 -0700 Subject: PCI: add stub for pci_set_consistent_dma_mask() When CONFIG_PCI=n, there is no stub for pci_set_consistent_dma_mask(), so add one like other similar stubs. Otherwise there can be build errors, as here: linux-next-20080630/drivers/ssb/main.c:1175: error: implicit declaration of function 'pci_set_consistent_dma_mask' Signed-off-by: Randy Dunlap Signed-off-by: Jesse Barnes diff --git a/include/linux/pci.h b/include/linux/pci.h index 76c9a4a..96ebaa8 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -853,6 +853,11 @@ static inline int pci_set_dma_mask(struct pci_dev *dev, u64 mask) return -EIO; } +static inline int pci_set_consistent_dma_mask(struct pci_dev *dev, u64 mask) +{ + return -EIO; +} + static inline int pci_set_dma_max_seg_size(struct pci_dev *dev, unsigned int size) { -- cgit v0.10.2 From eebfcfb52ce753eaaa8525078bda6b539586066c Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 2 Jul 2008 13:24:49 -0700 Subject: PCI: handle pci_name() being const This changes pci_setup_device to handle pci_name() now returning a constant string. Signed-off-by: Greg Kroah-Hartman Signed-off-by: Jesse Barnes diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 0420fd8..2f0ae70 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -713,9 +713,9 @@ static int pci_setup_device(struct pci_dev * dev) { u32 class; - snprintf(dev->dev.bus_id, BUS_ID_SIZE, - "%04x:%02x:%02x.%d", pci_domain_nr(dev->bus), - dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn)); + dev_set_name(&dev->dev, "%04x:%02x:%02x.%d", pci_domain_nr(dev->bus), + dev->bus->number, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn)); pci_read_config_dword(dev, PCI_CLASS_REVISION, &class); dev->revision = class & 0xff; -- cgit v0.10.2 From c6c4f070a61b2b6e5cd317a5fbf25255878688a2 Mon Sep 17 00:00:00 2001 From: Greg KH Date: Thu, 3 Jul 2008 09:49:39 -0700 Subject: PCI: make pci_name use dev_name Also fixes up the sparc code that was assuming this is not a constant. Acked-by: David S. Miller Signed-off-by: Kay Sievers Signed-off-by: Greg Kroah-Hartman Signed-off-by: Jesse Barnes diff --git a/arch/sparc64/kernel/pci.c b/arch/sparc64/kernel/pci.c index 112b09f..d00a365 100644 --- a/arch/sparc64/kernel/pci.c +++ b/arch/sparc64/kernel/pci.c @@ -408,7 +408,7 @@ struct pci_dev *of_create_pci_dev(struct pci_pbm_info *pbm, dev->class = class >> 8; dev->revision = class & 0xff; - sprintf(pci_name(dev), "%04x:%02x:%02x.%d", pci_domain_nr(bus), + sprintf(dev->dev.bus_id, "%04x:%02x:%02x.%d", pci_domain_nr(bus), dev->bus->number, PCI_SLOT(devfn), PCI_FUNC(devfn)); if (ofpci_verbose) diff --git a/include/linux/pci.h b/include/linux/pci.h index 96ebaa8..4c80dc3 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -996,9 +996,9 @@ static inline void pci_set_drvdata(struct pci_dev *pdev, void *data) /* If you want to know what to call your pci_dev, ask this function. * Again, it's a wrapper around the generic device. */ -static inline char *pci_name(struct pci_dev *pdev) +static inline const char *pci_name(struct pci_dev *pdev) { - return pdev->dev.bus_id; + return dev_name(&pdev->dev); } -- cgit v0.10.2 From 3737b2b1046900660b42e25c904b85e78139d25b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 7 Jul 2008 03:30:55 +0200 Subject: ACPI: Introduce acpi_bus_power_manageable function Introduce function acpi_bus_power_manageable() allowing other (dependent) subsystems to check if ACPI is able to power manage given device. This may be useful, for example, for PCI device power management. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Signed-off-by: Jesse Barnes diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index a6dbcf4..b9b69d9 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -295,6 +295,17 @@ int acpi_bus_set_power(acpi_handle handle, int state) EXPORT_SYMBOL(acpi_bus_set_power); +bool acpi_bus_power_manageable(acpi_handle handle) +{ + struct acpi_device *device; + int result; + + result = acpi_bus_get_device(handle, &device); + return result ? false : device->flags.power_manageable; +} + +EXPORT_SYMBOL(acpi_bus_power_manageable); + /* -------------------------------------------------------------------------- Event Management -------------------------------------------------------------------------- */ diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index db90a74..0c21ea3 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -335,6 +335,7 @@ void acpi_bus_data_handler(acpi_handle handle, u32 function, void *context); int acpi_bus_get_status(struct acpi_device *device); int acpi_bus_get_power(acpi_handle handle, int *state); int acpi_bus_set_power(acpi_handle handle, int state); +bool acpi_bus_power_manageable(acpi_handle handle); #ifdef CONFIG_ACPI_PROC_EVENT int acpi_bus_generate_proc_event(struct acpi_device *device, u8 type, int data); int acpi_bus_generate_proc_event4(const char *class, const char *bid, u8 type, int data); -- cgit v0.10.2 From 961d9120fa6f078492a1c762dd91f2c097e56c83 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 7 Jul 2008 03:32:02 +0200 Subject: PCI: Introduce platform_pci_power_manageable function Introduce function pointer platform_pci_power_manageable to be used by the platform-related code to point to a function allowing us to check if given device is power manageable by the platform. Introduce acpi_pci_power_manageable() playing that role for ACPI. Signed-off-by: Rafael J. Wysocki Signed-off-by: Jesse Barnes diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index 056ea80..e4df71ab 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -215,7 +215,6 @@ acpi_status pci_osc_control_set(acpi_handle handle, u32 flags) } EXPORT_SYMBOL(pci_osc_control_set); -#ifdef CONFIG_ACPI_SLEEP /* * _SxD returns the D-state with the highest power * (lowest D-state number) supported in the S-state "x". @@ -259,7 +258,13 @@ static pci_power_t acpi_pci_choose_state(struct pci_dev *pdev) } return PCI_POWER_ERROR; } -#endif + +static bool acpi_pci_power_manageable(struct pci_dev *dev) +{ + acpi_handle handle = DEVICE_ACPI_HANDLE(&dev->dev); + + return handle ? acpi_bus_power_manageable(handle) : false; +} static int acpi_pci_set_power_state(struct pci_dev *dev, pci_power_t state) { @@ -290,6 +295,11 @@ static int acpi_pci_set_power_state(struct pci_dev *dev, pci_power_t state) return -EINVAL; } +static struct pci_platform_pm_ops acpi_pci_platform_pm = { + .is_manageable = acpi_pci_power_manageable, + .set_state = acpi_pci_set_power_state, + .choose_state = acpi_pci_choose_state, +}; /* ACPI bus type */ static int acpi_pci_find_device(struct device *dev, acpi_handle *handle) @@ -341,10 +351,7 @@ static int __init acpi_pci_init(void) ret = register_acpi_bus_type(&acpi_pci_bus); if (ret) return 0; -#ifdef CONFIG_ACPI_SLEEP - platform_pci_choose_state = acpi_pci_choose_state; -#endif - platform_pci_set_power_state = acpi_pci_set_power_state; + pci_set_platform_pm(&acpi_pci_platform_pm); return 0; } arch_initcall(acpi_pci_init); diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 8e8ecc1..f807452 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -376,7 +376,32 @@ pci_restore_bars(struct pci_dev *dev) pci_update_resource(dev, &dev->resource[i], i); } -int (*platform_pci_set_power_state)(struct pci_dev *dev, pci_power_t t); +static struct pci_platform_pm_ops *pci_platform_pm; + +int pci_set_platform_pm(struct pci_platform_pm_ops *ops) +{ + if (!ops->is_manageable || !ops->set_state || !ops->choose_state) + return -EINVAL; + pci_platform_pm = ops; + return 0; +} + +static inline bool platform_pci_power_manageable(struct pci_dev *dev) +{ + return pci_platform_pm ? pci_platform_pm->is_manageable(dev) : false; +} + +static inline int platform_pci_set_power_state(struct pci_dev *dev, + pci_power_t t) +{ + return pci_platform_pm ? pci_platform_pm->set_state(dev, t) : -ENOSYS; +} + +static inline pci_power_t platform_pci_choose_state(struct pci_dev *dev) +{ + return pci_platform_pm ? + pci_platform_pm->choose_state(dev) : PCI_POWER_ERROR; +} /** * pci_set_power_state - Set the power state of a PCI device @@ -479,8 +504,7 @@ pci_set_power_state(struct pci_dev *dev, pci_power_t state) * Give firmware a chance to be called, such as ACPI _PRx, _PSx * Firmware method after native method ? */ - if (platform_pci_set_power_state) - platform_pci_set_power_state(dev, state); + platform_pci_set_power_state(dev, state); dev->current_state = state; @@ -505,8 +529,6 @@ pci_set_power_state(struct pci_dev *dev, pci_power_t state) return 0; } -pci_power_t (*platform_pci_choose_state)(struct pci_dev *dev); - /** * pci_choose_state - Choose the power state of a PCI device * @dev: PCI device to be suspended @@ -524,11 +546,9 @@ pci_power_t pci_choose_state(struct pci_dev *dev, pm_message_t state) if (!pci_find_capability(dev, PCI_CAP_ID_PM)) return PCI_D0; - if (platform_pci_choose_state) { - ret = platform_pci_choose_state(dev); - if (ret != PCI_POWER_ERROR) - return ret; - } + ret = platform_pci_choose_state(dev); + if (ret != PCI_POWER_ERROR) + return ret; switch (state.event) { case PM_EVENT_ON: diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index e0eff35..0cd2e71 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -5,10 +5,28 @@ extern int pci_create_sysfs_dev_files(struct pci_dev *pdev); extern void pci_remove_sysfs_dev_files(struct pci_dev *pdev); extern void pci_cleanup_rom(struct pci_dev *dev); -/* Firmware callbacks */ -extern pci_power_t (*platform_pci_choose_state)(struct pci_dev *dev); -extern int (*platform_pci_set_power_state)(struct pci_dev *dev, - pci_power_t state); +/** + * Firmware PM callbacks + * + * @is_manageable - returns 'true' if given device is power manageable by the + * platform firmware + * + * @set_state - invokes the platform firmware to set the device's power state + * + * @choose_state - returns PCI power state of given device preferred by the + * platform; to be used during system-wide transitions from a + * sleeping state to the working state and vice versa + * + * If given platform is generally capable of power managing PCI devices, all of + * these callbacks are mandatory. + */ +struct pci_platform_pm_ops { + bool (*is_manageable)(struct pci_dev *dev); + int (*set_state)(struct pci_dev *dev, pci_power_t state); + pci_power_t (*choose_state)(struct pci_dev *dev); +}; + +extern int pci_set_platform_pm(struct pci_platform_pm_ops *ops); extern int pci_user_read_config_byte(struct pci_dev *dev, int where, u8 *val); extern int pci_user_read_config_word(struct pci_dev *dev, int where, u16 *val); -- cgit v0.10.2 From 44e4e66eeae5338b3ca0b28f8352e60bf18d5ba8 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 7 Jul 2008 03:32:52 +0200 Subject: PCI: rework pci_set_power_state function to call platform first Rework pci_set_power_state() so that the platform callback is invoked before the native mechanism, if necessary. Also, make the function check if the device is power manageable by the platform before invoking the platform callback. This may matter if the device dependent on additional power resources controlled by the platform is being put into D0, in which case those power resources must be turned on before we attempt to handle the device itself. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Signed-off-by: Jesse Barnes diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index e4df71ab..6bc0d8c 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -277,12 +277,11 @@ static int acpi_pci_set_power_state(struct pci_dev *dev, pci_power_t state) [PCI_D3hot] = ACPI_STATE_D3, [PCI_D3cold] = ACPI_STATE_D3 }; + int error = -EINVAL; - if (!handle) - return -ENODEV; /* If the ACPI device has _EJ0, ignore the device */ - if (ACPI_SUCCESS(acpi_get_handle(handle, "_EJ0", &tmp))) - return 0; + if (!handle || ACPI_SUCCESS(acpi_get_handle(handle, "_EJ0", &tmp))) + return -ENODEV; switch (state) { case PCI_D0: @@ -290,9 +289,14 @@ static int acpi_pci_set_power_state(struct pci_dev *dev, pci_power_t state) case PCI_D2: case PCI_D3hot: case PCI_D3cold: - return acpi_bus_set_power(handle, state_conv[state]); + error = acpi_bus_set_power(handle, state_conv[state]); } - return -EINVAL; + + if (!error) + dev_printk(KERN_INFO, &dev->dev, + "power state changed by ACPI to D%d\n", state); + + return error; } static struct pci_platform_pm_ops acpi_pci_platform_pm = { diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index f807452..20e2807 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -404,67 +404,56 @@ static inline pci_power_t platform_pci_choose_state(struct pci_dev *dev) } /** - * pci_set_power_state - Set the power state of a PCI device - * @dev: PCI device to be suspended - * @state: PCI power state (D0, D1, D2, D3hot, D3cold) we're entering + * pci_raw_set_power_state - Use PCI PM registers to set the power state of + * given PCI device + * @dev: PCI device to handle. + * @pm: PCI PM capability offset of the device. + * @state: PCI power state (D0, D1, D2, D3hot) to put the device into. * - * Transition a device to a new power state, using the Power Management - * Capabilities in the device's config space. - * - * RETURN VALUE: - * -EINVAL if trying to enter a lower state than we're already in. - * 0 if we're already in the requested state. - * -EIO if device does not support PCI PM. - * 0 if we can successfully change the power state. + * RETURN VALUE: + * -EINVAL if the requested state is invalid. + * -EIO if device does not support PCI PM or its PM capabilities register has a + * wrong version, or device doesn't support the requested state. + * 0 if device already is in the requested state. + * 0 if device's power state has been successfully changed. */ -int -pci_set_power_state(struct pci_dev *dev, pci_power_t state) +static int +pci_raw_set_power_state(struct pci_dev *dev, int pm, pci_power_t state) { - int pm, need_restore = 0; u16 pmcsr, pmc; + bool need_restore = false; - /* bound the state we're entering */ - if (state > PCI_D3hot) - state = PCI_D3hot; - - /* - * If the device or the parent bridge can't support PCI PM, ignore - * the request if we're doing anything besides putting it into D0 - * (which would only happen on boot). - */ - if ((state == PCI_D1 || state == PCI_D2) && pci_no_d1d2(dev)) - return 0; - - /* find PCI PM capability in list */ - pm = pci_find_capability(dev, PCI_CAP_ID_PM); - - /* abort if the device doesn't support PM capabilities */ if (!pm) return -EIO; + if (state < PCI_D0 || state > PCI_D3hot) + return -EINVAL; + /* Validate current state: * Can enter D0 from any state, but if we can only go deeper * to sleep if we're already in a low power state */ - if (state != PCI_D0 && dev->current_state > state) { + if (dev->current_state == state) { + /* we're already there */ + return 0; + } else if (state != PCI_D0 && dev->current_state <= PCI_D3cold + && dev->current_state > state) { dev_err(&dev->dev, "invalid power transition " "(from state %d to %d)\n", dev->current_state, state); return -EINVAL; - } else if (dev->current_state == state) - return 0; /* we're already there */ + } + pci_read_config_word(dev, pm + PCI_PM_PMC, &pmc); - pci_read_config_word(dev,pm + PCI_PM_PMC,&pmc); if ((pmc & PCI_PM_CAP_VER_MASK) > 3) { - dev_printk(KERN_DEBUG, &dev->dev, "unsupported PM cap regs " - "version (%u)\n", pmc & PCI_PM_CAP_VER_MASK); + dev_err(&dev->dev, "unsupported PM cap regs version (%u)\n", + pmc & PCI_PM_CAP_VER_MASK); return -EIO; } /* check if this device supports the desired state */ - if (state == PCI_D1 && !(pmc & PCI_PM_CAP_D1)) - return -EIO; - else if (state == PCI_D2 && !(pmc & PCI_PM_CAP_D2)) + if ((state == PCI_D1 && !(pmc & PCI_PM_CAP_D1)) + || (state == PCI_D2 && !(pmc & PCI_PM_CAP_D2))) return -EIO; pci_read_config_word(dev, pm + PCI_PM_CTRL, &pmcsr); @@ -483,7 +472,7 @@ pci_set_power_state(struct pci_dev *dev, pci_power_t state) case PCI_UNKNOWN: /* Boot-up */ if ((pmcsr & PCI_PM_CTRL_STATE_MASK) == PCI_D3hot && !(pmcsr & PCI_PM_CTRL_NO_SOFT_RESET)) - need_restore = 1; + need_restore = true; /* Fall-through: force to D0 */ default: pmcsr = 0; @@ -500,12 +489,6 @@ pci_set_power_state(struct pci_dev *dev, pci_power_t state) else if (state == PCI_D2 || dev->current_state == PCI_D2) udelay(200); - /* - * Give firmware a chance to be called, such as ACPI _PRx, _PSx - * Firmware method after native method ? - */ - platform_pci_set_power_state(dev, state); - dev->current_state = state; /* According to section 5.4.1 of the "PCI BUS POWER MANAGEMENT @@ -530,6 +513,81 @@ pci_set_power_state(struct pci_dev *dev, pci_power_t state) } /** + * pci_update_current_state - Read PCI power state of given device from its + * PCI PM registers and cache it + * @dev: PCI device to handle. + * @pm: PCI PM capability offset of the device. + */ +static void pci_update_current_state(struct pci_dev *dev, int pm) +{ + if (pm) { + u16 pmcsr; + + pci_read_config_word(dev, pm + PCI_PM_CTRL, &pmcsr); + dev->current_state = (pmcsr & PCI_PM_CTRL_STATE_MASK); + } +} + +/** + * pci_set_power_state - Set the power state of a PCI device + * @dev: PCI device to handle. + * @state: PCI power state (D0, D1, D2, D3hot) to put the device into. + * + * Transition a device to a new power state, using the platform formware and/or + * the device's PCI PM registers. + * + * RETURN VALUE: + * -EINVAL if the requested state is invalid. + * -EIO if device does not support PCI PM or its PM capabilities register has a + * wrong version, or device doesn't support the requested state. + * 0 if device already is in the requested state. + * 0 if device's power state has been successfully changed. + */ +int pci_set_power_state(struct pci_dev *dev, pci_power_t state) +{ + int pm, error; + + /* bound the state we're entering */ + if (state > PCI_D3hot) + state = PCI_D3hot; + else if (state < PCI_D0) + state = PCI_D0; + else if ((state == PCI_D1 || state == PCI_D2) && pci_no_d1d2(dev)) + /* + * If the device or the parent bridge do not support PCI PM, + * ignore the request if we're doing anything other than putting + * it into D0 (which would only happen on boot). + */ + return 0; + + /* Find PCI PM capability in the list */ + pm = pci_find_capability(dev, PCI_CAP_ID_PM); + + if (state == PCI_D0 && platform_pci_power_manageable(dev)) { + /* + * Allow the platform to change the state, for example via ACPI + * _PR0, _PS0 and some such, but do not trust it. + */ + int ret = platform_pci_set_power_state(dev, PCI_D0); + if (!ret) + pci_update_current_state(dev, pm); + } + + error = pci_raw_set_power_state(dev, pm, state); + + if (state > PCI_D0 && platform_pci_power_manageable(dev)) { + /* Allow the platform to finalize the transition */ + int ret = platform_pci_set_power_state(dev, state); + if (!ret) { + pci_update_current_state(dev, pm); + error = 0; + } + } + + return error; +} + +/** * pci_choose_state - Choose the power state of a PCI device * @dev: PCI device to be suspended * @state: target sleep state for the whole system. This is the value -- cgit v0.10.2 From 77e766099efc29d8b01db4b8244ff64fa3d3d0ca Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 7 Jul 2008 03:33:34 +0200 Subject: ACPI: Introduce acpi_device_sleep_wake function The currect ACPI code attempts to execute _PSW at three different places and in one of them only it tries to execute _DSW before _PSW, which is inconsistent with the other two cases. Move the execution of _DSW and _PSW into a separate function called acpi_device_sleep_wake() and call it wherever appropriate instead of executing _DSW and/or _PSW directly. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Signed-off-by: Jesse Barnes diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c index 81e4f08..2e959aa 100644 --- a/drivers/acpi/power.c +++ b/drivers/acpi/power.c @@ -292,69 +292,115 @@ static int acpi_power_off_device(acpi_handle handle, struct acpi_device *dev) return 0; } +/** + * acpi_device_sleep_wake - execute _DSW (Device Sleep Wake) or (deprecated in + * ACPI 3.0) _PSW (Power State Wake) + * @dev: Device to handle. + * @enable: 0 - disable, 1 - enable the wake capabilities of the device. + * @sleep_state: Target sleep state of the system. + * @dev_state: Target power state of the device. + * + * Execute _DSW (Device Sleep Wake) or (deprecated in ACPI 3.0) _PSW (Power + * State Wake) for the device, if present. On failure reset the device's + * wakeup.flags.valid flag. + * + * RETURN VALUE: + * 0 if either _DSW or _PSW has been successfully executed + * 0 if neither _DSW nor _PSW has been found + * -ENODEV if the execution of either _DSW or _PSW has failed + */ +int acpi_device_sleep_wake(struct acpi_device *dev, + int enable, int sleep_state, int dev_state) +{ + union acpi_object in_arg[3]; + struct acpi_object_list arg_list = { 3, in_arg }; + acpi_status status = AE_OK; + + /* + * Try to execute _DSW first. + * + * Three agruments are needed for the _DSW object: + * Argument 0: enable/disable the wake capabilities + * Argument 1: target system state + * Argument 2: target device state + * When _DSW object is called to disable the wake capabilities, maybe + * the first argument is filled. The values of the other two agruments + * are meaningless. + */ + in_arg[0].type = ACPI_TYPE_INTEGER; + in_arg[0].integer.value = enable; + in_arg[1].type = ACPI_TYPE_INTEGER; + in_arg[1].integer.value = sleep_state; + in_arg[2].type = ACPI_TYPE_INTEGER; + in_arg[2].integer.value = dev_state; + status = acpi_evaluate_object(dev->handle, "_DSW", &arg_list, NULL); + if (ACPI_SUCCESS(status)) { + return 0; + } else if (status != AE_NOT_FOUND) { + printk(KERN_ERR PREFIX "_DSW execution failed\n"); + dev->wakeup.flags.valid = 0; + return -ENODEV; + } + + /* Execute _PSW */ + arg_list.count = 1; + in_arg[0].integer.value = enable; + status = acpi_evaluate_object(dev->handle, "_PSW", &arg_list, NULL); + if (ACPI_FAILURE(status) && (status != AE_NOT_FOUND)) { + printk(KERN_ERR PREFIX "_PSW execution failed\n"); + dev->wakeup.flags.valid = 0; + return -ENODEV; + } + + return 0; +} + /* * Prepare a wakeup device, two steps (Ref ACPI 2.0:P229): * 1. Power on the power resources required for the wakeup device - * 2. Enable _PSW (power state wake) for the device if present + * 2. Execute _DSW (Device Sleep Wake) or (deprecated in ACPI 3.0) _PSW (Power + * State Wake) for the device, if present */ -int acpi_enable_wakeup_device_power(struct acpi_device *dev) +int acpi_enable_wakeup_device_power(struct acpi_device *dev, int sleep_state) { - union acpi_object arg = { ACPI_TYPE_INTEGER }; - struct acpi_object_list arg_list = { 1, &arg }; - acpi_status status = AE_OK; int i; - int ret = 0; if (!dev || !dev->wakeup.flags.valid) - return -1; + return -EINVAL; - arg.integer.value = 1; /* Open power resource */ for (i = 0; i < dev->wakeup.resources.count; i++) { - ret = acpi_power_on(dev->wakeup.resources.handles[i], dev); + int ret = acpi_power_on(dev->wakeup.resources.handles[i], dev); if (ret) { printk(KERN_ERR PREFIX "Transition power state\n"); dev->wakeup.flags.valid = 0; - return -1; + return -ENODEV; } } - /* Execute PSW */ - status = acpi_evaluate_object(dev->handle, "_PSW", &arg_list, NULL); - if (ACPI_FAILURE(status) && (status != AE_NOT_FOUND)) { - printk(KERN_ERR PREFIX "Evaluate _PSW\n"); - dev->wakeup.flags.valid = 0; - ret = -1; - } - - return ret; + /* + * Passing 3 as the third argument below means the device may be placed + * in arbitrary power state afterwards. + */ + return acpi_device_sleep_wake(dev, 1, sleep_state, 3); } /* * Shutdown a wakeup device, counterpart of above method - * 1. Disable _PSW (power state wake) + * 1. Execute _DSW (Device Sleep Wake) or (deprecated in ACPI 3.0) _PSW (Power + * State Wake) for the device, if present * 2. Shutdown down the power resources */ int acpi_disable_wakeup_device_power(struct acpi_device *dev) { - union acpi_object arg = { ACPI_TYPE_INTEGER }; - struct acpi_object_list arg_list = { 1, &arg }; - acpi_status status = AE_OK; - int i; - int ret = 0; - + int i, ret; if (!dev || !dev->wakeup.flags.valid) - return -1; + return -EINVAL; - arg.integer.value = 0; - /* Execute PSW */ - status = acpi_evaluate_object(dev->handle, "_PSW", &arg_list, NULL); - if (ACPI_FAILURE(status) && (status != AE_NOT_FOUND)) { - printk(KERN_ERR PREFIX "Evaluate _PSW\n"); - dev->wakeup.flags.valid = 0; - return -1; - } + ret = acpi_device_sleep_wake(dev, 0, 0, 0); + if (ret) + return ret; /* Close power resource */ for (i = 0; i < dev->wakeup.resources.count; i++) { @@ -362,7 +408,7 @@ int acpi_disable_wakeup_device_power(struct acpi_device *dev) if (ret) { printk(KERN_ERR PREFIX "Transition power state\n"); dev->wakeup.flags.valid = 0; - return -1; + return -ENODEV; } } diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 6d85289..f276890 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -691,9 +691,7 @@ static int acpi_bus_get_wakeup_device_flags(struct acpi_device *device) acpi_status status = 0; struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; union acpi_object *package = NULL; - union acpi_object in_arg[3]; - struct acpi_object_list arg_list = { 3, in_arg }; - acpi_status psw_status = AE_OK; + int psw_error; struct acpi_device_id button_device_ids[] = { {"PNP0C0D", 0}, @@ -725,39 +723,11 @@ static int acpi_bus_get_wakeup_device_flags(struct acpi_device *device) * So it is necessary to call _DSW object first. Only when it is not * present will the _PSW object used. */ - /* - * Three agruments are needed for the _DSW object. - * Argument 0: enable/disable the wake capabilities - * When _DSW object is called to disable the wake capabilities, maybe - * the first argument is filled. The value of the other two agruments - * is meaningless. - */ - in_arg[0].type = ACPI_TYPE_INTEGER; - in_arg[0].integer.value = 0; - in_arg[1].type = ACPI_TYPE_INTEGER; - in_arg[1].integer.value = 0; - in_arg[2].type = ACPI_TYPE_INTEGER; - in_arg[2].integer.value = 0; - psw_status = acpi_evaluate_object(device->handle, "_DSW", - &arg_list, NULL); - if (ACPI_FAILURE(psw_status) && (psw_status != AE_NOT_FOUND)) - ACPI_DEBUG_PRINT((ACPI_DB_INFO, "error in evaluate _DSW\n")); - /* - * When the _DSW object is not present, OSPM will call _PSW object. - */ - if (psw_status == AE_NOT_FOUND) { - /* - * Only one agruments is required for the _PSW object. - * agrument 0: enable/disable the wake capabilities - */ - arg_list.count = 1; - in_arg[0].integer.value = 0; - psw_status = acpi_evaluate_object(device->handle, "_PSW", - &arg_list, NULL); - if (ACPI_FAILURE(psw_status) && (psw_status != AE_NOT_FOUND)) - ACPI_DEBUG_PRINT((ACPI_DB_INFO, "error in " - "evaluate _PSW\n")); - } + psw_error = acpi_device_sleep_wake(device, 0, 0, 0); + if (psw_error) + ACPI_DEBUG_PRINT((ACPI_DB_INFO, + "error in _DSW or _PSW evaluation\n")); + /* Power button, Lid switch always enable wakeup */ if (!acpi_match_device_ids(device, button_device_ids)) device->wakeup.flags.run_wake = 1; diff --git a/drivers/acpi/sleep/wakeup.c b/drivers/acpi/sleep/wakeup.c index ed8e41b..7422a22 100644 --- a/drivers/acpi/sleep/wakeup.c +++ b/drivers/acpi/sleep/wakeup.c @@ -42,7 +42,7 @@ void acpi_enable_wakeup_device_prep(u8 sleep_state) continue; spin_unlock(&acpi_device_lock); - acpi_enable_wakeup_device_power(dev); + acpi_enable_wakeup_device_power(dev, sleep_state); spin_lock(&acpi_device_lock); } spin_unlock(&acpi_device_lock); diff --git a/include/acpi/acpi_drivers.h b/include/acpi/acpi_drivers.h index 9757a04..e5f38e5 100644 --- a/include/acpi/acpi_drivers.h +++ b/include/acpi/acpi_drivers.h @@ -87,7 +87,9 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_device *device, int domain, -------------------------------------------------------------------------- */ #ifdef CONFIG_ACPI_POWER -int acpi_enable_wakeup_device_power(struct acpi_device *dev); +int acpi_device_sleep_wake(struct acpi_device *dev, + int enable, int sleep_state, int dev_state); +int acpi_enable_wakeup_device_power(struct acpi_device *dev, int sleep_state); int acpi_disable_wakeup_device_power(struct acpi_device *dev); int acpi_power_get_inferred_state(struct acpi_device *device); int acpi_power_transition(struct acpi_device *device, int state); -- cgit v0.10.2 From 0af4b8c4fb31193dc666f4893107a18fef82baab Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 7 Jul 2008 03:34:11 +0200 Subject: ACPI: Introduce new device wakeup flag 'prepared' Introduce additional flag 'prepared' in struct acpi_device_wakeup_flags and use it to prevent devices from being enable/disabled do wake up the system multiple times in a row (this does not happen currently, but will be possible after some of the following patches). Signed-off-by: Rafael J. Wysocki Signed-off-by: Jesse Barnes diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c index 2e959aa..4ab21cb 100644 --- a/drivers/acpi/power.c +++ b/drivers/acpi/power.c @@ -363,11 +363,18 @@ int acpi_device_sleep_wake(struct acpi_device *dev, */ int acpi_enable_wakeup_device_power(struct acpi_device *dev, int sleep_state) { - int i; + int i, err; if (!dev || !dev->wakeup.flags.valid) return -EINVAL; + /* + * Do not execute the code below twice in a row without calling + * acpi_disable_wakeup_device_power() in between for the same device + */ + if (dev->wakeup.flags.prepared) + return 0; + /* Open power resource */ for (i = 0; i < dev->wakeup.resources.count; i++) { int ret = acpi_power_on(dev->wakeup.resources.handles[i], dev); @@ -382,7 +389,11 @@ int acpi_enable_wakeup_device_power(struct acpi_device *dev, int sleep_state) * Passing 3 as the third argument below means the device may be placed * in arbitrary power state afterwards. */ - return acpi_device_sleep_wake(dev, 1, sleep_state, 3); + err = acpi_device_sleep_wake(dev, 1, sleep_state, 3); + if (!err) + dev->wakeup.flags.prepared = 1; + + return err; } /* @@ -398,6 +409,15 @@ int acpi_disable_wakeup_device_power(struct acpi_device *dev) if (!dev || !dev->wakeup.flags.valid) return -EINVAL; + /* + * Do not execute the code below twice in a row without calling + * acpi_enable_wakeup_device_power() in between for the same device + */ + if (!dev->wakeup.flags.prepared) + return 0; + + dev->wakeup.flags.prepared = 0; + ret = acpi_device_sleep_wake(dev, 0, 0, 0); if (ret) return ret; diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 0c21ea3..071daf8 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -259,6 +259,7 @@ struct acpi_device_perf { /* Wakeup Management */ struct acpi_device_wakeup_flags { u8 valid:1; /* Can successfully enable wakeup? */ + u8 prepared:1; /* Has the wake-up capability been enabled? */ u8 run_wake:1; /* Run-Wake GPE devices */ }; -- cgit v0.10.2 From eb9d0fe40e313c0a74115ef456a2e43a6c8da72f Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 7 Jul 2008 03:34:48 +0200 Subject: PCI ACPI: Rework PCI handling of wake-up * Introduce function acpi_pm_device_sleep_wake() for enabling and disabling the system wake-up capability of devices that are power manageable by ACPI. * Introduce function acpi_bus_can_wakeup() allowing other (dependent) subsystems to check if ACPI is able to enable the system wake-up capability of given device. * Introduce callback .sleep_wake() in struct pci_platform_pm_ops and for the ACPI PCI 'driver' make it use acpi_pm_device_sleep_wake(). * Introduce callback .can_wakeup() in struct pci_platform_pm_ops and for the ACPI 'driver' make it use acpi_bus_can_wakeup(). * Move the PME# handlig code out of pci_enable_wake() and split it into two functions, pci_pme_capable() and pci_pme_active(), allowing the caller to check if given device is capable of generating PME# from given power state and to enable/disable the device's PME# functionality, respectively. * Modify pci_enable_wake() to use the new ACPI callbacks and the new PME#-related functions. * Drop the generic .platform_enable_wakeup() callback that is not used any more. * Introduce device_set_wakeup_capable() that will set the power.can_wakeup flag of given device. * Rework PCI device PM initialization so that, if given device is capable of generating wake-up events, either natively through the PME# mechanism, or with the help of the platform, its power.can_wakeup flag is set and its power.should_wakeup flag is unset as appropriate. * Make ACPI set the power.can_wakeup flag for devices found to be wake-up capable by it. * Make the ACPI wake-up code enable/disable GPEs for devices that have the wakeup.flags.prepared flag set (which means that their wake-up power has been enabled). Signed-off-by: Rafael J. Wysocki Signed-off-by: Jesse Barnes diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index b9b69d9..fc1110d 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -306,6 +306,17 @@ bool acpi_bus_power_manageable(acpi_handle handle) EXPORT_SYMBOL(acpi_bus_power_manageable); +bool acpi_bus_can_wakeup(acpi_handle handle) +{ + struct acpi_device *device; + int result; + + result = acpi_bus_get_device(handle, &device); + return result ? false : device->wakeup.flags.valid; +} + +EXPORT_SYMBOL(acpi_bus_can_wakeup); + /* -------------------------------------------------------------------------- Event Management -------------------------------------------------------------------------- */ diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c index 06f8634..87c5d45 100644 --- a/drivers/acpi/glue.c +++ b/drivers/acpi/glue.c @@ -166,6 +166,8 @@ static int acpi_bind_one(struct device *dev, acpi_handle handle) "firmware_node"); ret = sysfs_create_link(&acpi_dev->dev.kobj, &dev->kobj, "physical_node"); + if (acpi_dev->wakeup.flags.valid) + device_set_wakeup_capable(dev, true); } return 0; diff --git a/drivers/acpi/sleep/main.c b/drivers/acpi/sleep/main.c index 4addf8a..af7f466 100644 --- a/drivers/acpi/sleep/main.c +++ b/drivers/acpi/sleep/main.c @@ -468,6 +468,31 @@ int acpi_pm_device_sleep_state(struct device *dev, int *d_min_p) *d_min_p = d_min; return d_max; } + +/** + * acpi_pm_device_sleep_wake - enable or disable the system wake-up + * capability of given device + * @dev: device to handle + * @enable: 'true' - enable, 'false' - disable the wake-up capability + */ +int acpi_pm_device_sleep_wake(struct device *dev, bool enable) +{ + acpi_handle handle; + struct acpi_device *adev; + + if (!device_may_wakeup(dev)) + return -EINVAL; + + handle = DEVICE_ACPI_HANDLE(dev); + if (!handle || ACPI_FAILURE(acpi_bus_get_device(handle, &adev))) { + printk(KERN_DEBUG "ACPI handle has no context!\n"); + return -ENODEV; + } + + return enable ? + acpi_enable_wakeup_device_power(adev, acpi_target_sleep_state) : + acpi_disable_wakeup_device_power(adev); +} #endif static void acpi_power_off_prepare(void) diff --git a/drivers/acpi/sleep/wakeup.c b/drivers/acpi/sleep/wakeup.c index 7422a22..38655eb 100644 --- a/drivers/acpi/sleep/wakeup.c +++ b/drivers/acpi/sleep/wakeup.c @@ -66,13 +66,15 @@ void acpi_enable_wakeup_device(u8 sleep_state) list_for_each_safe(node, next, &acpi_wakeup_device_list) { struct acpi_device *dev = container_of(node, struct acpi_device, wakeup_list); + if (!dev->wakeup.flags.valid) continue; + /* If users want to disable run-wake GPE, * we only disable it for wake and leave it for runtime */ - if (!dev->wakeup.state.enabled || - sleep_state > (u32) dev->wakeup.sleep_state) { + if ((!dev->wakeup.state.enabled && !dev->wakeup.flags.prepared) + || sleep_state > (u32) dev->wakeup.sleep_state) { if (dev->wakeup.flags.run_wake) { spin_unlock(&acpi_device_lock); /* set_gpe_type will disable GPE, leave it like that */ @@ -110,8 +112,9 @@ void acpi_disable_wakeup_device(u8 sleep_state) if (!dev->wakeup.flags.valid) continue; - if (!dev->wakeup.state.enabled || - sleep_state > (u32) dev->wakeup.sleep_state) { + + if ((!dev->wakeup.state.enabled && !dev->wakeup.flags.prepared) + || sleep_state > (u32) dev->wakeup.sleep_state) { if (dev->wakeup.flags.run_wake) { spin_unlock(&acpi_device_lock); acpi_set_gpe_type(dev->wakeup.gpe_device, diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c index d11f74b..596aeec 100644 --- a/drivers/base/power/sysfs.c +++ b/drivers/base/power/sysfs.c @@ -6,9 +6,6 @@ #include #include "power.h" -int (*platform_enable_wakeup)(struct device *dev, int is_on); - - /* * wakeup - Report/change current wakeup option for device * diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index 6bc0d8c..7764768b 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -299,10 +299,30 @@ static int acpi_pci_set_power_state(struct pci_dev *dev, pci_power_t state) return error; } +static bool acpi_pci_can_wakeup(struct pci_dev *dev) +{ + acpi_handle handle = DEVICE_ACPI_HANDLE(&dev->dev); + + return handle ? acpi_bus_can_wakeup(handle) : false; +} + +static int acpi_pci_sleep_wake(struct pci_dev *dev, bool enable) +{ + int error = acpi_pm_device_sleep_wake(&dev->dev, enable); + + if (!error) + dev_printk(KERN_INFO, &dev->dev, + "wake-up capability %s by ACPI\n", + enable ? "enabled" : "disabled"); + return error; +} + static struct pci_platform_pm_ops acpi_pci_platform_pm = { .is_manageable = acpi_pci_power_manageable, .set_state = acpi_pci_set_power_state, .choose_state = acpi_pci_choose_state, + .can_wakeup = acpi_pci_can_wakeup, + .sleep_wake = acpi_pci_sleep_wake, }; /* ACPI bus type */ diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 20e2807..a6b1b6f 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -380,7 +380,8 @@ static struct pci_platform_pm_ops *pci_platform_pm; int pci_set_platform_pm(struct pci_platform_pm_ops *ops) { - if (!ops->is_manageable || !ops->set_state || !ops->choose_state) + if (!ops->is_manageable || !ops->set_state || !ops->choose_state + || !ops->sleep_wake || !ops->can_wakeup) return -EINVAL; pci_platform_pm = ops; return 0; @@ -403,6 +404,17 @@ static inline pci_power_t platform_pci_choose_state(struct pci_dev *dev) pci_platform_pm->choose_state(dev) : PCI_POWER_ERROR; } +static inline bool platform_pci_can_wakeup(struct pci_dev *dev) +{ + return pci_platform_pm ? pci_platform_pm->can_wakeup(dev) : false; +} + +static inline int platform_pci_sleep_wake(struct pci_dev *dev, bool enable) +{ + return pci_platform_pm ? + pci_platform_pm->sleep_wake(dev, enable) : -ENODEV; +} + /** * pci_raw_set_power_state - Use PCI PM registers to set the power state of * given PCI device @@ -1036,6 +1048,56 @@ int pci_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state) } /** + * pci_pme_capable - check the capability of PCI device to generate PME# + * @dev: PCI device to handle. + * @pm: PCI PM capability offset of the device. + * @state: PCI state from which device will issue PME#. + */ +static bool pci_pme_capable(struct pci_dev *dev, int pm, pci_power_t state) +{ + u16 pmc; + + if (!pm) + return false; + + /* Check device's ability to generate PME# from given state */ + pci_read_config_word(dev, pm + PCI_PM_PMC, &pmc); + + pmc &= PCI_PM_CAP_PME_MASK; + pmc >>= ffs(PCI_PM_CAP_PME_MASK) - 1; /* First bit of mask */ + + return !!(pmc & (1 << state)); +} + +/** + * pci_pme_active - enable or disable PCI device's PME# function + * @dev: PCI device to handle. + * @pm: PCI PM capability offset of the device. + * @enable: 'true' to enable PME# generation; 'false' to disable it. + * + * The caller must verify that the device is capable of generating PME# before + * calling this function with @enable equal to 'true'. + */ +static void pci_pme_active(struct pci_dev *dev, int pm, bool enable) +{ + u16 pmcsr; + + if (!pm) + return; + + pci_read_config_word(dev, pm + PCI_PM_CTRL, &pmcsr); + /* Clear PME_Status by writing 1 to it and enable PME# */ + pmcsr |= PCI_PM_CTRL_PME_STATUS | PCI_PM_CTRL_PME_ENABLE; + if (!enable) + pmcsr &= ~PCI_PM_CTRL_PME_ENABLE; + + pci_write_config_word(dev, pm + PCI_PM_CTRL, pmcsr); + + dev_printk(KERN_INFO, &dev->dev, "PME# %s\n", + enable ? "enabled" : "disabled"); +} + +/** * pci_enable_wake - enable PCI device as wakeup event source * @dev: PCI device affected * @state: PCI state from which device will issue wakeup events @@ -1046,66 +1108,83 @@ int pci_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state) * called automatically by this routine. * * Devices with legacy power management (no standard PCI PM capabilities) - * always require such platform hooks. Depending on the platform, devices - * supporting the standard PCI PME# signal may require such platform hooks; - * they always update bits in config space to allow PME# generation. + * always require such platform hooks. * - * -EIO is returned if the device can't ever be a wakeup event source. - * -EINVAL is returned if the device can't generate wakeup events from - * the specified PCI state. Returns zero if the operation is successful. + * RETURN VALUE: + * 0 is returned on success + * -EINVAL is returned if device is not supposed to wake up the system + * Error code depending on the platform is returned if both the platform and + * the native mechanism fail to enable the generation of wake-up events */ int pci_enable_wake(struct pci_dev *dev, pci_power_t state, int enable) { int pm; - int status; - u16 value; - - /* Note that drivers should verify device_may_wakeup(&dev->dev) - * before calling this function. Platform code should report - * errors when drivers try to enable wakeup on devices that - * can't issue wakeups, or on which wakeups were disabled by - * userspace updating the /sys/devices.../power/wakeup file. - */ + int error = 0; + bool pme_done = false; - status = call_platform_enable_wakeup(&dev->dev, enable); - - /* find PCI PM capability in list */ - pm = pci_find_capability(dev, PCI_CAP_ID_PM); + if (!device_may_wakeup(&dev->dev)) + return -EINVAL; - /* If device doesn't support PM Capabilities, but caller wants to - * disable wake events, it's a NOP. Otherwise fail unless the - * platform hooks handled this legacy device already. + /* + * According to "PCI System Architecture" 4th ed. by Tom Shanley & Don + * Anderson we should be doing PME# wake enable followed by ACPI wake + * enable. To disable wake-up we call the platform first, for symmetry. */ - if (!pm) - return enable ? status : 0; - /* Check device's ability to generate PME# */ - pci_read_config_word(dev,pm+PCI_PM_PMC,&value); + if (!enable && platform_pci_can_wakeup(dev)) + error = platform_pci_sleep_wake(dev, false); - value &= PCI_PM_CAP_PME_MASK; - value >>= ffs(PCI_PM_CAP_PME_MASK) - 1; /* First bit of mask */ - - /* Check if it can generate PME# from requested state. */ - if (!value || !(value & (1 << state))) { - /* if it can't, revert what the platform hook changed, - * always reporting the base "EINVAL, can't PME#" error - */ - if (enable) - call_platform_enable_wakeup(&dev->dev, 0); - return enable ? -EINVAL : 0; + pm = pci_find_capability(dev, PCI_CAP_ID_PM); + if (!enable || pci_pme_capable(dev, pm, state)) { + pci_pme_active(dev, pm, enable); + pme_done = true; } - pci_read_config_word(dev, pm + PCI_PM_CTRL, &value); + if (enable && platform_pci_can_wakeup(dev)) + error = platform_pci_sleep_wake(dev, true); - /* Clear PME_Status by writing 1 to it and enable PME# */ - value |= PCI_PM_CTRL_PME_STATUS | PCI_PM_CTRL_PME_ENABLE; + return pme_done ? 0 : error; +} - if (!enable) - value &= ~PCI_PM_CTRL_PME_ENABLE; +/** + * pci_pm_init - Initialize PM functions of given PCI device + * @dev: PCI device to handle. + */ +void pci_pm_init(struct pci_dev *dev) +{ + int pm; + u16 pmc; - pci_write_config_word(dev, pm + PCI_PM_CTRL, value); + /* find PCI PM capability in list */ + pm = pci_find_capability(dev, PCI_CAP_ID_PM); + if (!pm) + return; + /* Check device's ability to generate PME# */ + pci_read_config_word(dev, pm + PCI_PM_PMC, &pmc); - return 0; + if ((pmc & PCI_PM_CAP_VER_MASK) > 3) { + dev_err(&dev->dev, "unsupported PM cap regs version (%u)\n", + pmc & PCI_PM_CAP_VER_MASK); + return; + } + + if (pmc & PCI_PM_CAP_PME_MASK) { + dev_printk(KERN_INFO, &dev->dev, + "PME# supported from%s%s%s%s%s\n", + (pmc & PCI_PM_CAP_PME_D0) ? " D0" : "", + (pmc & PCI_PM_CAP_PME_D1) ? " D1" : "", + (pmc & PCI_PM_CAP_PME_D2) ? " D2" : "", + (pmc & PCI_PM_CAP_PME_D3) ? " D3hot" : "", + (pmc & PCI_PM_CAP_PME_D3cold) ? " D3cold" : ""); + /* + * Make device's PM flags reflect the wake-up capability, but + * let the user space enable it to wake up the system as needed. + */ + device_set_wakeup_capable(&dev->dev, true); + device_set_wakeup_enable(&dev->dev, false); + /* Disable the PME# generation functionality */ + pci_pme_active(dev, pm, false); + } } int diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 0cd2e71..b08dfc9 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -17,6 +17,11 @@ extern void pci_cleanup_rom(struct pci_dev *dev); * platform; to be used during system-wide transitions from a * sleeping state to the working state and vice versa * + * @can_wakeup - returns 'true' if given device is capable of waking up the + * system from a sleeping state + * + * @sleep_wake - enables/disables the system wake up capability of given device + * * If given platform is generally capable of power managing PCI devices, all of * these callbacks are mandatory. */ @@ -24,9 +29,12 @@ struct pci_platform_pm_ops { bool (*is_manageable)(struct pci_dev *dev); int (*set_state)(struct pci_dev *dev, pci_power_t state); pci_power_t (*choose_state)(struct pci_dev *dev); + bool (*can_wakeup)(struct pci_dev *dev); + int (*sleep_wake)(struct pci_dev *dev, bool enable); }; extern int pci_set_platform_pm(struct pci_platform_pm_ops *ops); +extern void pci_pm_init(struct pci_dev *dev); extern int pci_user_read_config_byte(struct pci_dev *dev, int where, u8 *val); extern int pci_user_read_config_word(struct pci_dev *dev, int where, u16 *val); diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 2f0ae70..b1724cf 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -860,49 +860,6 @@ int pci_cfg_space_size_ext(struct pci_dev *dev) return PCI_CFG_SPACE_SIZE; } -/** - * pci_disable_pme - Disable the PME function of PCI device - * @dev: PCI device affected - * -EINVAL is returned if PCI device doesn't support PME. - * Zero is returned if the PME is supported and can be disabled. - */ -static int pci_disable_pme(struct pci_dev *dev) -{ - int pm; - u16 value; - - /* find PCI PM capability in list */ - pm = pci_find_capability(dev, PCI_CAP_ID_PM); - - /* If device doesn't support PM Capabilities, it means that PME is - * not supported. - */ - if (!pm) - return -EINVAL; - /* Check device's ability to generate PME# */ - pci_read_config_word(dev, pm + PCI_PM_PMC, &value); - - value &= PCI_PM_CAP_PME_MASK; - /* Check if it can generate PME# */ - if (!value) { - /* - * If it is zero, it means that PME is still unsupported - * although there exists the PM capability. - */ - return -EINVAL; - } - - pci_read_config_word(dev, pm + PCI_PM_CTRL, &value); - - /* Clear PME_Status by writing 1 to it */ - value |= PCI_PM_CTRL_PME_STATUS ; - /* Disable PME enable bit */ - value &= ~PCI_PM_CTRL_PME_ENABLE; - pci_write_config_word(dev, pm + PCI_PM_CTRL, value); - - return 0; -} - int pci_cfg_space_size(struct pci_dev *dev) { int pos; @@ -1010,7 +967,6 @@ static struct pci_dev *pci_scan_device(struct pci_bus *bus, int devfn) } pci_vpd_pci22_init(dev); - pci_disable_pme(dev); return dev; } @@ -1031,6 +987,9 @@ void pci_device_add(struct pci_dev *dev, struct pci_bus *bus) /* Fix up broken headers */ pci_fixup_device(pci_fixup_header, dev); + /* Initialize power management of the device */ + pci_pm_init(dev); + /* * Add the device to our list of discovered devices * and the bus list for fixup functions, etc. diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 071daf8..7ab5a61 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -337,6 +337,7 @@ int acpi_bus_get_status(struct acpi_device *device); int acpi_bus_get_power(acpi_handle handle, int *state); int acpi_bus_set_power(acpi_handle handle, int state); bool acpi_bus_power_manageable(acpi_handle handle); +bool acpi_bus_can_wakeup(acpi_handle handle); #ifdef CONFIG_ACPI_PROC_EVENT int acpi_bus_generate_proc_event(struct acpi_device *device, u8 type, int data); int acpi_bus_generate_proc_event4(const char *class, const char *bid, u8 type, int data); @@ -379,6 +380,7 @@ acpi_handle acpi_get_pci_rootbridge_handle(unsigned int, unsigned int); #ifdef CONFIG_PM_SLEEP int acpi_pm_device_sleep_state(struct device *, int *); +int acpi_pm_device_sleep_wake(struct device *, bool); #else /* !CONFIG_PM_SLEEP */ static inline int acpi_pm_device_sleep_state(struct device *d, int *p) { diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h index f0d0b2c..3af0c8d 100644 --- a/include/linux/pm_wakeup.h +++ b/include/linux/pm_wakeup.h @@ -35,6 +35,11 @@ static inline void device_init_wakeup(struct device *dev, int val) dev->power.can_wakeup = dev->power.should_wakeup = !!val; } +static inline void device_set_wakeup_capable(struct device *dev, int val) +{ + dev->power.can_wakeup = !!val; +} + static inline int device_can_wakeup(struct device *dev) { return dev->power.can_wakeup; @@ -47,21 +52,7 @@ static inline void device_set_wakeup_enable(struct device *dev, int val) static inline int device_may_wakeup(struct device *dev) { - return dev->power.can_wakeup & dev->power.should_wakeup; -} - -/* - * Platform hook to activate device wakeup capability, if that's not already - * handled by enable_irq_wake() etc. - * Returns zero on success, else negative errno - */ -extern int (*platform_enable_wakeup)(struct device *dev, int is_on); - -static inline int call_platform_enable_wakeup(struct device *dev, int is_on) -{ - if (platform_enable_wakeup) - return (*platform_enable_wakeup)(dev, is_on); - return 0; + return dev->power.can_wakeup && dev->power.should_wakeup; } #else /* !CONFIG_PM */ @@ -80,11 +71,6 @@ static inline int device_can_wakeup(struct device *dev) #define device_set_wakeup_enable(dev, val) do {} while (0) #define device_may_wakeup(dev) 0 -static inline int call_platform_enable_wakeup(struct device *dev, int is_on) -{ - return 0; -} - #endif /* !CONFIG_PM */ #endif /* _LINUX_PM_WAKEUP_H */ -- cgit v0.10.2 From 404cc2d8ce41ed4031958fba8e633767e8a2e028 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 7 Jul 2008 03:35:26 +0200 Subject: PCI PM: Introduce pci_prepare_to_sleep and pci_back_from_sleep Introduce functions pci_prepare_to_sleep() and pci_back_from_sleep(), to be used by the PCI drivers that want to place their devices into the lowest power state appropiate for them (PCI_D3hot, if the device is not supposed to wake up the system, or the deepest state from which the wake-up is possible, otherwise) while the system is being prepared to go into a sleeping state and to put them back into D0 during the subsequent transition to the working state. Signed-off-by: Rafael J. Wysocki Signed-off-by: Jesse Barnes diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index a6b1b6f..1b0ec45 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1147,6 +1147,87 @@ int pci_enable_wake(struct pci_dev *dev, pci_power_t state, int enable) } /** + * pci_prepare_to_sleep - prepare PCI device for system-wide transition into + * a sleep state + * @dev: Device to handle. + * + * Choose the power state appropriate for the device depending on whether + * it can wake up the system and/or is power manageable by the platform + * (PCI_D3hot is the default) and put the device into that state. + */ +int pci_prepare_to_sleep(struct pci_dev *dev) +{ + pci_power_t target_state = PCI_D3hot; + int pm = pci_find_capability(dev, PCI_CAP_ID_PM); + int error; + + if (platform_pci_power_manageable(dev)) { + /* + * Call the platform to choose the target state of the device + * and enable wake-up from this state if supported. + */ + pci_power_t state = platform_pci_choose_state(dev); + + switch (state) { + case PCI_POWER_ERROR: + case PCI_UNKNOWN: + break; + case PCI_D1: + case PCI_D2: + if (pci_no_d1d2(dev)) + break; + default: + target_state = state; + pci_enable_wake(dev, target_state, true); + } + } else if (device_may_wakeup(&dev->dev)) { + /* + * Find the deepest state from which the device can generate + * wake-up events, make it the target state and enable device + * to generate PME#. + */ + u16 pmc; + + if (!pm) + return -EIO; + + pci_read_config_word(dev, pm + PCI_PM_PMC, &pmc); + if (pmc & PCI_PM_CAP_PME_MASK) { + if (!(pmc & PCI_PM_CAP_PME_D3)) { + /* Device cannot generate PME# from D3_hot */ + if (pmc & PCI_PM_CAP_PME_D2) + target_state = PCI_D2; + else if (pmc & PCI_PM_CAP_PME_D1) + target_state = PCI_D1; + else + target_state = PCI_D0; + } + pci_pme_active(dev, pm, true); + } + } + + error = pci_set_power_state(dev, target_state); + + if (error) + pci_enable_wake(dev, target_state, false); + + return error; +} + +/** + * pci_back_from_sleep - turn PCI device on during system-wide transition into + * the working state a sleep state + * @dev: Device to handle. + * + * Disable device's sytem wake-up capability and put it into D0. + */ +int pci_back_from_sleep(struct pci_dev *dev) +{ + pci_enable_wake(dev, PCI_D0, false); + return pci_set_power_state(dev, PCI_D0); +} + +/** * pci_pm_init - Initialize PM functions of given PCI device * @dev: PCI device to handle. */ @@ -1853,5 +1934,7 @@ EXPORT_SYMBOL(pci_set_power_state); EXPORT_SYMBOL(pci_save_state); EXPORT_SYMBOL(pci_restore_state); EXPORT_SYMBOL(pci_enable_wake); +EXPORT_SYMBOL(pci_prepare_to_sleep); +EXPORT_SYMBOL(pci_back_from_sleep); EXPORT_SYMBOL_GPL(pci_set_pcie_reset_state); diff --git a/include/linux/pci.h b/include/linux/pci.h index 4c80dc3..52ac06d 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -632,6 +632,8 @@ int pci_restore_state(struct pci_dev *dev); int pci_set_power_state(struct pci_dev *dev, pci_power_t state); pci_power_t pci_choose_state(struct pci_dev *dev, pm_message_t state); int pci_enable_wake(struct pci_dev *dev, pci_power_t state, int enable); +int pci_prepare_to_sleep(struct pci_dev *dev); +int pci_back_from_sleep(struct pci_dev *dev); /* Functions for PCI Hotplug drivers to use */ int pci_bus_find_capability(struct pci_bus *bus, unsigned int devfn, int cap); -- cgit v0.10.2 From 337001b6c42938f49a880b1b8306c3ed771a7e61 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 7 Jul 2008 03:36:24 +0200 Subject: PCI: Simplify PCI device PM code If the offset of PCI device's PM capability in its configuration space, the mask of states that the device supports PME# from and the D1 and D2 support bits are cached in the corresponding struct pci_dev, the PCI device PM code can be simplified quite a bit. Signed-off-by: Rafael J. Wysocki Signed-off-by: Jesse Barnes diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 1b0ec45..e632a58 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -419,7 +419,6 @@ static inline int platform_pci_sleep_wake(struct pci_dev *dev, bool enable) * pci_raw_set_power_state - Use PCI PM registers to set the power state of * given PCI device * @dev: PCI device to handle. - * @pm: PCI PM capability offset of the device. * @state: PCI power state (D0, D1, D2, D3hot) to put the device into. * * RETURN VALUE: @@ -430,12 +429,12 @@ static inline int platform_pci_sleep_wake(struct pci_dev *dev, bool enable) * 0 if device's power state has been successfully changed. */ static int -pci_raw_set_power_state(struct pci_dev *dev, int pm, pci_power_t state) +pci_raw_set_power_state(struct pci_dev *dev, pci_power_t state) { - u16 pmcsr, pmc; + u16 pmcsr; bool need_restore = false; - if (!pm) + if (!dev->pm_cap) return -EIO; if (state < PCI_D0 || state > PCI_D3hot) @@ -455,20 +454,12 @@ pci_raw_set_power_state(struct pci_dev *dev, int pm, pci_power_t state) return -EINVAL; } - pci_read_config_word(dev, pm + PCI_PM_PMC, &pmc); - - if ((pmc & PCI_PM_CAP_VER_MASK) > 3) { - dev_err(&dev->dev, "unsupported PM cap regs version (%u)\n", - pmc & PCI_PM_CAP_VER_MASK); - return -EIO; - } - /* check if this device supports the desired state */ - if ((state == PCI_D1 && !(pmc & PCI_PM_CAP_D1)) - || (state == PCI_D2 && !(pmc & PCI_PM_CAP_D2))) + if ((state == PCI_D1 && !dev->d1_support) + || (state == PCI_D2 && !dev->d2_support)) return -EIO; - pci_read_config_word(dev, pm + PCI_PM_CTRL, &pmcsr); + pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr); /* If we're (effectively) in D3, force entire word to 0. * This doesn't affect PME_Status, disables PME_En, and @@ -492,7 +483,7 @@ pci_raw_set_power_state(struct pci_dev *dev, int pm, pci_power_t state) } /* enter specified state */ - pci_write_config_word(dev, pm + PCI_PM_CTRL, pmcsr); + pci_write_config_word(dev, dev->pm_cap + PCI_PM_CTRL, pmcsr); /* Mandatory power management transition delays */ /* see PCI PM 1.1 5.6.1 table 18 */ @@ -528,14 +519,13 @@ pci_raw_set_power_state(struct pci_dev *dev, int pm, pci_power_t state) * pci_update_current_state - Read PCI power state of given device from its * PCI PM registers and cache it * @dev: PCI device to handle. - * @pm: PCI PM capability offset of the device. */ -static void pci_update_current_state(struct pci_dev *dev, int pm) +static void pci_update_current_state(struct pci_dev *dev) { - if (pm) { + if (dev->pm_cap) { u16 pmcsr; - pci_read_config_word(dev, pm + PCI_PM_CTRL, &pmcsr); + pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr); dev->current_state = (pmcsr & PCI_PM_CTRL_STATE_MASK); } } @@ -557,7 +547,7 @@ static void pci_update_current_state(struct pci_dev *dev, int pm) */ int pci_set_power_state(struct pci_dev *dev, pci_power_t state) { - int pm, error; + int error; /* bound the state we're entering */ if (state > PCI_D3hot) @@ -572,9 +562,6 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state) */ return 0; - /* Find PCI PM capability in the list */ - pm = pci_find_capability(dev, PCI_CAP_ID_PM); - if (state == PCI_D0 && platform_pci_power_manageable(dev)) { /* * Allow the platform to change the state, for example via ACPI @@ -582,16 +569,16 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state) */ int ret = platform_pci_set_power_state(dev, PCI_D0); if (!ret) - pci_update_current_state(dev, pm); + pci_update_current_state(dev); } - error = pci_raw_set_power_state(dev, pm, state); + error = pci_raw_set_power_state(dev, state); if (state > PCI_D0 && platform_pci_power_manageable(dev)) { /* Allow the platform to finalize the transition */ int ret = platform_pci_set_power_state(dev, state); if (!ret) { - pci_update_current_state(dev, pm); + pci_update_current_state(dev); error = 0; } } @@ -1050,48 +1037,38 @@ int pci_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state) /** * pci_pme_capable - check the capability of PCI device to generate PME# * @dev: PCI device to handle. - * @pm: PCI PM capability offset of the device. * @state: PCI state from which device will issue PME#. */ -static bool pci_pme_capable(struct pci_dev *dev, int pm, pci_power_t state) +static bool pci_pme_capable(struct pci_dev *dev, pci_power_t state) { - u16 pmc; - - if (!pm) + if (!dev->pm_cap) return false; - /* Check device's ability to generate PME# from given state */ - pci_read_config_word(dev, pm + PCI_PM_PMC, &pmc); - - pmc &= PCI_PM_CAP_PME_MASK; - pmc >>= ffs(PCI_PM_CAP_PME_MASK) - 1; /* First bit of mask */ - - return !!(pmc & (1 << state)); + return !!(dev->pme_support & (1 << state)); } /** * pci_pme_active - enable or disable PCI device's PME# function * @dev: PCI device to handle. - * @pm: PCI PM capability offset of the device. * @enable: 'true' to enable PME# generation; 'false' to disable it. * * The caller must verify that the device is capable of generating PME# before * calling this function with @enable equal to 'true'. */ -static void pci_pme_active(struct pci_dev *dev, int pm, bool enable) +static void pci_pme_active(struct pci_dev *dev, bool enable) { u16 pmcsr; - if (!pm) + if (!dev->pm_cap) return; - pci_read_config_word(dev, pm + PCI_PM_CTRL, &pmcsr); + pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr); /* Clear PME_Status by writing 1 to it and enable PME# */ pmcsr |= PCI_PM_CTRL_PME_STATUS | PCI_PM_CTRL_PME_ENABLE; if (!enable) pmcsr &= ~PCI_PM_CTRL_PME_ENABLE; - pci_write_config_word(dev, pm + PCI_PM_CTRL, pmcsr); + pci_write_config_word(dev, dev->pm_cap + PCI_PM_CTRL, pmcsr); dev_printk(KERN_INFO, &dev->dev, "PME# %s\n", enable ? "enabled" : "disabled"); @@ -1118,7 +1095,6 @@ static void pci_pme_active(struct pci_dev *dev, int pm, bool enable) */ int pci_enable_wake(struct pci_dev *dev, pci_power_t state, int enable) { - int pm; int error = 0; bool pme_done = false; @@ -1134,9 +1110,8 @@ int pci_enable_wake(struct pci_dev *dev, pci_power_t state, int enable) if (!enable && platform_pci_can_wakeup(dev)) error = platform_pci_sleep_wake(dev, false); - pm = pci_find_capability(dev, PCI_CAP_ID_PM); - if (!enable || pci_pme_capable(dev, pm, state)) { - pci_pme_active(dev, pm, enable); + if (!enable || pci_pme_capable(dev, state)) { + pci_pme_active(dev, enable); pme_done = true; } @@ -1158,7 +1133,6 @@ int pci_enable_wake(struct pci_dev *dev, pci_power_t state, int enable) int pci_prepare_to_sleep(struct pci_dev *dev) { pci_power_t target_state = PCI_D3hot; - int pm = pci_find_capability(dev, PCI_CAP_ID_PM); int error; if (platform_pci_power_manageable(dev)) { @@ -1186,23 +1160,14 @@ int pci_prepare_to_sleep(struct pci_dev *dev) * wake-up events, make it the target state and enable device * to generate PME#. */ - u16 pmc; - - if (!pm) + if (!dev->pm_cap) return -EIO; - pci_read_config_word(dev, pm + PCI_PM_PMC, &pmc); - if (pmc & PCI_PM_CAP_PME_MASK) { - if (!(pmc & PCI_PM_CAP_PME_D3)) { - /* Device cannot generate PME# from D3_hot */ - if (pmc & PCI_PM_CAP_PME_D2) - target_state = PCI_D2; - else if (pmc & PCI_PM_CAP_PME_D1) - target_state = PCI_D1; - else - target_state = PCI_D0; - } - pci_pme_active(dev, pm, true); + if (dev->pme_support) { + while (target_state + && !(dev->pme_support & (1 << target_state))) + target_state--; + pci_pme_active(dev, true); } } @@ -1236,6 +1201,8 @@ void pci_pm_init(struct pci_dev *dev) int pm; u16 pmc; + dev->pm_cap = 0; + /* find PCI PM capability in list */ pm = pci_find_capability(dev, PCI_CAP_ID_PM); if (!pm) @@ -1249,7 +1216,23 @@ void pci_pm_init(struct pci_dev *dev) return; } - if (pmc & PCI_PM_CAP_PME_MASK) { + dev->pm_cap = pm; + + dev->d1_support = false; + dev->d2_support = false; + if (!pci_no_d1d2(dev)) { + if (pmc & PCI_PM_CAP_D1) { + dev_printk(KERN_DEBUG, &dev->dev, "supports D1\n"); + dev->d1_support = true; + } + if (pmc & PCI_PM_CAP_D2) { + dev_printk(KERN_DEBUG, &dev->dev, "supports D2\n"); + dev->d2_support = true; + } + } + + pmc &= PCI_PM_CAP_PME_MASK; + if (pmc) { dev_printk(KERN_INFO, &dev->dev, "PME# supported from%s%s%s%s%s\n", (pmc & PCI_PM_CAP_PME_D0) ? " D0" : "", @@ -1257,6 +1240,7 @@ void pci_pm_init(struct pci_dev *dev) (pmc & PCI_PM_CAP_PME_D2) ? " D2" : "", (pmc & PCI_PM_CAP_PME_D3) ? " D3hot" : "", (pmc & PCI_PM_CAP_PME_D3cold) ? " D3cold" : ""); + dev->pme_support = pmc >> PCI_PM_CAP_PME_SHIFT; /* * Make device's PM flags reflect the wake-up capability, but * let the user space enable it to wake up the system as needed. @@ -1264,7 +1248,9 @@ void pci_pm_init(struct pci_dev *dev) device_set_wakeup_capable(&dev->dev, true); device_set_wakeup_enable(&dev->dev, false); /* Disable the PME# generation functionality */ - pci_pme_active(dev, pm, false); + pci_pme_active(dev, false); + } else { + dev->pme_support = 0; } } diff --git a/include/linux/pci.h b/include/linux/pci.h index 52ac06d..68a29f0 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -177,6 +177,13 @@ struct pci_dev { pci_power_t current_state; /* Current operating state. In ACPI-speak, this is D0-D3, D0 being fully functional, and D3 being off. */ + int pm_cap; /* PM capability offset in the + configuration space */ + unsigned int pme_support:5; /* Bitmask of states from which PME# + can be generated */ + unsigned int d1_support:1; /* Low power state D1 is supported */ + unsigned int d2_support:1; /* Low power state D2 is supported */ + unsigned int no_d1d2:1; /* Only allow D0 and D3 */ #ifdef CONFIG_PCIEASPM struct pcie_link_state *link_state; /* ASPM link state. */ @@ -201,7 +208,6 @@ struct pci_dev { unsigned int is_added:1; unsigned int is_busmaster:1; /* device is busmaster */ unsigned int no_msi:1; /* device may not use msi */ - unsigned int no_d1d2:1; /* only allow d0 or d3 */ unsigned int block_ucfg_access:1; /* userspace config space access is blocked */ unsigned int broken_parity_status:1; /* Device generates false positive parity */ unsigned int msi_enabled:1; diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h index c0c1223..19958b9 100644 --- a/include/linux/pci_regs.h +++ b/include/linux/pci_regs.h @@ -231,6 +231,7 @@ #define PCI_PM_CAP_PME_D2 0x2000 /* PME# from D2 */ #define PCI_PM_CAP_PME_D3 0x4000 /* PME# from D3 (hot) */ #define PCI_PM_CAP_PME_D3cold 0x8000 /* PME# from D3 (cold) */ +#define PCI_PM_CAP_PME_SHIFT 11 /* Start of the PME Mask in PMC */ #define PCI_PM_CTRL 4 /* PM control and status register */ #define PCI_PM_CTRL_STATE_MASK 0x0003 /* Current power state (D0 to D3) */ #define PCI_PM_CTRL_NO_SOFT_RESET 0x0004 /* No reset for D3hot->D0 */ -- cgit v0.10.2 From f7a1b86095bf7fd1578ef54067545b9cb7084713 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 8 Jul 2008 15:00:54 +0100 Subject: Fix acpi_pm_device_sleep_wake() by providing a stub for CONFIG_PM_SLEEP=n So that one of the several config option permutations will build again. Tested-by: Alexander Beregalov Signed-off-by: David Howells Signed-off-by: Jesse Barnes diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 7ab5a61..a5ac0bc 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -388,6 +388,10 @@ static inline int acpi_pm_device_sleep_state(struct device *d, int *p) *p = ACPI_STATE_D0; return ACPI_STATE_D3; } +static inline int acpi_pm_device_sleep_wake(struct device *dev, bool enable) +{ + return -ENODEV; +} #endif /* !CONFIG_PM_SLEEP */ #endif /* CONFIG_ACPI */ -- cgit v0.10.2 From 57dc9a5747942f131a1a8b36d661fec6e6a5e9f6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 20 Jun 2008 15:35:32 -0400 Subject: NFS: Reduce the stack usage in NFSv4 create operations Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 1293e0a..26fbeb3 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2079,47 +2079,81 @@ static int nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *n return err; } +struct nfs4_createdata { + struct rpc_message msg; + struct nfs4_create_arg arg; + struct nfs4_create_res res; + struct nfs_fh fh; + struct nfs_fattr fattr; + struct nfs_fattr dir_fattr; +}; + +static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir, + struct qstr *name, struct iattr *sattr, u32 ftype) +{ + struct nfs4_createdata *data; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (data != NULL) { + struct nfs_server *server = NFS_SERVER(dir); + + data->msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE]; + data->msg.rpc_argp = &data->arg; + data->msg.rpc_resp = &data->res; + data->arg.dir_fh = NFS_FH(dir); + data->arg.server = server; + data->arg.name = name; + data->arg.attrs = sattr; + data->arg.ftype = ftype; + data->arg.bitmask = server->attr_bitmask; + data->res.server = server; + data->res.fh = &data->fh; + data->res.fattr = &data->fattr; + data->res.dir_fattr = &data->dir_fattr; + nfs_fattr_init(data->res.fattr); + nfs_fattr_init(data->res.dir_fattr); + } + return data; +} + +static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_createdata *data) +{ + int status = rpc_call_sync(NFS_CLIENT(dir), &data->msg, 0); + if (status == 0) { + update_changeattr(dir, &data->res.dir_cinfo); + nfs_post_op_update_inode(dir, data->res.dir_fattr); + status = nfs_instantiate(dentry, data->res.fh, data->res.fattr); + } + return status; +} + +static void nfs4_free_createdata(struct nfs4_createdata *data) +{ + kfree(data); +} + static int _nfs4_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page, unsigned int len, struct iattr *sattr) { - struct nfs_server *server = NFS_SERVER(dir); - struct nfs_fh fhandle; - struct nfs_fattr fattr, dir_fattr; - struct nfs4_create_arg arg = { - .dir_fh = NFS_FH(dir), - .server = server, - .name = &dentry->d_name, - .attrs = sattr, - .ftype = NF4LNK, - .bitmask = server->attr_bitmask, - }; - struct nfs4_create_res res = { - .server = server, - .fh = &fhandle, - .fattr = &fattr, - .dir_fattr = &dir_fattr, - }; - struct rpc_message msg = { - .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SYMLINK], - .rpc_argp = &arg, - .rpc_resp = &res, - }; - int status; + struct nfs4_createdata *data; + int status = -ENAMETOOLONG; if (len > NFS4_MAXPATHLEN) - return -ENAMETOOLONG; + goto out; - arg.u.symlink.pages = &page; - arg.u.symlink.len = len; - nfs_fattr_init(&fattr); - nfs_fattr_init(&dir_fattr); + status = -ENOMEM; + data = nfs4_alloc_createdata(dir, &dentry->d_name, sattr, NF4LNK); + if (data == NULL) + goto out; + + data->msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SYMLINK]; + data->arg.u.symlink.pages = &page; + data->arg.u.symlink.len = len; - status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); - if (!status) { - update_changeattr(dir, &res.dir_cinfo); - nfs_post_op_update_inode(dir, res.dir_fattr); - status = nfs_instantiate(dentry, &fhandle, &fattr); - } + status = nfs4_do_create(dir, dentry, data); + + nfs4_free_createdata(data); +out: return status; } @@ -2140,39 +2174,17 @@ static int nfs4_proc_symlink(struct inode *dir, struct dentry *dentry, static int _nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr) { - struct nfs_server *server = NFS_SERVER(dir); - struct nfs_fh fhandle; - struct nfs_fattr fattr, dir_fattr; - struct nfs4_create_arg arg = { - .dir_fh = NFS_FH(dir), - .server = server, - .name = &dentry->d_name, - .attrs = sattr, - .ftype = NF4DIR, - .bitmask = server->attr_bitmask, - }; - struct nfs4_create_res res = { - .server = server, - .fh = &fhandle, - .fattr = &fattr, - .dir_fattr = &dir_fattr, - }; - struct rpc_message msg = { - .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE], - .rpc_argp = &arg, - .rpc_resp = &res, - }; - int status; + struct nfs4_createdata *data; + int status = -ENOMEM; - nfs_fattr_init(&fattr); - nfs_fattr_init(&dir_fattr); - - status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); - if (!status) { - update_changeattr(dir, &res.dir_cinfo); - nfs_post_op_update_inode(dir, res.dir_fattr); - status = nfs_instantiate(dentry, &fhandle, &fattr); - } + data = nfs4_alloc_createdata(dir, &dentry->d_name, sattr, NF4DIR); + if (data == NULL) + goto out; + + status = nfs4_do_create(dir, dentry, data); + + nfs4_free_createdata(data); +out: return status; } @@ -2242,56 +2254,34 @@ static int nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, static int _nfs4_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr, dev_t rdev) { - struct nfs_server *server = NFS_SERVER(dir); - struct nfs_fh fh; - struct nfs_fattr fattr, dir_fattr; - struct nfs4_create_arg arg = { - .dir_fh = NFS_FH(dir), - .server = server, - .name = &dentry->d_name, - .attrs = sattr, - .bitmask = server->attr_bitmask, - }; - struct nfs4_create_res res = { - .server = server, - .fh = &fh, - .fattr = &fattr, - .dir_fattr = &dir_fattr, - }; - struct rpc_message msg = { - .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE], - .rpc_argp = &arg, - .rpc_resp = &res, - }; - int status; - int mode = sattr->ia_mode; - - nfs_fattr_init(&fattr); - nfs_fattr_init(&dir_fattr); + struct nfs4_createdata *data; + int mode = sattr->ia_mode; + int status = -ENOMEM; BUG_ON(!(sattr->ia_valid & ATTR_MODE)); BUG_ON(!S_ISFIFO(mode) && !S_ISBLK(mode) && !S_ISCHR(mode) && !S_ISSOCK(mode)); + + data = nfs4_alloc_createdata(dir, &dentry->d_name, sattr, NF4SOCK); + if (data == NULL) + goto out; + if (S_ISFIFO(mode)) - arg.ftype = NF4FIFO; + data->arg.ftype = NF4FIFO; else if (S_ISBLK(mode)) { - arg.ftype = NF4BLK; - arg.u.device.specdata1 = MAJOR(rdev); - arg.u.device.specdata2 = MINOR(rdev); + data->arg.ftype = NF4BLK; + data->arg.u.device.specdata1 = MAJOR(rdev); + data->arg.u.device.specdata2 = MINOR(rdev); } else if (S_ISCHR(mode)) { - arg.ftype = NF4CHR; - arg.u.device.specdata1 = MAJOR(rdev); - arg.u.device.specdata2 = MINOR(rdev); + data->arg.ftype = NF4CHR; + data->arg.u.device.specdata1 = MAJOR(rdev); + data->arg.u.device.specdata2 = MINOR(rdev); } - else - arg.ftype = NF4SOCK; - status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); - if (status == 0) { - update_changeattr(dir, &res.dir_cinfo); - nfs_post_op_update_inode(dir, res.dir_fattr); - status = nfs_instantiate(dentry, &fh, &fattr); - } + status = nfs4_do_create(dir, dentry, data); + + nfs4_free_createdata(data); +out: return status; } -- cgit v0.10.2 From 0b4aae7aad162ad175ba8a65708332f888066b26 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 20 Jun 2008 17:00:23 -0400 Subject: NFS: Reduce the stack usage in NFSv3 create operations Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index c3523ad..cf7d4e5 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -248,6 +248,53 @@ static int nfs3_proc_readlink(struct inode *inode, struct page *page, return status; } +struct nfs3_createdata { + struct rpc_message msg; + union { + struct nfs3_createargs create; + struct nfs3_mkdirargs mkdir; + struct nfs3_symlinkargs symlink; + struct nfs3_mknodargs mknod; + } arg; + struct nfs3_diropres res; + struct nfs_fh fh; + struct nfs_fattr fattr; + struct nfs_fattr dir_attr; +}; + +static struct nfs3_createdata *nfs3_alloc_createdata(void) +{ + struct nfs3_createdata *data; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (data != NULL) { + data->msg.rpc_argp = &data->arg; + data->msg.rpc_resp = &data->res; + data->res.fh = &data->fh; + data->res.fattr = &data->fattr; + data->res.dir_attr = &data->dir_attr; + nfs_fattr_init(data->res.fattr); + nfs_fattr_init(data->res.dir_attr); + } + return data; +} + +static int nfs3_do_create(struct inode *dir, struct dentry *dentry, struct nfs3_createdata *data) +{ + int status; + + status = rpc_call_sync(NFS_CLIENT(dir), &data->msg, 0); + nfs_post_op_update_inode(dir, data->res.dir_attr); + if (status == 0) + status = nfs_instantiate(dentry, data->res.fh, data->res.fattr); + return status; +} + +static void nfs3_free_createdata(struct nfs3_createdata *data) +{ + kfree(data); +} + /* * Create a regular file. * For now, we don't implement O_EXCL. @@ -256,70 +303,60 @@ static int nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, int flags, struct nameidata *nd) { - struct nfs_fh fhandle; - struct nfs_fattr fattr; - struct nfs_fattr dir_attr; - struct nfs3_createargs arg = { - .fh = NFS_FH(dir), - .name = dentry->d_name.name, - .len = dentry->d_name.len, - .sattr = sattr, - }; - struct nfs3_diropres res = { - .dir_attr = &dir_attr, - .fh = &fhandle, - .fattr = &fattr - }; - struct rpc_message msg = { - .rpc_proc = &nfs3_procedures[NFS3PROC_CREATE], - .rpc_argp = &arg, - .rpc_resp = &res, - }; + struct nfs3_createdata *data; mode_t mode = sattr->ia_mode; - int status; + int status = -ENOMEM; dprintk("NFS call create %s\n", dentry->d_name.name); - arg.createmode = NFS3_CREATE_UNCHECKED; + + data = nfs3_alloc_createdata(); + if (data == NULL) + goto out; + + data->msg.rpc_proc = &nfs3_procedures[NFS3PROC_CREATE]; + data->arg.create.fh = NFS_FH(dir); + data->arg.create.name = dentry->d_name.name; + data->arg.create.len = dentry->d_name.len; + data->arg.create.sattr = sattr; + + data->arg.create.createmode = NFS3_CREATE_UNCHECKED; if (flags & O_EXCL) { - arg.createmode = NFS3_CREATE_EXCLUSIVE; - arg.verifier[0] = jiffies; - arg.verifier[1] = current->pid; + data->arg.create.createmode = NFS3_CREATE_EXCLUSIVE; + data->arg.create.verifier[0] = jiffies; + data->arg.create.verifier[1] = current->pid; } sattr->ia_mode &= ~current->fs->umask; -again: - nfs_fattr_init(&dir_attr); - nfs_fattr_init(&fattr); - status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); - nfs_refresh_inode(dir, &dir_attr); + for (;;) { + status = nfs3_do_create(dir, dentry, data); - /* If the server doesn't support the exclusive creation semantics, - * try again with simple 'guarded' mode. */ - if (status == -ENOTSUPP) { - switch (arg.createmode) { + if (status != -ENOTSUPP) + break; + /* If the server doesn't support the exclusive creation + * semantics, try again with simple 'guarded' mode. */ + switch (data->arg.create.createmode) { case NFS3_CREATE_EXCLUSIVE: - arg.createmode = NFS3_CREATE_GUARDED; + data->arg.create.createmode = NFS3_CREATE_GUARDED; break; case NFS3_CREATE_GUARDED: - arg.createmode = NFS3_CREATE_UNCHECKED; + data->arg.create.createmode = NFS3_CREATE_UNCHECKED; break; case NFS3_CREATE_UNCHECKED: goto out; } - goto again; + nfs_fattr_init(data->res.dir_attr); + nfs_fattr_init(data->res.fattr); } - if (status == 0) - status = nfs_instantiate(dentry, &fhandle, &fattr); if (status != 0) goto out; /* When we created the file with exclusive semantics, make * sure we set the attributes afterwards. */ - if (arg.createmode == NFS3_CREATE_EXCLUSIVE) { + if (data->arg.create.createmode == NFS3_CREATE_EXCLUSIVE) { dprintk("NFS call setattr (post-create)\n"); if (!(sattr->ia_valid & ATTR_ATIME_SET)) @@ -330,14 +367,15 @@ again: /* Note: we could use a guarded setattr here, but I'm * not sure this buys us anything (and I'd have * to revamp the NFSv3 XDR code) */ - status = nfs3_proc_setattr(dentry, &fattr, sattr); - nfs_post_op_update_inode(dentry->d_inode, &fattr); + status = nfs3_proc_setattr(dentry, data->res.fattr, sattr); + nfs_post_op_update_inode(dentry->d_inode, data->res.fattr); dprintk("NFS reply setattr (post-create): %d\n", status); + if (status != 0) + goto out; } - if (status != 0) - goto out; status = nfs3_proc_set_default_acl(dir, dentry->d_inode, mode); out: + nfs3_free_createdata(data); dprintk("NFS reply create: %d\n", status); return status; } @@ -452,40 +490,28 @@ static int nfs3_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page, unsigned int len, struct iattr *sattr) { - struct nfs_fh fhandle; - struct nfs_fattr fattr, dir_attr; - struct nfs3_symlinkargs arg = { - .fromfh = NFS_FH(dir), - .fromname = dentry->d_name.name, - .fromlen = dentry->d_name.len, - .pages = &page, - .pathlen = len, - .sattr = sattr - }; - struct nfs3_diropres res = { - .dir_attr = &dir_attr, - .fh = &fhandle, - .fattr = &fattr - }; - struct rpc_message msg = { - .rpc_proc = &nfs3_procedures[NFS3PROC_SYMLINK], - .rpc_argp = &arg, - .rpc_resp = &res, - }; - int status; + struct nfs3_createdata *data; + int status = -ENOMEM; if (len > NFS3_MAXPATHLEN) return -ENAMETOOLONG; dprintk("NFS call symlink %s\n", dentry->d_name.name); - nfs_fattr_init(&dir_attr); - nfs_fattr_init(&fattr); - status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); - nfs_post_op_update_inode(dir, &dir_attr); - if (status != 0) + data = nfs3_alloc_createdata(); + if (data == NULL) goto out; - status = nfs_instantiate(dentry, &fhandle, &fattr); + data->msg.rpc_proc = &nfs3_procedures[NFS3PROC_SYMLINK]; + data->arg.symlink.fromfh = NFS_FH(dir); + data->arg.symlink.fromname = dentry->d_name.name; + data->arg.symlink.fromlen = dentry->d_name.len; + data->arg.symlink.pages = &page; + data->arg.symlink.pathlen = len; + data->arg.symlink.sattr = sattr; + + status = nfs3_do_create(dir, dentry, data); + + nfs3_free_createdata(data); out: dprintk("NFS reply symlink: %d\n", status); return status; @@ -494,42 +520,31 @@ out: static int nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr) { - struct nfs_fh fhandle; - struct nfs_fattr fattr, dir_attr; - struct nfs3_mkdirargs arg = { - .fh = NFS_FH(dir), - .name = dentry->d_name.name, - .len = dentry->d_name.len, - .sattr = sattr - }; - struct nfs3_diropres res = { - .dir_attr = &dir_attr, - .fh = &fhandle, - .fattr = &fattr - }; - struct rpc_message msg = { - .rpc_proc = &nfs3_procedures[NFS3PROC_MKDIR], - .rpc_argp = &arg, - .rpc_resp = &res, - }; + struct nfs3_createdata *data; int mode = sattr->ia_mode; - int status; + int status = -ENOMEM; dprintk("NFS call mkdir %s\n", dentry->d_name.name); sattr->ia_mode &= ~current->fs->umask; - nfs_fattr_init(&dir_attr); - nfs_fattr_init(&fattr); - status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); - nfs_post_op_update_inode(dir, &dir_attr); - if (status != 0) + data = nfs3_alloc_createdata(); + if (data == NULL) goto out; - status = nfs_instantiate(dentry, &fhandle, &fattr); + + data->msg.rpc_proc = &nfs3_procedures[NFS3PROC_MKDIR]; + data->arg.mkdir.fh = NFS_FH(dir); + data->arg.mkdir.name = dentry->d_name.name; + data->arg.mkdir.len = dentry->d_name.len; + data->arg.mkdir.sattr = sattr; + + status = nfs3_do_create(dir, dentry, data); if (status != 0) goto out; + status = nfs3_proc_set_default_acl(dir, dentry->d_inode, mode); out: + nfs3_free_createdata(data); dprintk("NFS reply mkdir: %d\n", status); return status; } @@ -615,52 +630,50 @@ static int nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr, dev_t rdev) { - struct nfs_fh fh; - struct nfs_fattr fattr, dir_attr; - struct nfs3_mknodargs arg = { - .fh = NFS_FH(dir), - .name = dentry->d_name.name, - .len = dentry->d_name.len, - .sattr = sattr, - .rdev = rdev - }; - struct nfs3_diropres res = { - .dir_attr = &dir_attr, - .fh = &fh, - .fattr = &fattr - }; - struct rpc_message msg = { - .rpc_proc = &nfs3_procedures[NFS3PROC_MKNOD], - .rpc_argp = &arg, - .rpc_resp = &res, - }; + struct nfs3_createdata *data; mode_t mode = sattr->ia_mode; - int status; - - switch (sattr->ia_mode & S_IFMT) { - case S_IFBLK: arg.type = NF3BLK; break; - case S_IFCHR: arg.type = NF3CHR; break; - case S_IFIFO: arg.type = NF3FIFO; break; - case S_IFSOCK: arg.type = NF3SOCK; break; - default: return -EINVAL; - } + int status = -ENOMEM; dprintk("NFS call mknod %s %u:%u\n", dentry->d_name.name, MAJOR(rdev), MINOR(rdev)); sattr->ia_mode &= ~current->fs->umask; - nfs_fattr_init(&dir_attr); - nfs_fattr_init(&fattr); - status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); - nfs_post_op_update_inode(dir, &dir_attr); - if (status != 0) + data = nfs3_alloc_createdata(); + if (data == NULL) goto out; - status = nfs_instantiate(dentry, &fh, &fattr); + + data->msg.rpc_proc = &nfs3_procedures[NFS3PROC_MKNOD]; + data->arg.mknod.fh = NFS_FH(dir); + data->arg.mknod.name = dentry->d_name.name; + data->arg.mknod.len = dentry->d_name.len; + data->arg.mknod.sattr = sattr; + data->arg.mknod.rdev = rdev; + + switch (sattr->ia_mode & S_IFMT) { + case S_IFBLK: + data->arg.mknod.type = NF3BLK; + break; + case S_IFCHR: + data->arg.mknod.type = NF3CHR; + break; + case S_IFIFO: + data->arg.mknod.type = NF3FIFO; + break; + case S_IFSOCK: + data->arg.mknod.type = NF3SOCK; + break; + default: + status = -EINVAL; + goto out; + } + + status = nfs3_do_create(dir, dentry, data); if (status != 0) goto out; status = nfs3_proc_set_default_acl(dir, dentry->d_inode, mode); out: + nfs3_free_createdata(data); dprintk("NFS reply mknod: %d\n", status); return status; } -- cgit v0.10.2 From f3d47a3a6a1484a93c8cfe1e8c8d4399c95199c7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 5 Jun 2008 15:17:39 -0400 Subject: NFS: Fix a preemption count leak in nfs_update_request The commit 2785259631697ebb0749a3782cca206e2e542939 (nfs: use GFP_NOFS preloads for radix-tree insertion) appears to have introduced a bug: We only want to call radix_tree_preload() once after creating a request. Calling it every time we loop after we created the request, will cause preemption count leaks. Signed-off-by: Trond Myklebust Cc: Nick Piggin diff --git a/fs/nfs/write.c b/fs/nfs/write.c index f333848..dc62bc5 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -584,13 +584,6 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx, /* Loop over all inode entries and see if we find * A request for the page we wish to update */ - if (new) { - if (radix_tree_preload(GFP_NOFS)) { - nfs_release_request(new); - return ERR_PTR(-ENOMEM); - } - } - spin_lock(&inode->i_lock); req = nfs_page_find_request_locked(page); if (req) { @@ -630,6 +623,10 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx, new = nfs_create_request(ctx, inode, page, offset, bytes); if (IS_ERR(new)) return new; + if (radix_tree_preload(GFP_NOFS)) { + nfs_release_request(new); + return ERR_PTR(-ENOMEM); + } } /* We have a request for our page. -- cgit v0.10.2 From 2116271a347d1181b5497602c2bfada1de8fd53b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 20 May 2008 19:34:39 -0400 Subject: NFS: Add correct bounds checking to NFSv2 locks NFSv2 file locking currently fails the Connectathon tests, because the calls to the VFS locking code do not return an EINVAL error if the struct file_lock overflows the 32-bit boundaries. The problem is due to the fact that we occasionally call helpers from fs/locks.c in order to avoid RPC calls to the server when we know that a local process holds the lock. These helpers are, of course, always 64-bit enabled, so EINVAL is not returned in cases when it would if the call had gone to the NLM code. For consistency, we therefore add support for a bounds-checking helper. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/file.c b/fs/nfs/file.c index d84a3d8..7c73f06 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -593,6 +593,7 @@ out: static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl) { struct inode * inode = filp->f_mapping->host; + int ret = -ENOLCK; dprintk("NFS: nfs_lock(f=%s/%ld, t=%x, fl=%x, r=%Ld:%Ld)\n", inode->i_sb->s_id, inode->i_ino, @@ -602,13 +603,22 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl) /* No mandatory locks over NFS */ if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK) - return -ENOLCK; + goto out_err; + + if (NFS_PROTO(inode)->lock_check_bounds != NULL) { + ret = NFS_PROTO(inode)->lock_check_bounds(fl); + if (ret < 0) + goto out_err; + } if (IS_GETLK(cmd)) - return do_getlk(filp, cmd, fl); - if (fl->fl_type == F_UNLCK) - return do_unlk(filp, cmd, fl); - return do_setlk(filp, cmd, fl); + ret = do_getlk(filp, cmd, fl); + else if (fl->fl_type == F_UNLCK) + ret = do_unlk(filp, cmd, fl); + else + ret = do_setlk(filp, cmd, fl); +out_err: + return ret; } /* diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 03599bf..5c35b02 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -598,6 +598,29 @@ nfs_proc_lock(struct file *filp, int cmd, struct file_lock *fl) return nlmclnt_proc(NFS_SERVER(inode)->nlm_host, cmd, fl); } +/* Helper functions for NFS lock bounds checking */ +#define NFS_LOCK32_OFFSET_MAX ((__s32)0x7fffffffUL) +static int nfs_lock_check_bounds(const struct file_lock *fl) +{ + __s32 start, end; + + start = (__s32)fl->fl_start; + if ((loff_t)start != fl->fl_start) + goto out_einval; + + if (fl->fl_end != OFFSET_MAX) { + end = (__s32)fl->fl_end; + if ((loff_t)end != fl->fl_end) + goto out_einval; + } else + end = NFS_LOCK32_OFFSET_MAX; + + if (start < 0 || start > end) + goto out_einval; + return 0; +out_einval: + return -EINVAL; +} const struct nfs_rpc_ops nfs_v2_clientops = { .version = 2, /* protocol version */ @@ -633,4 +656,5 @@ const struct nfs_rpc_ops nfs_v2_clientops = { .file_open = nfs_open, .file_release = nfs_release, .lock = nfs_proc_lock, + .lock_check_bounds = nfs_lock_check_bounds, }; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 24263bb..8d780de 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -832,6 +832,7 @@ struct nfs_rpc_ops { int (*file_open) (struct inode *, struct file *); int (*file_release) (struct inode *, struct file *); int (*lock)(struct file *, int, struct file_lock *); + int (*lock_check_bounds)(const struct file_lock *); void (*clear_acl_cache)(struct inode *); }; -- cgit v0.10.2 From 8b39f2b41033754e7ba669503d27268beb1b524a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 14 May 2008 19:48:25 -0700 Subject: SUNRPC: Ensure we exit early in case of an encode error All errors from call_encode(), with exception of EAGAIN are fatal, so we should immediately return instead of proceeding to xprt_transmit(). Signed-off-by: Trond Myklebust diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 8945307..9503b4c 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -890,7 +890,6 @@ call_encode(struct rpc_task *task) task->tk_msg.rpc_argp); if (task->tk_status == -ENOMEM) { /* XXX: Is this sane? */ - rpc_delay(task, 3*HZ); task->tk_status = -EAGAIN; } } @@ -1048,8 +1047,14 @@ call_transmit(struct rpc_task *task) BUG_ON(task->tk_rqstp->rq_bytes_sent != 0); call_encode(task); /* Did the encode result in an error condition? */ - if (task->tk_status != 0) + if (task->tk_status != 0) { + /* Was the error nonfatal? */ + if (task->tk_status == -EAGAIN) + rpc_delay(task, HZ >> 4); + else + rpc_exit(task, task->tk_status); return; + } } xprt_transmit(task); if (task->tk_status < 0) -- cgit v0.10.2 From b390c2b55c830eb3b64633fa8d8b8837e073e458 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 10 Jun 2008 18:30:11 -0400 Subject: SUNRPC: An ENOMEM error from call_encode is always fatal The special 'ENOMEM' case that was previously flagged as non-fatal is bogus: auth_gss always returns EAGAIN for non-fatal errors, and may in fact return ENOMEM in the special case where xdr_buf_read_netobj runs out of preallocated buffer space (invariably a _fatal_ error, since there is no provision for preallocating larger buffers). Signed-off-by: Trond Myklebust diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 9503b4c..1af4f16 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -888,10 +888,6 @@ call_encode(struct rpc_task *task) task->tk_status = rpcauth_wrap_req(task, encode, req, p, task->tk_msg.rpc_argp); - if (task->tk_status == -ENOMEM) { - /* XXX: Is this sane? */ - task->tk_status = -EAGAIN; - } } /* -- cgit v0.10.2 From efc91ed0191e3fc62bb1c556ac93fc4e661214d2 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 10 Jun 2008 18:31:00 -0400 Subject: NFS: Optimise append writes with holes If a file is being extended, and we're creating a hole, we might as well declare the entire page to be up to date. This patch significantly improves the write performance for sparse files in the case where lseek(SEEK_END) is used to append several non-contiguous writes at intervals of < PAGE_SIZE. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 7c73f06..7ac89a8 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -344,6 +344,26 @@ static int nfs_write_end(struct file *file, struct address_space *mapping, unsigned offset = pos & (PAGE_CACHE_SIZE - 1); int status; + /* + * Zero any uninitialised parts of the page, and then mark the page + * as up to date if it turns out that we're extending the file. + */ + if (!PageUptodate(page)) { + unsigned pglen = nfs_page_length(page); + unsigned end = offset + len; + + if (pglen == 0) { + zero_user_segments(page, 0, offset, + end, PAGE_CACHE_SIZE); + SetPageUptodate(page); + } else if (end >= pglen) { + zero_user_segment(page, end, PAGE_CACHE_SIZE); + if (offset == 0) + SetPageUptodate(page); + } else + zero_user_segment(page, pglen, PAGE_CACHE_SIZE); + } + lock_kernel(); status = nfs_updatepage(file, page, offset, copied); unlock_kernel(); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index dc62bc5..eea2d2b 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -616,7 +616,7 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx, spin_unlock(&inode->i_lock); radix_tree_preload_end(); req = new; - goto zero_page; + goto out; } spin_unlock(&inode->i_lock); @@ -649,19 +649,13 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx, req->wb_offset = offset; req->wb_pgbase = offset; req->wb_bytes = max(end, rqend) - req->wb_offset; - goto zero_page; + goto out; } if (end > rqend) req->wb_bytes = end - req->wb_offset; - return req; -zero_page: - /* If this page might potentially be marked as up to date, - * then we need to zero any uninitalised data. */ - if (req->wb_pgbase == 0 && req->wb_bytes != PAGE_CACHE_SIZE - && !PageUptodate(req->wb_page)) - zero_user_segment(req->wb_page, req->wb_bytes, PAGE_CACHE_SIZE); +out: return req; } -- cgit v0.10.2 From 7e5f6146609eb9134fac7d1b6bfee43df1732188 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 25 May 2008 12:59:19 -0400 Subject: NFS: Revert commit 44dd151d Revert commit 44dd151d "NFS: Don't mark a written page as uptodate until it is on disk". While it is true that the write may fail, that is always the case. There is no reason why we should treat data on pages that are not already marked as PG_uptodate as being special. The only thing we gain is a noticeable slowdown when re-reading these pages. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/write.c b/fs/nfs/write.c index eea2d2b..ee6fcde 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -188,6 +188,7 @@ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page, } /* Update file length */ nfs_grow_file(page, offset, count); + nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes); nfs_clear_page_tag_locked(req); return 0; } @@ -743,12 +744,7 @@ int nfs_updatepage(struct file *file, struct page *page, static void nfs_writepage_release(struct nfs_page *req) { - if (PageError(req->wb_page)) { - nfs_end_page_writeback(req->wb_page); - nfs_inode_remove_request(req); - } else if (!nfs_reschedule_unstable_write(req)) { - /* Set the PG_uptodate flag */ - nfs_mark_uptodate(req->wb_page, req->wb_pgbase, req->wb_bytes); + if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req)) { nfs_end_page_writeback(req->wb_page); nfs_inode_remove_request(req); } else @@ -1069,8 +1065,6 @@ static void nfs_writeback_release_full(void *calldata) dprintk(" marked for commit\n"); goto next; } - /* Set the PG_uptodate flag? */ - nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes); dprintk(" OK\n"); remove_request: nfs_end_page_writeback(page); @@ -1309,9 +1303,6 @@ static void nfs_commit_release(void *calldata) * returned by the server against all stored verfs. */ if (!memcmp(req->wb_verf.verifier, data->verf.verifier, sizeof(data->verf.verifier))) { /* We have a match */ - /* Set the PG_uptodate flag */ - nfs_mark_uptodate(req->wb_page, req->wb_pgbase, - req->wb_bytes); nfs_inode_remove_request(req); dprintk(" OK\n"); goto next; -- cgit v0.10.2 From 0f38b873aeaae698c3693748438547c8493165fb Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 10 Jun 2008 18:31:01 -0400 Subject: SUNRPC: Use GFP_NOFS when allocating credentials Since the credentials may be allocated during the call to rpc_new_task(), which again may be called by a memory allocator... Signed-off-by: Trond Myklebust diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index cc12d5f..bf7585b 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -146,7 +146,7 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *dest) q = (const void *)((const char *)p + len); if (unlikely(q > end || q < p)) return ERR_PTR(-EFAULT); - dest->data = kmemdup(p, len, GFP_KERNEL); + dest->data = kmemdup(p, len, GFP_NOFS); if (unlikely(dest->data == NULL)) return ERR_PTR(-ENOMEM); dest->len = len; @@ -171,7 +171,7 @@ gss_alloc_context(void) { struct gss_cl_ctx *ctx; - ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + ctx = kzalloc(sizeof(*ctx), GFP_NOFS); if (ctx != NULL) { ctx->gc_proc = RPC_GSS_PROC_DATA; ctx->gc_seq = 1; /* NetApp 6.4R1 doesn't accept seq. no. 0 */ @@ -341,7 +341,7 @@ gss_alloc_msg(struct gss_auth *gss_auth, uid_t uid) { struct gss_upcall_msg *gss_msg; - gss_msg = kzalloc(sizeof(*gss_msg), GFP_KERNEL); + gss_msg = kzalloc(sizeof(*gss_msg), GFP_NOFS); if (gss_msg != NULL) { INIT_LIST_HEAD(&gss_msg->list); rpc_init_wait_queue(&gss_msg->rpc_waitqueue, "RPCSEC_GSS upcall waitq"); @@ -503,7 +503,7 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) if (mlen > MSG_BUF_MAXSIZE) goto out; err = -ENOMEM; - buf = kmalloc(mlen, GFP_KERNEL); + buf = kmalloc(mlen, GFP_NOFS); if (!buf) goto out; @@ -806,7 +806,7 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) dprintk("RPC: gss_create_cred for uid %d, flavor %d\n", acred->uid, auth->au_flavor); - if (!(cred = kzalloc(sizeof(*cred), GFP_KERNEL))) + if (!(cred = kzalloc(sizeof(*cred), GFP_NOFS))) goto out_err; rpcauth_init_cred(&cred->gc_base, acred, auth, &gss_credops); diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index 60c3dba..ef45eba 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -70,7 +70,7 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res) q = (const void *)((const char *)p + len); if (unlikely(q > end || q < p)) return ERR_PTR(-EFAULT); - res->data = kmemdup(p, len, GFP_KERNEL); + res->data = kmemdup(p, len, GFP_NOFS); if (unlikely(res->data == NULL)) return ERR_PTR(-ENOMEM); res->len = len; @@ -131,7 +131,7 @@ gss_import_sec_context_kerberos(const void *p, struct krb5_ctx *ctx; int tmp; - if (!(ctx = kzalloc(sizeof(*ctx), GFP_KERNEL))) + if (!(ctx = kzalloc(sizeof(*ctx), GFP_NOFS))) goto out_err; p = simple_get_bytes(p, end, &ctx->initiate, sizeof(ctx->initiate)); diff --git a/net/sunrpc/auth_gss/gss_spkm3_mech.c b/net/sunrpc/auth_gss/gss_spkm3_mech.c index 5deb4b6..035e1dd 100644 --- a/net/sunrpc/auth_gss/gss_spkm3_mech.c +++ b/net/sunrpc/auth_gss/gss_spkm3_mech.c @@ -76,7 +76,7 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res) q = (const void *)((const char *)p + len); if (unlikely(q > end || q < p)) return ERR_PTR(-EFAULT); - res->data = kmemdup(p, len, GFP_KERNEL); + res->data = kmemdup(p, len, GFP_NOFS); if (unlikely(res->data == NULL)) return ERR_PTR(-ENOMEM); return q; @@ -90,7 +90,7 @@ gss_import_sec_context_spkm3(const void *p, size_t len, struct spkm3_ctx *ctx; int version; - if (!(ctx = kzalloc(sizeof(*ctx), GFP_KERNEL))) + if (!(ctx = kzalloc(sizeof(*ctx), GFP_NOFS))) goto out_err; p = simple_get_bytes(p, end, &version, sizeof(version)); diff --git a/net/sunrpc/auth_gss/gss_spkm3_token.c b/net/sunrpc/auth_gss/gss_spkm3_token.c index 6cdd241a..3308157 100644 --- a/net/sunrpc/auth_gss/gss_spkm3_token.c +++ b/net/sunrpc/auth_gss/gss_spkm3_token.c @@ -90,7 +90,7 @@ asn1_bitstring_len(struct xdr_netobj *in, int *enclen, int *zerobits) int decode_asn1_bitstring(struct xdr_netobj *out, char *in, int enclen, int explen) { - if (!(out->data = kzalloc(explen,GFP_KERNEL))) + if (!(out->data = kzalloc(explen,GFP_NOFS))) return 0; out->len = explen; memcpy(out->data, in, enclen); diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index 44920b9..46b2647 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c @@ -66,7 +66,7 @@ unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) dprintk("RPC: allocating UNIX cred for uid %d gid %d\n", acred->uid, acred->gid); - if (!(cred = kmalloc(sizeof(*cred), GFP_KERNEL))) + if (!(cred = kmalloc(sizeof(*cred), GFP_NOFS))) return ERR_PTR(-ENOMEM); rpcauth_init_cred(&cred->uc_base, acred, auth, &unix_credops); -- cgit v0.10.2 From b5418383ef13f70528281546d02c15edc03d8567 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 10 Jun 2008 18:31:02 -0400 Subject: NFS: do_setlk(): don't flush caches when we have a delegation Signed-off-by: Trond Myklebust diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 7ac89a8..0213c21 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -602,7 +602,8 @@ static int do_setlk(struct file *filp, int cmd, struct file_lock *fl) * This makes locking act as a cache coherency point. */ nfs_sync_mapping(filp->f_mapping); - nfs_zap_caches(inode); + if (!nfs_have_delegation(inode, FMODE_READ)) + nfs_zap_caches(inode); out: return status; } -- cgit v0.10.2 From 6fb1bc10303c0d88f635d014324432ab6ee49d1b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 21 May 2008 17:09:04 -0400 Subject: NFS: Update help text for CONFIG_NFS_FS Clean up: refresh the help text for Kconfig items related to the NFS client. Remove obsolete URLs, and make the language consistent among the options. Also move the ROOT_NFS config option next to the options related to the NFS client. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust diff --git a/fs/Kconfig b/fs/Kconfig index 2694648..1c16de9 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -1544,10 +1544,6 @@ config UFS_FS The recently released UFS2 variant (used in FreeBSD 5.x) is READ-ONLY supported. - If you only intend to mount files from some other Unix over the - network using NFS, you don't need the UFS file system support (but - you need NFS file system support obviously). - Note that this option is generally not needed for floppies, since a good portable way to transport files and directories between unixes (and even other operating systems) is given by the tar program ("man @@ -1587,6 +1583,7 @@ menuconfig NETWORK_FILESYSTEMS Say Y here to get to see options for network filesystems and filesystem-related networking code, such as NFS daemon and RPCSEC security modules. + This option alone does not add any kernel code. If you say N, all options in this submenu will be skipped and @@ -1595,76 +1592,92 @@ menuconfig NETWORK_FILESYSTEMS if NETWORK_FILESYSTEMS config NFS_FS - tristate "NFS file system support" + tristate "NFS client support" depends on INET select LOCKD select SUNRPC select NFS_ACL_SUPPORT if NFS_V3_ACL help - If you are connected to some other (usually local) Unix computer - (using SLIP, PLIP, PPP or Ethernet) and want to mount files residing - on that computer (the NFS server) using the Network File Sharing - protocol, say Y. "Mounting files" means that the client can access - the files with usual UNIX commands as if they were sitting on the - client's hard disk. For this to work, the server must run the - programs nfsd and mountd (but does not need to have NFS file system - support enabled in its kernel). NFS is explained in the Network - Administrator's Guide, available from - , on its man page: "man - nfs", and in the NFS-HOWTO. - - A superior but less widely used alternative to NFS is provided by - the Coda file system; see "Coda file system support" below. + Choose Y here if you want to access files residing on other + computers using Sun's Network File System protocol. To compile + this file system support as a module, choose M here: the module + will be called nfs. - If you say Y here, you should have said Y to TCP/IP networking also. - This option would enlarge your kernel by about 27 KB. + To mount file systems exported by NFS servers, you also need to + install the user space mount.nfs command which can be found in + the Linux nfs-utils package, available from http://linux-nfs.org/. + Information about using the mount command is available in the + mount(8) man page. More detail about the Linux NFS client + implementation is available via the nfs(5) man page. - To compile this file system support as a module, choose M here: the - module will be called nfs. + Below you can choose which versions of the NFS protocol are + available in the kernel to mount NFS servers. Support for NFS + version 2 (RFC 1094) is always available when NFS_FS is selected. - If you are configuring a diskless machine which will mount its root - file system over NFS at boot time, say Y here and to "Kernel - level IP autoconfiguration" above and to "Root file system on NFS" - below. You cannot compile this driver as a module in this case. - There are two packages designed for booting diskless machines over - the net: netboot, available from - , and Etherboot, - available from . + To configure a system which mounts its root file system via NFS + at boot time, say Y here, select "Kernel level IP + autoconfiguration" in the NETWORK menu, and select "Root file + system on NFS" below. You cannot compile this file system as a + module in this case. - If you don't know what all this is about, say N. + If unsure, say N. config NFS_V3 - bool "Provide NFSv3 client support" + bool "NFS client support for NFS version 3" depends on NFS_FS help - Say Y here if you want your NFS client to be able to speak version - 3 of the NFS protocol. + This option enables support for version 3 of the NFS protocol + (RFC 1813) in the kernel's NFS client. If unsure, say Y. config NFS_V3_ACL - bool "Provide client support for the NFSv3 ACL protocol extension" + bool "NFS client support for the NFSv3 ACL protocol extension" depends on NFS_V3 help - Implement the NFSv3 ACL protocol extension for manipulating POSIX - Access Control Lists. The server should also be compiled with - the NFSv3 ACL protocol extension; see the CONFIG_NFSD_V3_ACL option. + Some NFS servers support an auxiliary NFSv3 ACL protocol that + Sun added to Solaris but never became an official part of the + NFS version 3 protocol. This protocol extension allows + applications on NFS clients to manipulate POSIX Access Control + Lists on files residing on NFS servers. NFS servers enforce + ACLs on local files whether this protocol is available or not. + + Choose Y here if your NFS server supports the Solaris NFSv3 ACL + protocol extension and you want your NFS client to allow + applications to access and modify ACLs on files on the server. + + Most NFS servers don't support the Solaris NFSv3 ACL protocol + extension. You can choose N here or specify the "noacl" mount + option to prevent your NFS client from trying to use the NFSv3 + ACL protocol. If unsure, say N. config NFS_V4 - bool "Provide NFSv4 client support (EXPERIMENTAL)" + bool "NFS client support for NFS version 4 (EXPERIMENTAL)" depends on NFS_FS && EXPERIMENTAL select RPCSEC_GSS_KRB5 help - Say Y here if you want your NFS client to be able to speak the newer - version 4 of the NFS protocol. + This option enables support for version 4 of the NFS protocol + (RFC 3530) in the kernel's NFS client. - Note: Requires auxiliary userspace daemons which may be found on - http://www.citi.umich.edu/projects/nfsv4/ + To mount NFS servers using NFSv4, you also need to install user + space programs which can be found in the Linux nfs-utils package, + available from http://linux-nfs.org/. If unsure, say N. +config ROOT_NFS + bool "Root file system on NFS" + depends on NFS_FS=y && IP_PNP + help + If you want your system to mount its root file system via NFS, + choose Y here. This is common practice for managing systems + without local permanent storage. For details, read + . + + Most people say N here. + config NFSD tristate "NFS server support" depends on INET @@ -1746,20 +1759,6 @@ config NFSD_V4 If unsure, say N. -config ROOT_NFS - bool "Root file system on NFS" - depends on NFS_FS=y && IP_PNP - help - If you want your Linux box to mount its whole root file system (the - one containing the directory /) from some other computer over the - net via NFS (presumably because your box doesn't have a hard disk), - say Y. Read for - details. It is likely that in this case, you also want to say Y to - "Kernel level IP autoconfiguration" so that your box can discover - its network address at boot time. - - Most people say N here. - config LOCKD tristate -- cgit v0.10.2 From 3748f1e447ac1dbf45f33ee7491a008a8bb5cdf0 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 21 May 2008 17:09:12 -0400 Subject: SUNRPC: Add a function to display the name of an RPC procedure Improve debugging messages in call_start() and call_verify() by having them show the RPC procedure name instead of the procedure number. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 1af4f16..68ea6dd 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -690,6 +690,21 @@ rpc_restart_call(struct rpc_task *task) } EXPORT_SYMBOL_GPL(rpc_restart_call); +#ifdef RPC_DEBUG +static const char *rpc_proc_name(const struct rpc_task *task) +{ + const struct rpc_procinfo *proc = task->tk_msg.rpc_proc; + + if (proc) { + if (proc->p_name) + return proc->p_name; + else + return "NULL"; + } else + return "no proc"; +} +#endif + /* * 0. Initial state * @@ -701,9 +716,9 @@ call_start(struct rpc_task *task) { struct rpc_clnt *clnt = task->tk_client; - dprintk("RPC: %5u call_start %s%d proc %d (%s)\n", task->tk_pid, + dprintk("RPC: %5u call_start %s%d proc %s (%s)\n", task->tk_pid, clnt->cl_protname, clnt->cl_vers, - task->tk_msg.rpc_proc->p_proc, + rpc_proc_name(task), (RPC_IS_ASYNC(task) ? "async" : "sync")); /* Increment call count */ @@ -1432,10 +1447,10 @@ call_verify(struct rpc_task *task) error = -EPROTONOSUPPORT; goto out_err; case RPC_PROC_UNAVAIL: - dprintk("RPC: %5u %s: proc %p unsupported by program %u, " + dprintk("RPC: %5u %s: proc %s unsupported by program %u, " "version %u on server %s\n", task->tk_pid, __func__, - task->tk_msg.rpc_proc, + rpc_proc_name(task), task->tk_client->cl_prog, task->tk_client->cl_vers, task->tk_client->cl_server); -- cgit v0.10.2 From b0e1c57ea00302c3ac541ffd37e7db07d13cd674 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 21 May 2008 17:09:19 -0400 Subject: SUNRPC: Rename "call_" functions that are no longer FSM states The RPC client uses a finite state machine to move RPC tasks through each step of an RPC request. Each state is contained in a function in net/sunrpc/clnt.c, and named call_foo. Some of the functions named call_foo have changed over the past few years and are no longer states in the FSM. These include: call_encode, call_header, and call_verify. As a clean up, rename the functions that have changed. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 68ea6dd..ab8038d 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -58,7 +58,6 @@ static void call_start(struct rpc_task *task); static void call_reserve(struct rpc_task *task); static void call_reserveresult(struct rpc_task *task); static void call_allocate(struct rpc_task *task); -static void call_encode(struct rpc_task *task); static void call_decode(struct rpc_task *task); static void call_bind(struct rpc_task *task); static void call_bind_status(struct rpc_task *task); @@ -70,9 +69,9 @@ static void call_refreshresult(struct rpc_task *task); static void call_timeout(struct rpc_task *task); static void call_connect(struct rpc_task *task); static void call_connect_status(struct rpc_task *task); -static __be32 * call_header(struct rpc_task *task); -static __be32 * call_verify(struct rpc_task *task); +static __be32 *rpc_encode_header(struct rpc_task *task); +static __be32 *rpc_verify_header(struct rpc_task *task); static int rpc_ping(struct rpc_clnt *clnt, int flags); static void rpc_register_client(struct rpc_clnt *clnt) @@ -876,7 +875,7 @@ rpc_xdr_buf_init(struct xdr_buf *buf, void *start, size_t len) * 3. Encode arguments of an RPC call */ static void -call_encode(struct rpc_task *task) +rpc_xdr_encode(struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; kxdrproc_t encode; @@ -891,13 +890,14 @@ call_encode(struct rpc_task *task) (char *)req->rq_buffer + req->rq_callsize, req->rq_rcvsize); - /* Encode header and provided arguments */ - encode = task->tk_msg.rpc_proc->p_encode; - if (!(p = call_header(task))) { - printk(KERN_INFO "RPC: call_header failed, exit EIO\n"); + p = rpc_encode_header(task); + if (p == NULL) { + printk(KERN_INFO "RPC: couldn't encode RPC header, exit EIO\n"); rpc_exit(task, -EIO); return; } + + encode = task->tk_msg.rpc_proc->p_encode; if (encode == NULL) return; @@ -1056,7 +1056,7 @@ call_transmit(struct rpc_task *task) /* Encode here so that rpcsec_gss can use correct sequence number. */ if (rpc_task_need_encode(task)) { BUG_ON(task->tk_rqstp->rq_bytes_sent != 0); - call_encode(task); + rpc_xdr_encode(task); /* Did the encode result in an error condition? */ if (task->tk_status != 0) { /* Was the error nonfatal? */ @@ -1240,8 +1240,7 @@ call_decode(struct rpc_task *task) goto out_retry; } - /* Verify the RPC header */ - p = call_verify(task); + p = rpc_verify_header(task); if (IS_ERR(p)) { if (p == ERR_PTR(-EAGAIN)) goto out_retry; @@ -1259,7 +1258,7 @@ call_decode(struct rpc_task *task) return; out_retry: task->tk_status = 0; - /* Note: call_verify() may have freed the RPC slot */ + /* Note: rpc_verify_header() may have freed the RPC slot */ if (task->tk_rqstp == req) { req->rq_received = req->rq_rcv_buf.len = 0; if (task->tk_client->cl_discrtry) @@ -1306,11 +1305,8 @@ call_refreshresult(struct rpc_task *task) return; } -/* - * Call header serialization - */ static __be32 * -call_header(struct rpc_task *task) +rpc_encode_header(struct rpc_task *task) { struct rpc_clnt *clnt = task->tk_client; struct rpc_rqst *req = task->tk_rqstp; @@ -1330,11 +1326,8 @@ call_header(struct rpc_task *task) return p; } -/* - * Reply header verification - */ static __be32 * -call_verify(struct rpc_task *task) +rpc_verify_header(struct rpc_task *task) { struct kvec *iov = &task->tk_rqstp->rq_rcv_buf.head[0]; int len = task->tk_rqstp->rq_rcv_buf.len >> 2; @@ -1408,7 +1401,7 @@ call_verify(struct rpc_task *task) task->tk_action = call_bind; goto out_retry; case RPC_AUTH_TOOWEAK: - printk(KERN_NOTICE "call_verify: server %s requires stronger " + printk(KERN_NOTICE "RPC: server %s requires stronger " "authentication.\n", task->tk_client->cl_server); break; default: -- cgit v0.10.2 From 68a23ee94e3a06819f5a39d64f2e1f3131bab12d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 21 May 2008 17:09:26 -0400 Subject: SUNRPC: Don't display the rpc_show_tasks header if there are no tasks Clean up: don't display the rpc_show_tasks column header unless there is at least one task to display. As far as I can tell, it is safe to let the list_for_each_entry macro decide that each list is empty. scripts/checkpatch.pl also wants a KERN_FOO at the start of any newly added printk() calls, so this and subsequent patches will also add KERN_INFO. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index ab8038d..7dda328 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1526,24 +1526,30 @@ struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred, int EXPORT_SYMBOL_GPL(rpc_call_null); #ifdef RPC_DEBUG +static void rpc_show_header(void) +{ + printk(KERN_INFO "-pid- proc flgs status -client- -prog- --rqstp- " + "-timeout -rpcwait -action- ---ops--\n"); +} + void rpc_show_tasks(void) { struct rpc_clnt *clnt; struct rpc_task *t; + int header = 0; spin_lock(&rpc_client_lock); - if (list_empty(&all_clients)) - goto out; - printk("-pid- proc flgs status -client- -prog- --rqstp- -timeout " - "-rpcwait -action- ---ops--\n"); list_for_each_entry(clnt, &all_clients, cl_clients) { - if (list_empty(&clnt->cl_tasks)) - continue; spin_lock(&clnt->cl_lock); list_for_each_entry(t, &clnt->cl_tasks, tk_task) { const char *rpc_waitq = "none"; int proc; + if (!header) { + rpc_show_header(); + header++; + } + if (t->tk_msg.rpc_proc) proc = t->tk_msg.rpc_proc->p_proc; else @@ -1563,7 +1569,6 @@ void rpc_show_tasks(void) } spin_unlock(&clnt->cl_lock); } -out: spin_unlock(&rpc_client_lock); } #endif -- cgit v0.10.2 From 38e886e0c18975543938519254fc9bf0829c75a3 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 21 May 2008 17:09:33 -0400 Subject: SUNRPC: Refactor rpc_show_tasks Clean up: move the logic that displays each task to its own function. This removes indentation and makes future changes easier. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 7dda328..7964a98 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1532,40 +1532,42 @@ static void rpc_show_header(void) "-timeout -rpcwait -action- ---ops--\n"); } +static void rpc_show_task(const struct rpc_clnt *clnt, + const struct rpc_task *task) +{ + const char *rpc_waitq = "none"; + int proc = -1; + + if (task->tk_msg.rpc_proc) + proc = task->tk_msg.rpc_proc->p_proc; + + if (RPC_IS_QUEUED(task)) + rpc_waitq = rpc_qname(task->tk_waitqueue); + + printk(KERN_INFO "%5u %04d %04x %6d %8p %6d %8p %8ld %8s %8p %8p\n", + task->tk_pid, proc, + task->tk_flags, task->tk_status, + clnt, clnt->cl_prog, + task->tk_rqstp, task->tk_timeout, + rpc_waitq, + task->tk_action, task->tk_ops); +} + void rpc_show_tasks(void) { struct rpc_clnt *clnt; - struct rpc_task *t; + struct rpc_task *task; int header = 0; spin_lock(&rpc_client_lock); list_for_each_entry(clnt, &all_clients, cl_clients) { spin_lock(&clnt->cl_lock); - list_for_each_entry(t, &clnt->cl_tasks, tk_task) { - const char *rpc_waitq = "none"; - int proc; - + list_for_each_entry(task, &clnt->cl_tasks, tk_task) { if (!header) { rpc_show_header(); header++; } - - if (t->tk_msg.rpc_proc) - proc = t->tk_msg.rpc_proc->p_proc; - else - proc = -1; - - if (RPC_IS_QUEUED(t)) - rpc_waitq = rpc_qname(t->tk_waitqueue); - - printk("%5u %04d %04x %6d %8p %6d %8p %8ld %8s %8p %8p\n", - t->tk_pid, proc, - t->tk_flags, t->tk_status, - t->tk_client, - (t->tk_client ? t->tk_client->cl_prog : 0), - t->tk_rqstp, t->tk_timeout, - rpc_waitq, - t->tk_action, t->tk_ops); + rpc_show_task(clnt, task); } spin_unlock(&clnt->cl_lock); } -- cgit v0.10.2 From cb3997b5a0b21864368bd1bd1d0929f9304fb6d9 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 21 May 2008 17:09:41 -0400 Subject: SUNRPC: Display some debugging information as text rather than numbers In rpc_show_tasks(), display the program name, version number, procedure name and tk_action as human-readable variable-length text fields rather than columnar numbers. Doing the symbol lookup here helps in cases where we have actual debugging output from a kernel log, but don't have access to the kernel image or RPC module that generated the output. Sample output: -pid- flgs status -client- --rqstp- -timeout ---ops-- 5608 0001 -11 eeb42690 f6d93710 0 f8fa1764 nfsv3 WRITE a:call_transmit_status q:none 5609 0001 -11 eeb42690 f6d937e0 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5610 0001 -11 eeb42690 f6d93230 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5611 0001 -11 eeb42690 f6d93300 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5612 0001 -11 eeb42690 f6d93090 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5613 0001 -11 eeb42690 f6d933d0 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5614 0001 -11 eeb42690 f6d93cc0 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5615 0001 -11 eeb42690 f6d93a50 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5616 0001 -11 eeb42690 f6d93640 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5617 0001 -11 eeb42690 f6d93b20 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5618 0001 -11 eeb42690 f6d93160 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 7964a98..0530eea 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -25,6 +25,7 @@ #include #include +#include #include #include #include @@ -1528,29 +1529,31 @@ EXPORT_SYMBOL_GPL(rpc_call_null); #ifdef RPC_DEBUG static void rpc_show_header(void) { - printk(KERN_INFO "-pid- proc flgs status -client- -prog- --rqstp- " - "-timeout -rpcwait -action- ---ops--\n"); + printk(KERN_INFO "-pid- flgs status -client- --rqstp- " + "-timeout ---ops--\n"); } static void rpc_show_task(const struct rpc_clnt *clnt, const struct rpc_task *task) { const char *rpc_waitq = "none"; - int proc = -1; - - if (task->tk_msg.rpc_proc) - proc = task->tk_msg.rpc_proc->p_proc; + char *p, action[KSYM_SYMBOL_LEN]; if (RPC_IS_QUEUED(task)) rpc_waitq = rpc_qname(task->tk_waitqueue); - printk(KERN_INFO "%5u %04d %04x %6d %8p %6d %8p %8ld %8s %8p %8p\n", - task->tk_pid, proc, - task->tk_flags, task->tk_status, - clnt, clnt->cl_prog, - task->tk_rqstp, task->tk_timeout, - rpc_waitq, - task->tk_action, task->tk_ops); + /* map tk_action pointer to a function name; then trim off + * the "+0x0 [sunrpc]" */ + sprint_symbol(action, (unsigned long)task->tk_action); + p = strchr(action, '+'); + if (p) + *p = '\0'; + + printk(KERN_INFO "%5u %04x %6d %8p %8p %8ld %8p %sv%u %s a:%s q:%s\n", + task->tk_pid, task->tk_flags, task->tk_status, + clnt, task->tk_rqstp, task->tk_timeout, task->tk_ops, + clnt->cl_protname, clnt->cl_vers, rpc_proc_name(task), + action, rpc_waitq); } void rpc_show_tasks(void) -- cgit v0.10.2 From 549177863bac22f23663ee9f70c4e3b9fb269f2c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 27 May 2008 16:29:07 -0400 Subject: NFS: Make nfs_fsync methods consistent Clean up: Report the same debugging info, count function calls the same, and use similar function naming in nfs_fsync_dir() and nfs_fsync(). Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 982a206..5d73fbd 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -629,10 +629,11 @@ out: */ static int nfs_fsync_dir(struct file *filp, struct dentry *dentry, int datasync) { - dfprintk(VFS, "NFS: fsync_dir(%s/%s) datasync %d\n", + dfprintk(VFS, "NFS: fsync dir(%s/%s) datasync %d\n", dentry->d_parent->d_name.name, dentry->d_name.name, datasync); + nfs_inc_stats(dentry->d_inode, NFSIOS_VFSFSYNC); return 0; } diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 0213c21..1789de2 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -50,7 +50,7 @@ static ssize_t nfs_file_read(struct kiocb *, const struct iovec *iov, static ssize_t nfs_file_write(struct kiocb *, const struct iovec *iov, unsigned long nr_segs, loff_t pos); static int nfs_file_flush(struct file *, fl_owner_t id); -static int nfs_fsync(struct file *, struct dentry *dentry, int datasync); +static int nfs_file_fsync(struct file *, struct dentry *dentry, int datasync); static int nfs_check_flags(int flags); static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl); static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl); @@ -72,7 +72,7 @@ const struct file_operations nfs_file_operations = { .open = nfs_file_open, .flush = nfs_file_flush, .release = nfs_file_release, - .fsync = nfs_fsync, + .fsync = nfs_file_fsync, .lock = nfs_lock, .flock = nfs_flock, .splice_read = nfs_file_splice_read, @@ -181,7 +181,7 @@ static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin) } /* - * Helper for nfs_file_flush() and nfs_fsync() + * Helper for nfs_file_flush() and nfs_file_fsync() * * Notice that it clears the NFS_CONTEXT_ERROR_WRITE before synching to * disk, but it retrieves and clears ctx->error after synching, despite @@ -296,12 +296,14 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma) * whether any write errors occurred for this process. */ static int -nfs_fsync(struct file *file, struct dentry *dentry, int datasync) +nfs_file_fsync(struct file *file, struct dentry *dentry, int datasync) { struct nfs_open_context *ctx = nfs_file_open_context(file); struct inode *inode = dentry->d_inode; - dfprintk(VFS, "nfs: fsync(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino); + dfprintk(VFS, "NFS: fsync file(%s/%s) datasync %d\n", + dentry->d_parent->d_name.name, dentry->d_name.name, + datasync); nfs_inc_stats(inode, NFSIOS_VFSFSYNC); return nfs_do_fsync(ctx, inode); -- cgit v0.10.2 From b84e06c58fdefdc42931f771dc295e63f4b27365 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 11 Jun 2008 17:55:34 -0400 Subject: NFS: Make nfs_llseek methods consistent Clean up: Report the same debugging info in nfs_llseek_dir() and nfs_llseek_file(). Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 5d73fbd..2457106 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -603,7 +603,15 @@ out: static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin) { - mutex_lock(&filp->f_path.dentry->d_inode->i_mutex); + struct dentry *dentry = filp->f_path.dentry; + struct inode *inode = dentry->d_inode; + + dfprintk(VFS, "NFS: llseek dir(%s/%s, %lld, %d)\n", + dentry->d_parent->d_name.name, + dentry->d_name.name, + offset, origin); + + mutex_lock(&inode->i_mutex); switch (origin) { case 1: offset += filp->f_pos; @@ -619,7 +627,7 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin) nfs_file_open_context(filp)->dir_cookie = 0; } out: - mutex_unlock(&filp->f_path.dentry->d_inode->i_mutex); + mutex_unlock(&inode->i_mutex); return offset; } diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 1789de2..cef3636 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -170,6 +170,11 @@ force_reval: static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin) { + dfprintk(VFS, "NFS: llseek file(%s/%s, %lld, %d)\n", + filp->f_path.dentry->d_parent->d_name.name, + filp->f_path.dentry->d_name.name, + offset, origin); + /* origin == SEEK_END => we must revalidate the cached file length */ if (origin == SEEK_END) { struct inode *inode = filp->f_mapping->host; -- cgit v0.10.2 From cc0dd2d1052aede2946ad1338a8f6f5d5c604740 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 11 Jun 2008 17:55:42 -0400 Subject: NFS: Make nfs_open methods consistent Clean up: Report the same debugging info and count function calls the same for files and directories in nfs_opendir() and nfs_file_open(). Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 2457106..c962233 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -133,8 +133,11 @@ nfs_opendir(struct inode *inode, struct file *filp) { int res; - dfprintk(VFS, "NFS: opendir(%s/%ld)\n", - inode->i_sb->s_id, inode->i_ino); + dfprintk(VFS, "NFS: open dir(%s/%s)\n", + filp->f_path.dentry->d_parent->d_name.name, + filp->f_path.dentry->d_name.name); + + nfs_inc_stats(inode, NFSIOS_VFSOPEN); lock_kernel(); /* Call generic open code in order to cache credentials */ diff --git a/fs/nfs/file.c b/fs/nfs/file.c index cef3636..202408d 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -119,6 +119,10 @@ nfs_file_open(struct inode *inode, struct file *filp) { int res; + dfprintk(VFS, "NFS: open file(%s/%s)\n", + filp->f_path.dentry->d_parent->d_name.name, + filp->f_path.dentry->d_name.name); + res = nfs_check_flags(filp->f_flags); if (res) return res; -- cgit v0.10.2 From b7eaefaa8722fd98e5c2632640d1abd2b0c83e84 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 11 Jun 2008 17:55:50 -0400 Subject: NFS: Add debugging facility for NFS aops Recent work in fs/nfs/file.c neglected to add appropriate trace debugging for the NFS client's address space operations. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 202408d..6c447a3 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -335,6 +335,11 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping, struct page *page; index = pos >> PAGE_CACHE_SHIFT; + dfprintk(PAGECACHE, "NFS: write_begin(%s/%s(%ld), %u@%lld)\n", + file->f_path.dentry->d_parent->d_name.name, + file->f_path.dentry->d_name.name, + mapping->host->i_ino, len, (long long) pos); + page = __grab_cache_page(mapping, index); if (!page) return -ENOMEM; @@ -355,6 +360,11 @@ static int nfs_write_end(struct file *file, struct address_space *mapping, unsigned offset = pos & (PAGE_CACHE_SIZE - 1); int status; + dfprintk(PAGECACHE, "NFS: write_end(%s/%s(%ld), %u@%lld)\n", + file->f_path.dentry->d_parent->d_name.name, + file->f_path.dentry->d_name.name, + mapping->host->i_ino, len, (long long) pos); + /* * Zero any uninitialised parts of the page, and then mark the page * as up to date if it turns out that we're extending the file. @@ -389,6 +399,8 @@ static int nfs_write_end(struct file *file, struct address_space *mapping, static void nfs_invalidate_page(struct page *page, unsigned long offset) { + dfprintk(PAGECACHE, "NFS: invalidate_page(%p, %lu)\n", page, offset); + if (offset != 0) return; /* Cancel any unstarted writes on this page */ @@ -397,13 +409,20 @@ static void nfs_invalidate_page(struct page *page, unsigned long offset) static int nfs_release_page(struct page *page, gfp_t gfp) { + dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page); + /* If PagePrivate() is set, then the page is not freeable */ return 0; } static int nfs_launder_page(struct page *page) { - return nfs_wb_page(page->mapping->host, page); + struct inode *inode = page->mapping->host; + + dfprintk(PAGECACHE, "NFS: launder_page(%ld, %llu)\n", + inode->i_ino, (long long)page_offset(page)); + + return nfs_wb_page(inode, page); } const struct address_space_operations nfs_file_aops = { @@ -423,13 +442,19 @@ const struct address_space_operations nfs_file_aops = { static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page) { struct file *filp = vma->vm_file; + struct dentry *dentry = filp->f_path.dentry; unsigned pagelen; int ret = -EINVAL; struct address_space *mapping; + dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%s/%s(%ld), offset %lld)\n", + dentry->d_parent->d_name.name, dentry->d_name.name, + filp->f_mapping->host->i_ino, + (long long)page_offset(page)); + lock_page(page); mapping = page->mapping; - if (mapping != vma->vm_file->f_path.dentry->d_inode->i_mapping) + if (mapping != dentry->d_inode->i_mapping) goto out_unlock; ret = 0; -- cgit v0.10.2 From 6da24bc9cfc645c619992e39aab09747164c9f14 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 11 Jun 2008 17:55:58 -0400 Subject: NFS: Use NFSDBG_FILE for all fops Clean up: some fops use NFSDBG_FILE, some use NFSDBG_VFS. Let's use NFSDBG_FILE for all fops, and consistently report file names instead of inode numbers. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index c962233..b194066 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -133,7 +133,7 @@ nfs_opendir(struct inode *inode, struct file *filp) { int res; - dfprintk(VFS, "NFS: open dir(%s/%s)\n", + dfprintk(FILE, "NFS: open dir(%s/%s)\n", filp->f_path.dentry->d_parent->d_name.name, filp->f_path.dentry->d_name.name); @@ -531,7 +531,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) struct nfs_fattr fattr; long res; - dfprintk(VFS, "NFS: readdir(%s/%s) starting at cookie %Lu\n", + dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n", dentry->d_parent->d_name.name, dentry->d_name.name, (long long)filp->f_pos); nfs_inc_stats(inode, NFSIOS_VFSGETDENTS); @@ -598,7 +598,7 @@ out: unlock_kernel(); if (res > 0) res = 0; - dfprintk(VFS, "NFS: readdir(%s/%s) returns %ld\n", + dfprintk(FILE, "NFS: readdir(%s/%s) returns %ld\n", dentry->d_parent->d_name.name, dentry->d_name.name, res); return res; @@ -609,7 +609,7 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin) struct dentry *dentry = filp->f_path.dentry; struct inode *inode = dentry->d_inode; - dfprintk(VFS, "NFS: llseek dir(%s/%s, %lld, %d)\n", + dfprintk(FILE, "NFS: llseek dir(%s/%s, %lld, %d)\n", dentry->d_parent->d_name.name, dentry->d_name.name, offset, origin); @@ -640,7 +640,7 @@ out: */ static int nfs_fsync_dir(struct file *filp, struct dentry *dentry, int datasync) { - dfprintk(VFS, "NFS: fsync dir(%s/%s) datasync %d\n", + dfprintk(FILE, "NFS: fsync dir(%s/%s) datasync %d\n", dentry->d_parent->d_name.name, dentry->d_name.name, datasync); diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 4757a2b..08f6b04 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -890,7 +890,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov, count = iov_length(iov, nr_segs); nfs_add_stats(mapping->host, NFSIOS_DIRECTREADBYTES, count); - dprintk("nfs: direct read(%s/%s, %zd@%Ld)\n", + dfprintk(FILE, "NFS: direct read(%s/%s, %zd@%Ld)\n", file->f_path.dentry->d_parent->d_name.name, file->f_path.dentry->d_name.name, count, (long long) pos); @@ -947,7 +947,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov, count = iov_length(iov, nr_segs); nfs_add_stats(mapping->host, NFSIOS_DIRECTWRITTENBYTES, count); - dfprintk(VFS, "nfs: direct write(%s/%s, %zd@%Ld)\n", + dfprintk(FILE, "NFS: direct write(%s/%s, %zd@%Ld)\n", file->f_path.dentry->d_parent->d_name.name, file->f_path.dentry->d_name.name, count, (long long) pos); diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 6c447a3..78b26f8 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -119,7 +119,7 @@ nfs_file_open(struct inode *inode, struct file *filp) { int res; - dfprintk(VFS, "NFS: open file(%s/%s)\n", + dprintk("NFS: open file(%s/%s)\n", filp->f_path.dentry->d_parent->d_name.name, filp->f_path.dentry->d_name.name); @@ -137,9 +137,15 @@ nfs_file_open(struct inode *inode, struct file *filp) static int nfs_file_release(struct inode *inode, struct file *filp) { + struct dentry *dentry = filp->f_path.dentry; + + dprintk("NFS: release(%s/%s)\n", + dentry->d_parent->d_name.name, + dentry->d_name.name); + /* Ensure that dirty pages are flushed out with the right creds */ if (filp->f_mode & FMODE_WRITE) - nfs_wb_all(filp->f_path.dentry->d_inode); + nfs_wb_all(dentry->d_inode); nfs_inc_stats(inode, NFSIOS_VFSRELEASE); return NFS_PROTO(inode)->file_release(inode, filp); } @@ -174,7 +180,7 @@ force_reval: static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin) { - dfprintk(VFS, "NFS: llseek file(%s/%s, %lld, %d)\n", + dprintk("NFS: llseek file(%s/%s, %lld, %d)\n", filp->f_path.dentry->d_parent->d_name.name, filp->f_path.dentry->d_name.name, offset, origin); @@ -216,16 +222,18 @@ static int nfs_do_fsync(struct nfs_open_context *ctx, struct inode *inode) /* * Flush all dirty pages, and check for write errors. - * */ static int nfs_file_flush(struct file *file, fl_owner_t id) { struct nfs_open_context *ctx = nfs_file_open_context(file); - struct inode *inode = file->f_path.dentry->d_inode; + struct dentry *dentry = file->f_path.dentry; + struct inode *inode = dentry->d_inode; int status; - dfprintk(VFS, "nfs: flush(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino); + dprintk("NFS: flush(%s/%s)\n", + dentry->d_parent->d_name.name, + dentry->d_name.name); if ((file->f_mode & FMODE_WRITE) == 0) return 0; @@ -250,7 +258,7 @@ nfs_file_read(struct kiocb *iocb, const struct iovec *iov, if (iocb->ki_filp->f_flags & O_DIRECT) return nfs_file_direct_read(iocb, iov, nr_segs, pos); - dfprintk(VFS, "nfs: read(%s/%s, %lu@%lu)\n", + dprintk("NFS: read(%s/%s, %lu@%lu)\n", dentry->d_parent->d_name.name, dentry->d_name.name, (unsigned long) count, (unsigned long) pos); @@ -270,7 +278,7 @@ nfs_file_splice_read(struct file *filp, loff_t *ppos, struct inode *inode = dentry->d_inode; ssize_t res; - dfprintk(VFS, "nfs: splice_read(%s/%s, %lu@%Lu)\n", + dprintk("NFS: splice_read(%s/%s, %lu@%Lu)\n", dentry->d_parent->d_name.name, dentry->d_name.name, (unsigned long) count, (unsigned long long) *ppos); @@ -287,7 +295,7 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma) struct inode *inode = dentry->d_inode; int status; - dfprintk(VFS, "nfs: mmap(%s/%s)\n", + dprintk("NFS: mmap(%s/%s)\n", dentry->d_parent->d_name.name, dentry->d_name.name); status = nfs_revalidate_mapping(inode, file->f_mapping); @@ -310,7 +318,7 @@ nfs_file_fsync(struct file *file, struct dentry *dentry, int datasync) struct nfs_open_context *ctx = nfs_file_open_context(file); struct inode *inode = dentry->d_inode; - dfprintk(VFS, "NFS: fsync file(%s/%s) datasync %d\n", + dprintk("NFS: fsync file(%s/%s) datasync %d\n", dentry->d_parent->d_name.name, dentry->d_name.name, datasync); @@ -502,9 +510,9 @@ static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, if (iocb->ki_filp->f_flags & O_DIRECT) return nfs_file_direct_write(iocb, iov, nr_segs, pos); - dfprintk(VFS, "nfs: write(%s/%s(%ld), %lu@%Ld)\n", + dprintk("NFS: write(%s/%s, %lu@%Ld)\n", dentry->d_parent->d_name.name, dentry->d_name.name, - inode->i_ino, (unsigned long) count, (long long) pos); + (unsigned long) count, (long long) pos); result = -EBUSY; if (IS_SWAPFILE(inode)) @@ -649,13 +657,15 @@ out: */ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl) { - struct inode * inode = filp->f_mapping->host; + struct inode *inode = filp->f_mapping->host; int ret = -ENOLCK; - dprintk("NFS: nfs_lock(f=%s/%ld, t=%x, fl=%x, r=%Ld:%Ld)\n", - inode->i_sb->s_id, inode->i_ino, + dprintk("NFS: lock(%s/%s, t=%x, fl=%x, r=%lld:%lld)\n", + filp->f_path.dentry->d_parent->d_name.name, + filp->f_path.dentry->d_name.name, fl->fl_type, fl->fl_flags, (long long)fl->fl_start, (long long)fl->fl_end); + nfs_inc_stats(inode, NFSIOS_VFSLOCK); /* No mandatory locks over NFS */ @@ -683,9 +693,9 @@ out_err: */ static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) { - dprintk("NFS: nfs_flock(f=%s/%ld, t=%x, fl=%x)\n", - filp->f_path.dentry->d_inode->i_sb->s_id, - filp->f_path.dentry->d_inode->i_ino, + dprintk("NFS: flock(%s/%s, t=%x, fl=%x)\n", + filp->f_path.dentry->d_parent->d_name.name, + filp->f_path.dentry->d_name.name, fl->fl_type, fl->fl_flags); /* @@ -708,12 +718,15 @@ static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) return do_setlk(filp, cmd, fl); } +/* + * There is no protocol support for leases, so we have no way to implement + * them correctly in the face of opens by other clients. + */ static int nfs_setlease(struct file *file, long arg, struct file_lock **fl) { - /* - * There is no protocol support for leases, so we have no way - * to implement them correctly in the face of opens by other - * clients. - */ + dprintk("NFS: setlease(%s/%s, arg=%ld)\n", + file->f_path.dentry->d_parent->d_name.name, + file->f_path.dentry->d_name.name, arg); + return -EINVAL; } -- cgit v0.10.2 From 48186c7d5734a6b137f9186b37f6dc98097d0429 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 11 Jun 2008 17:56:05 -0400 Subject: NFS: Fix trace debugging nits in write.c Clean up: fix a few dprintk messages that still need to show the RPC task ID correctly, and be sure we use the preferred %lld or %llu instead of %Ld or %Lu. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust diff --git a/fs/nfs/write.c b/fs/nfs/write.c index ee6fcde..21d8a48 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -713,10 +713,10 @@ int nfs_updatepage(struct file *file, struct page *page, nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE); - dprintk("NFS: nfs_updatepage(%s/%s %d@%Ld)\n", + dprintk("NFS: nfs_updatepage(%s/%s %d@%lld)\n", file->f_path.dentry->d_parent->d_name.name, file->f_path.dentry->d_name.name, count, - (long long)(page_offset(page) +offset)); + (long long)(page_offset(page) + offset)); /* If we're not using byte range locks, and we know the page * is up to date, it may be more efficient to extend the write @@ -736,7 +736,7 @@ int nfs_updatepage(struct file *file, struct page *page, else __set_page_dirty_nobuffers(page); - dprintk("NFS: nfs_updatepage returns %d (isize %Ld)\n", + dprintk("NFS: nfs_updatepage returns %d (isize %lld)\n", status, (long long)i_size_read(inode)); return status; } @@ -821,7 +821,7 @@ static int nfs_write_rpcsetup(struct nfs_page *req, NFS_PROTO(inode)->write_setup(data, &msg); dprintk("NFS: %5u initiated write call " - "(req %s/%Ld, %u bytes @ offset %Lu)\n", + "(req %s/%lld, %u bytes @ offset %llu)\n", data->task.tk_pid, inode->i_sb->s_id, (long long)NFS_FILEID(inode), @@ -965,13 +965,13 @@ static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata) { struct nfs_write_data *data = calldata; - struct nfs_page *req = data->req; - dprintk("NFS: write (%s/%Ld %d@%Ld)", - req->wb_context->path.dentry->d_inode->i_sb->s_id, - (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), - req->wb_bytes, - (long long)req_offset(req)); + dprintk("NFS: %5u write(%s/%lld %d@%lld)", + task->tk_pid, + data->req->wb_context->path.dentry->d_inode->i_sb->s_id, + (long long) + NFS_FILEID(data->req->wb_context->path.dentry->d_inode), + data->req->wb_bytes, (long long)req_offset(data->req)); nfs_writeback_done(task, data); } @@ -1045,7 +1045,8 @@ static void nfs_writeback_release_full(void *calldata) nfs_list_remove_request(req); - dprintk("NFS: write (%s/%Ld %d@%Ld)", + dprintk("NFS: %5u write (%s/%lld %d@%lld)", + data->task.tk_pid, req->wb_context->path.dentry->d_inode->i_sb->s_id, (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), req->wb_bytes, @@ -1118,7 +1119,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) static unsigned long complain; if (time_before(complain, jiffies)) { - dprintk("NFS: faulty NFS server %s:" + dprintk("NFS: faulty NFS server %s:" " (committed = %d) != (stable = %d)\n", NFS_SERVER(data->inode)->nfs_client->cl_hostname, resp->verf->committed, argp->stable); @@ -1287,7 +1288,7 @@ static void nfs_commit_release(void *calldata) dec_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE); - dprintk("NFS: commit (%s/%Ld %d@%Ld)", + dprintk("NFS: commit (%s/%lld %d@%lld)", req->wb_context->path.dentry->d_inode->i_sb->s_id, (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), req->wb_bytes, -- cgit v0.10.2 From cd983ef81b9d79573848dabf81277c7314220257 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 11 Jun 2008 17:56:13 -0400 Subject: SUNRPC: Remove obsolete messages during transport connect Recent changes to the RPC client's transport connect logic make connect status values ECONNREFUSED and ECONNRESET impossible. Clean up xprt_connect_status() to account for these changes. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index e1770f7..67996bd 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -690,7 +690,7 @@ static void xprt_connect_status(struct rpc_task *task) { struct rpc_xprt *xprt = task->tk_xprt; - if (task->tk_status >= 0) { + if (task->tk_status == 0) { xprt->stat.connect_count++; xprt->stat.connect_time += (long)jiffies - xprt->stat.connect_start; dprintk("RPC: %5u xprt_connect_status: connection established\n", @@ -699,12 +699,6 @@ static void xprt_connect_status(struct rpc_task *task) } switch (task->tk_status) { - case -ECONNREFUSED: - case -ECONNRESET: - dprintk("RPC: %5u xprt_connect_status: server %s refused " - "connection\n", task->tk_pid, - task->tk_client->cl_server); - break; case -ENOTCONN: dprintk("RPC: %5u xprt_connect_status: connection broken\n", task->tk_pid); -- cgit v0.10.2 From c2d946e55e1be20a7443918e3b7497b905e6b3f7 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 20 May 2008 01:08:00 +0300 Subject: fs/nfs/nfsroot.c: remove CVS keyword This patch removes a CVS keyword that wasn't updated for a long time from a comment. Signed-off-by: Adrian Bunk Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index 531379d..15afe3a 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c @@ -1,6 +1,4 @@ /* - * $Id: nfsroot.c,v 1.45 1998/03/07 10:44:46 mj Exp $ - * * Copyright (C) 1995, 1996 Gero Kuhlmann * * Allow an NFS filesystem to be mounted as root. The way this works is: -- cgit v0.10.2 From 48b605f83c920d8daa50e43fc2c7f718e04c7bfa Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 10 Jun 2008 15:38:39 -0400 Subject: NFS: implement option checking when remounting NFS filesystems (resend) When remounting an NFS or NFS4 filesystem, the new NFS options are not respected, yet the remount will still return success. This patch adds a remount_fs sb op for NFS that checks any new nfs mount options against the existing ones and fails the mount if any have changed. This is only implemented for string-based mount options since doing this with binary options isn't really feasible. This is essentially the same as the original patch I sent out, but adds a check to see if the addr= option has changed. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 614efee..b550e92 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -207,6 +207,7 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); static void nfs_kill_super(struct super_block *); static void nfs_put_super(struct super_block *); +static int nfs_remount(struct super_block *sb, int *flags, char *raw_data); static struct file_system_type nfs_fs_type = { .owner = THIS_MODULE, @@ -234,6 +235,7 @@ static const struct super_operations nfs_sops = { .umount_begin = nfs_umount_begin, .show_options = nfs_show_options, .show_stats = nfs_show_stats, + .remount_fs = nfs_remount, }; #ifdef CONFIG_NFS_V4 @@ -278,6 +280,7 @@ static const struct super_operations nfs4_sops = { .umount_begin = nfs_umount_begin, .show_options = nfs_show_options, .show_stats = nfs_show_stats, + .remount_fs = nfs_remount, }; #endif @@ -1396,6 +1399,79 @@ out_invalid_fh: return -EINVAL; } +static int +nfs_compare_remount_data(struct nfs_server *nfss, + struct nfs_parsed_mount_data *data) +{ + if (data->flags != nfss->flags || + data->rsize != nfss->rsize || + data->wsize != nfss->wsize || + data->retrans != nfss->client->cl_timeout->to_retries || + data->auth_flavors[0] != nfss->client->cl_auth->au_flavor || + data->acregmin != nfss->acregmin / HZ || + data->acregmax != nfss->acregmax / HZ || + data->acdirmin != nfss->acdirmin / HZ || + data->acdirmax != nfss->acdirmax / HZ || + data->timeo != (10U * nfss->client->cl_timeout->to_initval / HZ) || + data->nfs_server.addrlen != nfss->nfs_client->cl_addrlen || + memcmp(&data->nfs_server.address, &nfss->nfs_client->cl_addr, + data->nfs_server.addrlen) != 0) + return -EINVAL; + + return 0; +} + +static int +nfs_remount(struct super_block *sb, int *flags, char *raw_data) +{ + int error; + struct nfs_server *nfss = sb->s_fs_info; + struct nfs_parsed_mount_data *data; + struct nfs_mount_data *options = (struct nfs_mount_data *)raw_data; + struct nfs4_mount_data *options4 = (struct nfs4_mount_data *)raw_data; + + /* + * Userspace mount programs that send binary options generally send + * them populated with default values. We have no way to know which + * ones were explicitly specified. Fall back to legacy behavior and + * just return success. + */ + if ((sb->s_type == &nfs4_fs_type && options4->version == 1) || + (sb->s_type == &nfs_fs_type && options->version >= 1 && + options->version <= 6)) + return 0; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (data == NULL) + return -ENOMEM; + + /* fill out struct with values from existing mount */ + data->flags = nfss->flags; + data->rsize = nfss->rsize; + data->wsize = nfss->wsize; + data->retrans = nfss->client->cl_timeout->to_retries; + data->auth_flavors[0] = nfss->client->cl_auth->au_flavor; + data->acregmin = nfss->acregmin / HZ; + data->acregmax = nfss->acregmax / HZ; + data->acdirmin = nfss->acdirmin / HZ; + data->acdirmax = nfss->acdirmax / HZ; + data->timeo = 10U * nfss->client->cl_timeout->to_initval / HZ; + data->nfs_server.addrlen = nfss->nfs_client->cl_addrlen; + memcpy(&data->nfs_server.address, &nfss->nfs_client->cl_addr, + data->nfs_server.addrlen); + + /* overwrite those values with any that were specified */ + error = nfs_parse_mount_options((char *)options, data); + if (error < 0) + goto out; + + /* compare new mount options with old ones */ + error = nfs_compare_remount_data(nfss, data); +out: + kfree(data); + return error; +} + /* * Initialise the common bits of the superblock */ -- cgit v0.10.2 From b6b6152c46861dd914d0e6cea9c27df057d6e235 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Mon, 9 Jun 2008 16:51:31 -0400 Subject: rpc: bring back cl_chatty The cl_chatty flag alows us to control whether a given rpc client leaves "server X not responding, timed out" messages in the syslog. Such messages make sense for ordinary nfs clients (where an unresponsive server means applications on the mountpoint are probably hanging), but not for the callback client (which can fail more commonly, with the only result just of disabling some optimizations). Previously cl_chatty was removed, do to lack of users; reinstate it, and use it for the nfsd's callback client. Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 4d4760e..702fa57 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -381,7 +381,7 @@ static int do_probe_callback(void *data) .program = &cb_program, .version = nfs_cb_version[1]->number, .authflavor = RPC_AUTH_UNIX, /* XXX: need AUTH_GSS... */ - .flags = (RPC_CLNT_CREATE_NOPING), + .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET), }; struct rpc_message msg = { .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL], diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 6fff7f8..764fd4c 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -42,7 +42,8 @@ struct rpc_clnt { unsigned int cl_softrtry : 1,/* soft timeouts */ cl_discrtry : 1,/* disconnect before retry */ - cl_autobind : 1;/* use getport() */ + cl_autobind : 1,/* use getport() */ + cl_chatty : 1;/* be verbose */ struct rpc_rtt * cl_rtt; /* RTO estimator data */ const struct rpc_timeout *cl_timeout; /* Timeout strategy */ @@ -114,6 +115,7 @@ struct rpc_create_args { #define RPC_CLNT_CREATE_NONPRIVPORT (1UL << 3) #define RPC_CLNT_CREATE_NOPING (1UL << 4) #define RPC_CLNT_CREATE_DISCRTRY (1UL << 5) +#define RPC_CLNT_CREATE_QUIET (1UL << 6) struct rpc_clnt *rpc_create(struct rpc_create_args *args); struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *, diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 0530eea..09631f6 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -324,6 +324,8 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) clnt->cl_autobind = 1; if (args->flags & RPC_CLNT_CREATE_DISCRTRY) clnt->cl_discrtry = 1; + if (!(args->flags & RPC_CLNT_CREATE_QUIET)) + clnt->cl_chatty = 1; return clnt; } @@ -1149,7 +1151,8 @@ call_status(struct rpc_task *task) rpc_exit(task, status); break; default: - printk("%s: RPC call returned error %d\n", + if (clnt->cl_chatty) + printk("%s: RPC call returned error %d\n", clnt->cl_protname, -status); rpc_exit(task, status); } @@ -1174,7 +1177,8 @@ call_timeout(struct rpc_task *task) task->tk_timeouts++; if (RPC_IS_SOFT(task)) { - printk(KERN_NOTICE "%s: server %s not responding, timed out\n", + if (clnt->cl_chatty) + printk(KERN_NOTICE "%s: server %s not responding, timed out\n", clnt->cl_protname, clnt->cl_server); rpc_exit(task, -EIO); return; @@ -1182,7 +1186,8 @@ call_timeout(struct rpc_task *task) if (!(task->tk_flags & RPC_CALL_MAJORSEEN)) { task->tk_flags |= RPC_CALL_MAJORSEEN; - printk(KERN_NOTICE "%s: server %s not responding, still trying\n", + if (clnt->cl_chatty) + printk(KERN_NOTICE "%s: server %s not responding, still trying\n", clnt->cl_protname, clnt->cl_server); } rpc_force_rebind(clnt); @@ -1213,8 +1218,9 @@ call_decode(struct rpc_task *task) task->tk_pid, task->tk_status); if (task->tk_flags & RPC_CALL_MAJORSEEN) { - printk(KERN_NOTICE "%s: server %s OK\n", - clnt->cl_protname, clnt->cl_server); + if (clnt->cl_chatty) + printk(KERN_NOTICE "%s: server %s OK\n", + clnt->cl_protname, clnt->cl_server); task->tk_flags &= ~RPC_CALL_MAJORSEEN; } -- cgit v0.10.2 From 720b8f2d6f7de9e16f1217448cc7396e1604e175 Mon Sep 17 00:00:00 2001 From: "\\\\\\\"J. Bruce Fields\\\\\\" Date: Mon, 9 Jun 2008 16:51:33 -0400 Subject: rpc: eliminate unused variable in auth_gss upcall code Also, a minor comment grammar fix in the same file. Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index bf7585b..ebfd630 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -272,7 +272,7 @@ __gss_find_upcall(struct rpc_inode *rpci, uid_t uid) return NULL; } -/* Try to add a upcall to the pipefs queue. +/* Try to add an upcall to the pipefs queue. * If an upcall owned by our uid already exists, then we return a reference * to that upcall instead of adding the new upcall. */ @@ -493,7 +493,6 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) { const void *p, *end; void *buf; - struct rpc_clnt *clnt; struct gss_upcall_msg *gss_msg; struct inode *inode = filp->f_path.dentry->d_inode; struct gss_cl_ctx *ctx; @@ -507,7 +506,6 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) if (!buf) goto out; - clnt = RPC_I(inode)->private; err = -EFAULT; if (copy_from_user(buf, src, mlen)) goto err; -- cgit v0.10.2 From d25a03cf966f2cf9990dc0bf2a921a554919ea34 Mon Sep 17 00:00:00 2001 From: "\\\\\\\"J. Bruce Fields\\\\\\" Date: Mon, 9 Jun 2008 16:51:34 -0400 Subject: rpc: remove some unused macros There used to be a print_hexl() function that used isprint(), now gone. I don't know why NFS_NGROUPS and CA_RUN_AS_MACHINE were here. I also don't know why another #define that's actually used was marked "unused". Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index ebfd630..834a831 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -63,22 +63,11 @@ static const struct rpc_credops gss_nullops; # define RPCDBG_FACILITY RPCDBG_AUTH #endif -#define NFS_NGROUPS 16 - -#define GSS_CRED_SLACK 1024 /* XXX: unused */ +#define GSS_CRED_SLACK 1024 /* length of a krb5 verifier (48), plus data added before arguments when * using integrity (two 4-byte integers): */ #define GSS_VERF_SLACK 100 -/* XXX this define must match the gssd define -* as it is passed to gssd to signal the use of -* machine creds should be part of the shared rpc interface */ - -#define CA_RUN_AS_MACHINE 0x00000200 - -/* dump the buffer in `emacs-hexl' style */ -#define isprint(c) ((c > 0x1f) && (c < 0x7f)) - struct gss_auth { struct kref kref; struct rpc_auth rpc_auth; -- cgit v0.10.2 From a486aeda9b2b0d944aecce7871b3186379b898de Mon Sep 17 00:00:00 2001 From: "\\\\\\\"J. Bruce Fields\\\\\\" Date: Mon, 9 Jun 2008 16:51:35 -0400 Subject: rpc: minor cleanup of scheduler callback code Try to make the comment here a little more clear and concise. Also, this macro definition seems unnecessary. Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index d1a5c8c..64981a2 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -135,7 +135,6 @@ struct rpc_task_setup { #define RPC_IS_SWAPPER(t) ((t)->tk_flags & RPC_TASK_SWAPPER) #define RPC_DO_ROOTOVERRIDE(t) ((t)->tk_flags & RPC_TASK_ROOTCREDS) #define RPC_ASSASSINATED(t) ((t)->tk_flags & RPC_TASK_KILLED) -#define RPC_DO_CALLBACK(t) ((t)->tk_callback != NULL) #define RPC_IS_SOFT(t) ((t)->tk_flags & RPC_TASK_SOFT) #define RPC_TASK_RUNNING 0 diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 6eab9bf..6288af0 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -626,19 +626,15 @@ static void __rpc_execute(struct rpc_task *task) /* * Execute any pending callback. */ - if (RPC_DO_CALLBACK(task)) { - /* Define a callback save pointer */ + if (task->tk_callback) { void (*save_callback)(struct rpc_task *); /* - * If a callback exists, save it, reset it, - * call it. - * The save is needed to stop from resetting - * another callback set within the callback handler - * - Dave + * We set tk_callback to NULL before calling it, + * in case it sets the tk_callback field itself: */ - save_callback=task->tk_callback; - task->tk_callback=NULL; + save_callback = task->tk_callback; + task->tk_callback = NULL; save_callback(task); } -- cgit v0.10.2 From 659bfcd6dd88919a5ad453f62afbeffcb3106847 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 10 Jun 2008 19:39:41 -0400 Subject: NFS: Fix the ftruncate() credential problem ftruncate() access checking is supposed to be performed at open() time, just like reads and writes. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 596c5d8..2e4ab4a 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -347,7 +347,7 @@ out_no_inode: goto out; } -#define NFS_VALID_ATTRS (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE|ATTR_ATIME|ATTR_ATIME_SET|ATTR_MTIME|ATTR_MTIME_SET) +#define NFS_VALID_ATTRS (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE|ATTR_ATIME|ATTR_ATIME_SET|ATTR_MTIME|ATTR_MTIME_SET|ATTR_FILE) int nfs_setattr(struct dentry *dentry, struct iattr *attr) @@ -369,7 +369,7 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) /* Optimization: if the end result is no change, don't RPC */ attr->ia_valid &= NFS_VALID_ATTRS; - if (attr->ia_valid == 0) + if ((attr->ia_valid & ~ATTR_FILE) == 0) return 0; lock_kernel(); diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index cf7d4e5..b9c2d99 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -129,6 +129,8 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, int status; dprintk("NFS call setattr\n"); + if (sattr->ia_valid & ATTR_FILE) + msg.rpc_cred = nfs_file_cred(sattr->ia_file); nfs_fattr_init(fattr); status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); if (status == 0) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 26fbeb3..31a7e4c 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1139,8 +1139,9 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct path *path, int return res; } -static int _nfs4_do_setattr(struct inode *inode, struct nfs_fattr *fattr, - struct iattr *sattr, struct nfs4_state *state) +static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, + struct nfs_fattr *fattr, struct iattr *sattr, + struct nfs4_state *state) { struct nfs_server *server = NFS_SERVER(inode); struct nfs_setattrargs arg = { @@ -1154,9 +1155,10 @@ static int _nfs4_do_setattr(struct inode *inode, struct nfs_fattr *fattr, .server = server, }; struct rpc_message msg = { - .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETATTR], - .rpc_argp = &arg, - .rpc_resp = &res, + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETATTR], + .rpc_argp = &arg, + .rpc_resp = &res, + .rpc_cred = cred, }; unsigned long timestamp = jiffies; int status; @@ -1166,7 +1168,6 @@ static int _nfs4_do_setattr(struct inode *inode, struct nfs_fattr *fattr, if (nfs4_copy_delegation_stateid(&arg.stateid, inode)) { /* Use that stateid */ } else if (state != NULL) { - msg.rpc_cred = state->owner->so_cred; nfs4_copy_stateid(&arg.stateid, state, current->files); } else memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid)); @@ -1177,15 +1178,16 @@ static int _nfs4_do_setattr(struct inode *inode, struct nfs_fattr *fattr, return status; } -static int nfs4_do_setattr(struct inode *inode, struct nfs_fattr *fattr, - struct iattr *sattr, struct nfs4_state *state) +static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, + struct nfs_fattr *fattr, struct iattr *sattr, + struct nfs4_state *state) { struct nfs_server *server = NFS_SERVER(inode); struct nfs4_exception exception = { }; int err; do { err = nfs4_handle_exception(server, - _nfs4_do_setattr(inode, fattr, sattr, state), + _nfs4_do_setattr(inode, cred, fattr, sattr, state), &exception); } while (exception.retry); return err; @@ -1647,29 +1649,25 @@ static int nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, struct iattr *sattr) { - struct rpc_cred *cred; struct inode *inode = dentry->d_inode; - struct nfs_open_context *ctx; + struct rpc_cred *cred = NULL; struct nfs4_state *state = NULL; int status; nfs_fattr_init(fattr); - cred = rpc_lookup_cred(); - if (IS_ERR(cred)) - return PTR_ERR(cred); - /* Search for an existing open(O_WRITE) file */ - ctx = nfs_find_open_context(inode, cred, FMODE_WRITE); - if (ctx != NULL) + if (sattr->ia_valid & ATTR_FILE) { + struct nfs_open_context *ctx; + + ctx = nfs_file_open_context(sattr->ia_file); + cred = ctx->cred; state = ctx->state; + } - status = nfs4_do_setattr(inode, fattr, sattr, state); + status = nfs4_do_setattr(inode, cred, fattr, sattr, state); if (status == 0) nfs_setattr_update_inode(inode, sattr); - if (ctx != NULL) - put_nfs_open_context(ctx); - put_rpccred(cred); return status; } @@ -1897,17 +1895,16 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, goto out; } state = nfs4_do_open(dir, &path, flags, sattr, cred); - put_rpccred(cred); d_drop(dentry); if (IS_ERR(state)) { status = PTR_ERR(state); - goto out; + goto out_putcred; } d_add(dentry, igrab(state->inode)); nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); if (flags & O_EXCL) { struct nfs_fattr fattr; - status = nfs4_do_setattr(state->inode, &fattr, sattr, state); + status = nfs4_do_setattr(state->inode, cred, &fattr, sattr, state); if (status == 0) nfs_setattr_update_inode(state->inode, sattr); nfs_post_op_update_inode(state->inode, &fattr); @@ -1916,6 +1913,8 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, status = nfs4_intent_set_file(nd, &path, state); else nfs4_close_sync(&path, state, flags); +out_putcred: + put_rpccred(cred); out: return status; } diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 5c35b02..c760558 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -129,6 +129,8 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, sattr->ia_mode &= S_IALLUGO; dprintk("NFS call setattr\n"); + if (sattr->ia_valid & ATTR_FILE) + msg.rpc_cred = nfs_file_cred(sattr->ia_file); nfs_fattr_init(fattr); status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); if (status == 0) -- cgit v0.10.2 From b22602a673b1743bba4b62bb404ffd3b269d2f09 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 6 Jun 2008 13:22:25 -0400 Subject: SUNRPC: Ensure all transports set rq_xtime consistently The RPC client uses the rq_xtime field in each RPC request to determine the round-trip time of the request. Currently, the rq_xtime field is initialized by each transport just before it starts enqueing a request to be sent. However, transports do not handle initializing this value consistently; sometimes they don't initialize it at all. To make the measurement of request round-trip time consistent for all RPC client transport capabilities, pull rq_xtime initialization into the RPC client's generic transport logic. Now all transports will get a standardized RTT measure automatically, from: xprt_transmit() to xprt_complete_rqst() This makes round-trip time calculation more accurate for the TCP transport. The socket ->sendmsg() method can return "-EAGAIN" if the socket's output buffer is full, so the TCP transport's ->send_request() method may call the ->sendmsg() method repeatedly until it gets all of the request's bytes queued in the socket's buffer. Currently, the TCP transport sets the rq_xtime field every time through that loop so the final value is the timestamp just before the *last* call to the underlying socket's ->sendmsg() method. After this patch, the rq_xtime field contains a timestamp that reflects the time just before the *first* call to ->sendmsg(). This is consequential under heavy workloads because large requests often take multiple ->sendmsg() calls to get all the bytes of a request queued. The TCP transport causes the request to sleep until the remote end of the socket has received enough bytes to clear space in the socket's local output buffer. This delay can be quite significant. The method introduced by this patch is a more accurate measure of RTT for stream transports, since the server can cause enough back pressure to delay (ie increase the latency of) requests from the client. Additionally, this patch corrects the behavior of the RDMA transport, which entirely neglected to initialize the rq_xtime field. RPC performance metrics for RDMA transports now display correct RPC request round trip times. Signed-off-by: Chuck Lever Acked-by: Tom Talpey Signed-off-by: Trond Myklebust diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 67996bd..99a52aa 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -872,6 +872,7 @@ void xprt_transmit(struct rpc_task *task) return; req->rq_connect_cookie = xprt->connect_cookie; + req->rq_xtime = jiffies; status = xprt->ops->send_request(task); if (status == 0) { dprintk("RPC: %5u xmit complete\n", task->tk_pid); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index ddbe981..4486c59 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -579,7 +579,6 @@ static int xs_udp_send_request(struct rpc_task *task) req->rq_svec->iov_base, req->rq_svec->iov_len); - req->rq_xtime = jiffies; status = xs_sendpages(transport->sock, xs_addr(xprt), xprt->addrlen, xdr, @@ -671,7 +670,6 @@ static int xs_tcp_send_request(struct rpc_task *task) * to cope with writespace callbacks arriving _after_ we have * called sendmsg(). */ while (1) { - req->rq_xtime = jiffies; status = xs_sendpages(transport->sock, NULL, 0, xdr, req->rq_bytes_sent); -- cgit v0.10.2 From 46cb650c224bb8e64a749090105d74b9e8eda669 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 11 Jun 2008 16:32:46 -0400 Subject: NFS: Remove the redundant file_open entry from struct nfs_rpc_ops All instances are set to nfs_open(), so we should just remove the redundant indirection. Ditto for the file_release op Signed-off-by: Trond Myklebust diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 78b26f8..509dcb5 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -129,7 +129,7 @@ nfs_file_open(struct inode *inode, struct file *filp) nfs_inc_stats(inode, NFSIOS_VFSOPEN); lock_kernel(); - res = NFS_PROTO(inode)->file_open(inode, filp); + res = nfs_open(inode, filp); unlock_kernel(); return res; } @@ -147,7 +147,7 @@ nfs_file_release(struct inode *inode, struct file *filp) if (filp->f_mode & FMODE_WRITE) nfs_wb_all(dentry->d_inode); nfs_inc_stats(inode, NFSIOS_VFSRELEASE); - return NFS_PROTO(inode)->file_release(inode, filp); + return nfs_release(inode, filp); } /** diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index b9c2d99..1e750e45 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -816,8 +816,6 @@ const struct nfs_rpc_ops nfs_v3_clientops = { .write_done = nfs3_write_done, .commit_setup = nfs3_proc_commit_setup, .commit_done = nfs3_commit_done, - .file_open = nfs_open, - .file_release = nfs_release, .lock = nfs3_proc_lock, .clear_acl_cache = nfs3_forget_cached_acls, }; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 31a7e4c..dfdd19a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3714,8 +3714,6 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .write_done = nfs4_write_done, .commit_setup = nfs4_proc_commit_setup, .commit_done = nfs4_commit_done, - .file_open = nfs_open, - .file_release = nfs_release, .lock = nfs4_proc_lock, .clear_acl_cache = nfs4_zap_acl_attr, }; diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index c760558..4dbb84d 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -655,8 +655,6 @@ const struct nfs_rpc_ops nfs_v2_clientops = { .write_setup = nfs_proc_write_setup, .write_done = nfs_write_done, .commit_setup = nfs_proc_commit_setup, - .file_open = nfs_open, - .file_release = nfs_release, .lock = nfs_proc_lock, .lock_check_bounds = nfs_lock_check_bounds, }; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 8d780de..8c77c11 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -829,8 +829,6 @@ struct nfs_rpc_ops { int (*write_done) (struct rpc_task *, struct nfs_write_data *); void (*commit_setup) (struct nfs_write_data *, struct rpc_message *); int (*commit_done) (struct rpc_task *, struct nfs_write_data *); - int (*file_open) (struct inode *, struct file *); - int (*file_release) (struct inode *, struct file *); int (*lock)(struct file *, int, struct file_lock *); int (*lock_check_bounds)(const struct file_lock *); void (*clear_acl_cache)(struct inode *); -- cgit v0.10.2 From 34e8f92831cb5c40b3137e47a3daf4c09016ef02 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 12 Jun 2008 12:32:25 -0400 Subject: NFS: Move fs/nfs/iostat.h to include/linux The fs/nfs/iostat.h header has definitions that were designed to be exposed to user space. Move these definitions under include/linux so user space can use the definitions in applications that read /proc/self/mountstats. Also address a handful of coding style issues called out by checkpatch.pl in fs/nfs/iostat.h. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust diff --git a/fs/nfs/iostat.h b/fs/nfs/iostat.h index 6350ecbd..2ec65e1 100644 --- a/fs/nfs/iostat.h +++ b/fs/nfs/iostat.h @@ -5,135 +5,41 @@ * * Copyright (C) 2005, 2006 Chuck Lever * - * NFS client per-mount statistics provide information about the health of - * the NFS client and the health of each NFS mount point. Generally these - * are not for detailed problem diagnosis, but simply to indicate that there - * is a problem. - * - * These counters are not meant to be human-readable, but are meant to be - * integrated into system monitoring tools such as "sar" and "iostat". As - * such, the counters are sampled by the tools over time, and are never - * zeroed after a file system is mounted. Moving averages can be computed - * by the tools by taking the difference between two instantaneous samples - * and dividing that by the time between the samples. */ #ifndef _NFS_IOSTAT #define _NFS_IOSTAT -#define NFS_IOSTAT_VERS "1.0" - -/* - * NFS byte counters - * - * 1. SERVER - the number of payload bytes read from or written to the - * server by the NFS client via an NFS READ or WRITE request. - * - * 2. NORMAL - the number of bytes read or written by applications via - * the read(2) and write(2) system call interfaces. - * - * 3. DIRECT - the number of bytes read or written from files opened - * with the O_DIRECT flag. - * - * These counters give a view of the data throughput into and out of the NFS - * client. Comparing the number of bytes requested by an application with the - * number of bytes the client requests from the server can provide an - * indication of client efficiency (per-op, cache hits, etc). - * - * These counters can also help characterize which access methods are in - * use. DIRECT by itself shows whether there is any O_DIRECT traffic. - * NORMAL + DIRECT shows how much data is going through the system call - * interface. A large amount of SERVER traffic without much NORMAL or - * DIRECT traffic shows that applications are using mapped files. - * - * NFS page counters - * - * These count the number of pages read or written via nfs_readpage(), - * nfs_readpages(), or their write equivalents. - */ -enum nfs_stat_bytecounters { - NFSIOS_NORMALREADBYTES = 0, - NFSIOS_NORMALWRITTENBYTES, - NFSIOS_DIRECTREADBYTES, - NFSIOS_DIRECTWRITTENBYTES, - NFSIOS_SERVERREADBYTES, - NFSIOS_SERVERWRITTENBYTES, - NFSIOS_READPAGES, - NFSIOS_WRITEPAGES, - __NFSIOS_BYTESMAX, -}; - -/* - * NFS event counters - * - * These counters provide a low-overhead way of monitoring client activity - * without enabling NFS trace debugging. The counters show the rate at - * which VFS requests are made, and how often the client invalidates its - * data and attribute caches. This allows system administrators to monitor - * such things as how close-to-open is working, and answer questions such - * as "why are there so many GETATTR requests on the wire?" - * - * They also count anamolous events such as short reads and writes, silly - * renames due to close-after-delete, and operations that change the size - * of a file (such operations can often be the source of data corruption - * if applications aren't using file locking properly). - */ -enum nfs_stat_eventcounters { - NFSIOS_INODEREVALIDATE = 0, - NFSIOS_DENTRYREVALIDATE, - NFSIOS_DATAINVALIDATE, - NFSIOS_ATTRINVALIDATE, - NFSIOS_VFSOPEN, - NFSIOS_VFSLOOKUP, - NFSIOS_VFSACCESS, - NFSIOS_VFSUPDATEPAGE, - NFSIOS_VFSREADPAGE, - NFSIOS_VFSREADPAGES, - NFSIOS_VFSWRITEPAGE, - NFSIOS_VFSWRITEPAGES, - NFSIOS_VFSGETDENTS, - NFSIOS_VFSSETATTR, - NFSIOS_VFSFLUSH, - NFSIOS_VFSFSYNC, - NFSIOS_VFSLOCK, - NFSIOS_VFSRELEASE, - NFSIOS_CONGESTIONWAIT, - NFSIOS_SETATTRTRUNC, - NFSIOS_EXTENDWRITE, - NFSIOS_SILLYRENAME, - NFSIOS_SHORTREAD, - NFSIOS_SHORTWRITE, - NFSIOS_DELAY, - __NFSIOS_COUNTSMAX, -}; - -#ifdef __KERNEL__ - #include #include +#include struct nfs_iostats { unsigned long long bytes[__NFSIOS_BYTESMAX]; unsigned long events[__NFSIOS_COUNTSMAX]; } ____cacheline_aligned; -static inline void nfs_inc_server_stats(struct nfs_server *server, enum nfs_stat_eventcounters stat) +static inline void nfs_inc_server_stats(struct nfs_server *server, + enum nfs_stat_eventcounters stat) { struct nfs_iostats *iostats; int cpu; cpu = get_cpu(); iostats = per_cpu_ptr(server->io_stats, cpu); - iostats->events[stat] ++; + iostats->events[stat]++; put_cpu_no_resched(); } -static inline void nfs_inc_stats(struct inode *inode, enum nfs_stat_eventcounters stat) +static inline void nfs_inc_stats(struct inode *inode, + enum nfs_stat_eventcounters stat) { nfs_inc_server_stats(NFS_SERVER(inode), stat); } -static inline void nfs_add_server_stats(struct nfs_server *server, enum nfs_stat_bytecounters stat, unsigned long addend) +static inline void nfs_add_server_stats(struct nfs_server *server, + enum nfs_stat_bytecounters stat, + unsigned long addend) { struct nfs_iostats *iostats; int cpu; @@ -144,7 +50,9 @@ static inline void nfs_add_server_stats(struct nfs_server *server, enum nfs_stat put_cpu_no_resched(); } -static inline void nfs_add_stats(struct inode *inode, enum nfs_stat_bytecounters stat, unsigned long addend) +static inline void nfs_add_stats(struct inode *inode, + enum nfs_stat_bytecounters stat, + unsigned long addend) { nfs_add_server_stats(NFS_SERVER(inode), stat, addend); } @@ -160,5 +68,4 @@ static inline void nfs_free_iostats(struct nfs_iostats *stats) free_percpu(stats); } -#endif -#endif +#endif /* _NFS_IOSTAT */ diff --git a/include/linux/nfs_iostat.h b/include/linux/nfs_iostat.h new file mode 100644 index 0000000..1cb9a3f --- /dev/null +++ b/include/linux/nfs_iostat.h @@ -0,0 +1,119 @@ +/* + * User-space visible declarations for NFS client per-mount + * point statistics + * + * Copyright (C) 2005, 2006 Chuck Lever + * + * NFS client per-mount statistics provide information about the + * health of the NFS client and the health of each NFS mount point. + * Generally these are not for detailed problem diagnosis, but + * simply to indicate that there is a problem. + * + * These counters are not meant to be human-readable, but are meant + * to be integrated into system monitoring tools such as "sar" and + * "iostat". As such, the counters are sampled by the tools over + * time, and are never zeroed after a file system is mounted. + * Moving averages can be computed by the tools by taking the + * difference between two instantaneous samples and dividing that + * by the time between the samples. + */ + +#ifndef _LINUX_NFS_IOSTAT +#define _LINUX_NFS_IOSTAT + +#define NFS_IOSTAT_VERS "1.0" + +/* + * NFS byte counters + * + * 1. SERVER - the number of payload bytes read from or written + * to the server by the NFS client via an NFS READ or WRITE + * request. + * + * 2. NORMAL - the number of bytes read or written by applications + * via the read(2) and write(2) system call interfaces. + * + * 3. DIRECT - the number of bytes read or written from files + * opened with the O_DIRECT flag. + * + * These counters give a view of the data throughput into and out + * of the NFS client. Comparing the number of bytes requested by + * an application with the number of bytes the client requests from + * the server can provide an indication of client efficiency + * (per-op, cache hits, etc). + * + * These counters can also help characterize which access methods + * are in use. DIRECT by itself shows whether there is any O_DIRECT + * traffic. NORMAL + DIRECT shows how much data is going through + * the system call interface. A large amount of SERVER traffic + * without much NORMAL or DIRECT traffic shows that applications + * are using mapped files. + * + * NFS page counters + * + * These count the number of pages read or written via nfs_readpage(), + * nfs_readpages(), or their write equivalents. + * + * NB: When adding new byte counters, please include the measured + * units in the name of each byte counter to help users of this + * interface determine what exactly is being counted. + */ +enum nfs_stat_bytecounters { + NFSIOS_NORMALREADBYTES = 0, + NFSIOS_NORMALWRITTENBYTES, + NFSIOS_DIRECTREADBYTES, + NFSIOS_DIRECTWRITTENBYTES, + NFSIOS_SERVERREADBYTES, + NFSIOS_SERVERWRITTENBYTES, + NFSIOS_READPAGES, + NFSIOS_WRITEPAGES, + __NFSIOS_BYTESMAX, +}; + +/* + * NFS event counters + * + * These counters provide a low-overhead way of monitoring client + * activity without enabling NFS trace debugging. The counters + * show the rate at which VFS requests are made, and how often the + * client invalidates its data and attribute caches. This allows + * system administrators to monitor such things as how close-to-open + * is working, and answer questions such as "why are there so many + * GETATTR requests on the wire?" + * + * They also count anamolous events such as short reads and writes, + * silly renames due to close-after-delete, and operations that + * change the size of a file (such operations can often be the + * source of data corruption if applications aren't using file + * locking properly). + */ +enum nfs_stat_eventcounters { + NFSIOS_INODEREVALIDATE = 0, + NFSIOS_DENTRYREVALIDATE, + NFSIOS_DATAINVALIDATE, + NFSIOS_ATTRINVALIDATE, + NFSIOS_VFSOPEN, + NFSIOS_VFSLOOKUP, + NFSIOS_VFSACCESS, + NFSIOS_VFSUPDATEPAGE, + NFSIOS_VFSREADPAGE, + NFSIOS_VFSREADPAGES, + NFSIOS_VFSWRITEPAGE, + NFSIOS_VFSWRITEPAGES, + NFSIOS_VFSGETDENTS, + NFSIOS_VFSSETATTR, + NFSIOS_VFSFLUSH, + NFSIOS_VFSFSYNC, + NFSIOS_VFSLOCK, + NFSIOS_VFSRELEASE, + NFSIOS_CONGESTIONWAIT, + NFSIOS_SETATTRTRUNC, + NFSIOS_EXTENDWRITE, + NFSIOS_SILLYRENAME, + NFSIOS_SHORTREAD, + NFSIOS_SHORTWRITE, + NFSIOS_DELAY, + __NFSIOS_COUNTSMAX, +}; + +#endif /* _LINUX_NFS_IOSTAT */ -- cgit v0.10.2 From 2e96d2867245668dbdb973729288cf69b9fafa66 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 11 Jun 2008 16:42:05 -0400 Subject: NFS: Fix a warning in nfs4_async_handle_error We're not modifying the nfs_server when we call nfs_inc_server_stats and friends, so allow the compiler to pass 'const' pointers too. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/iostat.h b/fs/nfs/iostat.h index 2ec65e1..a369528 100644 --- a/fs/nfs/iostat.h +++ b/fs/nfs/iostat.h @@ -19,7 +19,7 @@ struct nfs_iostats { unsigned long events[__NFSIOS_COUNTSMAX]; } ____cacheline_aligned; -static inline void nfs_inc_server_stats(struct nfs_server *server, +static inline void nfs_inc_server_stats(const struct nfs_server *server, enum nfs_stat_eventcounters stat) { struct nfs_iostats *iostats; @@ -31,13 +31,13 @@ static inline void nfs_inc_server_stats(struct nfs_server *server, put_cpu_no_resched(); } -static inline void nfs_inc_stats(struct inode *inode, +static inline void nfs_inc_stats(const struct inode *inode, enum nfs_stat_eventcounters stat) { nfs_inc_server_stats(NFS_SERVER(inode), stat); } -static inline void nfs_add_server_stats(struct nfs_server *server, +static inline void nfs_add_server_stats(const struct nfs_server *server, enum nfs_stat_bytecounters stat, unsigned long addend) { @@ -50,7 +50,7 @@ static inline void nfs_add_server_stats(struct nfs_server *server, put_cpu_no_resched(); } -static inline void nfs_add_stats(struct inode *inode, +static inline void nfs_add_stats(const struct inode *inode, enum nfs_stat_bytecounters stat, unsigned long addend) { diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index dfdd19a..058723d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2756,8 +2756,7 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server) task->tk_status = 0; return -EAGAIN; case -NFS4ERR_DELAY: - nfs_inc_server_stats((struct nfs_server *) server, - NFSIOS_DELAY); + nfs_inc_server_stats(server, NFSIOS_DELAY); case -NFS4ERR_GRACE: rpc_delay(task, NFS4_POLL_RETRY_MAX); task->tk_status = 0; -- cgit v0.10.2 From f41f741838480aeaa3a189cff6e210503cf9c42d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 11 Jun 2008 17:39:04 -0400 Subject: NFS: Ensure we zap only the access and acl caches when setting new acls ...and ensure that we obey the NFS_INO_INVALID_ACL flag when retrieving the acls. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 2e4ab4a..2c23d06 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -57,8 +57,6 @@ static int enable_ino64 = NFS_64_BIT_INODE_NUMBERS_ENABLED; static void nfs_invalidate_inode(struct inode *); static int nfs_update_inode(struct inode *, struct nfs_fattr *); -static void nfs_zap_acl_cache(struct inode *); - static struct kmem_cache * nfs_inode_cachep; static inline unsigned long @@ -167,7 +165,7 @@ void nfs_zap_mapping(struct inode *inode, struct address_space *mapping) } } -static void nfs_zap_acl_cache(struct inode *inode) +void nfs_zap_acl_cache(struct inode *inode) { void (*clear_acl_cache)(struct inode *); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 04ae867..24241fc 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -150,6 +150,7 @@ extern void nfs_clear_inode(struct inode *); #ifdef CONFIG_NFS_V4 extern void nfs4_clear_inode(struct inode *); #endif +void nfs_zap_acl_cache(struct inode *inode); /* super.c */ extern struct file_system_type nfs_xdev_fs_type; diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index 9b73625..423842f 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c @@ -5,6 +5,8 @@ #include #include +#include "internal.h" + #define NFSDBG_FACILITY NFSDBG_PROC ssize_t nfs3_listxattr(struct dentry *dentry, char *buffer, size_t size) @@ -205,6 +207,8 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type) status = nfs_revalidate_inode(server, inode); if (status < 0) return ERR_PTR(status); + if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_ACL) + nfs_zap_acl_cache(inode); acl = nfs3_get_cached_acl(inode, type); if (acl != ERR_PTR(-EAGAIN)) return acl; @@ -319,9 +323,8 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, dprintk("NFS call setacl\n"); msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_SETACL]; status = rpc_call_sync(server->client_acl, &msg, 0); - spin_lock(&inode->i_lock); - NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS; - spin_unlock(&inode->i_lock); + nfs_access_zap_cache(inode); + nfs_zap_acl_cache(inode); dprintk("NFS reply setacl: %d\n", status); /* pages may have been allocated at the xdr layer. */ diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 058723d..10f01c0 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2695,6 +2695,8 @@ static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen) ret = nfs_revalidate_inode(server, inode); if (ret < 0) return ret; + if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_ACL) + nfs_zap_acl_cache(inode); ret = nfs4_read_cached_acl(inode, buf, buflen); if (ret != -ENOENT) return ret; @@ -2722,7 +2724,8 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl nfs_inode_return_delegation(inode); buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase); ret = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); - nfs_zap_caches(inode); + nfs_access_zap_cache(inode); + nfs_zap_acl_cache(inode); return ret; } -- cgit v0.10.2 From ecbb3845dd678364148c1faad32adbc9dd833ef9 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 12 Jun 2008 12:37:33 -0400 Subject: NFS: Allow any value for the "retry" option The kernel NFS mount option parser should ignore the retry= mount option since it is meaningful only in user space. Today it expects a number rather than arbitrary text, so it ignores the option if the value is numeric, but chokes if there are other characters in the value. Change it to allow any text (except ",") as its value. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust diff --git a/fs/nfs/super.c b/fs/nfs/super.c index b550e92..5278eef 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -101,6 +101,8 @@ enum { static match_table_t nfs_mount_option_tokens = { { Opt_userspace, "bg" }, { Opt_userspace, "fg" }, + { Opt_userspace, "retry=%s" }, + { Opt_soft, "soft" }, { Opt_hard, "hard" }, { Opt_intr, "intr" }, @@ -136,7 +138,6 @@ static match_table_t nfs_mount_option_tokens = { { Opt_acdirmin, "acdirmin=%u" }, { Opt_acdirmax, "acdirmax=%u" }, { Opt_actimeo, "actimeo=%u" }, - { Opt_userspace, "retry=%u" }, { Opt_namelen, "namlen=%u" }, { Opt_mountport, "mountport=%u" }, { Opt_mountvers, "mountvers=%u" }, -- cgit v0.10.2 From d33e4dfeab5eb0c3c1359cc1a399f2f8b49e18de Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 12 Jun 2008 12:37:41 -0400 Subject: NFS: Treat "intr" and "nointr" options as deprecated Clean up: the "intr" and "nointr" mount options were recently retired. Document this in the NFS mount option parser. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 5278eef..f824c41 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -65,7 +65,6 @@ enum { /* Mount options that take no arguments */ Opt_soft, Opt_hard, - Opt_intr, Opt_nointr, Opt_posix, Opt_noposix, Opt_cto, Opt_nocto, Opt_ac, Opt_noac, @@ -105,8 +104,8 @@ static match_table_t nfs_mount_option_tokens = { { Opt_soft, "soft" }, { Opt_hard, "hard" }, - { Opt_intr, "intr" }, - { Opt_nointr, "nointr" }, + { Opt_deprecated, "intr" }, + { Opt_deprecated, "nointr" }, { Opt_posix, "posix" }, { Opt_noposix, "noposix" }, { Opt_cto, "cto" }, @@ -787,9 +786,6 @@ static int nfs_parse_mount_options(char *raw, case Opt_hard: mnt->flags &= ~NFS_MOUNT_SOFT; break; - case Opt_intr: - case Opt_nointr: - break; case Opt_posix: mnt->flags |= NFS_MOUNT_POSIX; break; @@ -1105,6 +1101,8 @@ static int nfs_parse_mount_options(char *raw, case Opt_userspace: case Opt_deprecated: + dfprintk(MOUNT, "NFS: ignoring mount option " + "'%s'\n", p); break; default: -- cgit v0.10.2 From 396cee977f79590673ad51b04f1853e58bc30e7b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 12 Jun 2008 12:37:49 -0400 Subject: NFS: missing newline in NFS mount debugging message Clean up. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust diff --git a/fs/nfs/super.c b/fs/nfs/super.c index f824c41..a1065c1 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1190,7 +1190,7 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args, if (status == 0) return 0; - dfprintk(MOUNT, "NFS: unable to mount server %s, error %d", + dfprintk(MOUNT, "NFS: unable to mount server %s, error %d\n", hostname, status); return status; } -- cgit v0.10.2 From e7d39069e387a12d4c57f4067d9f48c1d29ea900 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 13 Jun 2008 12:12:32 -0400 Subject: NFS: Clean up nfs_update_request() Simplify the loop in nfs_update_request by moving into a separate function the code that attempts to update an existing cached NFS write. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 21d8a48..04f51e5 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -34,9 +34,6 @@ /* * Local function declarations */ -static struct nfs_page * nfs_update_request(struct nfs_open_context*, - struct page *, - unsigned int, unsigned int); static void nfs_pageio_init_write(struct nfs_pageio_descriptor *desc, struct inode *inode, int ioflags); static void nfs_redirty_request(struct nfs_page *req); @@ -169,30 +166,6 @@ static void nfs_mark_uptodate(struct page *page, unsigned int base, unsigned int SetPageUptodate(page); } -static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page, - unsigned int offset, unsigned int count) -{ - struct nfs_page *req; - int ret; - - for (;;) { - req = nfs_update_request(ctx, page, offset, count); - if (!IS_ERR(req)) - break; - ret = PTR_ERR(req); - if (ret != -EBUSY) - return ret; - ret = nfs_wb_page(page->mapping->host, page); - if (ret != 0) - return ret; - } - /* Update file length */ - nfs_grow_file(page, offset, count); - nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes); - nfs_clear_page_tag_locked(req); - return 0; -} - static int wb_priority(struct writeback_control *wbc) { if (wbc->for_reclaim) @@ -356,11 +329,19 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) /* * Insert a write request into an inode */ -static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) +static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req) { struct nfs_inode *nfsi = NFS_I(inode); int error; + error = radix_tree_preload(GFP_NOFS); + if (error != 0) + goto out; + + /* Lock the request! */ + nfs_lock_request_dontget(req); + + spin_lock(&inode->i_lock); error = radix_tree_insert(&nfsi->nfs_page_tree, req->wb_index, req); BUG_ON(error); if (!nfsi->npages) { @@ -374,6 +355,10 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) kref_get(&req->wb_kref); radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED); + spin_unlock(&inode->i_lock); + radix_tree_preload_end(); +out: + return error; } /* @@ -565,101 +550,121 @@ static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, pg #endif /* - * Try to update any existing write request, or create one if there is none. - * In order to match, the request's credentials must match those of - * the calling process. + * Search for an existing write request, and attempt to update + * it to reflect a new dirty region on a given page. * - * Note: Should always be called with the Page Lock held! + * If the attempt fails, then the existing request is flushed out + * to disk. */ -static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx, - struct page *page, unsigned int offset, unsigned int bytes) +static struct nfs_page *nfs_try_to_update_request(struct inode *inode, + struct page *page, + unsigned int offset, + unsigned int bytes) { - struct address_space *mapping = page->mapping; - struct inode *inode = mapping->host; - struct nfs_page *req, *new = NULL; - pgoff_t rqend, end; + struct nfs_page *req; + unsigned int rqend; + unsigned int end; + int error; + + if (!PagePrivate(page)) + return NULL; end = offset + bytes; + spin_lock(&inode->i_lock); for (;;) { - /* Loop over all inode entries and see if we find - * A request for the page we wish to update - */ - spin_lock(&inode->i_lock); req = nfs_page_find_request_locked(page); - if (req) { - if (!nfs_set_page_tag_locked(req)) { - int error; - - spin_unlock(&inode->i_lock); - error = nfs_wait_on_request(req); - nfs_release_request(req); - if (error < 0) { - if (new) { - radix_tree_preload_end(); - nfs_release_request(new); - } - return ERR_PTR(error); - } - continue; - } - spin_unlock(&inode->i_lock); - if (new) { - radix_tree_preload_end(); - nfs_release_request(new); - } + if (req == NULL) + goto out_unlock; + + rqend = req->wb_offset + req->wb_bytes; + /* + * Tell the caller to flush out the request if + * the offsets are non-contiguous. + * Note: nfs_flush_incompatible() will already + * have flushed out requests having wrong owners. + */ + if (!nfs_dirty_request(req) + || offset > rqend + || end < req->wb_offset) + goto out_flushme; + + if (nfs_set_page_tag_locked(req)) break; - } - if (new) { - nfs_lock_request_dontget(new); - nfs_inode_add_request(inode, new); - spin_unlock(&inode->i_lock); - radix_tree_preload_end(); - req = new; - goto out; - } + /* The request is locked, so wait and then retry */ spin_unlock(&inode->i_lock); - - new = nfs_create_request(ctx, inode, page, offset, bytes); - if (IS_ERR(new)) - return new; - if (radix_tree_preload(GFP_NOFS)) { - nfs_release_request(new); - return ERR_PTR(-ENOMEM); - } - } - - /* We have a request for our page. - * If the creds don't match, or the - * page addresses don't match, - * tell the caller to wait on the conflicting - * request. - */ - rqend = req->wb_offset + req->wb_bytes; - if (req->wb_context != ctx - || req->wb_page != page - || !nfs_dirty_request(req) - || offset > rqend || end < req->wb_offset) { - nfs_clear_page_tag_locked(req); - return ERR_PTR(-EBUSY); + error = nfs_wait_on_request(req); + nfs_release_request(req); + if (error != 0) + goto out_err; + spin_lock(&inode->i_lock); } /* Okay, the request matches. Update the region */ if (offset < req->wb_offset) { req->wb_offset = offset; req->wb_pgbase = offset; - req->wb_bytes = max(end, rqend) - req->wb_offset; - goto out; } - if (end > rqend) req->wb_bytes = end - req->wb_offset; + else + req->wb_bytes = rqend - req->wb_offset; +out_unlock: + spin_unlock(&inode->i_lock); + return req; +out_flushme: + spin_unlock(&inode->i_lock); + nfs_release_request(req); + error = nfs_wb_page(inode, page); +out_err: + return ERR_PTR(error); +} +/* + * Try to update an existing write request, or create one if there is none. + * + * Note: Should always be called with the Page Lock held to prevent races + * if we have to add a new request. Also assumes that the caller has + * already called nfs_flush_incompatible() if necessary. + */ +static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx, + struct page *page, unsigned int offset, unsigned int bytes) +{ + struct inode *inode = page->mapping->host; + struct nfs_page *req; + int error; + + req = nfs_try_to_update_request(inode, page, offset, bytes); + if (req != NULL) + goto out; + req = nfs_create_request(ctx, inode, page, offset, bytes); + if (IS_ERR(req)) + goto out; + error = nfs_inode_add_request(inode, req); + if (error != 0) { + nfs_release_request(req); + req = ERR_PTR(error); + } out: return req; } +static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page, + unsigned int offset, unsigned int count) +{ + struct nfs_page *req; + + req = nfs_setup_write_request(ctx, page, offset, count); + if (IS_ERR(req)) + return PTR_ERR(req); + /* Update file length */ + nfs_grow_file(page, offset, count); + nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes); + nfs_clear_page_tag_locked(req); + return 0; +} + int nfs_flush_incompatible(struct file *file, struct page *page) { struct nfs_open_context *ctx = nfs_file_open_context(file); -- cgit v0.10.2 From e468bae97d243fe0e1515abaa1f7d0edf1476ad0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 13 Jun 2008 13:25:22 -0400 Subject: NFS: Allow redirtying of a completed unstable write. Currently, if an unstable write completes, we cannot redirty the page in order to reflect a new change in the page data until after we've sent a COMMIT request. This patch allows a page rewrite to proceed without the unnecessary COMMIT step, putting it immediately back onto the dirty page list, undoing the VM unstable write accounting, and removing the NFS_PAGE_TAG_COMMIT tag from the NFS radix tree. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 04f51e5..feca8c6 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -242,12 +242,9 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, return ret; spin_lock(&inode->i_lock); } - if (test_bit(PG_NEED_COMMIT, &req->wb_flags)) { - /* This request is marked for commit */ + if (test_bit(PG_CLEAN, &req->wb_flags)) { spin_unlock(&inode->i_lock); - nfs_clear_page_tag_locked(req); - nfs_pageio_complete(pgio); - return 0; + BUG(); } if (nfs_set_page_writeback(page) != 0) { spin_unlock(&inode->i_lock); @@ -391,19 +388,6 @@ nfs_mark_request_dirty(struct nfs_page *req) __set_page_dirty_nobuffers(req->wb_page); } -/* - * Check if a request is dirty - */ -static inline int -nfs_dirty_request(struct nfs_page *req) -{ - struct page *page = req->wb_page; - - if (page == NULL || test_bit(PG_NEED_COMMIT, &req->wb_flags)) - return 0; - return !PageWriteback(page); -} - #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) /* * Add a request to the inode's commit list. @@ -416,7 +400,7 @@ nfs_mark_request_commit(struct nfs_page *req) spin_lock(&inode->i_lock); nfsi->ncommit++; - set_bit(PG_NEED_COMMIT, &(req)->wb_flags); + set_bit(PG_CLEAN, &(req)->wb_flags); radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_COMMIT); @@ -426,6 +410,19 @@ nfs_mark_request_commit(struct nfs_page *req) __mark_inode_dirty(inode, I_DIRTY_DATASYNC); } +static int +nfs_clear_request_commit(struct nfs_page *req) +{ + struct page *page = req->wb_page; + + if (test_and_clear_bit(PG_CLEAN, &(req)->wb_flags)) { + dec_zone_page_state(page, NR_UNSTABLE_NFS); + dec_bdi_stat(page->mapping->backing_dev_info, BDI_RECLAIMABLE); + return 1; + } + return 0; +} + static inline int nfs_write_need_commit(struct nfs_write_data *data) { @@ -435,7 +432,7 @@ int nfs_write_need_commit(struct nfs_write_data *data) static inline int nfs_reschedule_unstable_write(struct nfs_page *req) { - if (test_bit(PG_NEED_COMMIT, &req->wb_flags)) { + if (test_and_clear_bit(PG_NEED_COMMIT, &req->wb_flags)) { nfs_mark_request_commit(req); return 1; } @@ -451,6 +448,12 @@ nfs_mark_request_commit(struct nfs_page *req) { } +static inline int +nfs_clear_request_commit(struct nfs_page *req) +{ + return 0; +} + static inline int nfs_write_need_commit(struct nfs_write_data *data) { @@ -508,11 +511,8 @@ static void nfs_cancel_commit_list(struct list_head *head) while(!list_empty(head)) { req = nfs_list_entry(head->next); - dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); - dec_bdi_stat(req->wb_page->mapping->backing_dev_info, - BDI_RECLAIMABLE); nfs_list_remove_request(req); - clear_bit(PG_NEED_COMMIT, &(req)->wb_flags); + nfs_clear_request_commit(req); nfs_inode_remove_request(req); nfs_unlock_request(req); } @@ -584,8 +584,7 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode, * Note: nfs_flush_incompatible() will already * have flushed out requests having wrong owners. */ - if (!nfs_dirty_request(req) - || offset > rqend + if (offset > rqend || end < req->wb_offset) goto out_flushme; @@ -601,6 +600,10 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode, spin_lock(&inode->i_lock); } + if (nfs_clear_request_commit(req)) + radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree, + req->wb_index, NFS_PAGE_TAG_COMMIT); + /* Okay, the request matches. Update the region */ if (offset < req->wb_offset) { req->wb_offset = offset; @@ -682,8 +685,7 @@ int nfs_flush_incompatible(struct file *file, struct page *page) req = nfs_page_find_request(page); if (req == NULL) return 0; - do_flush = req->wb_page != page || req->wb_context != ctx - || !nfs_dirty_request(req); + do_flush = req->wb_page != page || req->wb_context != ctx; nfs_release_request(req); if (!do_flush) return 0; @@ -1288,10 +1290,7 @@ static void nfs_commit_release(void *calldata) while (!list_empty(&data->pages)) { req = nfs_list_entry(data->pages.next); nfs_list_remove_request(req); - clear_bit(PG_NEED_COMMIT, &(req)->wb_flags); - dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); - dec_bdi_stat(req->wb_page->mapping->backing_dev_info, - BDI_RECLAIMABLE); + nfs_clear_request_commit(req); dprintk("NFS: commit (%s/%lld %d@%lld)", req->wb_context->path.dentry->d_inode->i_sb->s_id, @@ -1467,7 +1466,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page) req = nfs_page_find_request(page); if (req == NULL) goto out; - if (test_bit(PG_NEED_COMMIT, &req->wb_flags)) { + if (test_bit(PG_CLEAN, &req->wb_flags)) { nfs_release_request(req); break; } diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index a1676e1..3c60685 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -27,9 +27,12 @@ /* * Valid flags for a dirty buffer */ -#define PG_BUSY 0 -#define PG_NEED_COMMIT 1 -#define PG_NEED_RESCHED 2 +enum { + PG_BUSY = 0, + PG_CLEAN, + PG_NEED_COMMIT, + PG_NEED_RESCHED, +}; struct nfs_inode; struct nfs_page { -- cgit v0.10.2 From cd100725620a8063fbc81bcf16d7c9e71a9583e0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 17 Jun 2008 16:12:00 -0400 Subject: NFS: Fix a dependency on CONFIG_NFS_V4 in nfs_remount Fix the 'nfs4_fs_type' undeclared error in nfs_remount when compiling sans NFSv4... Signed-off-by: Trond Myklebust Cc: Jeff Layton diff --git a/fs/nfs/super.c b/fs/nfs/super.c index a1065c1..b880db1 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1428,6 +1428,7 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data) struct nfs_parsed_mount_data *data; struct nfs_mount_data *options = (struct nfs_mount_data *)raw_data; struct nfs4_mount_data *options4 = (struct nfs4_mount_data *)raw_data; + u32 nfsvers = nfss->nfs_client->rpc_ops->version; /* * Userspace mount programs that send binary options generally send @@ -1435,8 +1436,8 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data) * ones were explicitly specified. Fall back to legacy behavior and * just return success. */ - if ((sb->s_type == &nfs4_fs_type && options4->version == 1) || - (sb->s_type == &nfs_fs_type && options->version >= 1 && + if ((nfsvers == 4 && options4->version == 1) || + (nfsvers <= 3 && options->version >= 1 && options->version <= 6)) return 0; -- cgit v0.10.2 From dc04589827f7e1af12714af8bb00e3f3c4c48c62 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 23 Jun 2008 12:36:37 -0400 Subject: NFS: Use common device name parsing logic for NFSv4 and NFSv2/v3 To support passing a raw IPv6 address as a server hostname, we need to expand the logic that handles splitting the passed-in device name into a server hostname and export path Start by pulling device name parsing out of the mount option validation functions and into separate helper functions. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust diff --git a/fs/nfs/super.c b/fs/nfs/super.c index b880db1..c4ee8b3 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1196,6 +1196,67 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args, } /* + * Split "dev_name" into "hostname:export_path". + * + * Note: caller frees hostname and export path, even on error. + */ +static int nfs_parse_devname(const char *dev_name, + char **hostname, size_t maxnamlen, + char **export_path, size_t maxpathlen) +{ + size_t len; + char *colon, *comma; + + colon = strchr(dev_name, ':'); + if (colon == NULL) + goto out_bad_devname; + + len = colon - dev_name; + if (len > maxnamlen) + goto out_hostname; + + /* N.B. caller will free nfs_server.hostname in all cases */ + *hostname = kstrndup(dev_name, len, GFP_KERNEL); + if (!*hostname) + goto out_nomem; + + /* kill possible hostname list: not supported */ + comma = strchr(*hostname, ','); + if (comma != NULL) { + if (comma == *hostname) + goto out_bad_devname; + *comma = '\0'; + } + + colon++; + len = strlen(colon); + if (len > maxpathlen) + goto out_path; + *export_path = kstrndup(colon, len, GFP_KERNEL); + if (!*export_path) + goto out_nomem; + + dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", *export_path); + return 0; + +out_bad_devname: + dfprintk(MOUNT, "NFS: device name not in host:path format\n"); + return -EINVAL; + +out_nomem: + dfprintk(MOUNT, "NFS: not enough memory to parse device name\n"); + return -ENOMEM; + +out_hostname: + dfprintk(MOUNT, "NFS: server hostname too long\n"); + return -ENAMETOOLONG; + +out_path: + dfprintk(MOUNT, "NFS: export pathname too long\n"); + return -ENAMETOOLONG; +} + +/* * Validate the NFS2/NFS3 mount data * - fills in the mount root filehandle * @@ -1323,8 +1384,6 @@ static int nfs_validate_mount_data(void *options, break; default: { - unsigned int len; - char *c; int status; if (nfs_parse_mount_options((char *)options, args) == 0) @@ -1334,21 +1393,17 @@ static int nfs_validate_mount_data(void *options, &args->nfs_server.address)) goto out_no_address; - c = strchr(dev_name, ':'); - if (c == NULL) - return -EINVAL; - len = c - dev_name; - /* N.B. caller will free nfs_server.hostname in all cases */ - args->nfs_server.hostname = kstrndup(dev_name, len, GFP_KERNEL); - if (!args->nfs_server.hostname) - goto out_nomem; + status = nfs_parse_devname(dev_name, + &args->nfs_server.hostname, + PAGE_SIZE, + &args->nfs_server.export_path, + NFS_MAXPATHLEN); + if (!status) + status = nfs_try_mount(args, mntfh); - c++; - if (strlen(c) > NFS_MAXPATHLEN) - return -ENAMETOOLONG; - args->nfs_server.export_path = c; + kfree(args->nfs_server.export_path); + args->nfs_server.export_path = NULL; - status = nfs_try_mount(args, mntfh); if (status) return status; @@ -1958,7 +2013,7 @@ static int nfs4_validate_mount_data(void *options, break; default: { - unsigned int len; + int status; if (nfs_parse_mount_options((char *)options, args) == 0) return -EINVAL; @@ -1977,34 +2032,17 @@ static int nfs4_validate_mount_data(void *options, goto out_inval_auth; } - /* - * Split "dev_name" into "hostname:mntpath". - */ - c = strchr(dev_name, ':'); - if (c == NULL) - return -EINVAL; - /* while calculating len, pretend ':' is '\0' */ - len = c - dev_name; - if (len > NFS4_MAXNAMLEN) - return -ENAMETOOLONG; - /* N.B. caller will free nfs_server.hostname in all cases */ - args->nfs_server.hostname = kstrndup(dev_name, len, GFP_KERNEL); - if (!args->nfs_server.hostname) - goto out_nomem; - - c++; /* step over the ':' */ - len = strlen(c); - if (len > NFS4_MAXPATHLEN) - return -ENAMETOOLONG; - args->nfs_server.export_path = kstrndup(c, len, GFP_KERNEL); - if (!args->nfs_server.export_path) - goto out_nomem; - - dprintk("NFS: MNTPATH: '%s'\n", args->nfs_server.export_path); - if (args->client_address == NULL) goto out_no_client_address; + status = nfs_parse_devname(dev_name, + &args->nfs_server.hostname, + NFS4_MAXNAMLEN, + &args->nfs_server.export_path, + NFS4_MAXPATHLEN); + if (status < 0) + return status; + break; } } @@ -2020,10 +2058,6 @@ out_inval_auth: data->auth_flavourlen); return -EINVAL; -out_nomem: - dfprintk(MOUNT, "NFS4: not enough memory to handle mount options\n"); - return -ENOMEM; - out_no_address: dfprintk(MOUNT, "NFS4: mount program didn't pass remote address\n"); return -EINVAL; -- cgit v0.10.2 From d1aa08257312f1439c1ab7c8a18e3856f9530f46 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 23 Jun 2008 12:36:45 -0400 Subject: NFS: Support raw IPv6 address hostnames during NFS mount operation Traditionally the mount command has looked for a ":" to separate the server's hostname from the export path in the mounted on device name, like this: mount server:/export /mounted/on/dir The server's hostname is "server" and the export path is "/export". You can also substitute a specific IPv4 network address for the server hostname, like this: mount 192.168.0.55:/export /mounted/on/dir Raw IPv6 addresses present a problem, however, because they look something like this: fe80::200:5aff:fe00:30b Note the use of colons. To get around the presence of colons, copy the Solaris convention used for mounting IPv6 servers by address: wrap a raw IPv6 address with square brackets. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust diff --git a/fs/nfs/super.c b/fs/nfs/super.c index c4ee8b3..ea4abd2 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1195,14 +1195,9 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args, return status; } -/* - * Split "dev_name" into "hostname:export_path". - * - * Note: caller frees hostname and export path, even on error. - */ -static int nfs_parse_devname(const char *dev_name, - char **hostname, size_t maxnamlen, - char **export_path, size_t maxpathlen) +static int nfs_parse_simple_hostname(const char *dev_name, + char **hostname, size_t maxnamlen, + char **export_path, size_t maxpathlen) { size_t len; char *colon, *comma; @@ -1257,6 +1252,85 @@ out_path: } /* + * Hostname has square brackets around it because it contains one or + * more colons. We look for the first closing square bracket, and a + * colon must follow it. + */ +static int nfs_parse_protected_hostname(const char *dev_name, + char **hostname, size_t maxnamlen, + char **export_path, size_t maxpathlen) +{ + size_t len; + char *start, *end; + + start = (char *)(dev_name + 1); + + end = strchr(start, ']'); + if (end == NULL) + goto out_bad_devname; + if (*(end + 1) != ':') + goto out_bad_devname; + + len = end - start; + if (len > maxnamlen) + goto out_hostname; + + /* N.B. caller will free nfs_server.hostname in all cases */ + *hostname = kstrndup(start, len, GFP_KERNEL); + if (*hostname == NULL) + goto out_nomem; + + end += 2; + len = strlen(end); + if (len > maxpathlen) + goto out_path; + *export_path = kstrndup(end, len, GFP_KERNEL); + if (!*export_path) + goto out_nomem; + + return 0; + +out_bad_devname: + dfprintk(MOUNT, "NFS: device name not in host:path format\n"); + return -EINVAL; + +out_nomem: + dfprintk(MOUNT, "NFS: not enough memory to parse device name\n"); + return -ENOMEM; + +out_hostname: + dfprintk(MOUNT, "NFS: server hostname too long\n"); + return -ENAMETOOLONG; + +out_path: + dfprintk(MOUNT, "NFS: export pathname too long\n"); + return -ENAMETOOLONG; +} + +/* + * Split "dev_name" into "hostname:export_path". + * + * The leftmost colon demarks the split between the server's hostname + * and the export path. If the hostname starts with a left square + * bracket, then it may contain colons. + * + * Note: caller frees hostname and export path, even on error. + */ +static int nfs_parse_devname(const char *dev_name, + char **hostname, size_t maxnamlen, + char **export_path, size_t maxpathlen) +{ + if (*dev_name == '[') + return nfs_parse_protected_hostname(dev_name, + hostname, maxnamlen, + export_path, maxpathlen); + + return nfs_parse_simple_hostname(dev_name, + hostname, maxnamlen, + export_path, maxpathlen); +} + +/* * Validate the NFS2/NFS3 mount data * - fills in the mount root filehandle * -- cgit v0.10.2 From ce3b7e1906ebbe96753fe090b36de6ffb8e0e0e7 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 23 Jun 2008 12:36:53 -0400 Subject: NFS: Add string length argument to nfs_parse_server_address To make nfs_parse_server_address() more generally useful, allow it to accept input strings that are not terminated with '\0'. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust diff --git a/fs/nfs/super.c b/fs/nfs/super.c index ea4abd2..d27aa1d 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -705,38 +705,76 @@ static int nfs_verify_server_address(struct sockaddr *addr) return 0; } -/* - * Parse string addresses passed in via a mount option, - * and construct a sockaddr based on the result. - * - * If address parsing fails, set the sockaddr's address - * family to AF_UNSPEC to force nfs_verify_server_address() - * to punt the mount. - */ -static void nfs_parse_server_address(char *value, - struct sockaddr *sap, - size_t *len) +static void nfs_parse_ipv4_address(char *string, size_t str_len, + struct sockaddr *sap, size_t *addr_len) { - if (strchr(value, ':')) { - struct sockaddr_in6 *ap = (struct sockaddr_in6 *)sap; - u8 *addr = (u8 *)&ap->sin6_addr.in6_u; + struct sockaddr_in *sin = (struct sockaddr_in *)sap; + u8 *addr = (u8 *)&sin->sin_addr.s_addr; - ap->sin6_family = AF_INET6; - *len = sizeof(*ap); - if (in6_pton(value, -1, addr, '\0', NULL)) + if (str_len <= INET_ADDRSTRLEN) { + dfprintk(MOUNT, "NFS: parsing IPv4 address %*s\n", + (int)str_len, string); + + sin->sin_family = AF_INET; + *addr_len = sizeof(*sin); + if (in4_pton(string, str_len, addr, '\0', NULL)) return; - } else { - struct sockaddr_in *ap = (struct sockaddr_in *)sap; - u8 *addr = (u8 *)&ap->sin_addr.s_addr; + } + + sap->sa_family = AF_UNSPEC; + *addr_len = 0; +} + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +static void nfs_parse_ipv6_address(char *string, size_t str_len, + struct sockaddr *sap, size_t *addr_len) +{ + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; + u8 *addr = (u8 *)&sin6->sin6_addr.in6_u; + + if (str_len <= INET6_ADDRSTRLEN) { + dfprintk(MOUNT, "NFS: parsing IPv6 address %*s\n", + (int)str_len, string); - ap->sin_family = AF_INET; - *len = sizeof(*ap); - if (in4_pton(value, -1, addr, '\0', NULL)) + sin6->sin6_family = AF_INET6; + *addr_len = sizeof(*sin6); + if (in6_pton(string, str_len, addr, '\0', NULL)) return; } sap->sa_family = AF_UNSPEC; - *len = 0; + *addr_len = 0; +} +#else +static void nfs_parse_ipv6_address(char *string, size_t str_len, + struct sockaddr *sap, size_t *addr_len) +{ + sap->sa_family = AF_UNSPEC; + *addr_len = 0; +} +#endif + +/* + * Construct a sockaddr based on the contents of a string that contains + * an IP address in presentation format. + * + * If there is a problem constructing the new sockaddr, set the address + * family to AF_UNSPEC. + */ +static void nfs_parse_ip_address(char *string, size_t str_len, + struct sockaddr *sap, size_t *addr_len) +{ + unsigned int i, colons; + + colons = 0; + for (i = 0; i < str_len; i++) + if (string[i] == ':') + colons++; + + if (colons >= 2) + nfs_parse_ipv6_address(string, str_len, sap, addr_len); + else + nfs_parse_ipv4_address(string, str_len, sap, addr_len); } /* @@ -1070,9 +1108,10 @@ static int nfs_parse_mount_options(char *raw, string = match_strdup(args); if (string == NULL) goto out_nomem; - nfs_parse_server_address(string, (struct sockaddr *) - &mnt->nfs_server.address, - &mnt->nfs_server.addrlen); + nfs_parse_ip_address(string, strlen(string), + (struct sockaddr *) + &mnt->nfs_server.address, + &mnt->nfs_server.addrlen); kfree(string); break; case Opt_clientaddr: @@ -1093,9 +1132,10 @@ static int nfs_parse_mount_options(char *raw, string = match_strdup(args); if (string == NULL) goto out_nomem; - nfs_parse_server_address(string, (struct sockaddr *) - &mnt->mount_server.address, - &mnt->mount_server.addrlen); + nfs_parse_ip_address(string, strlen(string), + (struct sockaddr *) + &mnt->mount_server.address, + &mnt->mount_server.addrlen); kfree(string); break; diff --git a/include/linux/inet.h b/include/linux/inet.h index 1354080..4cca05c 100644 --- a/include/linux/inet.h +++ b/include/linux/inet.h @@ -44,6 +44,13 @@ #include +/* + * These mimic similar macros defined in user-space for inet_ntop(3). + * See /usr/include/netinet/in.h . + */ +#define INET_ADDRSTRLEN (16) +#define INET6_ADDRSTRLEN (48) + extern __be32 in_aton(const char *str); extern int in4_pton(const char *src, int srclen, u8 *dst, int delim, const char **end); extern int in6_pton(const char *src, int srclen, u8 *dst, int delim, const char **end); -- cgit v0.10.2 From d8e7748ab8322171ebfd78f6155ff35e7d57ac32 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 23 Jun 2008 12:37:01 -0400 Subject: NFS: handle interface identifiers in incoming IPv6 addresses Add support in the kernel NFS client's address parser for interface identifiers. IPv6 link-local addresses require an additional "interface identifier", which is a network device name or an integer that indexes the array of local network interfaces. They are suffixed to the address with a '%'. For example: fe80::215:c5ff:fe3b:e1b2%2 indicates an interface index of 2. Or fe80::215:c5ff:fe3b:e1b2%eth0 indicates that requests should be routed through the eth0 device. Without the interface ID, link-local addresses are not usable for NFS. Both the kernel NFS client mount option parser and the mount.nfs command can take either form. The mount.nfs command always passes the address through getnameinfo(3), which usually re-writes interface indices as device names. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust diff --git a/fs/nfs/super.c b/fs/nfs/super.c index d27aa1d..73a8e59 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include #include @@ -725,12 +726,48 @@ static void nfs_parse_ipv4_address(char *string, size_t str_len, *addr_len = 0; } +#define IPV6_SCOPE_DELIMITER '%' + #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +static void nfs_parse_ipv6_scope_id(const char *string, const size_t str_len, + const char *delim, + struct sockaddr_in6 *sin6) +{ + char *p; + size_t len; + + if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)) + return ; + if (*delim != IPV6_SCOPE_DELIMITER) + return; + + len = (string + str_len) - delim - 1; + p = kstrndup(delim + 1, len, GFP_KERNEL); + if (p) { + unsigned long scope_id = 0; + struct net_device *dev; + + dev = dev_get_by_name(&init_net, p); + if (dev != NULL) { + scope_id = dev->ifindex; + dev_put(dev); + } else { + /* scope_id is set to zero on error */ + strict_strtoul(p, 10, &scope_id); + } + + kfree(p); + sin6->sin6_scope_id = scope_id; + dfprintk(MOUNT, "NFS: IPv6 scope ID = %lu\n", scope_id); + } +} + static void nfs_parse_ipv6_address(char *string, size_t str_len, struct sockaddr *sap, size_t *addr_len) { struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; u8 *addr = (u8 *)&sin6->sin6_addr.in6_u; + const char *delim; if (str_len <= INET6_ADDRSTRLEN) { dfprintk(MOUNT, "NFS: parsing IPv6 address %*s\n", @@ -738,8 +775,10 @@ static void nfs_parse_ipv6_address(char *string, size_t str_len, sin6->sin6_family = AF_INET6; *addr_len = sizeof(*sin6); - if (in6_pton(string, str_len, addr, '\0', NULL)) + if (in6_pton(string, str_len, addr, IPV6_SCOPE_DELIMITER, &delim)) { + nfs_parse_ipv6_scope_id(string, str_len, delim, sin6); return; + } } sap->sa_family = AF_UNSPEC; -- cgit v0.10.2 From 77e03677ac76d413fbb6d6caaffa1d64877a0c2a Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Tue, 24 Jun 2008 20:25:57 +0300 Subject: nfs: initialize timeout variable in nfs4_proc_setclientid_confirm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit gcc (4.3.0) rightfully warns about this: /usr0/export/dev/bhalevy/git/linux-pnfs-bh-nfs41/fs/nfs/nfs4proc.c: In function nfs4_proc_setclientid_confirm: /usr0/export/dev/bhalevy/git/linux-pnfs-bh-nfs41/fs/nfs/nfs4proc.c:2936: warning: timeout may be used uninitialized in this function nfs4_delay that's passed a pointer to 'timeout' is looking at its value and sets it up to some value in the range: NFS4_POLL_RETRY_MIN..NFS4_POLL_RETRY_MAX if (*timeout <= 0) *timeout = NFS4_POLL_RETRY_MIN; if (*timeout > NFS4_POLL_RETRY_MAX) *timeout = NFS4_POLL_RETRY_MAX; Therefore it will end up set to some sane, though rather indeterministic, value. Signed-off-by: Benny Halevy Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 10f01c0..4451287 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2924,7 +2924,7 @@ static int _nfs4_proc_setclientid_confirm(struct nfs_client *clp, struct rpc_cre int nfs4_proc_setclientid_confirm(struct nfs_client *clp, struct rpc_cred *cred) { - long timeout; + long timeout = 0; int err; do { err = _nfs4_proc_setclientid_confirm(clp, cred); -- cgit v0.10.2 From ee84dfc45467fd8e5ce04fa2813d98e0aebe465c Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 11 Jun 2008 10:03:10 -0400 Subject: nfs4: remove BKL from nfs_callback_up and nfs_callback_down The nfs_callback_mutex is sufficient protection. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index c1e7c83..9e713d2 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -105,7 +105,6 @@ int nfs_callback_up(void) struct svc_rqst *rqstp; int ret = 0; - lock_kernel(); mutex_lock(&nfs_callback_mutex); if (nfs_callback_info.users++ || nfs_callback_info.task != NULL) goto out; @@ -149,7 +148,6 @@ out: if (serv) svc_destroy(serv); mutex_unlock(&nfs_callback_mutex); - unlock_kernel(); return ret; out_err: dprintk("Couldn't create callback socket or server thread; err = %d\n", @@ -163,13 +161,11 @@ out_err: */ void nfs_callback_down(void) { - lock_kernel(); mutex_lock(&nfs_callback_mutex); nfs_callback_info.users--; if (nfs_callback_info.users == 0 && nfs_callback_info.task != NULL) kthread_stop(nfs_callback_info.task); mutex_unlock(&nfs_callback_mutex); - unlock_kernel(); } static int nfs_callback_authenticate(struct svc_rqst *rqstp) -- cgit v0.10.2 From 5afc597c5f0bd184457e49b9a330fcb37b69db11 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 11 Jun 2008 10:03:11 -0400 Subject: nfs4: fix potential race with rapid nfs_callback_up/down cycle If the nfsv4 callback thread is rapidly brought up and down, it's possible that nfs_callback_svc might never get a chance to run. If this happens, the cleanup at thread exit might never occur, throwing the refcounting off and nfs_callback_info in an incorrect state. Move the clean functions into nfs_callback_down. Also change the nfs_callback_info struct to track the svc_rqst rather than svc_serv since we need to know that to call svc_exit_thread. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 9e713d2..f447f4b 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -27,7 +27,7 @@ struct nfs_callback_data { unsigned int users; - struct svc_serv *serv; + struct svc_rqst *rqst; struct task_struct *task; }; @@ -91,18 +91,15 @@ nfs_callback_svc(void *vrqstp) svc_process(rqstp); } unlock_kernel(); - nfs_callback_info.task = NULL; - svc_exit_thread(rqstp); return 0; } /* - * Bring up the server process if it is not already up. + * Bring up the callback thread if it is not already up. */ int nfs_callback_up(void) { struct svc_serv *serv = NULL; - struct svc_rqst *rqstp; int ret = 0; mutex_lock(&nfs_callback_mutex); @@ -120,22 +117,23 @@ int nfs_callback_up(void) nfs_callback_tcpport = ret; dprintk("Callback port = 0x%x\n", nfs_callback_tcpport); - rqstp = svc_prepare_thread(serv, &serv->sv_pools[0]); - if (IS_ERR(rqstp)) { - ret = PTR_ERR(rqstp); + nfs_callback_info.rqst = svc_prepare_thread(serv, &serv->sv_pools[0]); + if (IS_ERR(nfs_callback_info.rqst)) { + ret = PTR_ERR(nfs_callback_info.rqst); + nfs_callback_info.rqst = NULL; goto out_err; } svc_sock_update_bufs(serv); - nfs_callback_info.serv = serv; - nfs_callback_info.task = kthread_run(nfs_callback_svc, rqstp, + nfs_callback_info.task = kthread_run(nfs_callback_svc, + nfs_callback_info.rqst, "nfsv4-svc"); if (IS_ERR(nfs_callback_info.task)) { ret = PTR_ERR(nfs_callback_info.task); - nfs_callback_info.serv = NULL; + svc_exit_thread(nfs_callback_info.rqst); + nfs_callback_info.rqst = NULL; nfs_callback_info.task = NULL; - svc_exit_thread(rqstp); goto out_err; } out: @@ -157,14 +155,18 @@ out_err: } /* - * Kill the server process if it is not already down. + * Kill the callback thread if it's no longer being used. */ void nfs_callback_down(void) { mutex_lock(&nfs_callback_mutex); nfs_callback_info.users--; - if (nfs_callback_info.users == 0 && nfs_callback_info.task != NULL) + if (nfs_callback_info.users == 0 && nfs_callback_info.task != NULL) { kthread_stop(nfs_callback_info.task); + svc_exit_thread(nfs_callback_info.rqst); + nfs_callback_info.rqst = NULL; + nfs_callback_info.task = NULL; + } mutex_unlock(&nfs_callback_mutex); } -- cgit v0.10.2 From 877fcf103982e52a59a1035378b4c0b8c63fe004 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 25 Jun 2008 17:24:23 -0400 Subject: SUNRPC: More useful debugging output for rpcb client Clean up dprintk's in rpcb client's XDR decoder functions. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index e6fb21b..cf2b916 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -438,7 +438,7 @@ static int rpcb_decode_getport(struct rpc_rqst *req, __be32 *p, unsigned short *portp) { *portp = (unsigned short) ntohl(*p++); - dprintk("RPC: rpcb_decode_getport result %u\n", + dprintk("RPC: rpcb_decode_getport result %u\n", *portp); return 0; } @@ -447,8 +447,8 @@ static int rpcb_decode_set(struct rpc_rqst *req, __be32 *p, unsigned int *boolp) { *boolp = (unsigned int) ntohl(*p++); - dprintk("RPC: rpcb_decode_set result %u\n", - *boolp); + dprintk("RPC: rpcb_decode_set: call %s\n", + (*boolp ? "succeeded" : "failed")); return 0; } -- cgit v0.10.2 From fc200e794d723bc88c39859e8f096b717532f9c9 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 25 Jun 2008 17:24:31 -0400 Subject: SUNRPC: Document some naked integers in rpcbind client Clean up: Replace naked integers that represent rpcbind protocol versions. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index cf2b916..b23a719 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -32,6 +32,10 @@ #define RPCBIND_PROGRAM (100000u) #define RPCBIND_PORT (111u) +#define RPCBVERS_2 (2u) +#define RPCBVERS_3 (3u) +#define RPCBVERS_4 (4u) + enum { RPCBPROC_NULL, RPCBPROC_SET, @@ -82,7 +86,7 @@ static struct rpc_procinfo rpcb_procedures2[]; static struct rpc_procinfo rpcb_procedures3[]; struct rpcb_info { - int rpc_vers; + u32 rpc_vers; struct rpc_procinfo * rpc_proc; }; @@ -177,7 +181,7 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) prog, vers, prot, port); rpcb_clnt = rpcb_create("localhost", (struct sockaddr *) &sin, - sizeof(sin), XPRT_TRANSPORT_UDP, 2, 1); + sizeof(sin), XPRT_TRANSPORT_UDP, RPCBVERS_2, 1); if (IS_ERR(rpcb_clnt)) return PTR_ERR(rpcb_clnt); @@ -227,7 +231,7 @@ int rpcb_getport_sync(struct sockaddr_in *sin, u32 prog, u32 vers, int prot) __func__, NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot); rpcb_clnt = rpcb_create(NULL, (struct sockaddr *)sin, - sizeof(*sin), prot, 2, 0); + sizeof(*sin), prot, RPCBVERS_2, 0); if (IS_ERR(rpcb_clnt)) return PTR_ERR(rpcb_clnt); @@ -588,35 +592,54 @@ static struct rpc_procinfo rpcb_procedures4[] = { static struct rpcb_info rpcb_next_version[] = { #ifdef CONFIG_SUNRPC_BIND34 - { 4, &rpcb_procedures4[RPCBPROC_GETVERSADDR] }, - { 3, &rpcb_procedures3[RPCBPROC_GETADDR] }, + { + .rpc_vers = RPCBVERS_4, + .rpc_proc = &rpcb_procedures4[RPCBPROC_GETVERSADDR], + }, + { + .rpc_vers = RPCBVERS_3, + .rpc_proc = &rpcb_procedures3[RPCBPROC_GETADDR], + }, #endif - { 2, &rpcb_procedures2[RPCBPROC_GETPORT] }, - { 0, NULL }, + { + .rpc_vers = RPCBVERS_2, + .rpc_proc = &rpcb_procedures2[RPCBPROC_GETPORT], + }, + { + .rpc_proc = NULL, + }, }; static struct rpcb_info rpcb_next_version6[] = { #ifdef CONFIG_SUNRPC_BIND34 - { 4, &rpcb_procedures4[RPCBPROC_GETVERSADDR] }, - { 3, &rpcb_procedures3[RPCBPROC_GETADDR] }, + { + .rpc_vers = RPCBVERS_4, + .rpc_proc = &rpcb_procedures4[RPCBPROC_GETVERSADDR], + }, + { + .rpc_vers = RPCBVERS_3, + .rpc_proc = &rpcb_procedures3[RPCBPROC_GETADDR], + }, #endif - { 0, NULL }, + { + .rpc_proc = NULL, + }, }; static struct rpc_version rpcb_version2 = { - .number = 2, + .number = RPCBVERS_2, .nrprocs = RPCB_HIGHPROC_2, .procs = rpcb_procedures2 }; static struct rpc_version rpcb_version3 = { - .number = 3, + .number = RPCBVERS_3, .nrprocs = RPCB_HIGHPROC_3, .procs = rpcb_procedures3 }; static struct rpc_version rpcb_version4 = { - .number = 4, + .number = RPCBVERS_4, .nrprocs = RPCB_HIGHPROC_4, .procs = rpcb_procedures4 }; -- cgit v0.10.2 From 6a774051573042cdeb57e81f77b36c25e5856739 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 25 Jun 2008 17:24:39 -0400 Subject: SUNRPC: Use rpcbind version 2 GETPORT Clean up: Change the version 2 procedure name to GETPORT. It's the same procedure number as GETADDR, but version 2 implementations usually refer to it as GETPORT. This also now matches the procedure name used in the version 2 procedure entry in the rpcb_next_version[] array, making it slightly less confusing. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index b23a719..a70cc1e 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -575,7 +575,7 @@ out_err: static struct rpc_procinfo rpcb_procedures2[] = { PROC(SET, mapping, set), PROC(UNSET, mapping, set), - PROC(GETADDR, mapping, getport), + PROC(GETPORT, mapping, getport), }; static struct rpc_procinfo rpcb_procedures3[] = { -- cgit v0.10.2 From 8842413aa4c3220ce9313791f99808fc149ca16d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 25 Jun 2008 17:24:47 -0400 Subject: SUNRPC: Use GETADDR for rpcbind version 4 queries Some rpcbind servers that do support rpcbind version 4 do not support the GETVERSADDR procedure. Use GETADDR for querying rpcbind servers via rpcbind version 4 instead of GETVERSADDR. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index a70cc1e..625ba72 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -587,6 +587,7 @@ static struct rpc_procinfo rpcb_procedures3[] = { static struct rpc_procinfo rpcb_procedures4[] = { PROC(SET, mapping, set), PROC(UNSET, mapping, set), + PROC(GETADDR, getaddr, getaddr), PROC(GETVERSADDR, getaddr, getaddr), }; @@ -594,7 +595,7 @@ static struct rpcb_info rpcb_next_version[] = { #ifdef CONFIG_SUNRPC_BIND34 { .rpc_vers = RPCBVERS_4, - .rpc_proc = &rpcb_procedures4[RPCBPROC_GETVERSADDR], + .rpc_proc = &rpcb_procedures4[RPCBPROC_GETADDR], }, { .rpc_vers = RPCBVERS_3, @@ -614,7 +615,7 @@ static struct rpcb_info rpcb_next_version6[] = { #ifdef CONFIG_SUNRPC_BIND34 { .rpc_vers = RPCBVERS_4, - .rpc_proc = &rpcb_procedures4[RPCBPROC_GETVERSADDR], + .rpc_proc = &rpcb_procedures4[RPCBPROC_GETADDR], }, { .rpc_vers = RPCBVERS_3, -- cgit v0.10.2 From 40fef8a649e5344bfb6a67a7cc3def3e0dad6448 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 25 Jun 2008 17:24:54 -0400 Subject: SUNRPC: Use only rpcbind v2 for AF_INET requests Some server vendors support the higher versions of rpcbind only for AF_INET6. The kernel doesn't need to use v3 or v4 for AF_INET anyway, so change the kernel's rpcbind client to query AF_INET servers over rpcbind v2 only. This has a few interesting benefits: 1. If the rpcbind request is going over TCP, and the server doesn't support rpcbind versions 3 or 4, the client reduces by two the number of ephemeral ports left in TIME_WAIT for each rpcbind request. This will help during NFS mount storms. 2. The rpcbind interaction with servers that don't support rpcbind versions 3 or 4 will use less network traffic. Also helpful during mount storms. 3. We can eliminate the kernel build option that controls whether the kernel's rpcbind client uses rpcbind version 3 and 4 for AF_INET servers. Less complicated kernel configuration... Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust diff --git a/fs/Kconfig b/fs/Kconfig index 1c16de9..0ce72dc 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -1799,27 +1799,6 @@ config SUNRPC_XPRT_RDMA If unsure, say N. -config SUNRPC_BIND34 - bool "Support for rpcbind versions 3 & 4 (EXPERIMENTAL)" - depends on SUNRPC && EXPERIMENTAL - default n - help - RPC requests over IPv6 networks require support for larger - addresses when performing an RPC bind. Sun added support for - IPv6 addressing by creating two new versions of the rpcbind - protocol (RFC 1833). - - This option enables support in the kernel RPC client for - querying rpcbind servers via versions 3 and 4 of the rpcbind - protocol. The kernel automatically falls back to version 2 - if a remote rpcbind service does not support versions 3 or 4. - By themselves, these new versions do not provide support for - RPC over IPv6, but the new protocol versions are necessary to - support it. - - If unsure, say N to get traditional behavior (version 2 rpcbind - requests only). - config RPCSEC_GSS_KRB5 tristate "Secure RPC: Kerberos V mechanism (EXPERIMENTAL)" depends on SUNRPC && EXPERIMENTAL diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 625ba72..c62e446 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -592,16 +592,6 @@ static struct rpc_procinfo rpcb_procedures4[] = { }; static struct rpcb_info rpcb_next_version[] = { -#ifdef CONFIG_SUNRPC_BIND34 - { - .rpc_vers = RPCBVERS_4, - .rpc_proc = &rpcb_procedures4[RPCBPROC_GETADDR], - }, - { - .rpc_vers = RPCBVERS_3, - .rpc_proc = &rpcb_procedures3[RPCBPROC_GETADDR], - }, -#endif { .rpc_vers = RPCBVERS_2, .rpc_proc = &rpcb_procedures2[RPCBPROC_GETPORT], @@ -612,7 +602,6 @@ static struct rpcb_info rpcb_next_version[] = { }; static struct rpcb_info rpcb_next_version6[] = { -#ifdef CONFIG_SUNRPC_BIND34 { .rpc_vers = RPCBVERS_4, .rpc_proc = &rpcb_procedures4[RPCBPROC_GETADDR], @@ -621,7 +610,6 @@ static struct rpcb_info rpcb_next_version6[] = { .rpc_vers = RPCBVERS_3, .rpc_proc = &rpcb_procedures3[RPCBPROC_GETADDR], }, -#endif { .rpc_proc = NULL, }, -- cgit v0.10.2 From 259875efed06d6936f54c9a264e868937f1bc217 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 2 Jul 2008 14:43:47 -0400 Subject: NFS: set transport defaults after mount option parsing is finished Move the UDP/TCP default timeo/retrans settings for text mounts to nfs_init_timeout_values(), which was were they were always being initialised (and sanity checked) for binary mounts. Document the default timeout values using appropriate #defines. Ensure that we initialise and sanity check the transport protocols that may have been specified by the user. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/client.c b/fs/nfs/client.c index f2a092c..5ee23e7 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -431,14 +431,14 @@ static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, { to->to_initval = timeo * HZ / 10; to->to_retries = retrans; - if (!to->to_retries) - to->to_retries = 2; switch (proto) { case XPRT_TRANSPORT_TCP: case XPRT_TRANSPORT_RDMA: + if (to->to_retries == 0) + to->to_retries = NFS_DEF_TCP_RETRANS; if (to->to_initval == 0) - to->to_initval = 60 * HZ; + to->to_initval = NFS_DEF_TCP_TIMEO * HZ / 10; if (to->to_initval > NFS_MAX_TCP_TIMEOUT) to->to_initval = NFS_MAX_TCP_TIMEOUT; to->to_increment = to->to_initval; @@ -450,14 +450,17 @@ static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, to->to_exponential = 0; break; case XPRT_TRANSPORT_UDP: - default: + if (to->to_retries == 0) + to->to_retries = NFS_DEF_UDP_RETRANS; if (!to->to_initval) - to->to_initval = 11 * HZ / 10; + to->to_initval = NFS_DEF_UDP_TIMEO * HZ / 10; if (to->to_initval > NFS_MAX_UDP_TIMEOUT) to->to_initval = NFS_MAX_UDP_TIMEOUT; to->to_maxval = NFS_MAX_UDP_TIMEOUT; to->to_exponential = 1; break; + default: + BUG(); } } diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 73a8e59..9c1a960 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -817,6 +817,43 @@ static void nfs_parse_ip_address(char *string, size_t str_len, } /* + * Sanity check the NFS transport protocol. + * + */ +static void nfs_validate_transport_protocol(struct nfs_parsed_mount_data *mnt) +{ + switch (mnt->nfs_server.protocol) { + case XPRT_TRANSPORT_UDP: + case XPRT_TRANSPORT_TCP: + case XPRT_TRANSPORT_RDMA: + break; + default: + mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP; + } +} + +/* + * For text based NFSv2/v3 mounts, the mount protocol transport default + * settings should depend upon the specified NFS transport. + */ +static void nfs_set_mount_transport_protocol(struct nfs_parsed_mount_data *mnt) +{ + nfs_validate_transport_protocol(mnt); + + if (mnt->mount_server.protocol == XPRT_TRANSPORT_UDP || + mnt->mount_server.protocol == XPRT_TRANSPORT_TCP) + return; + switch (mnt->nfs_server.protocol) { + case XPRT_TRANSPORT_UDP: + mnt->mount_server.protocol = XPRT_TRANSPORT_UDP; + break; + case XPRT_TRANSPORT_TCP: + case XPRT_TRANSPORT_RDMA: + mnt->mount_server.protocol = XPRT_TRANSPORT_TCP; + } +} + +/* * Error-check and convert a string of mount options from user space into * a data structure */ @@ -896,20 +933,14 @@ static int nfs_parse_mount_options(char *raw, case Opt_udp: mnt->flags &= ~NFS_MOUNT_TCP; mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP; - mnt->timeo = 7; - mnt->retrans = 5; break; case Opt_tcp: mnt->flags |= NFS_MOUNT_TCP; mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP; - mnt->timeo = 600; - mnt->retrans = 2; break; case Opt_rdma: mnt->flags |= NFS_MOUNT_TCP; /* for side protocols */ mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA; - mnt->timeo = 600; - mnt->retrans = 2; break; case Opt_acl: mnt->flags &= ~NFS_MOUNT_NOACL; @@ -1103,21 +1134,15 @@ static int nfs_parse_mount_options(char *raw, case Opt_xprt_udp: mnt->flags &= ~NFS_MOUNT_TCP; mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP; - mnt->timeo = 7; - mnt->retrans = 5; break; case Opt_xprt_tcp: mnt->flags |= NFS_MOUNT_TCP; mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP; - mnt->timeo = 600; - mnt->retrans = 2; break; case Opt_xprt_rdma: /* vector side protocols to TCP */ mnt->flags |= NFS_MOUNT_TCP; mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA; - mnt->timeo = 600; - mnt->retrans = 2; break; default: goto out_unrec_xprt; @@ -1438,14 +1463,11 @@ static int nfs_validate_mount_data(void *options, args->flags = (NFS_MOUNT_VER3 | NFS_MOUNT_TCP); args->rsize = NFS_MAX_FILE_IO_SIZE; args->wsize = NFS_MAX_FILE_IO_SIZE; - args->timeo = 600; - args->retrans = 2; args->acregmin = 3; args->acregmax = 60; args->acdirmin = 30; args->acdirmax = 60; args->mount_server.port = 0; /* autobind unless user sets port */ - args->mount_server.protocol = XPRT_TRANSPORT_UDP; args->nfs_server.port = 0; /* autobind unless user sets port */ args->nfs_server.protocol = XPRT_TRANSPORT_TCP; @@ -1546,6 +1568,8 @@ static int nfs_validate_mount_data(void *options, &args->nfs_server.address)) goto out_no_address; + nfs_set_mount_transport_protocol(args); + status = nfs_parse_devname(dev_name, &args->nfs_server.hostname, PAGE_SIZE, @@ -2095,14 +2119,11 @@ static int nfs4_validate_mount_data(void *options, args->rsize = NFS_MAX_FILE_IO_SIZE; args->wsize = NFS_MAX_FILE_IO_SIZE; - args->timeo = 600; - args->retrans = 2; args->acregmin = 3; args->acregmax = 60; args->acdirmin = 30; args->acdirmax = 60; args->nfs_server.port = NFS_PORT; /* 2049 unless user set port= */ - args->nfs_server.protocol = XPRT_TRANSPORT_TCP; switch (data->version) { case 1: @@ -2163,6 +2184,7 @@ static int nfs4_validate_mount_data(void *options, args->acdirmin = data->acdirmin; args->acdirmax = data->acdirmax; args->nfs_server.protocol = data->proto; + nfs_validate_transport_protocol(args); break; default: { @@ -2175,6 +2197,8 @@ static int nfs4_validate_mount_data(void *options, &args->nfs_server.address)) return -EINVAL; + nfs_validate_transport_protocol(args); + switch (args->auth_flavor_len) { case 0: args->auth_flavors[0] = RPC_AUTH_UNIX; diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 27d6a8d..830d9cc 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -12,6 +12,11 @@ #include /* Default timeout values */ +#define NFS_DEF_UDP_TIMEO (11) +#define NFS_DEF_UDP_RETRANS (3) +#define NFS_DEF_TCP_TIMEO (600) +#define NFS_DEF_TCP_RETRANS (2) + #define NFS_MAX_UDP_TIMEOUT (60*HZ) #define NFS_MAX_TCP_TIMEOUT (600*HZ) -- cgit v0.10.2 From ed596a8adb7964cf9d2f7682f9cf2c37322a775d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 26 Jun 2008 17:47:05 -0400 Subject: NFS: Move the nfs_set_port() call out of nfs_parse_mount_options() The remount path does not need to set the port in the server address. Since it's not really a part of option parsing, move the nfs_set_port() call to nfs_parse_mount_options()'s callers. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 9c1a960..de424d2 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1214,9 +1214,6 @@ static int nfs_parse_mount_options(char *raw, } } - nfs_set_port((struct sockaddr *)&mnt->nfs_server.address, - mnt->nfs_server.port); - return 1; out_nomem: @@ -1568,6 +1565,9 @@ static int nfs_validate_mount_data(void *options, &args->nfs_server.address)) goto out_no_address; + nfs_set_port((struct sockaddr *)&args->nfs_server.address, + args->nfs_server.port); + nfs_set_mount_transport_protocol(args); status = nfs_parse_devname(dev_name, @@ -2197,6 +2197,9 @@ static int nfs4_validate_mount_data(void *options, &args->nfs_server.address)) return -EINVAL; + nfs_set_port((struct sockaddr *)&args->nfs_server.address, + args->nfs_server.port); + nfs_validate_transport_protocol(args); switch (args->auth_flavor_len) { -- cgit v0.10.2 From 0e0cab744b17a70ef0f08d818d66935feade7cad Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 26 Jun 2008 17:47:12 -0400 Subject: NFS: use documenting macro constants for initializing ac{reg, dir}{min, max} Clean up. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index 15afe3a..46763d1c 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c @@ -295,10 +295,10 @@ static int __init root_nfs_name(char *name) nfs_data.flags = NFS_MOUNT_NONLM; /* No lockd in nfs root yet */ nfs_data.rsize = NFS_DEF_FILE_IO_SIZE; nfs_data.wsize = NFS_DEF_FILE_IO_SIZE; - nfs_data.acregmin = 3; - nfs_data.acregmax = 60; - nfs_data.acdirmin = 30; - nfs_data.acdirmax = 60; + nfs_data.acregmin = NFS_DEF_ACREGMIN; + nfs_data.acregmax = NFS_DEF_ACREGMAX; + nfs_data.acdirmin = NFS_DEF_ACDIRMIN; + nfs_data.acdirmax = NFS_DEF_ACDIRMAX; strcpy(buf, NFS_ROOT); /* Process options received from the remote server */ diff --git a/fs/nfs/super.c b/fs/nfs/super.c index de424d2..ebed63e 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -518,13 +518,13 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, if (nfss->bsize != 0) seq_printf(m, ",bsize=%u", nfss->bsize); seq_printf(m, ",namlen=%u", nfss->namelen); - if (nfss->acregmin != 3*HZ || showdefaults) + if (nfss->acregmin != NFS_DEF_ACREGMIN*HZ || showdefaults) seq_printf(m, ",acregmin=%u", nfss->acregmin/HZ); - if (nfss->acregmax != 60*HZ || showdefaults) + if (nfss->acregmax != NFS_DEF_ACREGMAX*HZ || showdefaults) seq_printf(m, ",acregmax=%u", nfss->acregmax/HZ); - if (nfss->acdirmin != 30*HZ || showdefaults) + if (nfss->acdirmin != NFS_DEF_ACDIRMIN*HZ || showdefaults) seq_printf(m, ",acdirmin=%u", nfss->acdirmin/HZ); - if (nfss->acdirmax != 60*HZ || showdefaults) + if (nfss->acdirmax != NFS_DEF_ACDIRMAX*HZ || showdefaults) seq_printf(m, ",acdirmax=%u", nfss->acdirmax/HZ); for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) { if (nfss->flags & nfs_infop->flag) @@ -1460,10 +1460,10 @@ static int nfs_validate_mount_data(void *options, args->flags = (NFS_MOUNT_VER3 | NFS_MOUNT_TCP); args->rsize = NFS_MAX_FILE_IO_SIZE; args->wsize = NFS_MAX_FILE_IO_SIZE; - args->acregmin = 3; - args->acregmax = 60; - args->acdirmin = 30; - args->acdirmax = 60; + args->acregmin = NFS_DEF_ACREGMIN; + args->acregmax = NFS_DEF_ACREGMAX; + args->acdirmin = NFS_DEF_ACDIRMIN; + args->acdirmax = NFS_DEF_ACDIRMAX; args->mount_server.port = 0; /* autobind unless user sets port */ args->nfs_server.port = 0; /* autobind unless user sets port */ args->nfs_server.protocol = XPRT_TRANSPORT_TCP; @@ -2119,10 +2119,10 @@ static int nfs4_validate_mount_data(void *options, args->rsize = NFS_MAX_FILE_IO_SIZE; args->wsize = NFS_MAX_FILE_IO_SIZE; - args->acregmin = 3; - args->acregmax = 60; - args->acdirmin = 30; - args->acdirmax = 60; + args->acregmin = NFS_DEF_ACREGMIN; + args->acregmax = NFS_DEF_ACREGMAX; + args->acdirmin = NFS_DEF_ACDIRMIN; + args->acdirmax = NFS_DEF_ACDIRMAX; args->nfs_server.port = NFS_PORT; /* 2049 unless user set port= */ switch (data->version) { diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 830d9cc..29d2619 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -20,6 +20,11 @@ #define NFS_MAX_UDP_TIMEOUT (60*HZ) #define NFS_MAX_TCP_TIMEOUT (600*HZ) +#define NFS_DEF_ACREGMIN (3) +#define NFS_DEF_ACREGMAX (60) +#define NFS_DEF_ACDIRMIN (30) +#define NFS_DEF_ACDIRMAX (60) + /* * When flushing a cluster of dirty pages, there can be different * strategies: -- cgit v0.10.2 From 01060c896e3e1ef53dcb914301c186932cd31b81 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 24 Jun 2008 16:33:38 -0400 Subject: NFS: Refactor logic for parsing NFS security flavor mount options Clean up: Refactor the NFS mount option parsing function to extract the security flavor parsing logic into a separate function. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust diff --git a/fs/nfs/super.c b/fs/nfs/super.c index ebed63e..6eb145e 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -854,6 +854,82 @@ static void nfs_set_mount_transport_protocol(struct nfs_parsed_mount_data *mnt) } /* + * Parse the value of the 'sec=' option. + * + * The flags setting is for v2/v3. The flavor_len setting is for v4. + * v2/v3 also need to know the difference between NULL and UNIX. + */ +static int nfs_parse_security_flavors(char *value, + struct nfs_parsed_mount_data *mnt) +{ + substring_t args[MAX_OPT_ARGS]; + + dfprintk(MOUNT, "NFS: parsing sec=%s option\n", value); + + switch (match_token(value, nfs_secflavor_tokens, args)) { + case Opt_sec_none: + mnt->flags &= ~NFS_MOUNT_SECFLAVOUR; + mnt->auth_flavor_len = 0; + mnt->auth_flavors[0] = RPC_AUTH_NULL; + break; + case Opt_sec_sys: + mnt->flags &= ~NFS_MOUNT_SECFLAVOUR; + mnt->auth_flavor_len = 0; + mnt->auth_flavors[0] = RPC_AUTH_UNIX; + break; + case Opt_sec_krb5: + mnt->flags |= NFS_MOUNT_SECFLAVOUR; + mnt->auth_flavor_len = 1; + mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5; + break; + case Opt_sec_krb5i: + mnt->flags |= NFS_MOUNT_SECFLAVOUR; + mnt->auth_flavor_len = 1; + mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5I; + break; + case Opt_sec_krb5p: + mnt->flags |= NFS_MOUNT_SECFLAVOUR; + mnt->auth_flavor_len = 1; + mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5P; + break; + case Opt_sec_lkey: + mnt->flags |= NFS_MOUNT_SECFLAVOUR; + mnt->auth_flavor_len = 1; + mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEY; + break; + case Opt_sec_lkeyi: + mnt->flags |= NFS_MOUNT_SECFLAVOUR; + mnt->auth_flavor_len = 1; + mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYI; + break; + case Opt_sec_lkeyp: + mnt->flags |= NFS_MOUNT_SECFLAVOUR; + mnt->auth_flavor_len = 1; + mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYP; + break; + case Opt_sec_spkm: + mnt->flags |= NFS_MOUNT_SECFLAVOUR; + mnt->auth_flavor_len = 1; + mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKM; + break; + case Opt_sec_spkmi: + mnt->flags |= NFS_MOUNT_SECFLAVOUR; + mnt->auth_flavor_len = 1; + mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMI; + break; + case Opt_sec_spkmp: + mnt->flags |= NFS_MOUNT_SECFLAVOUR; + mnt->auth_flavor_len = 1; + mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMP; + break; + default: + return 0; + } + + return 1; +} + +/* * Error-check and convert a string of mount options from user space into * a data structure */ @@ -1054,73 +1130,10 @@ static int nfs_parse_mount_options(char *raw, string = match_strdup(args); if (string == NULL) goto out_nomem; - token = match_token(string, nfs_secflavor_tokens, args); + rc = nfs_parse_security_flavors(string, mnt); kfree(string); - - /* - * The flags setting is for v2/v3. The flavor_len - * setting is for v4. v2/v3 also need to know the - * difference between NULL and UNIX. - */ - switch (token) { - case Opt_sec_none: - mnt->flags &= ~NFS_MOUNT_SECFLAVOUR; - mnt->auth_flavor_len = 0; - mnt->auth_flavors[0] = RPC_AUTH_NULL; - break; - case Opt_sec_sys: - mnt->flags &= ~NFS_MOUNT_SECFLAVOUR; - mnt->auth_flavor_len = 0; - mnt->auth_flavors[0] = RPC_AUTH_UNIX; - break; - case Opt_sec_krb5: - mnt->flags |= NFS_MOUNT_SECFLAVOUR; - mnt->auth_flavor_len = 1; - mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5; - break; - case Opt_sec_krb5i: - mnt->flags |= NFS_MOUNT_SECFLAVOUR; - mnt->auth_flavor_len = 1; - mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5I; - break; - case Opt_sec_krb5p: - mnt->flags |= NFS_MOUNT_SECFLAVOUR; - mnt->auth_flavor_len = 1; - mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5P; - break; - case Opt_sec_lkey: - mnt->flags |= NFS_MOUNT_SECFLAVOUR; - mnt->auth_flavor_len = 1; - mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEY; - break; - case Opt_sec_lkeyi: - mnt->flags |= NFS_MOUNT_SECFLAVOUR; - mnt->auth_flavor_len = 1; - mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYI; - break; - case Opt_sec_lkeyp: - mnt->flags |= NFS_MOUNT_SECFLAVOUR; - mnt->auth_flavor_len = 1; - mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYP; - break; - case Opt_sec_spkm: - mnt->flags |= NFS_MOUNT_SECFLAVOUR; - mnt->auth_flavor_len = 1; - mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKM; - break; - case Opt_sec_spkmi: - mnt->flags |= NFS_MOUNT_SECFLAVOUR; - mnt->auth_flavor_len = 1; - mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMI; - break; - case Opt_sec_spkmp: - mnt->flags |= NFS_MOUNT_SECFLAVOUR; - mnt->auth_flavor_len = 1; - mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMP; - break; - default: + if (!rc) goto out_unrec_sec; - } break; case Opt_proto: string = match_strdup(args); -- cgit v0.10.2 From dd07c94750cb1ee4449fb0db06623e1865b3e26e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 24 Jun 2008 16:33:46 -0400 Subject: NFS: Set security flavor default for NFSv2/3 mounts like other defaults Set the default security flavor when we set the other mount option default values. After this change, only the legacy user-space mount path needs to set the NFS_MOUNT_SECFLAVOUR flag. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 6eb145e..2f7e7f2 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -856,8 +856,7 @@ static void nfs_set_mount_transport_protocol(struct nfs_parsed_mount_data *mnt) /* * Parse the value of the 'sec=' option. * - * The flags setting is for v2/v3. The flavor_len setting is for v4. - * v2/v3 also need to know the difference between NULL and UNIX. + * The flavor_len setting is for v4 mounts. */ static int nfs_parse_security_flavors(char *value, struct nfs_parsed_mount_data *mnt) @@ -868,57 +867,46 @@ static int nfs_parse_security_flavors(char *value, switch (match_token(value, nfs_secflavor_tokens, args)) { case Opt_sec_none: - mnt->flags &= ~NFS_MOUNT_SECFLAVOUR; mnt->auth_flavor_len = 0; mnt->auth_flavors[0] = RPC_AUTH_NULL; break; case Opt_sec_sys: - mnt->flags &= ~NFS_MOUNT_SECFLAVOUR; mnt->auth_flavor_len = 0; mnt->auth_flavors[0] = RPC_AUTH_UNIX; break; case Opt_sec_krb5: - mnt->flags |= NFS_MOUNT_SECFLAVOUR; mnt->auth_flavor_len = 1; mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5; break; case Opt_sec_krb5i: - mnt->flags |= NFS_MOUNT_SECFLAVOUR; mnt->auth_flavor_len = 1; mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5I; break; case Opt_sec_krb5p: - mnt->flags |= NFS_MOUNT_SECFLAVOUR; mnt->auth_flavor_len = 1; mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5P; break; case Opt_sec_lkey: - mnt->flags |= NFS_MOUNT_SECFLAVOUR; mnt->auth_flavor_len = 1; mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEY; break; case Opt_sec_lkeyi: - mnt->flags |= NFS_MOUNT_SECFLAVOUR; mnt->auth_flavor_len = 1; mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYI; break; case Opt_sec_lkeyp: - mnt->flags |= NFS_MOUNT_SECFLAVOUR; mnt->auth_flavor_len = 1; mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYP; break; case Opt_sec_spkm: - mnt->flags |= NFS_MOUNT_SECFLAVOUR; mnt->auth_flavor_len = 1; mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKM; break; case Opt_sec_spkmi: - mnt->flags |= NFS_MOUNT_SECFLAVOUR; mnt->auth_flavor_len = 1; mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMI; break; case Opt_sec_spkmp: - mnt->flags |= NFS_MOUNT_SECFLAVOUR; mnt->auth_flavor_len = 1; mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMP; break; @@ -1480,6 +1468,7 @@ static int nfs_validate_mount_data(void *options, args->mount_server.port = 0; /* autobind unless user sets port */ args->nfs_server.port = 0; /* autobind unless user sets port */ args->nfs_server.protocol = XPRT_TRANSPORT_TCP; + args->auth_flavors[0] = RPC_AUTH_UNIX; switch (data->version) { case 1: @@ -1537,7 +1526,9 @@ static int nfs_validate_mount_data(void *options, args->nfs_server.hostname = kstrdup(data->hostname, GFP_KERNEL); args->namlen = data->namlen; args->bsize = data->bsize; - args->auth_flavors[0] = data->pseudoflavor; + + if (data->flags & NFS_MOUNT_SECFLAVOUR) + args->auth_flavors[0] = data->pseudoflavor; if (!args->nfs_server.hostname) goto out_nomem; @@ -1601,9 +1592,6 @@ static int nfs_validate_mount_data(void *options, } } - if (!(args->flags & NFS_MOUNT_SECFLAVOUR)) - args->auth_flavors[0] = RPC_AUTH_UNIX; - #ifndef CONFIG_NFS_V3 if (args->flags & NFS_MOUNT_VER3) goto out_v3_not_compiled; -- cgit v0.10.2 From 6738b2512bdf93ffbefe108b38a9165d5a718c3f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 24 Jun 2008 16:33:54 -0400 Subject: NFS4: Set security flavor default for NFSv4 mounts like other defaults Set the default security flavor when we set the other mount option default values for NFSv4. This cleans up the NFSv4 mount option parsing path to look like the NFSv2/v3 one. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 2f7e7f2..4bbdbf6 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2125,6 +2125,8 @@ static int nfs4_validate_mount_data(void *options, args->acdirmin = NFS_DEF_ACDIRMIN; args->acdirmax = NFS_DEF_ACDIRMAX; args->nfs_server.port = NFS_PORT; /* 2049 unless user set port= */ + args->auth_flavors[0] = RPC_AUTH_UNIX; + args->auth_flavor_len = 0; switch (data->version) { case 1: @@ -2140,18 +2142,13 @@ static int nfs4_validate_mount_data(void *options, &args->nfs_server.address)) goto out_no_address; - switch (data->auth_flavourlen) { - case 0: - args->auth_flavors[0] = RPC_AUTH_UNIX; - break; - case 1: + if (data->auth_flavourlen) { + if (data->auth_flavourlen > 1) + goto out_inval_auth; if (copy_from_user(&args->auth_flavors[0], data->auth_flavours, sizeof(args->auth_flavors[0]))) return -EFAULT; - break; - default: - goto out_inval_auth; } c = strndup_user(data->hostname.data, NFS4_MAXNAMLEN); @@ -2203,15 +2200,8 @@ static int nfs4_validate_mount_data(void *options, nfs_validate_transport_protocol(args); - switch (args->auth_flavor_len) { - case 0: - args->auth_flavors[0] = RPC_AUTH_UNIX; - break; - case 1: - break; - default: + if (args->auth_flavor_len > 1) goto out_inval_auth; - } if (args->client_address == NULL) goto out_no_client_address; -- cgit v0.10.2 From f45663ce5fb30f76a3414ab3ac69f4dd320e760a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 24 Jun 2008 19:28:02 -0400 Subject: NFS: Allow either strict or sloppy mount option parsing The kernel's NFS client mount option parser currently doesn't allow unrecognized or incorrect mount options. This prevents misspellings or incorrectly specified mount options from possibly causing silent data corruption. However, NFS mount options are not standardized, so different operating systems can use differently spelled mount options to support similar features, or can support mount options which no other operating system supports. "Sloppy" mount option parsing, which allows the parser to ignore any option it doesn't recognize, is needed to support automounters that often use maps that are shared between heterogenous operating systems. The legacy mount command ignores the validity of the values of mount options entirely, except for the "sec=" and "proto=" options. If an incorrect value is specified, the out-of-range value is passed to the kernel; if a value is specified that contains non-numeric characters, it appears as though the legacy mount command sets that option to zero (probably incorrect behavior in general). In any case, this sets a precedent which we will partially follow for the kernel mount option parser: + if "sloppy" is not set, the parser will be strict about both unrecognized options (same as legacy) and invalid option values (stricter than legacy) + if "sloppy" is set, the parser will ignore unrecognized options and invalid option values (same as legacy) An "invalid" option value in this case means that either the type (integer, short, or string) or sign (for integer values) of the specified value is incorrect. This patch does two things: it changes the NFS client's mount option parsing loop so that it parses the whole string instead of failing at the first unrecognized option or invalid option value. An unrecognized option or an invalid option value cause the option to be skipped. Then, the patch adds a "sloppy" mount option that allows the parsing to succeed anyway if there were any problems during parsing. When parsing a set of options is complete, if there are errors and "sloppy" was specified, return success anyway. Otherwise, only return success if there are no errors. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 4bbdbf6..47cf83e 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -92,8 +92,8 @@ enum { Opt_sec, Opt_proto, Opt_mountproto, Opt_mounthost, Opt_addr, Opt_mountaddr, Opt_clientaddr, - /* Mount options that are ignored */ - Opt_userspace, Opt_deprecated, + /* Special mount options */ + Opt_userspace, Opt_deprecated, Opt_sloppy, Opt_err }; @@ -103,6 +103,8 @@ static match_table_t nfs_mount_option_tokens = { { Opt_userspace, "fg" }, { Opt_userspace, "retry=%s" }, + { Opt_sloppy, "sloppy" }, + { Opt_soft, "soft" }, { Opt_hard, "hard" }, { Opt_deprecated, "intr" }, @@ -917,15 +919,22 @@ static int nfs_parse_security_flavors(char *value, return 1; } +static void nfs_parse_invalid_value(const char *option) +{ + dfprintk(MOUNT, "NFS: bad value specified for %s option\n", option); +} + /* * Error-check and convert a string of mount options from user space into - * a data structure + * a data structure. The whole mount string is processed; bad options are + * skipped as they are encountered. If there were no errors, return 1; + * otherwise return 0 (zero). */ static int nfs_parse_mount_options(char *raw, struct nfs_parsed_mount_data *mnt) { char *p, *string, *secdata; - int rc; + int rc, sloppy = 0, errors = 0; if (!raw) { dfprintk(MOUNT, "NFS: mount options string was NULL.\n"); @@ -958,6 +967,10 @@ static int nfs_parse_mount_options(char *raw, token = match_token(p, nfs_mount_option_tokens, args); switch (token) { + + /* + * boolean options: foo/nofoo + */ case Opt_soft: mnt->flags |= NFS_MOUNT_SOFT; break; @@ -1025,103 +1038,145 @@ static int nfs_parse_mount_options(char *raw, mnt->flags |= NFS_MOUNT_UNSHARED; break; + /* + * options that take numeric values + */ case Opt_port: - if (match_int(args, &option)) - return 0; - if (option < 0 || option > 65535) - return 0; - mnt->nfs_server.port = option; + if (match_int(args, &option) || + option < 0 || option > USHORT_MAX) { + errors++; + nfs_parse_invalid_value("port"); + } else + mnt->nfs_server.port = option; break; case Opt_rsize: - if (match_int(args, &mnt->rsize)) - return 0; + if (match_int(args, &option) || option < 0) { + errors++; + nfs_parse_invalid_value("rsize"); + } else + mnt->rsize = option; break; case Opt_wsize: - if (match_int(args, &mnt->wsize)) - return 0; + if (match_int(args, &option) || option < 0) { + errors++; + nfs_parse_invalid_value("wsize"); + } else + mnt->wsize = option; break; case Opt_bsize: - if (match_int(args, &option)) - return 0; - if (option < 0) - return 0; - mnt->bsize = option; + if (match_int(args, &option) || option < 0) { + errors++; + nfs_parse_invalid_value("bsize"); + } else + mnt->bsize = option; break; case Opt_timeo: - if (match_int(args, &mnt->timeo)) - return 0; + if (match_int(args, &option) || option <= 0) { + errors++; + nfs_parse_invalid_value("timeo"); + } else + mnt->timeo = option; break; case Opt_retrans: - if (match_int(args, &mnt->retrans)) - return 0; + if (match_int(args, &option) || option <= 0) { + errors++; + nfs_parse_invalid_value("retrans"); + } else + mnt->retrans = option; break; case Opt_acregmin: - if (match_int(args, &mnt->acregmin)) - return 0; + if (match_int(args, &option) || option < 0) { + errors++; + nfs_parse_invalid_value("acregmin"); + } else + mnt->acregmin = option; break; case Opt_acregmax: - if (match_int(args, &mnt->acregmax)) - return 0; + if (match_int(args, &option) || option < 0) { + errors++; + nfs_parse_invalid_value("acregmax"); + } else + mnt->acregmax = option; break; case Opt_acdirmin: - if (match_int(args, &mnt->acdirmin)) - return 0; + if (match_int(args, &option) || option < 0) { + errors++; + nfs_parse_invalid_value("acdirmin"); + } else + mnt->acdirmin = option; break; case Opt_acdirmax: - if (match_int(args, &mnt->acdirmax)) - return 0; + if (match_int(args, &option) || option < 0) { + errors++; + nfs_parse_invalid_value("acdirmax"); + } else + mnt->acdirmax = option; break; case Opt_actimeo: - if (match_int(args, &option)) - return 0; - if (option < 0) - return 0; - mnt->acregmin = - mnt->acregmax = - mnt->acdirmin = - mnt->acdirmax = option; + if (match_int(args, &option) || option < 0) { + errors++; + nfs_parse_invalid_value("actimeo"); + } else + mnt->acregmin = mnt->acregmax = + mnt->acdirmin = mnt->acdirmax = option; break; case Opt_namelen: - if (match_int(args, &mnt->namlen)) - return 0; + if (match_int(args, &option) || option < 0) { + errors++; + nfs_parse_invalid_value("namlen"); + } else + mnt->namlen = option; break; case Opt_mountport: - if (match_int(args, &option)) - return 0; - if (option < 0 || option > 65535) - return 0; - mnt->mount_server.port = option; + if (match_int(args, &option) || + option < 0 || option > USHORT_MAX) { + errors++; + nfs_parse_invalid_value("mountport"); + } else + mnt->mount_server.port = option; break; case Opt_mountvers: - if (match_int(args, &option)) - return 0; - if (option < 0) - return 0; - mnt->mount_server.version = option; + if (match_int(args, &option) || + option < NFS_MNT_VERSION || + option > NFS_MNT3_VERSION) { + errors++; + nfs_parse_invalid_value("mountvers"); + } else + mnt->mount_server.version = option; break; case Opt_nfsvers: - if (match_int(args, &option)) - return 0; + if (match_int(args, &option)) { + errors++; + nfs_parse_invalid_value("nfsvers"); + break; + } switch (option) { - case 2: + case NFS2_VERSION: mnt->flags &= ~NFS_MOUNT_VER3; break; - case 3: + case NFS3_VERSION: mnt->flags |= NFS_MOUNT_VER3; break; default: - goto out_unrec_vers; + errors++; + nfs_parse_invalid_value("nfsvers"); } break; + /* + * options that take text values + */ case Opt_sec: string = match_strdup(args); if (string == NULL) goto out_nomem; rc = nfs_parse_security_flavors(string, mnt); kfree(string); - if (!rc) - goto out_unrec_sec; + if (!rc) { + errors++; + dfprintk(MOUNT, "NFS: unrecognized " + "security flavor\n"); + } break; case Opt_proto: string = match_strdup(args); @@ -1146,7 +1201,9 @@ static int nfs_parse_mount_options(char *raw, mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA; break; default: - goto out_unrec_xprt; + errors++; + dfprintk(MOUNT, "NFS: unrecognized " + "transport protocol\n"); } break; case Opt_mountproto: @@ -1166,7 +1223,9 @@ static int nfs_parse_mount_options(char *raw, break; case Opt_xprt_rdma: /* not used for side protocols */ default: - goto out_unrec_xprt; + errors++; + dfprintk(MOUNT, "NFS: unrecognized " + "transport protocol\n"); } break; case Opt_addr: @@ -1204,6 +1263,13 @@ static int nfs_parse_mount_options(char *raw, kfree(string); break; + /* + * Special options + */ + case Opt_sloppy: + sloppy = 1; + dfprintk(MOUNT, "NFS: relaxing parsing rules\n"); + break; case Opt_userspace: case Opt_deprecated: dfprintk(MOUNT, "NFS: ignoring mount option " @@ -1211,7 +1277,9 @@ static int nfs_parse_mount_options(char *raw, break; default: - goto out_unknown; + errors++; + dfprintk(MOUNT, "NFS: unrecognized mount option " + "'%s'\n", p); } } @@ -1224,21 +1292,6 @@ out_security_failure: free_secdata(secdata); printk(KERN_INFO "NFS: security options invalid: %d\n", rc); return 0; -out_unrec_vers: - printk(KERN_INFO "NFS: unrecognized NFS version number\n"); - return 0; - -out_unrec_xprt: - printk(KERN_INFO "NFS: unrecognized transport protocol\n"); - return 0; - -out_unrec_sec: - printk(KERN_INFO "NFS: unrecognized security flavor\n"); - return 0; - -out_unknown: - printk(KERN_INFO "NFS: unknown mount option: %s\n", p); - return 0; } /* -- cgit v0.10.2 From 381ba74af55e58bca4c01553835a360a9f6fbb07 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 7 Jul 2008 12:18:53 -0400 Subject: SUNRPC: Ensure our task is notified when an rpcbind call is done If another task is busy in rpcb_getport_async number, it is more efficient to have it wake us up when it has finished instead of arbitrarily sleeping for 5 seconds. Also ensure that rpcb_wake_rpcbind_waiters() is called regardless of whether or not rpcb_getport_done() gets called. Signed-off-by: Trond Myklebust diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 09631f6..76739e9 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -942,11 +942,9 @@ call_bind_status(struct rpc_task *task) } switch (task->tk_status) { - case -EAGAIN: - dprintk("RPC: %5u rpcbind waiting for another request " - "to finish\n", task->tk_pid); - /* avoid busy-waiting here -- could be a network outage. */ - rpc_delay(task, 5*HZ); + case -ENOMEM: + dprintk("RPC: %5u rpcbind out of memory\n", task->tk_pid); + rpc_delay(task, HZ >> 2); goto retry_timeout; case -EACCES: dprintk("RPC: %5u remote rpcbind: RPC program/version " diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index c62e446..24e93e0 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -68,6 +68,7 @@ enum { #define RPCB_MAXOWNERLEN sizeof(RPCB_OWNER_STRING) static void rpcb_getport_done(struct rpc_task *, void *); +static void rpcb_map_release(void *data); static struct rpc_program rpcb_program; struct rpcbind_args { @@ -80,6 +81,8 @@ struct rpcbind_args { const char * r_netid; const char * r_addr; const char * r_owner; + + int r_status; }; static struct rpc_procinfo rpcb_procedures2[]; @@ -93,14 +96,6 @@ struct rpcb_info { static struct rpcb_info rpcb_next_version[]; static struct rpcb_info rpcb_next_version6[]; -static void rpcb_map_release(void *data) -{ - struct rpcbind_args *map = data; - - xprt_put(map->r_xprt); - kfree(map); -} - static const struct rpc_call_ops rpcb_getport_ops = { .rpc_call_done = rpcb_getport_done, .rpc_release = rpcb_map_release, @@ -112,6 +107,15 @@ static void rpcb_wake_rpcbind_waiters(struct rpc_xprt *xprt, int status) rpc_wake_up_status(&xprt->binding, status); } +static void rpcb_map_release(void *data) +{ + struct rpcbind_args *map = data; + + rpcb_wake_rpcbind_waiters(map->r_xprt, map->r_status); + xprt_put(map->r_xprt); + kfree(map); +} + static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, size_t salen, int proto, u32 version, int privileged) @@ -293,17 +297,16 @@ void rpcb_getport_async(struct rpc_task *task) /* Autobind on cloned rpc clients is discouraged */ BUG_ON(clnt->cl_parent != clnt); + /* Put self on the wait queue to ensure we get notified if + * some other task is already attempting to bind the port */ + rpc_sleep_on(&xprt->binding, task, NULL); + if (xprt_test_and_set_binding(xprt)) { - status = -EAGAIN; /* tell caller to check again */ dprintk("RPC: %5u %s: waiting for another binder\n", task->tk_pid, __func__); - goto bailout_nowake; + return; } - /* Put self on queue before sending rpcbind request, in case - * rpcb_getport_done completes before we return from rpc_run_task */ - rpc_sleep_on(&xprt->binding, task, NULL); - /* Someone else may have bound if we slept */ if (xprt_bound(xprt)) { status = 0; @@ -365,15 +368,15 @@ void rpcb_getport_async(struct rpc_task *task) map->r_netid = rpc_peeraddr2str(clnt, RPC_DISPLAY_NETID); map->r_addr = rpc_peeraddr2str(rpcb_clnt, RPC_DISPLAY_UNIVERSAL_ADDR); map->r_owner = RPCB_OWNER_STRING; /* ignored for GETADDR */ + map->r_status = -EIO; child = rpcb_call_async(rpcb_clnt, map, proc); rpc_release_client(rpcb_clnt); if (IS_ERR(child)) { - status = -EIO; /* rpcb_map_release() has freed the arguments */ dprintk("RPC: %5u %s: rpc_run_task failed\n", task->tk_pid, __func__); - goto bailout_nofree; + return; } rpc_put_task(child); @@ -382,7 +385,6 @@ void rpcb_getport_async(struct rpc_task *task) bailout_nofree: rpcb_wake_rpcbind_waiters(xprt, status); -bailout_nowake: task->tk_status = status; } EXPORT_SYMBOL_GPL(rpcb_getport_async); @@ -421,7 +423,7 @@ static void rpcb_getport_done(struct rpc_task *child, void *data) dprintk("RPC: %5u rpcb_getport_done(status %d, port %u)\n", child->tk_pid, status, map->r_port); - rpcb_wake_rpcbind_waiters(xprt, status); + map->r_status = status; } static int rpcb_encode_mapping(struct rpc_rqst *req, __be32 *p, -- cgit v0.10.2 From ae8547b0a9e5d718ce272ddc48f91703a0f52a0b Mon Sep 17 00:00:00 2001 From: Hans Reiser Date: Wed, 7 May 2008 15:48:57 +0300 Subject: VFS: move inode_lock into sync_sb_inodes This patch makes 'sync_sb_inodes()' lock 'inode_lock', rather than expect that the caller will do this. This change was previously done by Hans Reiser and sat in the -mm tree. Signed-off-by: Artem Bityutskiy diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index ae45f77..16519fe 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -424,8 +424,6 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) * WB_SYNC_HOLD is a hack for sys_sync(): reattach the inode to sb->s_dirty so * that it can be located for waiting on in __writeback_single_inode(). * - * Called under inode_lock. - * * If `bdi' is non-zero then we're being asked to writeback a specific queue. * This function assumes that the blockdev superblock's inodes are backed by * a variety of queues, so all inodes are searched. For other superblocks, @@ -446,6 +444,7 @@ sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc) { const unsigned long start = jiffies; /* livelock avoidance */ + spin_lock(&inode_lock); if (!wbc->for_kupdate || list_empty(&sb->s_io)) queue_io(sb, wbc->older_than_this); @@ -524,6 +523,7 @@ sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc) if (!list_empty(&sb->s_more_io)) wbc->more_io = 1; } + spin_unlock(&inode_lock); return; /* Leave any unwritten inodes on s_io */ } @@ -565,11 +565,8 @@ restart: * be unmounted by the time it is released. */ if (down_read_trylock(&sb->s_umount)) { - if (sb->s_root) { - spin_lock(&inode_lock); + if (sb->s_root) sync_sb_inodes(sb, wbc); - spin_unlock(&inode_lock); - } up_read(&sb->s_umount); } spin_lock(&sb_lock); @@ -607,9 +604,7 @@ void sync_inodes_sb(struct super_block *sb, int wait) (inodes_stat.nr_inodes - inodes_stat.nr_unused) + nr_dirty + nr_unstable; wbc.nr_to_write += wbc.nr_to_write / 2; /* Bit more for luck */ - spin_lock(&inode_lock); sync_sb_inodes(sb, &wbc); - spin_unlock(&inode_lock); } /* -- cgit v0.10.2 From 4ee6afd34409d296782a5b667d7991b1050e910a Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 7 May 2008 21:01:30 +0300 Subject: VFS: export sync_sb_inodes This patch exports the 'sync_sb_inodes()' which is needed for UBIFS because it has to force write-back from time to time. Namely, the UBIFS budgeting subsystem forces write-back when its pessimistic callculations show that there is no free space on the media. Signed-off-by: Artem Bityutskiy diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 16519fe..25adfc3 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -439,8 +439,8 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) * on the writer throttling path, and we get decent balancing between many * throttled threads: we don't want them all piling up on inode_sync_wait. */ -static void -sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc) +void generic_sync_sb_inodes(struct super_block *sb, + struct writeback_control *wbc) { const unsigned long start = jiffies; /* livelock avoidance */ @@ -526,6 +526,13 @@ sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc) spin_unlock(&inode_lock); return; /* Leave any unwritten inodes on s_io */ } +EXPORT_SYMBOL_GPL(generic_sync_sb_inodes); + +static void sync_sb_inodes(struct super_block *sb, + struct writeback_control *wbc) +{ + generic_sync_sb_inodes(sb, wbc); +} /* * Start writeback of dirty pagecache data against all unlocked inodes. diff --git a/include/linux/fs.h b/include/linux/fs.h index d8e2762..f9d2aab 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1729,6 +1729,8 @@ static inline void invalidate_remote_inode(struct inode *inode) extern int invalidate_inode_pages2(struct address_space *mapping); extern int invalidate_inode_pages2_range(struct address_space *mapping, pgoff_t start, pgoff_t end); +extern void generic_sync_sb_inodes(struct super_block *sb, + struct writeback_control *wbc); extern int write_inode_now(struct inode *, int); extern int filemap_fdatawrite(struct address_space *); extern int filemap_flush(struct address_space *); -- cgit v0.10.2 From 2d62f488585405bc9108c47cb06957397cfd1059 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 31 Jan 2008 17:25:00 +0200 Subject: do_mounts: allow UBI root device name Similarly to MTD devices, allow UBI devices. Signed-off-by: Adrian Hunter diff --git a/init/do_mounts.c b/init/do_mounts.c index 660c1e5..a1de1bf 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -372,7 +372,8 @@ void __init prepare_namespace(void) if (saved_root_name[0]) { root_device_name = saved_root_name; - if (!strncmp(root_device_name, "mtd", 3)) { + if (!strncmp(root_device_name, "mtd", 3) || + !strncmp(root_device_name, "ubi", 3)) { mount_block_root(root_device_name, root_mountflags); goto out; } -- cgit v0.10.2 From ce7231e92dac381f6e4f9cfdfdf9e0ea055223ad Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Tue, 13 May 2008 13:45:14 -0700 Subject: [PATCH 1/2] ocfs2: Add CONFIG_OCFS2_FS_STATS config option This patch adds config option CONFIG_OCFS2_FS_STATS to allow building the fs with instrumentation enabled. An upcoming patch will provide support to instrument cluster locking, which is a crucial overhead in a cluster file system. This config option allows users to avoid the cpu and memory overhead that is involved in gathering such statistics. Signed-off-by: Sunil Mushran Signed-off-by: Mark Fasheh diff --git a/fs/Kconfig b/fs/Kconfig index 2694648..9c07a4f 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -470,6 +470,14 @@ config OCFS2_FS_USERSPACE_CLUSTER It is safe to say Y, as the clustering method is run-time selectable. +config OCFS2_FS_STATS + bool "OCFS2 statistics" + depends on OCFS2_FS + default y + help + This option allows some fs statistics to be captured. Enabling + this option may increase the memory consumption. + config OCFS2_DEBUG_MASKLOG bool "OCFS2 logging support" depends on OCFS2_FS -- cgit v0.10.2 From 8ddb7b004dfa1832a750e199df8bff4b75b73565 Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Tue, 13 May 2008 13:45:15 -0700 Subject: [PATCH 2/2] ocfs2: Instrument fs cluster locks This patch adds code to track the number of times the fs takes various cluster locks as well as the times associated with it. The information is made available to users via debugfs. This patch was originally written by Jan Kara . Signed-off-by: Sunil Mushran Signed-off-by: Mark Fasheh diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 80e20d9..80537b7 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -31,6 +31,7 @@ #include #include #include +#include #define MLOG_MASK_PREFIX ML_DLM_GLUE #include @@ -59,6 +60,9 @@ struct ocfs2_mask_waiter { struct completion mw_complete; unsigned long mw_mask; unsigned long mw_goal; +#ifdef CONFIG_OCFS2_FS_STATS + unsigned long long mw_lock_start; +#endif }; static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres); @@ -366,6 +370,75 @@ static void ocfs2_remove_lockres_tracking(struct ocfs2_lock_res *res) spin_unlock(&ocfs2_dlm_tracking_lock); } +#ifdef CONFIG_OCFS2_FS_STATS +static void ocfs2_init_lock_stats(struct ocfs2_lock_res *res) +{ + res->l_lock_num_prmode = 0; + res->l_lock_num_prmode_failed = 0; + res->l_lock_total_prmode = 0; + res->l_lock_max_prmode = 0; + res->l_lock_num_exmode = 0; + res->l_lock_num_exmode_failed = 0; + res->l_lock_total_exmode = 0; + res->l_lock_max_exmode = 0; + res->l_lock_refresh = 0; +} + +static void ocfs2_update_lock_stats(struct ocfs2_lock_res *res, int level, + struct ocfs2_mask_waiter *mw, int ret) +{ + unsigned long long *num, *sum; + unsigned int *max, *failed; + struct timespec ts = current_kernel_time(); + unsigned long long time = timespec_to_ns(&ts) - mw->mw_lock_start; + + if (level == LKM_PRMODE) { + num = &res->l_lock_num_prmode; + sum = &res->l_lock_total_prmode; + max = &res->l_lock_max_prmode; + failed = &res->l_lock_num_prmode_failed; + } else if (level == LKM_EXMODE) { + num = &res->l_lock_num_exmode; + sum = &res->l_lock_total_exmode; + max = &res->l_lock_max_exmode; + failed = &res->l_lock_num_exmode_failed; + } else + return; + + (*num)++; + (*sum) += time; + if (time > *max) + *max = time; + if (ret) + (*failed)++; +} + +static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres) +{ + lockres->l_lock_refresh++; +} + +static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw) +{ + struct timespec ts = current_kernel_time(); + mw->mw_lock_start = timespec_to_ns(&ts); +} +#else +static inline void ocfs2_init_lock_stats(struct ocfs2_lock_res *res) +{ +} +static inline void ocfs2_update_lock_stats(struct ocfs2_lock_res *res, + int level, struct ocfs2_mask_waiter *mw, int ret) +{ +} +static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres) +{ +} +static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw) +{ +} +#endif + static void ocfs2_lock_res_init_common(struct ocfs2_super *osb, struct ocfs2_lock_res *res, enum ocfs2_lock_type type, @@ -385,6 +458,8 @@ static void ocfs2_lock_res_init_common(struct ocfs2_super *osb, res->l_flags = OCFS2_LOCK_INITIALIZED; ocfs2_add_lockres_tracking(res, osb->osb_dlm_debug); + + ocfs2_init_lock_stats(res); } void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res) @@ -1048,6 +1123,7 @@ static void ocfs2_init_mask_waiter(struct ocfs2_mask_waiter *mw) { INIT_LIST_HEAD(&mw->mw_item); init_completion(&mw->mw_complete); + ocfs2_init_start_time(mw); } static int ocfs2_wait_for_mask(struct ocfs2_mask_waiter *mw) @@ -1254,6 +1330,7 @@ out: goto again; mlog_errno(ret); } + ocfs2_update_lock_stats(lockres, level, &mw, ret); mlog_exit(ret); return ret; @@ -1983,6 +2060,7 @@ static int ocfs2_inode_lock_update(struct inode *inode, le32_to_cpu(fe->i_flags)); ocfs2_refresh_inode(inode, fe); + ocfs2_track_lock_refresh(lockres); } status = 0; @@ -2267,6 +2345,7 @@ int ocfs2_super_lock(struct ocfs2_super *osb, if (status < 0) mlog_errno(status); + ocfs2_track_lock_refresh(lockres); } bail: mlog_exit(status); @@ -2461,7 +2540,7 @@ static void *ocfs2_dlm_seq_next(struct seq_file *m, void *v, loff_t *pos) } /* So that debugfs.ocfs2 can determine which format is being used */ -#define OCFS2_DLM_DEBUG_STR_VERSION 1 +#define OCFS2_DLM_DEBUG_STR_VERSION 2 static int ocfs2_dlm_seq_show(struct seq_file *m, void *v) { int i; @@ -2502,6 +2581,47 @@ static int ocfs2_dlm_seq_show(struct seq_file *m, void *v) for(i = 0; i < DLM_LVB_LEN; i++) seq_printf(m, "0x%x\t", lvb[i]); +#ifdef CONFIG_OCFS2_FS_STATS +# define lock_num_prmode(_l) (_l)->l_lock_num_prmode +# define lock_num_exmode(_l) (_l)->l_lock_num_exmode +# define lock_num_prmode_failed(_l) (_l)->l_lock_num_prmode_failed +# define lock_num_exmode_failed(_l) (_l)->l_lock_num_exmode_failed +# define lock_total_prmode(_l) (_l)->l_lock_total_prmode +# define lock_total_exmode(_l) (_l)->l_lock_total_exmode +# define lock_max_prmode(_l) (_l)->l_lock_max_prmode +# define lock_max_exmode(_l) (_l)->l_lock_max_exmode +# define lock_refresh(_l) (_l)->l_lock_refresh +#else +# define lock_num_prmode(_l) (0) +# define lock_num_exmode(_l) (0) +# define lock_num_prmode_failed(_l) (0) +# define lock_num_exmode_failed(_l) (0) +# define lock_total_prmode(_l) (0) +# define lock_total_exmode(_l) (0) +# define lock_max_prmode(_l) (0) +# define lock_max_exmode(_l) (0) +# define lock_refresh(_l) (0) +#endif + /* The following seq_print was added in version 2 of this output */ + seq_printf(m, "%llu\t" + "%llu\t" + "%u\t" + "%u\t" + "%llu\t" + "%llu\t" + "%u\t" + "%u\t" + "%u\t", + lock_num_prmode(lockres), + lock_num_exmode(lockres), + lock_num_prmode_failed(lockres), + lock_num_exmode_failed(lockres), + lock_total_prmode(lockres), + lock_total_exmode(lockres), + lock_max_prmode(lockres), + lock_max_exmode(lockres), + lock_refresh(lockres)); + /* End the line */ seq_printf(m, "\n"); return 0; diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 3169237..1cb814b 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -132,6 +132,18 @@ struct ocfs2_lock_res { wait_queue_head_t l_event; struct list_head l_debug_list; + +#ifdef CONFIG_OCFS2_FS_STATS + unsigned long long l_lock_num_prmode; /* PR acquires */ + unsigned long long l_lock_num_exmode; /* EX acquires */ + unsigned int l_lock_num_prmode_failed; /* Failed PR gets */ + unsigned int l_lock_num_exmode_failed; /* Failed EX gets */ + unsigned long long l_lock_total_prmode; /* Tot wait for PR */ + unsigned long long l_lock_total_exmode; /* Tot wait for EX */ + unsigned int l_lock_max_prmode; /* Max wait for PR */ + unsigned int l_lock_max_exmode; /* Max wait for EX */ + unsigned int l_lock_refresh; /* Disk refreshes */ +#endif }; struct ocfs2_dlm_debug { -- cgit v0.10.2 From dd25e55ea133b14678cfaa9e205b082b24b26dbc Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 28 May 2008 14:41:00 -0700 Subject: ocfs2: fix printk format warnings with OCFS2_FS_STATS=n Fix printk format warnings when OCFS2_FS_STATS=n: linux-next-20080528/fs/ocfs2/dlmglue.c: In function 'ocfs2_dlm_seq_show': linux-next-20080528/fs/ocfs2/dlmglue.c:2623: warning: format '%llu' expects type 'long long unsigned int', but argument 3 has type 'int' linux-next-20080528/fs/ocfs2/dlmglue.c:2623: warning: format '%llu' expects type 'long long unsigned int', but argument 4 has type 'int' linux-next-20080528/fs/ocfs2/dlmglue.c:2623: warning: format '%llu' expects type 'long long unsigned int', but argument 7 has type 'int' linux-next-20080528/fs/ocfs2/dlmglue.c:2623: warning: format '%llu' expects type 'long long unsigned int', but argument 8 has type 'int' Signed-off-by: Randy Dunlap Signed-off-by: Mark Fasheh diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 80537b7..eae3d64 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -2592,12 +2592,12 @@ static int ocfs2_dlm_seq_show(struct seq_file *m, void *v) # define lock_max_exmode(_l) (_l)->l_lock_max_exmode # define lock_refresh(_l) (_l)->l_lock_refresh #else -# define lock_num_prmode(_l) (0) -# define lock_num_exmode(_l) (0) +# define lock_num_prmode(_l) (0ULL) +# define lock_num_exmode(_l) (0ULL) # define lock_num_prmode_failed(_l) (0) # define lock_num_exmode_failed(_l) (0) -# define lock_total_prmode(_l) (0) -# define lock_total_exmode(_l) (0) +# define lock_total_prmode(_l) (0ULL) +# define lock_total_exmode(_l) (0ULL) # define lock_max_prmode(_l) (0) # define lock_max_exmode(_l) (0) # define lock_refresh(_l) (0) -- cgit v0.10.2 From 7600c72b75bab374ad39b2a4799a0728579a8e2f Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Mon, 9 Jun 2008 16:34:23 -0700 Subject: ocfs2: use simple_read_from_buffer() Signed-off-by: Akinobu Mita Acked-by: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Mark Fasheh diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c index c021280..24e0b19 100644 --- a/fs/ocfs2/stack_user.c +++ b/fs/ocfs2/stack_user.c @@ -549,26 +549,17 @@ static ssize_t ocfs2_control_read(struct file *file, size_t count, loff_t *ppos) { - char *proto_string = OCFS2_CONTROL_PROTO; - size_t to_write = 0; - - if (*ppos >= OCFS2_CONTROL_PROTO_LEN) - return 0; - - to_write = OCFS2_CONTROL_PROTO_LEN - *ppos; - if (to_write > count) - to_write = count; - if (copy_to_user(buf, proto_string + *ppos, to_write)) - return -EFAULT; + ssize_t ret; - *ppos += to_write; + ret = simple_read_from_buffer(buf, count, ppos, + OCFS2_CONTROL_PROTO, OCFS2_CONTROL_PROTO_LEN); /* Have we read the whole protocol list? */ - if (*ppos >= OCFS2_CONTROL_PROTO_LEN) + if (ret > 0 && *ppos >= OCFS2_CONTROL_PROTO_LEN) ocfs2_control_set_handshake_state(file, OCFS2_CONTROL_HANDSHAKE_READ); - return to_write; + return ret; } static int ocfs2_control_release(struct inode *inode, struct file *file) -- cgit v0.10.2 From 56753bd3b9220f6f2477eb1cf97f40c24e0a4c91 Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Mon, 9 Jun 2008 11:24:41 -0700 Subject: ocfs2: Silence an error message in ocfs2_file_aio_read() This patch silences an EINVAL error message in ocfs2_file_aio_read() that is always due to a user error. Signed-off-by: Sunil Mushran Signed-off-by: Mark Fasheh diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 57e0d30..e8514e8 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2202,7 +2202,7 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb, ret = generic_file_aio_read(iocb, iov, nr_segs, iocb->ki_pos); if (ret == -EINVAL) - mlog(ML_ERROR, "generic_file_aio_read returned -EINVAL\n"); + mlog(0, "generic_file_aio_read returned -EINVAL\n"); /* buffered aio wouldn't have proper lock coverage today */ BUG_ON(ret == -EIOCBQUEUED && !(filp->f_flags & O_DIRECT)); -- cgit v0.10.2 From 01af482037d32c215aab208a0b110ffe6fd782c0 Mon Sep 17 00:00:00 2001 From: Wengang Wang Date: Tue, 10 Jun 2008 14:24:48 +0800 Subject: ocfs2: Handle error during journal load This patch ensures the mount fails if the fs is unable to load the journal. Signed-off-by: Wengang Wang Acked-by: Sunil Mushran Signed-off-by: Mark Fasheh diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index df63ba2..ccecfe5 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1703,7 +1703,11 @@ static int ocfs2_check_volume(struct ocfs2_super *osb) local = ocfs2_mount_local(osb); /* will play back anything left in the journal. */ - ocfs2_journal_load(osb->journal, local); + status = ocfs2_journal_load(osb->journal, local); + if (status < 0) { + mlog(ML_ERROR, "ocfs2 journal load failed! %d\n", status); + goto finally; + } if (dirty) { /* recover my local alloc if we didn't unmount cleanly. */ -- cgit v0.10.2 From 461c6a30eca6f25add1dadb9fd8a1d8e89a6e627 Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Mon, 19 May 2008 16:23:37 -0700 Subject: ocfs2/net: Silence build warnings on sparc64 suseconds_t is type long on most arches except sparc64 where it is type int. This patch silences the following warnings that are generated when building on it. netdebug.c: In function 'nst_seq_show': netdebug.c:152: warning: format '%lu' expects type 'long unsigned int', but argument 13 has type 'suseconds_t' netdebug.c:152: warning: format '%lu' expects type 'long unsigned int', but argument 15 has type 'suseconds_t' netdebug.c:152: warning: format '%lu' expects type 'long unsigned int', but argument 17 has type 'suseconds_t' netdebug.c: In function 'sc_seq_show': netdebug.c:332: warning: format '%lu' expects type 'long unsigned int', but argument 19 has type 'suseconds_t' netdebug.c:332: warning: format '%lu' expects type 'long unsigned int', but argument 21 has type 'suseconds_t' netdebug.c:332: warning: format '%lu' expects type 'long unsigned int', but argument 23 has type 'suseconds_t' netdebug.c:332: warning: format '%lu' expects type 'long unsigned int', but argument 25 has type 'suseconds_t' netdebug.c:332: warning: format '%lu' expects type 'long unsigned int', but argument 27 has type 'suseconds_t' netdebug.c:332: warning: format '%lu' expects type 'long unsigned int', but argument 29 has type 'suseconds_t' Signed-off-by: Sunil Mushran Signed-off-by: Mark Fasheh diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c index 7bf3c0e..d8bfa0e 100644 --- a/fs/ocfs2/cluster/netdebug.c +++ b/fs/ocfs2/cluster/netdebug.c @@ -146,8 +146,10 @@ static int nst_seq_show(struct seq_file *seq, void *v) nst->st_task->comm, nst->st_node, nst->st_sc, nst->st_id, nst->st_msg_type, nst->st_msg_key, - nst->st_sock_time.tv_sec, nst->st_sock_time.tv_usec, - nst->st_send_time.tv_sec, nst->st_send_time.tv_usec, + nst->st_sock_time.tv_sec, + (unsigned long)nst->st_sock_time.tv_usec, + nst->st_send_time.tv_sec, + (unsigned long)nst->st_send_time.tv_usec, nst->st_status_time.tv_sec, nst->st_status_time.tv_usec); } @@ -274,7 +276,7 @@ static void *sc_seq_next(struct seq_file *seq, void *v, loff_t *pos) return sc; /* unused, just needs to be null when done */ } -#define TV_SEC_USEC(TV) TV.tv_sec, TV.tv_usec +#define TV_SEC_USEC(TV) TV.tv_sec, (unsigned long)TV.tv_usec static int sc_seq_show(struct seq_file *seq, void *v) { -- cgit v0.10.2 From e407e39783a7206d20b3e9961aedf272de966e31 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Thu, 12 Jun 2008 22:35:39 -0700 Subject: ocfs2: Fix CONFIG_OCFS2_DEBUG_FS #ifdefs A couple places use OCFS2_DEBUG_FS where they really mean CONFIG_OCFS2_DEBUG_FS. Reported-by: Robert P. J. Day Signed-off-by: Joel Becker diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 9698338..a8c19cb 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -329,7 +329,7 @@ int ocfs2_extend_trans(handle_t *handle, int nblocks) mlog(0, "Trying to extend transaction by %d blocks\n", nblocks); -#ifdef OCFS2_DEBUG_FS +#ifdef CONFIG_OCFS2_DEBUG_FS status = 1; #else status = journal_extend(handle, nblocks); diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index be774bd..28e492e 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c @@ -498,7 +498,7 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb, alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; -#ifdef OCFS2_DEBUG_FS +#ifdef CONFIG_OCFS2_DEBUG_FS if (le32_to_cpu(alloc->id1.bitmap1.i_used) != ocfs2_local_alloc_count_bits(alloc)) { ocfs2_error(osb->sb, "local alloc inode %llu says it has " -- cgit v0.10.2 From fe9f387740ac7cb3b7c2fffa76807e997e6c6292 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Thu, 12 Jun 2008 22:39:18 -0700 Subject: ocfs2: Don't snprintf() without a format. Some system files are per-slot. Their names include the slot number. ocfs2_sprintf_system_inode_name() uses the system inode definitions to fill in the slot number with snprintf(). For global system files, there is no node number, and the name was printed as a format with no arguments. -Wformat-nonliteral and -Wformat-security don't like this. Instead, use a static "%s" format and the name as the argument. Signed-off-by: Joel Becker diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index 52c4266..3f19451 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h @@ -901,7 +901,7 @@ static inline int ocfs2_sprintf_system_inode_name(char *buf, int len, * list has a copy per slot. */ if (type <= OCFS2_LAST_GLOBAL_SYSTEM_INODE) - chars = snprintf(buf, len, + chars = snprintf(buf, len, "%s", ocfs2_system_inodes[type].si_name); else chars = snprintf(buf, len, -- cgit v0.10.2 From 6f61076406251626be39651d114fac412b1e0c39 Mon Sep 17 00:00:00 2001 From: Louis Rilling Date: Mon, 16 Jun 2008 19:00:58 +0200 Subject: configfs: Introduce configfs_dirent_lock This patch introduces configfs_dirent_lock spinlock to protect configfs_dirent traversals against linkage mutations (add/del/move). This will allow configfs_detach_prep() to avoid locking i_mutexes. Locking rules for configfs_dirent linkage mutations are the same plus the requirement of taking configfs_dirent_lock. For configfs_dirent walking, one can either take appropriate i_mutex as before, or take configfs_dirent_lock. The spinlock could actually be a mutex, but the critical sections are either O(1) or should not be too long (default groups walking in last patch). ChangeLog: - Clarify the comment on configfs_dirent_lock usage - Move sd->s_element init before linking the new dirent - In lseek(), do not release configfs_dirent_lock before the dirent is relinked. Signed-off-by: Louis Rilling Signed-off-by: Joel Becker diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h index cca9860..5a33b58 100644 --- a/fs/configfs/configfs_internal.h +++ b/fs/configfs/configfs_internal.h @@ -26,6 +26,7 @@ #include #include +#include struct configfs_dirent { atomic_t s_count; @@ -49,6 +50,8 @@ struct configfs_dirent { #define CONFIGFS_USET_DROPPING 0x0100 #define CONFIGFS_NOT_PINNED (CONFIGFS_ITEM_ATTR) +extern spinlock_t configfs_dirent_lock; + extern struct vfsmount * configfs_mount; extern struct kmem_cache *configfs_dir_cachep; diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index a48dc7d..2619f48 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -35,6 +35,14 @@ #include "configfs_internal.h" DECLARE_RWSEM(configfs_rename_sem); +/* + * Protects mutations of configfs_dirent linkage together with proper i_mutex + * Mutators of configfs_dirent linkage must *both* have the proper inode locked + * and configfs_dirent_lock locked, in that order. + * This allows one to safely traverse configfs_dirent trees without having to + * lock inodes. + */ +DEFINE_SPINLOCK(configfs_dirent_lock); static void configfs_d_iput(struct dentry * dentry, struct inode * inode) @@ -79,8 +87,10 @@ static struct configfs_dirent *configfs_new_dirent(struct configfs_dirent * pare atomic_set(&sd->s_count, 1); INIT_LIST_HEAD(&sd->s_links); INIT_LIST_HEAD(&sd->s_children); - list_add(&sd->s_sibling, &parent_sd->s_children); sd->s_element = element; + spin_lock(&configfs_dirent_lock); + list_add(&sd->s_sibling, &parent_sd->s_children); + spin_unlock(&configfs_dirent_lock); return sd; } @@ -173,7 +183,9 @@ static int create_dir(struct config_item * k, struct dentry * p, } else { struct configfs_dirent *sd = d->d_fsdata; if (sd) { + spin_lock(&configfs_dirent_lock); list_del_init(&sd->s_sibling); + spin_unlock(&configfs_dirent_lock); configfs_put(sd); } } @@ -224,7 +236,9 @@ int configfs_create_link(struct configfs_symlink *sl, else { struct configfs_dirent *sd = dentry->d_fsdata; if (sd) { + spin_lock(&configfs_dirent_lock); list_del_init(&sd->s_sibling); + spin_unlock(&configfs_dirent_lock); configfs_put(sd); } } @@ -238,7 +252,9 @@ static void remove_dir(struct dentry * d) struct configfs_dirent * sd; sd = d->d_fsdata; + spin_lock(&configfs_dirent_lock); list_del_init(&sd->s_sibling); + spin_unlock(&configfs_dirent_lock); configfs_put(sd); if (d->d_inode) simple_rmdir(parent->d_inode,d); @@ -410,7 +426,9 @@ static void detach_attrs(struct config_item * item) list_for_each_entry_safe(sd, tmp, &parent_sd->s_children, s_sibling) { if (!sd->s_element || !(sd->s_type & CONFIGFS_NOT_PINNED)) continue; + spin_lock(&configfs_dirent_lock); list_del_init(&sd->s_sibling); + spin_unlock(&configfs_dirent_lock); configfs_drop_dentry(sd, dentry); configfs_put(sd); } @@ -1268,7 +1286,9 @@ static int configfs_dir_close(struct inode *inode, struct file *file) struct configfs_dirent * cursor = file->private_data; mutex_lock(&dentry->d_inode->i_mutex); + spin_lock(&configfs_dirent_lock); list_del_init(&cursor->s_sibling); + spin_unlock(&configfs_dirent_lock); mutex_unlock(&dentry->d_inode->i_mutex); release_configfs_dirent(cursor); @@ -1308,7 +1328,9 @@ static int configfs_readdir(struct file * filp, void * dirent, filldir_t filldir /* fallthrough */ default: if (filp->f_pos == 2) { + spin_lock(&configfs_dirent_lock); list_move(q, &parent_sd->s_children); + spin_unlock(&configfs_dirent_lock); } for (p=q->next; p!= &parent_sd->s_children; p=p->next) { struct configfs_dirent *next; @@ -1331,7 +1353,9 @@ static int configfs_readdir(struct file * filp, void * dirent, filldir_t filldir dt_type(next)) < 0) return 0; + spin_lock(&configfs_dirent_lock); list_move(q, p); + spin_unlock(&configfs_dirent_lock); p = q; filp->f_pos++; } @@ -1362,6 +1386,7 @@ static loff_t configfs_dir_lseek(struct file * file, loff_t offset, int origin) struct list_head *p; loff_t n = file->f_pos - 2; + spin_lock(&configfs_dirent_lock); list_del(&cursor->s_sibling); p = sd->s_children.next; while (n && p != &sd->s_children) { @@ -1373,6 +1398,7 @@ static loff_t configfs_dir_lseek(struct file * file, loff_t offset, int origin) p = p->next; } list_add_tail(&cursor->s_sibling, p); + spin_unlock(&configfs_dirent_lock); } } mutex_unlock(&dentry->d_inode->i_mutex); diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c index b9a1d81..4803ccc 100644 --- a/fs/configfs/inode.c +++ b/fs/configfs/inode.c @@ -247,7 +247,9 @@ void configfs_hash_and_remove(struct dentry * dir, const char * name) if (!sd->s_element) continue; if (!strcmp(configfs_get_name(sd), name)) { + spin_lock(&configfs_dirent_lock); list_del_init(&sd->s_sibling); + spin_unlock(&configfs_dirent_lock); configfs_drop_dentry(sd, dir); configfs_put(sd); break; diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c index 2a731ef..676c84c 100644 --- a/fs/configfs/symlink.c +++ b/fs/configfs/symlink.c @@ -169,7 +169,9 @@ int configfs_unlink(struct inode *dir, struct dentry *dentry) parent_item = configfs_get_config_item(dentry->d_parent); type = parent_item->ci_type; + spin_lock(&configfs_dirent_lock); list_del_init(&sd->s_sibling); + spin_unlock(&configfs_dirent_lock); configfs_drop_dentry(sd, dentry->d_parent); dput(dentry); configfs_put(sd); -- cgit v0.10.2 From 5301a77da2da1e4c22573e0e8d394a653b8ad9f9 Mon Sep 17 00:00:00 2001 From: Louis Rilling Date: Mon, 16 Jun 2008 19:00:59 +0200 Subject: configfs: Protect configfs_dirent s_links list mutations Symlinks to a config_item are listed under its configfs_dirent s_links, but the list mutations are not protected by any common lock. This patch uses the configfs_dirent_lock spinlock to add the necessary protection. Note: we should also protect the list_empty() test in configfs_detach_prep() but 1/ the lock should not be released immediately because nothing would prevent the list from being filled after a successful list_empty() test, making the problem tricky, 2/ this will be solved by the rmdir() vs rename() deadlock bugfix. Signed-off-by: Louis Rilling Signed-off-by: Joel Becker diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 2619f48..a08e5c2 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -37,10 +37,11 @@ DECLARE_RWSEM(configfs_rename_sem); /* * Protects mutations of configfs_dirent linkage together with proper i_mutex + * Also protects mutations of symlinks linkage to target configfs_dirent * Mutators of configfs_dirent linkage must *both* have the proper inode locked * and configfs_dirent_lock locked, in that order. - * This allows one to safely traverse configfs_dirent trees without having to - * lock inodes. + * This allows one to safely traverse configfs_dirent trees and symlinks without + * having to lock inodes. */ DEFINE_SPINLOCK(configfs_dirent_lock); diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c index 676c84c..faeb441 100644 --- a/fs/configfs/symlink.c +++ b/fs/configfs/symlink.c @@ -77,12 +77,15 @@ static int create_link(struct config_item *parent_item, sl = kmalloc(sizeof(struct configfs_symlink), GFP_KERNEL); if (sl) { sl->sl_target = config_item_get(item); - /* FIXME: needs a lock, I'd bet */ + spin_lock(&configfs_dirent_lock); list_add(&sl->sl_list, &target_sd->s_links); + spin_unlock(&configfs_dirent_lock); ret = configfs_create_link(sl, parent_item->ci_dentry, dentry); if (ret) { + spin_lock(&configfs_dirent_lock); list_del_init(&sl->sl_list); + spin_unlock(&configfs_dirent_lock); config_item_put(item); kfree(sl); } @@ -186,8 +189,9 @@ int configfs_unlink(struct inode *dir, struct dentry *dentry) type->ct_item_ops->drop_link(parent_item, sl->sl_target); - /* FIXME: Needs lock */ + spin_lock(&configfs_dirent_lock); list_del_init(&sl->sl_list); + spin_unlock(&configfs_dirent_lock); /* Put reference from create_link() */ config_item_put(sl->sl_target); -- cgit v0.10.2 From 107ed40bd070df5e4a0a012042c45c40963dc574 Mon Sep 17 00:00:00 2001 From: Louis Rilling Date: Mon, 16 Jun 2008 19:01:00 +0200 Subject: configfs: Make configfs_new_dirent() return error code instead of NULL This patch makes configfs_new_dirent return negative error code instead of NULL, which will be useful in the next patch to differentiate ENOMEM from ENOENT. Signed-off-by: Louis Rilling Signed-off-by: Joel Becker diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index a08e5c2..918a332 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include "configfs_internal.h" @@ -83,7 +84,7 @@ static struct configfs_dirent *configfs_new_dirent(struct configfs_dirent * pare sd = kmem_cache_zalloc(configfs_dir_cachep, GFP_KERNEL); if (!sd) - return NULL; + return ERR_PTR(-ENOMEM); atomic_set(&sd->s_count, 1); INIT_LIST_HEAD(&sd->s_links); @@ -129,8 +130,8 @@ int configfs_make_dirent(struct configfs_dirent * parent_sd, struct configfs_dirent * sd; sd = configfs_new_dirent(parent_sd, element); - if (!sd) - return -ENOMEM; + if (IS_ERR(sd)) + return PTR_ERR(sd); sd->s_mode = mode; sd->s_type = type; @@ -1277,7 +1278,7 @@ static int configfs_dir_open(struct inode *inode, struct file *file) file->private_data = configfs_new_dirent(parent_sd, NULL); mutex_unlock(&dentry->d_inode->i_mutex); - return file->private_data ? 0 : -ENOMEM; + return IS_ERR(file->private_data) ? PTR_ERR(file->private_data) : 0; } -- cgit v0.10.2 From b3e76af87441fc36eef3516d73ab2314e7b2d911 Mon Sep 17 00:00:00 2001 From: Louis Rilling Date: Mon, 16 Jun 2008 19:01:01 +0200 Subject: configfs: Fix deadlock with racing rmdir() and rename() This patch fixes the deadlock between racing sys_rename() and configfs_rmdir(). The idea is to avoid locking i_mutexes of default groups in configfs_detach_prep(), and rely instead on the new configfs_dirent_lock to protect against configfs_dirent's linkage mutations. To ensure that an mkdir() racing with rmdir() will not create new items in a to-be-removed default group, we make configfs_new_dirent() check for the CONFIGFS_USET_DROPPING flag right before linking the new dirent, and return error if the flag is set. This makes racing mkdir()/symlink()/dir_open() fail in places where errors could already happen, resp. in (attach_item()|attach_group())/create_link()/new_dirent(). configfs_depend() remains safe since it locks all the path from configfs root, and is thus mutually exclusive with rmdir(). An advantage of this is that now detach_groups() unconditionnaly takes the default groups i_mutex, which makes it more consistent with populate_groups(). Signed-off-by: Louis Rilling Signed-off-by: Joel Becker diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 918a332..d5b5985 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -43,6 +43,10 @@ DECLARE_RWSEM(configfs_rename_sem); * and configfs_dirent_lock locked, in that order. * This allows one to safely traverse configfs_dirent trees and symlinks without * having to lock inodes. + * + * Protects setting of CONFIGFS_USET_DROPPING: checking the flag + * unlocked is not reliable unless in detach_groups() called from + * rmdir()/unregister() and from configfs_attach_group() */ DEFINE_SPINLOCK(configfs_dirent_lock); @@ -91,6 +95,11 @@ static struct configfs_dirent *configfs_new_dirent(struct configfs_dirent * pare INIT_LIST_HEAD(&sd->s_children); sd->s_element = element; spin_lock(&configfs_dirent_lock); + if (parent_sd->s_type & CONFIGFS_USET_DROPPING) { + spin_unlock(&configfs_dirent_lock); + kmem_cache_free(configfs_dir_cachep, sd); + return ERR_PTR(-ENOENT); + } list_add(&sd->s_sibling, &parent_sd->s_children); spin_unlock(&configfs_dirent_lock); @@ -349,11 +358,11 @@ static struct dentry * configfs_lookup(struct inode *dir, /* * Only subdirectories count here. Files (CONFIGFS_NOT_PINNED) are - * attributes and are removed by rmdir(). We recurse, taking i_mutex - * on all children that are candidates for default detach. If the - * result is clean, then configfs_detach_group() will handle dropping - * i_mutex. If there is an error, the caller will clean up the i_mutex - * holders via configfs_detach_rollback(). + * attributes and are removed by rmdir(). We recurse, setting + * CONFIGFS_USET_DROPPING on all children that are candidates for + * default detach. + * If there is an error, the caller will reset the flags via + * configfs_detach_rollback(). */ static int configfs_detach_prep(struct dentry *dentry) { @@ -370,8 +379,7 @@ static int configfs_detach_prep(struct dentry *dentry) if (sd->s_type & CONFIGFS_NOT_PINNED) continue; if (sd->s_type & CONFIGFS_USET_DEFAULT) { - mutex_lock(&sd->s_dentry->d_inode->i_mutex); - /* Mark that we've taken i_mutex */ + /* Mark that we're trying to drop the group */ sd->s_type |= CONFIGFS_USET_DROPPING; /* @@ -392,7 +400,7 @@ out: } /* - * Walk the tree, dropping i_mutex wherever CONFIGFS_USET_DROPPING is + * Walk the tree, resetting CONFIGFS_USET_DROPPING wherever it was * set. */ static void configfs_detach_rollback(struct dentry *dentry) @@ -403,11 +411,7 @@ static void configfs_detach_rollback(struct dentry *dentry) list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { if (sd->s_type & CONFIGFS_USET_DEFAULT) { configfs_detach_rollback(sd->s_dentry); - - if (sd->s_type & CONFIGFS_USET_DROPPING) { - sd->s_type &= ~CONFIGFS_USET_DROPPING; - mutex_unlock(&sd->s_dentry->d_inode->i_mutex); - } + sd->s_type &= ~CONFIGFS_USET_DROPPING; } } } @@ -486,16 +490,12 @@ static void detach_groups(struct config_group *group) child = sd->s_dentry; + mutex_lock(&child->d_inode->i_mutex); + configfs_detach_group(sd->s_element); child->d_inode->i_flags |= S_DEAD; - /* - * From rmdir/unregister, a configfs_detach_prep() pass - * has taken our i_mutex for us. Drop it. - * From mkdir/register cleanup, there is no sem held. - */ - if (sd->s_type & CONFIGFS_USET_DROPPING) - mutex_unlock(&child->d_inode->i_mutex); + mutex_unlock(&child->d_inode->i_mutex); d_delete(child); dput(child); @@ -1181,12 +1181,15 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) return -EINVAL; } + spin_lock(&configfs_dirent_lock); ret = configfs_detach_prep(dentry); if (ret) { configfs_detach_rollback(dentry); + spin_unlock(&configfs_dirent_lock); config_item_put(parent_item); return ret; } + spin_unlock(&configfs_dirent_lock); /* Get a working ref for the duration of this function */ item = configfs_get_config_item(dentry); @@ -1476,9 +1479,11 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys) mutex_lock_nested(&configfs_sb->s_root->d_inode->i_mutex, I_MUTEX_PARENT); mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD); + spin_lock(&configfs_dirent_lock); if (configfs_detach_prep(dentry)) { printk(KERN_ERR "configfs: Tried to unregister non-empty subsystem!\n"); } + spin_unlock(&configfs_dirent_lock); configfs_detach_group(&group->cg_item); dentry->d_inode->i_flags |= S_DEAD; mutex_unlock(&dentry->d_inode->i_mutex); -- cgit v0.10.2 From 6d8344baee99402de58b5fa5dfea197242955c15 Mon Sep 17 00:00:00 2001 From: Louis Rilling Date: Mon, 16 Jun 2008 19:01:02 +0200 Subject: configfs: Fix failing mkdir() making racing rmdir() fail When fixing the rename() vs rmdir() deadlock, we stopped locking default groups' inodes in configfs_detach_prep(), letting racing mkdir() in default groups proceed concurrently. This enables races like below happen, which leads to a failing mkdir() making rmdir() fail, despite the group to remove having no user-created directory under it in the end. process A: process B: /* PWD=A/B */ mkdir("C") make_item("C") attach_group("C") rmdir("A") detach_prep("A") detach_prep("B") error because of "C" return -ENOTEMPTY attach_group("C/D") error (eg -ENOMEM) return -ENOMEM This patch prevents such scenarii by making rmdir() wait as long as detach_prep() fails because a racing mkdir() is in the middle of attach_group(). To achieve this, mkdir() sets a flag CONFIGFS_USET_IN_MKDIR in parent's configfs_dirent before calling attach_group(), and clears the flag once attach_group() is done. detach_prep() fails with -EAGAIN whenever the flag is hit and returns the guilty inode's mutex so that rmdir() can wait on it. Signed-off-by: Louis Rilling Signed-off-by: Joel Becker diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h index 5a33b58..da015c1 100644 --- a/fs/configfs/configfs_internal.h +++ b/fs/configfs/configfs_internal.h @@ -48,6 +48,7 @@ struct configfs_dirent { #define CONFIGFS_USET_DIR 0x0040 #define CONFIGFS_USET_DEFAULT 0x0080 #define CONFIGFS_USET_DROPPING 0x0100 +#define CONFIGFS_USET_IN_MKDIR 0x0200 #define CONFIGFS_NOT_PINNED (CONFIGFS_ITEM_ATTR) extern spinlock_t configfs_dirent_lock; diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index d5b5985..614e382 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -364,7 +364,7 @@ static struct dentry * configfs_lookup(struct inode *dir, * If there is an error, the caller will reset the flags via * configfs_detach_rollback(). */ -static int configfs_detach_prep(struct dentry *dentry) +static int configfs_detach_prep(struct dentry *dentry, struct mutex **wait_mutex) { struct configfs_dirent *parent_sd = dentry->d_fsdata; struct configfs_dirent *sd; @@ -379,6 +379,12 @@ static int configfs_detach_prep(struct dentry *dentry) if (sd->s_type & CONFIGFS_NOT_PINNED) continue; if (sd->s_type & CONFIGFS_USET_DEFAULT) { + /* Abort if racing with mkdir() */ + if (sd->s_type & CONFIGFS_USET_IN_MKDIR) { + if (wait_mutex) + *wait_mutex = &sd->s_dentry->d_inode->i_mutex; + return -EAGAIN; + } /* Mark that we're trying to drop the group */ sd->s_type |= CONFIGFS_USET_DROPPING; @@ -386,7 +392,7 @@ static int configfs_detach_prep(struct dentry *dentry) * Yup, recursive. If there's a problem, blame * deep nesting of default_groups */ - ret = configfs_detach_prep(sd->s_dentry); + ret = configfs_detach_prep(sd->s_dentry, wait_mutex); if (!ret) continue; } else @@ -1113,11 +1119,26 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) */ module_got = 1; + /* + * Make racing rmdir() fail if it did not tag parent with + * CONFIGFS_USET_DROPPING + * Note: if CONFIGFS_USET_DROPPING is already set, attach_group() will + * fail and let rmdir() terminate correctly + */ + spin_lock(&configfs_dirent_lock); + /* This will make configfs_detach_prep() fail */ + sd->s_type |= CONFIGFS_USET_IN_MKDIR; + spin_unlock(&configfs_dirent_lock); + if (group) ret = configfs_attach_group(parent_item, item, dentry); else ret = configfs_attach_item(parent_item, item, dentry); + spin_lock(&configfs_dirent_lock); + sd->s_type &= ~CONFIGFS_USET_IN_MKDIR; + spin_unlock(&configfs_dirent_lock); + out_unlink: if (ret) { /* Tear down everything we built up */ @@ -1182,13 +1203,25 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) } spin_lock(&configfs_dirent_lock); - ret = configfs_detach_prep(dentry); - if (ret) { - configfs_detach_rollback(dentry); - spin_unlock(&configfs_dirent_lock); - config_item_put(parent_item); - return ret; - } + do { + struct mutex *wait_mutex; + + ret = configfs_detach_prep(dentry, &wait_mutex); + if (ret) { + configfs_detach_rollback(dentry); + spin_unlock(&configfs_dirent_lock); + if (ret != -EAGAIN) { + config_item_put(parent_item); + return ret; + } + + /* Wait until the racing operation terminates */ + mutex_lock(wait_mutex); + mutex_unlock(wait_mutex); + + spin_lock(&configfs_dirent_lock); + } + } while (ret == -EAGAIN); spin_unlock(&configfs_dirent_lock); /* Get a working ref for the duration of this function */ @@ -1480,7 +1513,7 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys) I_MUTEX_PARENT); mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD); spin_lock(&configfs_dirent_lock); - if (configfs_detach_prep(dentry)) { + if (configfs_detach_prep(dentry, NULL)) { printk(KERN_ERR "configfs: Tried to unregister non-empty subsystem!\n"); } spin_unlock(&configfs_dirent_lock); -- cgit v0.10.2 From 11c3b79218390a139f2d474ee1e983a672d5839a Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Thu, 12 Jun 2008 14:00:18 -0700 Subject: configfs: Allow ->make_item() and ->make_group() to return detailed errors. The configfs operations ->make_item() and ->make_group() currently return a new item/group. A return of NULL signifies an error. Because of this, -ENOMEM is the only return code bubbled up the stack. Multiple folks have requested the ability to return specific error codes when these operations fail. This patch adds that ability by changing the ->make_item/group() ops to return an int. Also updated are the in-kernel users of configfs. Signed-off-by: Joel Becker diff --git a/Documentation/filesystems/configfs/configfs.txt b/Documentation/filesystems/configfs/configfs.txt index 44c97e6..15838d7 100644 --- a/Documentation/filesystems/configfs/configfs.txt +++ b/Documentation/filesystems/configfs/configfs.txt @@ -233,10 +233,12 @@ accomplished via the group operations specified on the group's config_item_type. struct configfs_group_operations { - struct config_item *(*make_item)(struct config_group *group, - const char *name); - struct config_group *(*make_group)(struct config_group *group, - const char *name); + int (*make_item)(struct config_group *group, + const char *name, + struct config_item **new_item); + int (*make_group)(struct config_group *group, + const char *name, + struct config_group **new_group); int (*commit_item)(struct config_item *item); void (*disconnect_notify)(struct config_group *group, struct config_item *item); diff --git a/Documentation/filesystems/configfs/configfs_example.c b/Documentation/filesystems/configfs/configfs_example.c index 25151fd..0b422ac 100644 --- a/Documentation/filesystems/configfs/configfs_example.c +++ b/Documentation/filesystems/configfs/configfs_example.c @@ -273,13 +273,13 @@ static inline struct simple_children *to_simple_children(struct config_item *ite return item ? container_of(to_config_group(item), struct simple_children, group) : NULL; } -static struct config_item *simple_children_make_item(struct config_group *group, const char *name) +static int simple_children_make_item(struct config_group *group, const char *name, struct config_item **new_item) { struct simple_child *simple_child; simple_child = kzalloc(sizeof(struct simple_child), GFP_KERNEL); if (!simple_child) - return NULL; + return -ENOMEM; config_item_init_type_name(&simple_child->item, name, @@ -287,7 +287,8 @@ static struct config_item *simple_children_make_item(struct config_group *group, simple_child->storeme = 0; - return &simple_child->item; + *new_item = &simple_child->item; + return 0; } static struct configfs_attribute simple_children_attr_description = { @@ -359,20 +360,21 @@ static struct configfs_subsystem simple_children_subsys = { * children of its own. */ -static struct config_group *group_children_make_group(struct config_group *group, const char *name) +static int group_children_make_group(struct config_group *group, const char *name, struct config_group **new_group) { struct simple_children *simple_children; simple_children = kzalloc(sizeof(struct simple_children), GFP_KERNEL); if (!simple_children) - return NULL; + return -ENOMEM; config_group_init_type_name(&simple_children->group, name, &simple_children_type); - return &simple_children->group; + *new_group = &simple_children->group; + return 0; } static struct configfs_attribute group_children_attr_description = { diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c index 665341e..387a133 100644 --- a/drivers/net/netconsole.c +++ b/drivers/net/netconsole.c @@ -585,8 +585,9 @@ static struct config_item_type netconsole_target_type = { * Group operations and type for netconsole_subsys. */ -static struct config_item *make_netconsole_target(struct config_group *group, - const char *name) +static int make_netconsole_target(struct config_group *group, + const char *name, + struct config_item **new_item) { unsigned long flags; struct netconsole_target *nt; @@ -598,7 +599,7 @@ static struct config_item *make_netconsole_target(struct config_group *group, nt = kzalloc(sizeof(*nt), GFP_KERNEL); if (!nt) { printk(KERN_ERR "netconsole: failed to allocate memory\n"); - return NULL; + return -ENOMEM; } nt->np.name = "netconsole"; @@ -615,7 +616,8 @@ static struct config_item *make_netconsole_target(struct config_group *group, list_add(&nt->list, &target_list); spin_unlock_irqrestore(&target_list_lock, flags); - return &nt->item; + *new_item = &nt->item; + return 0; } static void drop_netconsole_target(struct config_group *group, diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 614e382..0e64312 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -1073,25 +1073,24 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) group = NULL; item = NULL; if (type->ct_group_ops->make_group) { - group = type->ct_group_ops->make_group(to_config_group(parent_item), name); - if (group) { + ret = type->ct_group_ops->make_group(to_config_group(parent_item), name, &group); + if (!ret) { link_group(to_config_group(parent_item), group); item = &group->cg_item; } } else { - item = type->ct_group_ops->make_item(to_config_group(parent_item), name); - if (item) + ret = type->ct_group_ops->make_item(to_config_group(parent_item), name, &item); + if (!ret) link_obj(parent_item, item); } mutex_unlock(&subsys->su_mutex); kfree(name); - if (!item) { + if (ret) { /* - * If item == NULL, then link_obj() was never called. + * If ret != 0, then link_obj() was never called. * There are no extra references to clean up. */ - ret = -ENOMEM; goto out_put; } diff --git a/fs/dlm/config.c b/fs/dlm/config.c index eac23bd..492d8ca 100644 --- a/fs/dlm/config.c +++ b/fs/dlm/config.c @@ -41,16 +41,20 @@ struct comm; struct nodes; struct node; -static struct config_group *make_cluster(struct config_group *, const char *); +static int make_cluster(struct config_group *, const char *, + struct config_group **); static void drop_cluster(struct config_group *, struct config_item *); static void release_cluster(struct config_item *); -static struct config_group *make_space(struct config_group *, const char *); +static int make_space(struct config_group *, const char *, + struct config_group **); static void drop_space(struct config_group *, struct config_item *); static void release_space(struct config_item *); -static struct config_item *make_comm(struct config_group *, const char *); +static int make_comm(struct config_group *, const char *, + struct config_item **); static void drop_comm(struct config_group *, struct config_item *); static void release_comm(struct config_item *); -static struct config_item *make_node(struct config_group *, const char *); +static int make_node(struct config_group *, const char *, + struct config_item **); static void drop_node(struct config_group *, struct config_item *); static void release_node(struct config_item *); @@ -392,8 +396,8 @@ static struct node *to_node(struct config_item *i) return i ? container_of(i, struct node, item) : NULL; } -static struct config_group *make_cluster(struct config_group *g, - const char *name) +static int make_cluster(struct config_group *g, const char *name, + struct config_group **new_g) { struct cluster *cl = NULL; struct spaces *sps = NULL; @@ -431,14 +435,15 @@ static struct config_group *make_cluster(struct config_group *g, space_list = &sps->ss_group; comm_list = &cms->cs_group; - return &cl->group; + *new_g = &cl->group; + return 0; fail: kfree(cl); kfree(gps); kfree(sps); kfree(cms); - return NULL; + return -ENOMEM; } static void drop_cluster(struct config_group *g, struct config_item *i) @@ -466,7 +471,8 @@ static void release_cluster(struct config_item *i) kfree(cl); } -static struct config_group *make_space(struct config_group *g, const char *name) +static int make_space(struct config_group *g, const char *name, + struct config_group **new_g) { struct space *sp = NULL; struct nodes *nds = NULL; @@ -489,13 +495,14 @@ static struct config_group *make_space(struct config_group *g, const char *name) INIT_LIST_HEAD(&sp->members); mutex_init(&sp->members_lock); sp->members_count = 0; - return &sp->group; + *new_g = &sp->group; + return 0; fail: kfree(sp); kfree(gps); kfree(nds); - return NULL; + return -ENOMEM; } static void drop_space(struct config_group *g, struct config_item *i) @@ -522,19 +529,21 @@ static void release_space(struct config_item *i) kfree(sp); } -static struct config_item *make_comm(struct config_group *g, const char *name) +static int make_comm(struct config_group *g, const char *name, + struct config_item **new_i) { struct comm *cm; cm = kzalloc(sizeof(struct comm), GFP_KERNEL); if (!cm) - return NULL; + return -ENOMEM; config_item_init_type_name(&cm->item, name, &comm_type); cm->nodeid = -1; cm->local = 0; cm->addr_count = 0; - return &cm->item; + *new_i = &cm->item; + return 0; } static void drop_comm(struct config_group *g, struct config_item *i) @@ -554,14 +563,15 @@ static void release_comm(struct config_item *i) kfree(cm); } -static struct config_item *make_node(struct config_group *g, const char *name) +static int make_node(struct config_group *g, const char *name, + struct config_item **new_i) { struct space *sp = to_space(g->cg_item.ci_parent); struct node *nd; nd = kzalloc(sizeof(struct node), GFP_KERNEL); if (!nd) - return NULL; + return -ENOMEM; config_item_init_type_name(&nd->item, name, &node_type); nd->nodeid = -1; @@ -573,7 +583,8 @@ static struct config_item *make_node(struct config_group *g, const char *name) sp->members_count++; mutex_unlock(&sp->members_lock); - return &nd->item; + *new_i = &nd->item; + return 0; } static void drop_node(struct config_group *g, struct config_item *i) diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index f02ccb3..443d108 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -1489,25 +1489,28 @@ static struct o2hb_heartbeat_group *to_o2hb_heartbeat_group(struct config_group : NULL; } -static struct config_item *o2hb_heartbeat_group_make_item(struct config_group *group, - const char *name) +static int o2hb_heartbeat_group_make_item(struct config_group *group, + const char *name, + struct config_item **new_item) { struct o2hb_region *reg = NULL; - struct config_item *ret = NULL; + int ret = 0; reg = kzalloc(sizeof(struct o2hb_region), GFP_KERNEL); - if (reg == NULL) - goto out; /* ENOMEM */ + if (reg == NULL) { + ret = -ENOMEM; + goto out; + } config_item_init_type_name(®->hr_item, name, &o2hb_region_type); - ret = ®->hr_item; + *new_item = ®->hr_item; spin_lock(&o2hb_live_lock); list_add_tail(®->hr_all_item, &o2hb_all_regions); spin_unlock(&o2hb_live_lock); out: - if (ret == NULL) + if (ret) kfree(reg); return ret; diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c index cfdb08b..b364b70 100644 --- a/fs/ocfs2/cluster/nodemanager.c +++ b/fs/ocfs2/cluster/nodemanager.c @@ -644,27 +644,32 @@ out: return ret; } -static struct config_item *o2nm_node_group_make_item(struct config_group *group, - const char *name) +static int o2nm_node_group_make_item(struct config_group *group, + const char *name, + struct config_item **new_item) { struct o2nm_node *node = NULL; - struct config_item *ret = NULL; + int ret = 0; - if (strlen(name) > O2NM_MAX_NAME_LEN) - goto out; /* ENAMETOOLONG */ + if (strlen(name) > O2NM_MAX_NAME_LEN) { + ret = -ENAMETOOLONG; + goto out; + } node = kzalloc(sizeof(struct o2nm_node), GFP_KERNEL); - if (node == NULL) - goto out; /* ENOMEM */ + if (node == NULL) { + ret = -ENOMEM; + goto out; + } strcpy(node->nd_name, name); /* use item.ci_namebuf instead? */ config_item_init_type_name(&node->nd_item, name, &o2nm_node_type); spin_lock_init(&node->nd_lock); - ret = &node->nd_item; + *new_item = &node->nd_item; out: - if (ret == NULL) + if (ret) kfree(node); return ret; @@ -751,25 +756,31 @@ static struct o2nm_cluster_group *to_o2nm_cluster_group(struct config_group *gro } #endif -static struct config_group *o2nm_cluster_group_make_group(struct config_group *group, - const char *name) +static int o2nm_cluster_group_make_group(struct config_group *group, + const char *name, + struct config_group **new_group) { struct o2nm_cluster *cluster = NULL; struct o2nm_node_group *ns = NULL; - struct config_group *o2hb_group = NULL, *ret = NULL; + struct config_group *o2hb_group = NULL; void *defs = NULL; + int ret = 0; /* this runs under the parent dir's i_mutex; there can be only * one caller in here at a time */ - if (o2nm_single_cluster) - goto out; /* ENOSPC */ + if (o2nm_single_cluster) { + ret = -ENOSPC; + goto out; + } cluster = kzalloc(sizeof(struct o2nm_cluster), GFP_KERNEL); ns = kzalloc(sizeof(struct o2nm_node_group), GFP_KERNEL); defs = kcalloc(3, sizeof(struct config_group *), GFP_KERNEL); o2hb_group = o2hb_alloc_hb_set(); - if (cluster == NULL || ns == NULL || o2hb_group == NULL || defs == NULL) + if (cluster == NULL || ns == NULL || o2hb_group == NULL || defs == NULL) { + ret = -ENOMEM; goto out; + } config_group_init_type_name(&cluster->cl_group, name, &o2nm_cluster_type); @@ -786,11 +797,11 @@ static struct config_group *o2nm_cluster_group_make_group(struct config_group *g cluster->cl_idle_timeout_ms = O2NET_IDLE_TIMEOUT_MS_DEFAULT; cluster->cl_keepalive_delay_ms = O2NET_KEEPALIVE_DELAY_MS_DEFAULT; - ret = &cluster->cl_group; + *new_group = &cluster->cl_group; o2nm_single_cluster = cluster; out: - if (ret == NULL) { + if (ret) { kfree(cluster); kfree(ns); o2hb_free_hb_set(o2hb_group); diff --git a/include/linux/configfs.h b/include/linux/configfs.h index 3ae65b1..0488f93 100644 --- a/include/linux/configfs.h +++ b/include/linux/configfs.h @@ -165,8 +165,8 @@ struct configfs_item_operations { }; struct configfs_group_operations { - struct config_item *(*make_item)(struct config_group *group, const char *name); - struct config_group *(*make_group)(struct config_group *group, const char *name); + int (*make_item)(struct config_group *group, const char *name, struct config_item **new_item); + int (*make_group)(struct config_group *group, const char *name, struct config_group **new_group); int (*commit_item)(struct config_item *item); void (*disconnect_notify)(struct config_group *group, struct config_item *item); void (*drop_item)(struct config_group *group, struct config_item *item); -- cgit v0.10.2 From e75206517504461778c283b942440ef312e437d5 Mon Sep 17 00:00:00 2001 From: Louis Rilling Date: Thu, 12 Jun 2008 17:26:47 +0200 Subject: configfs: call drop_link() to cleanup after create_link() failure When allow_link() succeeds but create_link() fails, the subsystem is not informed of the failure. This patch fixes this by calling drop_link() on create_link() failures. Signed-off-by: Louis Rilling Signed-off-by: Joel Becker diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c index faeb441..0004d18 100644 --- a/fs/configfs/symlink.c +++ b/fs/configfs/symlink.c @@ -140,8 +140,12 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna goto out_put; ret = type->ct_item_ops->allow_link(parent_item, target_item); - if (!ret) + if (!ret) { ret = create_link(parent_item, target_item, dentry); + if (ret && type->ct_item_ops->drop_link) + type->ct_item_ops->drop_link(parent_item, + target_item); + } config_item_put(target_item); path_put(&nd.path); -- cgit v0.10.2 From beef3129b3afb74817acff72fda4a9d951e3973e Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 11 Jul 2008 15:21:17 -0600 Subject: x86/PCI: Fix PCI config space for domains > 0 John Keller reports that PCI config space access is broken on machines with more than one domain. conf1 accesses only work for domain 0, so make sure we check the domain number in the raw routines before trying conf1. Reported-by: John Keller Signed-off-by: Matthew Wilcox Signed-off-by: Jesse Barnes diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index d19fd07..86aff81 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -32,7 +32,7 @@ struct pci_raw_ops *raw_pci_ext_ops; int raw_pci_read(unsigned int domain, unsigned int bus, unsigned int devfn, int reg, int len, u32 *val) { - if (reg < 256 && raw_pci_ops) + if (domain == 0 && reg < 256 && raw_pci_ops) return raw_pci_ops->read(domain, bus, devfn, reg, len, val); if (raw_pci_ext_ops) return raw_pci_ext_ops->read(domain, bus, devfn, reg, len, val); @@ -42,7 +42,7 @@ int raw_pci_read(unsigned int domain, unsigned int bus, unsigned int devfn, int raw_pci_write(unsigned int domain, unsigned int bus, unsigned int devfn, int reg, int len, u32 val) { - if (reg < 256 && raw_pci_ops) + if (domain == 0 && reg < 256 && raw_pci_ops) return raw_pci_ops->write(domain, bus, devfn, reg, len, val); if (raw_pci_ext_ops) return raw_pci_ext_ops->write(domain, bus, devfn, reg, len, val); -- cgit v0.10.2 From c157dfa3e4aea5775389f2f4d53c040bc8813af1 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 13 Jul 2008 22:45:06 +0200 Subject: PCI PM: Fix pci_prepare_to_sleep The recently introduced pci_prepare_to_sleep() needs the following fix, because there are systems which are not power manageable by ACPI (ie. ACPI doesn't provide methods to put the device into low power states and back), but require ACPI hooks to be executed for wake-up to work. Signed-off-by: Rafael J. Wysocki Signed-off-by: Jesse Barnes diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index e632a58..ace5181 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1152,7 +1152,6 @@ int pci_prepare_to_sleep(struct pci_dev *dev) break; default: target_state = state; - pci_enable_wake(dev, target_state, true); } } else if (device_may_wakeup(&dev->dev)) { /* @@ -1167,10 +1166,11 @@ int pci_prepare_to_sleep(struct pci_dev *dev) while (target_state && !(dev->pme_support & (1 << target_state))) target_state--; - pci_pme_active(dev, true); } } + pci_enable_wake(dev, target_state, true); + error = pci_set_power_state(dev, target_state); if (error) -- cgit v0.10.2 From c300bd2fb583afb6d68804afd38bc90b31310d95 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Thu, 10 Jul 2008 02:16:44 +0200 Subject: PCI: include linux/pm_wakeup.h for device_set_wakeup_capable drivers/pci/pci.c needs pm_wakeup.h since it uses device_set_wakup_capable(). The latter also needs to be stubbed out for !CONFIG_PM. Signed-off-by: Stephen Rothwell Signed-off-by: Rafael J. Wysocki Signed-off-by: Jesse Barnes diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index ace5181..44a46c9 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -17,6 +17,7 @@ #include #include #include +#include #include /* isa_dma_bridge_buggy */ #include "pci.h" diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h index 3af0c8d..0aae777 100644 --- a/include/linux/pm_wakeup.h +++ b/include/linux/pm_wakeup.h @@ -63,6 +63,8 @@ static inline void device_init_wakeup(struct device *dev, int val) dev->power.can_wakeup = !!val; } +static inline void device_set_wakeup_capable(struct device *dev, int val) { } + static inline int device_can_wakeup(struct device *dev) { return dev->power.can_wakeup; -- cgit v0.10.2 From 9527056630b68c94b94b94cd58c6cbb65e611fd1 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 14 Jul 2008 17:58:44 +0300 Subject: MAINTAINERS: add UBIFS section Signed-off-by: Artem Bityutskiy diff --git a/MAINTAINERS b/MAINTAINERS index 56a2f67..3882f9c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2307,6 +2307,16 @@ L: linux-mtd@lists.infradead.org W: http://www.linux-mtd.infradead.org/doc/jffs2.html S: Maintained +UBI FILE SYSTEM (UBIFS) +P: Artem Bityutskiy +M: dedekind@infradead.org +P: Adrian Hunter +M: ext-adrian.hunter@nokia.com +L: linux-mtd@lists.infradead.org +T: git git://git.infradead.org/~dedekind/ubifs-2.6.git +W: http://www.linux-mtd.infradead.org/doc/ubifs.html +S: Maintained + JFS FILESYSTEM P: Dave Kleikamp M: shaggy@austin.ibm.com -- cgit v0.10.2 From e56a99d5a42dcb91e622ae7a0289d8fb2ddabffb Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 14 Jul 2008 19:08:34 +0300 Subject: UBIFS: add brief documentation Signed-off-by: Artem Bityutskiy Signed-off-by: Adrian Hunter diff --git a/Documentation/filesystems/ubifs.txt b/Documentation/filesystems/ubifs.txt new file mode 100644 index 0000000..540e9e7 --- /dev/null +++ b/Documentation/filesystems/ubifs.txt @@ -0,0 +1,164 @@ +Introduction +============= + +UBIFS file-system stands for UBI File System. UBI stands for "Unsorted +Block Images". UBIFS is a flash file system, which means it is designed +to work with flash devices. It is important to understand, that UBIFS +is completely different to any traditional file-system in Linux, like +Ext2, XFS, JFS, etc. UBIFS represents a separate class of file-systems +which work with MTD devices, not block devices. The other Linux +file-system of this class is JFFS2. + +To make it more clear, here is a small comparison of MTD devices and +block devices. + +1 MTD devices represent flash devices and they consist of eraseblocks of + rather large size, typically about 128KiB. Block devices consist of + small blocks, typically 512 bytes. +2 MTD devices support 3 main operations - read from some offset within an + eraseblock, write to some offset within an eraseblock, and erase a whole + eraseblock. Block devices support 2 main operations - read a whole + block and write a whole block. +3 The whole eraseblock has to be erased before it becomes possible to + re-write its contents. Blocks may be just re-written. +4 Eraseblocks become worn out after some number of erase cycles - + typically 100K-1G for SLC NAND and NOR flashes, and 1K-10K for MLC + NAND flashes. Blocks do not have the wear-out property. +5 Eraseblocks may become bad (only on NAND flashes) and software should + deal with this. Blocks on hard drives typically do not become bad, + because hardware has mechanisms to substitute bad blocks, at least in + modern LBA disks. + +It should be quite obvious why UBIFS is very different to traditional +file-systems. + +UBIFS works on top of UBI. UBI is a separate software layer which may be +found in drivers/mtd/ubi. UBI is basically a volume management and +wear-leveling layer. It provides so called UBI volumes which is a higher +level abstraction than a MTD device. The programming model of UBI devices +is very similar to MTD devices - they still consist of large eraseblocks, +they have read/write/erase operations, but UBI devices are devoid of +limitations like wear and bad blocks (items 4 and 5 in the above list). + +In a sense, UBIFS is a next generation of JFFS2 file-system, but it is +very different and incompatible to JFFS2. The following are the main +differences. + +* JFFS2 works on top of MTD devices, UBIFS depends on UBI and works on + top of UBI volumes. +* JFFS2 does not have on-media index and has to build it while mounting, + which requires full media scan. UBIFS maintains the FS indexing + information on the flash media and does not require full media scan, + so it mounts many times faster than JFFS2. +* JFFS2 is a write-through file-system, while UBIFS supports write-back, + which makes UBIFS much faster on writes. + +Similarly to JFFS2, UBIFS supports on-the-flight compression which makes +it possible to fit quite a lot of data to the flash. + +Similarly to JFFS2, UBIFS is tolerant of unclean reboots and power-cuts. +It does not need stuff like ckfs.ext2. UBIFS automatically replays its +journal and recovers from crashes, ensuring that the on-flash data +structures are consistent. + +UBIFS scales logarithmically (most of the data structures it uses are +trees), so the mount time and memory consumption do not linearly depend +on the flash size, like in case of JFFS2. This is because UBIFS +maintains the FS index on the flash media. However, UBIFS depends on +UBI, which scales linearly. So overall UBI/UBIFS stack scales linearly. +Nevertheless, UBI/UBIFS scales considerably better than JFFS2. + +The authors of UBIFS believe, that it is possible to develop UBI2 which +would scale logarithmically as well. UBI2 would support the same API as UBI, +but it would be binary incompatible to UBI. So UBIFS would not need to be +changed to use UBI2 + + +Mount options +============= + +(*) == default. + +norm_unmount (*) commit on unmount; the journal is committed + when the file-system is unmounted so that the + next mount does not have to replay the journal + and it becomes very fast; +fast_unmount do not commit on unmount; this option makes + unmount faster, but the next mount slower + because of the need to replay the journal. + + +Quick usage instructions +======================== + +The UBI volume to mount is specified using "ubiX_Y" or "ubiX:NAME" syntax, +where "X" is UBI device number, "Y" is UBI volume number, and "NAME" is +UBI volume name. + +Mount volume 0 on UBI device 0 to /mnt/ubifs: +$ mount -t ubifs ubi0_0 /mnt/ubifs + +Mount "rootfs" volume of UBI device 0 to /mnt/ubifs ("rootfs" is volume +name): +$ mount -t ubifs ubi0:rootfs /mnt/ubifs + +The following is an example of the kernel boot arguments to attach mtd0 +to UBI and mount volume "rootfs": +ubi.mtd=0 root=ubi0:rootfs rootfstype=ubifs + + +Module Parameters for Debugging +=============================== + +When UBIFS has been compiled with debugging enabled, there are 3 module +parameters that are available to control aspects of testing and debugging. +The parameters are unsigned integers where each bit controls an option. +The parameters are: + +debug_msgs Selects which debug messages to display, as follows: + + Message Type Flag value + + General messages 1 + Journal messages 2 + Mount messages 4 + Commit messages 8 + LEB search messages 16 + Budgeting messages 32 + Garbage collection messages 64 + Tree Node Cache (TNC) messages 128 + LEB properties (lprops) messages 256 + Input/output messages 512 + Log messages 1024 + Scan messages 2048 + Recovery messages 4096 + +debug_chks Selects extra checks that UBIFS can do while running: + + Check Flag value + + General checks 1 + Check Tree Node Cache (TNC) 2 + Check indexing tree size 4 + Check orphan area 8 + Check old indexing tree 16 + Check LEB properties (lprops) 32 + Check leaf nodes and inodes 64 + +debug_tsts Selects a mode of testing, as follows: + + Test mode Flag value + + Force in-the-gaps method 2 + Failure mode for recovery testing 4 + +For example, set debug_msgs to 5 to display General messages and Mount +messages. + + +References +========== + +UBIFS documentation and FAQ/HOWTO at the MTD web site: +http://www.linux-mtd.infradead.org/doc/ubifs.html +http://www.linux-mtd.infradead.org/faq/ubifs.html -- cgit v0.10.2 From c9b74c5b8fb807187f6b1db09012828fcd2d7e73 Mon Sep 17 00:00:00 2001 From: Pierre Ossman Date: Fri, 18 Apr 2008 20:41:49 +0200 Subject: sdhci: don't check block count for progress The specification is insufficiently strict when it comes to how the hardware should update the block count register, making it useless for checking transfer progress. Signed-off-by: Pierre Ossman diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index b413aa6..9b06c60 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -567,7 +567,6 @@ static void sdhci_set_transfer_mode(struct sdhci_host *host, static void sdhci_finish_data(struct sdhci_host *host) { struct mmc_data *data; - u16 blocks; BUG_ON(!host->data); @@ -580,20 +579,16 @@ static void sdhci_finish_data(struct sdhci_host *host) } /* - * Controller doesn't count down when in single block mode. + * The specification states that the block count register must + * be updated, but it does not specify at what point in the + * data flow. That makes the register entirely useless to read + * back so we have to assume that nothing made it to the card + * in the event of an error. */ - if (data->blocks == 1) - blocks = (data->error == 0) ? 0 : 1; + if (data->error) + data->bytes_xfered = 0; else - blocks = readw(host->ioaddr + SDHCI_BLOCK_COUNT); - data->bytes_xfered = data->blksz * (data->blocks - blocks); - - if (!data->error && blocks) { - printk(KERN_ERR "%s: Controller signalled completion even " - "though there were blocks left.\n", - mmc_hostname(host->mmc)); - data->error = -EIO; - } + data->bytes_xfered = data->blksz * data->blocks; if (data->stop) { /* @@ -1466,7 +1461,7 @@ static int __devinit sdhci_probe_slot(struct pci_dev *pdev, int slot) mmc->ops = &sdhci_ops; mmc->f_min = host->max_clk / 256; mmc->f_max = host->max_clk; - mmc->caps = MMC_CAP_4_BIT_DATA | MMC_CAP_MULTIWRITE | MMC_CAP_SDIO_IRQ; + mmc->caps = MMC_CAP_4_BIT_DATA | MMC_CAP_SDIO_IRQ; if (caps & SDHCI_CAN_DO_HISPD) mmc->caps |= MMC_CAP_SD_HIGHSPEED; -- cgit v0.10.2 From b8c86fc5d8deaa5a6dc49c2c1ed144e6838bf0f3 Mon Sep 17 00:00:00 2001 From: Pierre Ossman Date: Tue, 18 Mar 2008 17:35:49 +0100 Subject: sdhci: move pci stuff to separate module The SDHCI interface is not PCI specific, yet the Linux driver was intimitely connected to the PCI bus. This patch properly separates the PCI specific portion from the bus independent code. This patch is based on work by Ben Dooks but he did not have time to complete it. Signed-off-by: Pierre Ossman diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig index dead617..f9cbcb8 100644 --- a/drivers/mmc/host/Kconfig +++ b/drivers/mmc/host/Kconfig @@ -26,18 +26,31 @@ config MMC_PXA config MMC_SDHCI tristate "Secure Digital Host Controller Interface support" - depends on PCI + depends on HAS_DMA help - This select the generic Secure Digital Host Controller Interface. + This selects the generic Secure Digital Host Controller Interface. It is used by manufacturers such as Texas Instruments(R), Ricoh(R) and Toshiba(R). Most controllers found in laptops are of this type. + + If you have a controller with this interface, say Y or M here. You + also need to enable an appropriate bus interface. + + If unsure, say N. + +config MMC_SDHCI_PCI + tristate "SDHCI support on PCI bus" + depends on MMC_SDHCI && PCI + help + This selects the PCI Secure Digital Host Controller Interface. + Most controllers found today are PCI devices. + If you have a controller with this interface, say Y or M here. If unsure, say N. config MMC_RICOH_MMC tristate "Ricoh MMC Controller Disabler (EXPERIMENTAL)" - depends on PCI && EXPERIMENTAL && MMC_SDHCI + depends on MMC_SDHCI_PCI help This selects the disabler for the Ricoh MMC Controller. This proprietary controller is unnecessary because the SDHCI driver diff --git a/drivers/mmc/host/Makefile b/drivers/mmc/host/Makefile index 3877c87..3027250 100644 --- a/drivers/mmc/host/Makefile +++ b/drivers/mmc/host/Makefile @@ -10,6 +10,7 @@ obj-$(CONFIG_MMC_ARMMMCI) += mmci.o obj-$(CONFIG_MMC_PXA) += pxamci.o obj-$(CONFIG_MMC_IMX) += imxmmc.o obj-$(CONFIG_MMC_SDHCI) += sdhci.o +obj-$(CONFIG_MMC_SDHCI_PCI) += sdhci-pci.o obj-$(CONFIG_MMC_RICOH_MMC) += ricoh_mmc.o obj-$(CONFIG_MMC_WBSD) += wbsd.o obj-$(CONFIG_MMC_AU1X) += au1xmmc.o diff --git a/drivers/mmc/host/sdhci-pci.c b/drivers/mmc/host/sdhci-pci.c new file mode 100644 index 0000000..2a3dd6c --- /dev/null +++ b/drivers/mmc/host/sdhci-pci.c @@ -0,0 +1,486 @@ +/* linux/drivers/mmc/host/sdhci-pci.c - SDHCI on PCI bus interface + * + * Copyright (C) 2005-2008 Pierre Ossman, All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or (at + * your option) any later version. + * + * Thanks to the following companies for their support: + * + * - JMicron (hardware and technical support) + */ + +#include +#include +#include +#include + +#include + +#include +#include + +#include "sdhci.h" + +/* + * PCI registers + */ + +#define PCI_SDHCI_IFPIO 0x00 +#define PCI_SDHCI_IFDMA 0x01 +#define PCI_SDHCI_IFVENDOR 0x02 + +#define PCI_SLOT_INFO 0x40 /* 8 bits */ +#define PCI_SLOT_INFO_SLOTS(x) ((x >> 4) & 7) +#define PCI_SLOT_INFO_FIRST_BAR_MASK 0x07 + +#define MAX_SLOTS 8 + +static const struct pci_device_id pci_ids[] __devinitdata = { + { + .vendor = PCI_VENDOR_ID_RICOH, + .device = PCI_DEVICE_ID_RICOH_R5C822, + .subvendor = PCI_VENDOR_ID_IBM, + .subdevice = PCI_ANY_ID, + .driver_data = SDHCI_QUIRK_CLOCK_BEFORE_RESET | + SDHCI_QUIRK_FORCE_DMA, + }, + + { + .vendor = PCI_VENDOR_ID_RICOH, + .device = PCI_DEVICE_ID_RICOH_R5C822, + .subvendor = PCI_VENDOR_ID_SAMSUNG, + .subdevice = PCI_ANY_ID, + .driver_data = SDHCI_QUIRK_FORCE_DMA | + SDHCI_QUIRK_NO_CARD_NO_RESET, + }, + + { + .vendor = PCI_VENDOR_ID_RICOH, + .device = PCI_DEVICE_ID_RICOH_R5C822, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .driver_data = SDHCI_QUIRK_FORCE_DMA, + }, + + { + .vendor = PCI_VENDOR_ID_TI, + .device = PCI_DEVICE_ID_TI_XX21_XX11_SD, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .driver_data = SDHCI_QUIRK_FORCE_DMA, + }, + + { + .vendor = PCI_VENDOR_ID_ENE, + .device = PCI_DEVICE_ID_ENE_CB712_SD, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .driver_data = SDHCI_QUIRK_SINGLE_POWER_WRITE | + SDHCI_QUIRK_BROKEN_DMA, + }, + + { + .vendor = PCI_VENDOR_ID_ENE, + .device = PCI_DEVICE_ID_ENE_CB712_SD_2, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .driver_data = SDHCI_QUIRK_SINGLE_POWER_WRITE | + SDHCI_QUIRK_BROKEN_DMA, + }, + + { + .vendor = PCI_VENDOR_ID_ENE, + .device = PCI_DEVICE_ID_ENE_CB714_SD, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .driver_data = SDHCI_QUIRK_SINGLE_POWER_WRITE | + SDHCI_QUIRK_RESET_CMD_DATA_ON_IOS | + SDHCI_QUIRK_BROKEN_DMA, + }, + + { + .vendor = PCI_VENDOR_ID_ENE, + .device = PCI_DEVICE_ID_ENE_CB714_SD_2, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .driver_data = SDHCI_QUIRK_SINGLE_POWER_WRITE | + SDHCI_QUIRK_RESET_CMD_DATA_ON_IOS | + SDHCI_QUIRK_BROKEN_DMA, + }, + + { + .vendor = PCI_VENDOR_ID_MARVELL, + .device = PCI_DEVICE_ID_MARVELL_CAFE_SD, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .driver_data = SDHCI_QUIRK_NO_SIMULT_VDD_AND_POWER | + SDHCI_QUIRK_INCR_TIMEOUT_CONTROL, + }, + + { + .vendor = PCI_VENDOR_ID_JMICRON, + .device = PCI_DEVICE_ID_JMICRON_JMB38X_SD, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .driver_data = SDHCI_QUIRK_32BIT_DMA_ADDR | + SDHCI_QUIRK_32BIT_DMA_SIZE | + SDHCI_QUIRK_RESET_AFTER_REQUEST, + }, + + { /* Generic SD host controller */ + PCI_DEVICE_CLASS((PCI_CLASS_SYSTEM_SDHCI << 8), 0xFFFF00) + }, + + { /* end: all zeroes */ }, +}; + +MODULE_DEVICE_TABLE(pci, pci_ids); + +struct sdhci_pci_chip; + +struct sdhci_pci_slot { + struct sdhci_pci_chip *chip; + struct sdhci_host *host; + + int pci_bar; +}; + +struct sdhci_pci_chip { + struct pci_dev *pdev; + unsigned int quirks; + + int num_slots; /* Slots on controller */ + struct sdhci_pci_slot *slots[MAX_SLOTS]; /* Pointers to host slots */ +}; + +/*****************************************************************************\ + * * + * SDHCI core callbacks * + * * +\*****************************************************************************/ + +static int sdhci_pci_enable_dma(struct sdhci_host *host) +{ + struct sdhci_pci_slot *slot; + struct pci_dev *pdev; + int ret; + + slot = sdhci_priv(host); + pdev = slot->chip->pdev; + + if (((pdev->class & 0xFFFF00) == (PCI_CLASS_SYSTEM_SDHCI << 8)) && + ((pdev->class & 0x0000FF) != PCI_SDHCI_IFDMA) && + (host->flags & SDHCI_USE_DMA)) { + dev_warn(&pdev->dev, "Will use DMA mode even though HW " + "doesn't fully claim to support it.\n"); + } + + ret = pci_set_dma_mask(pdev, DMA_32BIT_MASK); + if (ret) + return ret; + + pci_set_master(pdev); + + return 0; +} + +static struct sdhci_ops sdhci_pci_ops = { + .enable_dma = sdhci_pci_enable_dma, +}; + +/*****************************************************************************\ + * * + * Suspend/resume * + * * +\*****************************************************************************/ + +#ifdef CONFIG_PM + +static int sdhci_pci_suspend (struct pci_dev *pdev, pm_message_t state) +{ + struct sdhci_pci_chip *chip; + struct sdhci_pci_slot *slot; + int i, ret; + + chip = pci_get_drvdata(pdev); + if (!chip) + return 0; + + for (i = 0;i < chip->num_slots;i++) { + slot = chip->slots[i]; + if (!slot) + continue; + + ret = sdhci_suspend_host(slot->host, state); + + if (ret) { + for (i--;i >= 0;i--) + sdhci_resume_host(chip->slots[i]->host); + return ret; + } + } + + pci_save_state(pdev); + pci_enable_wake(pdev, pci_choose_state(pdev, state), 0); + pci_disable_device(pdev); + pci_set_power_state(pdev, pci_choose_state(pdev, state)); + + return 0; +} + +static int sdhci_pci_resume (struct pci_dev *pdev) +{ + struct sdhci_pci_chip *chip; + struct sdhci_pci_slot *slot; + int i, ret; + + chip = pci_get_drvdata(pdev); + if (!chip) + return 0; + + pci_set_power_state(pdev, PCI_D0); + pci_restore_state(pdev); + ret = pci_enable_device(pdev); + if (ret) + return ret; + + for (i = 0;i < chip->num_slots;i++) { + slot = chip->slots[i]; + if (!slot) + continue; + + ret = sdhci_resume_host(slot->host); + if (ret) + return ret; + } + + return 0; +} + +#else /* CONFIG_PM */ + +#define sdhci_pci_suspend NULL +#define sdhci_pci_resume NULL + +#endif /* CONFIG_PM */ + +/*****************************************************************************\ + * * + * Device probing/removal * + * * +\*****************************************************************************/ + +static struct sdhci_pci_slot * __devinit sdhci_pci_probe_slot( + struct pci_dev *pdev, struct sdhci_pci_chip *chip, int bar) +{ + struct sdhci_pci_slot *slot; + struct sdhci_host *host; + + resource_size_t addr; + + int ret; + + if (!(pci_resource_flags(pdev, bar) & IORESOURCE_MEM)) { + dev_err(&pdev->dev, "BAR %d is not iomem. Aborting.\n", bar); + return ERR_PTR(-ENODEV); + } + + if (pci_resource_len(pdev, bar) != 0x100) { + dev_err(&pdev->dev, "Invalid iomem size. You may " + "experience problems.\n"); + } + + if ((pdev->class & 0x0000FF) == PCI_SDHCI_IFVENDOR) { + dev_err(&pdev->dev, "Vendor specific interface. Aborting.\n"); + return ERR_PTR(-ENODEV); + } + + if ((pdev->class & 0x0000FF) > PCI_SDHCI_IFVENDOR) { + dev_err(&pdev->dev, "Unknown interface. Aborting.\n"); + return ERR_PTR(-ENODEV); + } + + host = sdhci_alloc_host(&pdev->dev, sizeof(struct sdhci_pci_slot)); + if (IS_ERR(host)) { + ret = PTR_ERR(host); + goto unmap; + } + + slot = sdhci_priv(host); + + slot->chip = chip; + slot->host = host; + slot->pci_bar = bar; + + host->hw_name = "PCI"; + host->ops = &sdhci_pci_ops; + host->quirks = chip->quirks; + + host->irq = pdev->irq; + + ret = pci_request_region(pdev, bar, mmc_hostname(host->mmc)); + if (ret) { + dev_err(&pdev->dev, "cannot request region\n"); + return ERR_PTR(ret); + } + + addr = pci_resource_start(pdev, bar); + host->ioaddr = ioremap_nocache(addr, pci_resource_len(pdev, bar)); + if (!host->ioaddr) { + dev_err(&pdev->dev, "failed to remap registers\n"); + goto release; + } + + ret = sdhci_add_host(host); + if (ret) + goto unmap; + + return slot; + +unmap: + iounmap(host->ioaddr); + +release: + pci_release_region(pdev, bar); + sdhci_free_host(host); + + return ERR_PTR(ret); +} + +static void sdhci_pci_remove_slot(struct sdhci_pci_slot *slot) +{ + sdhci_remove_host(slot->host); + pci_release_region(slot->chip->pdev, slot->pci_bar); + sdhci_free_host(slot->host); +} + +static int __devinit sdhci_pci_probe(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + struct sdhci_pci_chip *chip; + struct sdhci_pci_slot *slot; + + u8 slots, rev, first_bar; + int ret, i; + + BUG_ON(pdev == NULL); + BUG_ON(ent == NULL); + + pci_read_config_byte(pdev, PCI_CLASS_REVISION, &rev); + + dev_info(&pdev->dev, "SDHCI controller found [%04x:%04x] (rev %x)\n", + (int)pdev->vendor, (int)pdev->device, (int)rev); + + ret = pci_read_config_byte(pdev, PCI_SLOT_INFO, &slots); + if (ret) + return ret; + + slots = PCI_SLOT_INFO_SLOTS(slots) + 1; + dev_dbg(&pdev->dev, "found %d slot(s)\n", slots); + if (slots == 0) + return -ENODEV; + + BUG_ON(slots > MAX_SLOTS); + + ret = pci_read_config_byte(pdev, PCI_SLOT_INFO, &first_bar); + if (ret) + return ret; + + first_bar &= PCI_SLOT_INFO_FIRST_BAR_MASK; + + if (first_bar > 5) { + dev_err(&pdev->dev, "Invalid first BAR. Aborting.\n"); + return -ENODEV; + } + + ret = pci_enable_device(pdev); + if (ret) + return ret; + + chip = kzalloc(sizeof(struct sdhci_pci_chip), GFP_KERNEL); + if (!chip) { + ret = -ENOMEM; + goto err; + } + + chip->pdev = pdev; + chip->quirks = ent->driver_data; + chip->num_slots = slots; + + pci_set_drvdata(pdev, chip); + + for (i = 0;i < slots;i++) { + slot = sdhci_pci_probe_slot(pdev, chip, first_bar + i); + if (IS_ERR(slot)) { + for (i--;i >= 0;i--) + sdhci_pci_remove_slot(chip->slots[i]); + ret = PTR_ERR(slot); + goto free; + } + + chip->slots[i] = slot; + } + + return 0; + +free: + pci_set_drvdata(pdev, NULL); + kfree(chip); + +err: + pci_disable_device(pdev); + return ret; +} + +static void __devexit sdhci_pci_remove(struct pci_dev *pdev) +{ + int i; + struct sdhci_pci_chip *chip; + + chip = pci_get_drvdata(pdev); + + if (chip) { + for (i = 0;i < chip->num_slots; i++) + sdhci_pci_remove_slot(chip->slots[i]); + + pci_set_drvdata(pdev, NULL); + kfree(chip); + } + + pci_disable_device(pdev); +} + +static struct pci_driver sdhci_driver = { + .name = "sdhci-pci", + .id_table = pci_ids, + .probe = sdhci_pci_probe, + .remove = __devexit_p(sdhci_pci_remove), + .suspend = sdhci_pci_suspend, + .resume = sdhci_pci_resume, +}; + +/*****************************************************************************\ + * * + * Driver init/exit * + * * +\*****************************************************************************/ + +static int __init sdhci_drv_init(void) +{ + return pci_register_driver(&sdhci_driver); +} + +static void __exit sdhci_drv_exit(void) +{ + pci_unregister_driver(&sdhci_driver); +} + +module_init(sdhci_drv_init); +module_exit(sdhci_drv_exit); + +MODULE_AUTHOR("Pierre Ossman "); +MODULE_DESCRIPTION("Secure Digital Host Controller Interface PCI driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 9b06c60..8ce01d4 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -15,7 +15,7 @@ #include #include -#include +#include #include #include @@ -32,135 +32,6 @@ static unsigned int debug_quirks = 0; -/* - * Different quirks to handle when the hardware deviates from a strict - * interpretation of the SDHCI specification. - */ - -/* Controller doesn't honor resets unless we touch the clock register */ -#define SDHCI_QUIRK_CLOCK_BEFORE_RESET (1<<0) -/* Controller has bad caps bits, but really supports DMA */ -#define SDHCI_QUIRK_FORCE_DMA (1<<1) -/* Controller doesn't like to be reset when there is no card inserted. */ -#define SDHCI_QUIRK_NO_CARD_NO_RESET (1<<2) -/* Controller doesn't like clearing the power reg before a change */ -#define SDHCI_QUIRK_SINGLE_POWER_WRITE (1<<3) -/* Controller has flaky internal state so reset it on each ios change */ -#define SDHCI_QUIRK_RESET_CMD_DATA_ON_IOS (1<<4) -/* Controller has an unusable DMA engine */ -#define SDHCI_QUIRK_BROKEN_DMA (1<<5) -/* Controller can only DMA from 32-bit aligned addresses */ -#define SDHCI_QUIRK_32BIT_DMA_ADDR (1<<6) -/* Controller can only DMA chunk sizes that are a multiple of 32 bits */ -#define SDHCI_QUIRK_32BIT_DMA_SIZE (1<<7) -/* Controller needs to be reset after each request to stay stable */ -#define SDHCI_QUIRK_RESET_AFTER_REQUEST (1<<8) -/* Controller needs voltage and power writes to happen separately */ -#define SDHCI_QUIRK_NO_SIMULT_VDD_AND_POWER (1<<9) -/* Controller has an off-by-one issue with timeout value */ -#define SDHCI_QUIRK_INCR_TIMEOUT_CONTROL (1<<10) - -static const struct pci_device_id pci_ids[] __devinitdata = { - { - .vendor = PCI_VENDOR_ID_RICOH, - .device = PCI_DEVICE_ID_RICOH_R5C822, - .subvendor = PCI_VENDOR_ID_IBM, - .subdevice = PCI_ANY_ID, - .driver_data = SDHCI_QUIRK_CLOCK_BEFORE_RESET | - SDHCI_QUIRK_FORCE_DMA, - }, - - { - .vendor = PCI_VENDOR_ID_RICOH, - .device = PCI_DEVICE_ID_RICOH_R5C822, - .subvendor = PCI_VENDOR_ID_SAMSUNG, - .subdevice = PCI_ANY_ID, - .driver_data = SDHCI_QUIRK_FORCE_DMA | - SDHCI_QUIRK_NO_CARD_NO_RESET, - }, - - { - .vendor = PCI_VENDOR_ID_RICOH, - .device = PCI_DEVICE_ID_RICOH_R5C822, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .driver_data = SDHCI_QUIRK_FORCE_DMA, - }, - - { - .vendor = PCI_VENDOR_ID_TI, - .device = PCI_DEVICE_ID_TI_XX21_XX11_SD, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .driver_data = SDHCI_QUIRK_FORCE_DMA, - }, - - { - .vendor = PCI_VENDOR_ID_ENE, - .device = PCI_DEVICE_ID_ENE_CB712_SD, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .driver_data = SDHCI_QUIRK_SINGLE_POWER_WRITE | - SDHCI_QUIRK_BROKEN_DMA, - }, - - { - .vendor = PCI_VENDOR_ID_ENE, - .device = PCI_DEVICE_ID_ENE_CB712_SD_2, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .driver_data = SDHCI_QUIRK_SINGLE_POWER_WRITE | - SDHCI_QUIRK_BROKEN_DMA, - }, - - { - .vendor = PCI_VENDOR_ID_ENE, - .device = PCI_DEVICE_ID_ENE_CB714_SD, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .driver_data = SDHCI_QUIRK_SINGLE_POWER_WRITE | - SDHCI_QUIRK_RESET_CMD_DATA_ON_IOS | - SDHCI_QUIRK_BROKEN_DMA, - }, - - { - .vendor = PCI_VENDOR_ID_ENE, - .device = PCI_DEVICE_ID_ENE_CB714_SD_2, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .driver_data = SDHCI_QUIRK_SINGLE_POWER_WRITE | - SDHCI_QUIRK_RESET_CMD_DATA_ON_IOS | - SDHCI_QUIRK_BROKEN_DMA, - }, - - { - .vendor = PCI_VENDOR_ID_MARVELL, - .device = PCI_DEVICE_ID_MARVELL_CAFE_SD, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .driver_data = SDHCI_QUIRK_NO_SIMULT_VDD_AND_POWER | - SDHCI_QUIRK_INCR_TIMEOUT_CONTROL, - }, - - { - .vendor = PCI_VENDOR_ID_JMICRON, - .device = PCI_DEVICE_ID_JMICRON_JMB38X_SD, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .driver_data = SDHCI_QUIRK_32BIT_DMA_ADDR | - SDHCI_QUIRK_32BIT_DMA_SIZE | - SDHCI_QUIRK_RESET_AFTER_REQUEST, - }, - - { /* Generic SD host controller */ - PCI_DEVICE_CLASS((PCI_CLASS_SYSTEM_SDHCI << 8), 0xFFFF00) - }, - - { /* end: all zeroes */ }, -}; - -MODULE_DEVICE_TABLE(pci, pci_ids); - static void sdhci_prepare_data(struct sdhci_host *, struct mmc_data *); static void sdhci_finish_data(struct sdhci_host *); @@ -215,7 +86,7 @@ static void sdhci_reset(struct sdhci_host *host, u8 mask) { unsigned long timeout; - if (host->chip->quirks & SDHCI_QUIRK_NO_CARD_NO_RESET) { + if (host->quirks & SDHCI_QUIRK_NO_CARD_NO_RESET) { if (!(readl(host->ioaddr + SDHCI_PRESENT_STATE) & SDHCI_CARD_PRESENT)) return; @@ -488,7 +359,7 @@ static void sdhci_prepare_data(struct sdhci_host *host, struct mmc_data *data) * Compensate for an off-by-one error in the CaFe hardware; otherwise, * a too-small count gives us interrupt timeouts. */ - if ((host->chip->quirks & SDHCI_QUIRK_INCR_TIMEOUT_CONTROL)) + if ((host->quirks & SDHCI_QUIRK_INCR_TIMEOUT_CONTROL)) count++; if (count >= 0xF) { @@ -503,7 +374,7 @@ static void sdhci_prepare_data(struct sdhci_host *host, struct mmc_data *data) host->flags |= SDHCI_REQ_USE_DMA; if (unlikely((host->flags & SDHCI_REQ_USE_DMA) && - (host->chip->quirks & SDHCI_QUIRK_32BIT_DMA_SIZE) && + (host->quirks & SDHCI_QUIRK_32BIT_DMA_SIZE) && ((data->blksz * data->blocks) & 0x3))) { DBG("Reverting to PIO because of transfer size (%d)\n", data->blksz * data->blocks); @@ -515,7 +386,7 @@ static void sdhci_prepare_data(struct sdhci_host *host, struct mmc_data *data) * translation to device address space. */ if (unlikely((host->flags & SDHCI_REQ_USE_DMA) && - (host->chip->quirks & SDHCI_QUIRK_32BIT_DMA_ADDR) && + (host->quirks & SDHCI_QUIRK_32BIT_DMA_ADDR) && (data->sg->offset & 0x3))) { DBG("Reverting to PIO because of bad alignment\n"); host->flags &= ~SDHCI_REQ_USE_DMA; @@ -524,11 +395,13 @@ static void sdhci_prepare_data(struct sdhci_host *host, struct mmc_data *data) if (host->flags & SDHCI_REQ_USE_DMA) { int count; - count = pci_map_sg(host->chip->pdev, data->sg, data->sg_len, - (data->flags & MMC_DATA_READ)?PCI_DMA_FROMDEVICE:PCI_DMA_TODEVICE); - BUG_ON(count != 1); + count = dma_map_sg(mmc_dev(host->mmc), data->sg, data->sg_len, + (data->flags & MMC_DATA_READ) ? + DMA_FROM_DEVICE : DMA_TO_DEVICE); + WARN_ON(count != 1); - writel(sg_dma_address(data->sg), host->ioaddr + SDHCI_DMA_ADDRESS); + writel(sg_dma_address(data->sg), + host->ioaddr + SDHCI_DMA_ADDRESS); } else { host->cur_sg = data->sg; host->num_sg = data->sg_len; @@ -574,8 +447,9 @@ static void sdhci_finish_data(struct sdhci_host *host) host->data = NULL; if (host->flags & SDHCI_REQ_USE_DMA) { - pci_unmap_sg(host->chip->pdev, data->sg, data->sg_len, - (data->flags & MMC_DATA_READ)?PCI_DMA_FROMDEVICE:PCI_DMA_TODEVICE); + dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len, + (data->flags & MMC_DATA_READ) ? + DMA_FROM_DEVICE : DMA_TO_DEVICE); } /* @@ -770,7 +644,7 @@ static void sdhci_set_power(struct sdhci_host *host, unsigned short power) * Spec says that we should clear the power reg before setting * a new value. Some controllers don't seem to like this though. */ - if (!(host->chip->quirks & SDHCI_QUIRK_SINGLE_POWER_WRITE)) + if (!(host->quirks & SDHCI_QUIRK_SINGLE_POWER_WRITE)) writeb(0, host->ioaddr + SDHCI_POWER_CONTROL); pwr = SDHCI_POWER_ON; @@ -795,7 +669,7 @@ static void sdhci_set_power(struct sdhci_host *host, unsigned short power) * At least the CaFe chip gets confused if we set the voltage * and set turn on power at the same time, so set the voltage first. */ - if ((host->chip->quirks & SDHCI_QUIRK_NO_SIMULT_VDD_AND_POWER)) + if ((host->quirks & SDHCI_QUIRK_NO_SIMULT_VDD_AND_POWER)) writeb(pwr & ~SDHCI_POWER_ON, host->ioaddr + SDHCI_POWER_CONTROL); @@ -883,7 +757,7 @@ static void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) * signalling timeout and CRC errors even on CMD0. Resetting * it on each ios seems to solve the problem. */ - if(host->chip->quirks & SDHCI_QUIRK_RESET_CMD_DATA_ON_IOS) + if(host->quirks & SDHCI_QUIRK_RESET_CMD_DATA_ON_IOS) sdhci_reset(host, SDHCI_RESET_CMD | SDHCI_RESET_DATA); mmiowb(); @@ -994,10 +868,10 @@ static void sdhci_tasklet_finish(unsigned long param) if (mrq->cmd->error || (mrq->data && (mrq->data->error || (mrq->data->stop && mrq->data->stop->error))) || - (host->chip->quirks & SDHCI_QUIRK_RESET_AFTER_REQUEST)) { + (host->quirks & SDHCI_QUIRK_RESET_AFTER_REQUEST)) { /* Some controllers need this kick or reset won't work here */ - if (host->chip->quirks & SDHCI_QUIRK_CLOCK_BEFORE_RESET) { + if (host->quirks & SDHCI_QUIRK_CLOCK_BEFORE_RESET) { unsigned int clock; /* This is to force an update */ @@ -1229,165 +1103,90 @@ out: #ifdef CONFIG_PM -static int sdhci_suspend (struct pci_dev *pdev, pm_message_t state) +int sdhci_suspend_host(struct sdhci_host *host, pm_message_t state) { - struct sdhci_chip *chip; - int i, ret; - - chip = pci_get_drvdata(pdev); - if (!chip) - return 0; - - DBG("Suspending...\n"); - - for (i = 0;i < chip->num_slots;i++) { - if (!chip->hosts[i]) - continue; - ret = mmc_suspend_host(chip->hosts[i]->mmc, state); - if (ret) { - for (i--;i >= 0;i--) - mmc_resume_host(chip->hosts[i]->mmc); - return ret; - } - } - - pci_save_state(pdev); - pci_enable_wake(pdev, pci_choose_state(pdev, state), 0); + int ret; - for (i = 0;i < chip->num_slots;i++) { - if (!chip->hosts[i]) - continue; - free_irq(chip->hosts[i]->irq, chip->hosts[i]); - } + ret = mmc_suspend_host(host->mmc, state); + if (ret) + return ret; - pci_disable_device(pdev); - pci_set_power_state(pdev, pci_choose_state(pdev, state)); + free_irq(host->irq, host); return 0; } -static int sdhci_resume (struct pci_dev *pdev) -{ - struct sdhci_chip *chip; - int i, ret; +EXPORT_SYMBOL_GPL(sdhci_suspend_host); - chip = pci_get_drvdata(pdev); - if (!chip) - return 0; +int sdhci_resume_host(struct sdhci_host *host) +{ + int ret; - DBG("Resuming...\n"); + if (host->flags & SDHCI_USE_DMA) { + if (host->ops->enable_dma) + host->ops->enable_dma(host); + } - pci_set_power_state(pdev, PCI_D0); - pci_restore_state(pdev); - ret = pci_enable_device(pdev); + ret = request_irq(host->irq, sdhci_irq, IRQF_SHARED, + mmc_hostname(host->mmc), host); if (ret) return ret; - for (i = 0;i < chip->num_slots;i++) { - if (!chip->hosts[i]) - continue; - if (chip->hosts[i]->flags & SDHCI_USE_DMA) - pci_set_master(pdev); - ret = request_irq(chip->hosts[i]->irq, sdhci_irq, - IRQF_SHARED, mmc_hostname(chip->hosts[i]->mmc), - chip->hosts[i]); - if (ret) - return ret; - sdhci_init(chip->hosts[i]); - mmiowb(); - ret = mmc_resume_host(chip->hosts[i]->mmc); - if (ret) - return ret; - } + sdhci_init(host); + mmiowb(); + + ret = mmc_resume_host(host->mmc); + if (ret) + return ret; return 0; } -#else /* CONFIG_PM */ - -#define sdhci_suspend NULL -#define sdhci_resume NULL +EXPORT_SYMBOL_GPL(sdhci_resume_host); #endif /* CONFIG_PM */ /*****************************************************************************\ * * - * Device probing/removal * + * Device allocation/registration * * * \*****************************************************************************/ -static int __devinit sdhci_probe_slot(struct pci_dev *pdev, int slot) +struct sdhci_host *sdhci_alloc_host(struct device *dev, + size_t priv_size) { - int ret; - unsigned int version; - struct sdhci_chip *chip; struct mmc_host *mmc; struct sdhci_host *host; - u8 first_bar; - unsigned int caps; - - chip = pci_get_drvdata(pdev); - BUG_ON(!chip); - - ret = pci_read_config_byte(pdev, PCI_SLOT_INFO, &first_bar); - if (ret) - return ret; - - first_bar &= PCI_SLOT_INFO_FIRST_BAR_MASK; + WARN_ON(dev == NULL); - if (first_bar > 5) { - printk(KERN_ERR DRIVER_NAME ": Invalid first BAR. Aborting.\n"); - return -ENODEV; - } - - if (!(pci_resource_flags(pdev, first_bar + slot) & IORESOURCE_MEM)) { - printk(KERN_ERR DRIVER_NAME ": BAR is not iomem. Aborting.\n"); - return -ENODEV; - } - - if (pci_resource_len(pdev, first_bar + slot) != 0x100) { - printk(KERN_ERR DRIVER_NAME ": Invalid iomem size. " - "You may experience problems.\n"); - } - - if ((pdev->class & 0x0000FF) == PCI_SDHCI_IFVENDOR) { - printk(KERN_ERR DRIVER_NAME ": Vendor specific interface. Aborting.\n"); - return -ENODEV; - } - - if ((pdev->class & 0x0000FF) > PCI_SDHCI_IFVENDOR) { - printk(KERN_ERR DRIVER_NAME ": Unknown interface. Aborting.\n"); - return -ENODEV; - } - - mmc = mmc_alloc_host(sizeof(struct sdhci_host), &pdev->dev); + mmc = mmc_alloc_host(sizeof(struct sdhci_host) + priv_size, dev); if (!mmc) - return -ENOMEM; + return ERR_PTR(-ENOMEM); host = mmc_priv(mmc); host->mmc = mmc; - host->chip = chip; - chip->hosts[slot] = host; + return host; +} - host->bar = first_bar + slot; +EXPORT_SYMBOL_GPL(sdhci_alloc_host); - host->addr = pci_resource_start(pdev, host->bar); - host->irq = pdev->irq; +int sdhci_add_host(struct sdhci_host *host) +{ + struct mmc_host *mmc; + unsigned int caps; + unsigned int version; + int ret; - DBG("slot %d at 0x%08lx, irq %d\n", slot, host->addr, host->irq); + WARN_ON(host == NULL); + if (host == NULL) + return -EINVAL; - ret = pci_request_region(pdev, host->bar, mmc_hostname(mmc)); - if (ret) - goto free; + mmc = host->mmc; - host->ioaddr = ioremap_nocache(host->addr, - pci_resource_len(pdev, host->bar)); - if (!host->ioaddr) { - ret = -ENOMEM; - goto release; - } + if (debug_quirks) + host->quirks = debug_quirks; sdhci_reset(host, SDHCI_RESET_ALL); @@ -1401,46 +1200,40 @@ static int __devinit sdhci_probe_slot(struct pci_dev *pdev, int slot) caps = readl(host->ioaddr + SDHCI_CAPABILITIES); - if (chip->quirks & SDHCI_QUIRK_FORCE_DMA) + if (host->quirks & SDHCI_QUIRK_FORCE_DMA) host->flags |= SDHCI_USE_DMA; else if (!(caps & SDHCI_CAN_DO_DMA)) DBG("Controller doesn't have DMA capability\n"); else host->flags |= SDHCI_USE_DMA; - if ((chip->quirks & SDHCI_QUIRK_BROKEN_DMA) && + if ((host->quirks & SDHCI_QUIRK_BROKEN_DMA) && (host->flags & SDHCI_USE_DMA)) { DBG("Disabling DMA as it is marked broken\n"); host->flags &= ~SDHCI_USE_DMA; } - if (((pdev->class & 0x0000FF) != PCI_SDHCI_IFDMA) && - (host->flags & SDHCI_USE_DMA)) { - printk(KERN_WARNING "%s: Will use DMA " - "mode even though HW doesn't fully " - "claim to support it.\n", mmc_hostname(mmc)); - } - if (host->flags & SDHCI_USE_DMA) { - if (pci_set_dma_mask(pdev, DMA_32BIT_MASK)) { - printk(KERN_WARNING "%s: No suitable DMA available. " - "Falling back to PIO.\n", mmc_hostname(mmc)); - host->flags &= ~SDHCI_USE_DMA; + if (host->ops->enable_dma) { + if (host->ops->enable_dma(host)) { + printk(KERN_WARNING "%s: No suitable DMA " + "available. Falling back to PIO.\n", + mmc_hostname(mmc)); + host->flags &= ~SDHCI_USE_DMA; + } } } - if (host->flags & SDHCI_USE_DMA) - pci_set_master(pdev); - else /* XXX: Hack to get MMC layer to avoid highmem */ - pdev->dma_mask = 0; + /* XXX: Hack to get MMC layer to avoid highmem */ + if (!(host->flags & SDHCI_USE_DMA)) + mmc_dev(host->mmc)->dma_mask = 0; host->max_clk = (caps & SDHCI_CLOCK_BASE_MASK) >> SDHCI_CLOCK_BASE_SHIFT; if (host->max_clk == 0) { printk(KERN_ERR "%s: Hardware doesn't specify base clock " "frequency.\n", mmc_hostname(mmc)); - ret = -ENODEV; - goto unmap; + return -ENODEV; } host->max_clk *= 1000000; @@ -1449,8 +1242,7 @@ static int __devinit sdhci_probe_slot(struct pci_dev *pdev, int slot) if (host->timeout_clk == 0) { printk(KERN_ERR "%s: Hardware doesn't specify timeout clock " "frequency.\n", mmc_hostname(mmc)); - ret = -ENODEV; - goto unmap; + return -ENODEV; } if (caps & SDHCI_TIMEOUT_CLK_UNIT) host->timeout_clk *= 1000; @@ -1477,8 +1269,7 @@ static int __devinit sdhci_probe_slot(struct pci_dev *pdev, int slot) if (mmc->ocr_avail == 0) { printk(KERN_ERR "%s: Hardware doesn't report any " "support voltages.\n", mmc_hostname(mmc)); - ret = -ENODEV; - goto unmap; + return -ENODEV; } spin_lock_init(&host->lock); @@ -1548,7 +1339,7 @@ static int __devinit sdhci_probe_slot(struct pci_dev *pdev, int slot) host->led.default_trigger = mmc_hostname(mmc); host->led.brightness_set = sdhci_led_control; - ret = led_classdev_register(&pdev->dev, &host->led); + ret = led_classdev_register(mmc_dev(mmc), &host->led); if (ret) goto reset; #endif @@ -1557,8 +1348,8 @@ static int __devinit sdhci_probe_slot(struct pci_dev *pdev, int slot) mmc_add_host(mmc); - printk(KERN_INFO "%s: SDHCI at 0x%08lx irq %d %s\n", - mmc_hostname(mmc), host->addr, host->irq, + printk(KERN_INFO "%s: SDHCI controller on %s [%s] using %s\n", + mmc_hostname(mmc), host->hw_name, mmc_dev(mmc)->bus_id, (host->flags & SDHCI_USE_DMA)?"DMA":"PIO"); return 0; @@ -1571,29 +1362,15 @@ reset: untasklet: tasklet_kill(&host->card_tasklet); tasklet_kill(&host->finish_tasklet); -unmap: - iounmap(host->ioaddr); -release: - pci_release_region(pdev, host->bar); -free: - mmc_free_host(mmc); return ret; } -static void sdhci_remove_slot(struct pci_dev *pdev, int slot) -{ - struct sdhci_chip *chip; - struct mmc_host *mmc; - struct sdhci_host *host; - - chip = pci_get_drvdata(pdev); - host = chip->hosts[slot]; - mmc = host->mmc; - - chip->hosts[slot] = NULL; +EXPORT_SYMBOL_GPL(sdhci_add_host); - mmc_remove_host(mmc); +void sdhci_remove_host(struct sdhci_host *host) +{ + mmc_remove_host(host->mmc); #ifdef CONFIG_LEDS_CLASS led_classdev_unregister(&host->led); @@ -1607,107 +1384,16 @@ static void sdhci_remove_slot(struct pci_dev *pdev, int slot) tasklet_kill(&host->card_tasklet); tasklet_kill(&host->finish_tasklet); - - iounmap(host->ioaddr); - - pci_release_region(pdev, host->bar); - - mmc_free_host(mmc); } -static int __devinit sdhci_probe(struct pci_dev *pdev, - const struct pci_device_id *ent) -{ - int ret, i; - u8 slots, rev; - struct sdhci_chip *chip; - - BUG_ON(pdev == NULL); - BUG_ON(ent == NULL); - - pci_read_config_byte(pdev, PCI_CLASS_REVISION, &rev); - - printk(KERN_INFO DRIVER_NAME - ": SDHCI controller found at %s [%04x:%04x] (rev %x)\n", - pci_name(pdev), (int)pdev->vendor, (int)pdev->device, - (int)rev); - - ret = pci_read_config_byte(pdev, PCI_SLOT_INFO, &slots); - if (ret) - return ret; - - slots = PCI_SLOT_INFO_SLOTS(slots) + 1; - DBG("found %d slot(s)\n", slots); - if (slots == 0) - return -ENODEV; +EXPORT_SYMBOL_GPL(sdhci_remove_host); - ret = pci_enable_device(pdev); - if (ret) - return ret; - - chip = kzalloc(sizeof(struct sdhci_chip) + - sizeof(struct sdhci_host*) * slots, GFP_KERNEL); - if (!chip) { - ret = -ENOMEM; - goto err; - } - - chip->pdev = pdev; - chip->quirks = ent->driver_data; - - if (debug_quirks) - chip->quirks = debug_quirks; - - chip->num_slots = slots; - pci_set_drvdata(pdev, chip); - - for (i = 0;i < slots;i++) { - ret = sdhci_probe_slot(pdev, i); - if (ret) { - for (i--;i >= 0;i--) - sdhci_remove_slot(pdev, i); - goto free; - } - } - - return 0; - -free: - pci_set_drvdata(pdev, NULL); - kfree(chip); - -err: - pci_disable_device(pdev); - return ret; -} - -static void __devexit sdhci_remove(struct pci_dev *pdev) +void sdhci_free_host(struct sdhci_host *host) { - int i; - struct sdhci_chip *chip; - - chip = pci_get_drvdata(pdev); - - if (chip) { - for (i = 0;i < chip->num_slots;i++) - sdhci_remove_slot(pdev, i); - - pci_set_drvdata(pdev, NULL); - - kfree(chip); - } - - pci_disable_device(pdev); + mmc_free_host(host->mmc); } -static struct pci_driver sdhci_driver = { - .name = DRIVER_NAME, - .id_table = pci_ids, - .probe = sdhci_probe, - .remove = __devexit_p(sdhci_remove), - .suspend = sdhci_suspend, - .resume = sdhci_resume, -}; +EXPORT_SYMBOL_GPL(sdhci_free_host); /*****************************************************************************\ * * @@ -1721,14 +1407,11 @@ static int __init sdhci_drv_init(void) ": Secure Digital Host Controller Interface driver\n"); printk(KERN_INFO DRIVER_NAME ": Copyright(c) Pierre Ossman\n"); - return pci_register_driver(&sdhci_driver); + return 0; } static void __exit sdhci_drv_exit(void) { - DBG("Exiting\n"); - - pci_unregister_driver(&sdhci_driver); } module_init(sdhci_drv_init); @@ -1737,7 +1420,7 @@ module_exit(sdhci_drv_exit); module_param(debug_quirks, uint, 0444); MODULE_AUTHOR("Pierre Ossman "); -MODULE_DESCRIPTION("Secure Digital Host Controller Interface driver"); +MODULE_DESCRIPTION("Secure Digital Host Controller Interface core driver"); MODULE_LICENSE("GPL"); MODULE_PARM_DESC(debug_quirks, "Force certain quirks."); diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h index 299118d..22fc258 100644 --- a/drivers/mmc/host/sdhci.h +++ b/drivers/mmc/host/sdhci.h @@ -10,18 +10,6 @@ */ /* - * PCI registers - */ - -#define PCI_SDHCI_IFPIO 0x00 -#define PCI_SDHCI_IFDMA 0x01 -#define PCI_SDHCI_IFVENDOR 0x02 - -#define PCI_SLOT_INFO 0x40 /* 8 bits */ -#define PCI_SLOT_INFO_SLOTS(x) ((x >> 4) & 7) -#define PCI_SLOT_INFO_FIRST_BAR_MASK 0x07 - -/* * Controller registers */ @@ -162,10 +150,43 @@ #define SDHCI_SPEC_VER_MASK 0x00FF #define SDHCI_SPEC_VER_SHIFT 0 -struct sdhci_chip; +struct sdhci_ops; struct sdhci_host { - struct sdhci_chip *chip; + /* Data set by hardware interface driver */ + const char *hw_name; /* Hardware bus name */ + + unsigned int quirks; /* Deviations from spec. */ + +/* Controller doesn't honor resets unless we touch the clock register */ +#define SDHCI_QUIRK_CLOCK_BEFORE_RESET (1<<0) +/* Controller has bad caps bits, but really supports DMA */ +#define SDHCI_QUIRK_FORCE_DMA (1<<1) +/* Controller doesn't like to be reset when there is no card inserted. */ +#define SDHCI_QUIRK_NO_CARD_NO_RESET (1<<2) +/* Controller doesn't like clearing the power reg before a change */ +#define SDHCI_QUIRK_SINGLE_POWER_WRITE (1<<3) +/* Controller has flaky internal state so reset it on each ios change */ +#define SDHCI_QUIRK_RESET_CMD_DATA_ON_IOS (1<<4) +/* Controller has an unusable DMA engine */ +#define SDHCI_QUIRK_BROKEN_DMA (1<<5) +/* Controller can only DMA from 32-bit aligned addresses */ +#define SDHCI_QUIRK_32BIT_DMA_ADDR (1<<6) +/* Controller can only DMA chunk sizes that are a multiple of 32 bits */ +#define SDHCI_QUIRK_32BIT_DMA_SIZE (1<<7) +/* Controller needs to be reset after each request to stay stable */ +#define SDHCI_QUIRK_RESET_AFTER_REQUEST (1<<8) +/* Controller needs voltage and power writes to happen separately */ +#define SDHCI_QUIRK_NO_SIMULT_VDD_AND_POWER (1<<9) +/* Controller has an off-by-one issue with timeout value */ +#define SDHCI_QUIRK_INCR_TIMEOUT_CONTROL (1<<10) + + int irq; /* Device IRQ */ + void __iomem * ioaddr; /* Mapped address */ + + const struct sdhci_ops *ops; /* Low level hw interface */ + + /* Internal data */ struct mmc_host *mmc; /* MMC structure */ #ifdef CONFIG_LEDS_CLASS @@ -194,22 +215,33 @@ struct sdhci_host { int offset; /* Offset into current sg */ int remain; /* Bytes left in current */ - int irq; /* Device IRQ */ - int bar; /* PCI BAR index */ - unsigned long addr; /* Bus address */ - void __iomem * ioaddr; /* Mapped address */ - struct tasklet_struct card_tasklet; /* Tasklet structures */ struct tasklet_struct finish_tasklet; struct timer_list timer; /* Timer for timeouts */ -}; -struct sdhci_chip { - struct pci_dev *pdev; + unsigned long private[0] ____cacheline_aligned; +}; - unsigned long quirks; - int num_slots; /* Slots on controller */ - struct sdhci_host *hosts[0]; /* Pointers to hosts */ +struct sdhci_ops { + int (*enable_dma)(struct sdhci_host *host); }; + + +extern struct sdhci_host *sdhci_alloc_host(struct device *dev, + size_t priv_size); +extern void sdhci_free_host(struct sdhci_host *host); + +static inline void *sdhci_priv(struct sdhci_host *host) +{ + return (void *)host->private; +} + +extern int sdhci_add_host(struct sdhci_host *host); +extern void sdhci_remove_host(struct sdhci_host *host); + +#ifdef CONFIG_PM +extern int sdhci_suspend_host(struct sdhci_host *host, pm_message_t state); +extern int sdhci_resume_host(struct sdhci_host *host); +#endif -- cgit v0.10.2 From 3815a0ebab678c20d0d9c66af5ee9e68807b6681 Mon Sep 17 00:00:00 2001 From: Pierre Ossman Date: Sat, 22 Mar 2008 22:05:40 +0100 Subject: sdhci: remove forced dma quirks Remove the quirk to force DMA on the Ricoh and TI controllers as it is no longer needed. The only bug they have is that they use an incorrect PCI interface value, and that is not respected anymore. Signed-off-by: Pierre Ossman diff --git a/drivers/mmc/host/sdhci-pci.c b/drivers/mmc/host/sdhci-pci.c index 2a3dd6c..51798ec 100644 --- a/drivers/mmc/host/sdhci-pci.c +++ b/drivers/mmc/host/sdhci-pci.c @@ -44,8 +44,7 @@ static const struct pci_device_id pci_ids[] __devinitdata = { .device = PCI_DEVICE_ID_RICOH_R5C822, .subvendor = PCI_VENDOR_ID_IBM, .subdevice = PCI_ANY_ID, - .driver_data = SDHCI_QUIRK_CLOCK_BEFORE_RESET | - SDHCI_QUIRK_FORCE_DMA, + .driver_data = SDHCI_QUIRK_CLOCK_BEFORE_RESET, }, { @@ -53,24 +52,7 @@ static const struct pci_device_id pci_ids[] __devinitdata = { .device = PCI_DEVICE_ID_RICOH_R5C822, .subvendor = PCI_VENDOR_ID_SAMSUNG, .subdevice = PCI_ANY_ID, - .driver_data = SDHCI_QUIRK_FORCE_DMA | - SDHCI_QUIRK_NO_CARD_NO_RESET, - }, - - { - .vendor = PCI_VENDOR_ID_RICOH, - .device = PCI_DEVICE_ID_RICOH_R5C822, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .driver_data = SDHCI_QUIRK_FORCE_DMA, - }, - - { - .vendor = PCI_VENDOR_ID_TI, - .device = PCI_DEVICE_ID_TI_XX21_XX11_SD, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .driver_data = SDHCI_QUIRK_FORCE_DMA, + .driver_data = SDHCI_QUIRK_NO_CARD_NO_RESET, }, { -- cgit v0.10.2 From 22606405894a3ca5796eb4454a4b83af611fd201 Mon Sep 17 00:00:00 2001 From: Pierre Ossman Date: Sun, 23 Mar 2008 19:33:23 +0100 Subject: sdhci: more complex quirks handling Extend the quirks handling in the PCI driver to be able to have callbacks and not just flags. Signed-off-by: Pierre Ossman diff --git a/drivers/mmc/host/sdhci-pci.c b/drivers/mmc/host/sdhci-pci.c index 51798ec..5094fe8 100644 --- a/drivers/mmc/host/sdhci-pci.c +++ b/drivers/mmc/host/sdhci-pci.c @@ -38,21 +38,82 @@ #define MAX_SLOTS 8 -static const struct pci_device_id pci_ids[] __devinitdata = { - { - .vendor = PCI_VENDOR_ID_RICOH, - .device = PCI_DEVICE_ID_RICOH_R5C822, - .subvendor = PCI_VENDOR_ID_IBM, - .subdevice = PCI_ANY_ID, - .driver_data = SDHCI_QUIRK_CLOCK_BEFORE_RESET, - }, +struct sdhci_pci_chip; + +struct sdhci_pci_fixes { + unsigned int quirks; + + int (*probe)(struct sdhci_pci_chip*); +}; + +struct sdhci_pci_slot { + struct sdhci_pci_chip *chip; + struct sdhci_host *host; + int pci_bar; +}; + +struct sdhci_pci_chip { + struct pci_dev *pdev; + + unsigned int quirks; + const struct sdhci_pci_fixes *fixes; + + int num_slots; /* Slots on controller */ + struct sdhci_pci_slot *slots[MAX_SLOTS]; /* Pointers to host slots */ +}; + + +/*****************************************************************************\ + * * + * Hardware specific quirk handling * + * * +\*****************************************************************************/ + +static int ricoh_probe(struct sdhci_pci_chip *chip) +{ + if (chip->pdev->subsystem_vendor == PCI_VENDOR_ID_IBM) + chip->quirks |= SDHCI_QUIRK_CLOCK_BEFORE_RESET; + + if (chip->pdev->subsystem_vendor == PCI_VENDOR_ID_SAMSUNG) + chip->quirks |= SDHCI_QUIRK_NO_CARD_NO_RESET; + + return 0; +} + +static const struct sdhci_pci_fixes sdhci_ricoh = { + .probe = ricoh_probe, +}; + +static const struct sdhci_pci_fixes sdhci_ene_712 = { + .quirks = SDHCI_QUIRK_SINGLE_POWER_WRITE | + SDHCI_QUIRK_BROKEN_DMA, +}; + +static const struct sdhci_pci_fixes sdhci_ene_714 = { + .quirks = SDHCI_QUIRK_SINGLE_POWER_WRITE | + SDHCI_QUIRK_RESET_CMD_DATA_ON_IOS | + SDHCI_QUIRK_BROKEN_DMA, +}; + +static const struct sdhci_pci_fixes sdhci_cafe = { + .quirks = SDHCI_QUIRK_NO_SIMULT_VDD_AND_POWER | + SDHCI_QUIRK_INCR_TIMEOUT_CONTROL, +}; + +static const struct sdhci_pci_fixes sdhci_jmicron = { + .quirks = SDHCI_QUIRK_32BIT_DMA_ADDR | + SDHCI_QUIRK_32BIT_DMA_SIZE | + SDHCI_QUIRK_RESET_AFTER_REQUEST, +}; + +static const struct pci_device_id pci_ids[] __devinitdata = { { .vendor = PCI_VENDOR_ID_RICOH, .device = PCI_DEVICE_ID_RICOH_R5C822, - .subvendor = PCI_VENDOR_ID_SAMSUNG, + .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, - .driver_data = SDHCI_QUIRK_NO_CARD_NO_RESET, + .driver_data = (kernel_ulong_t)&sdhci_ricoh, }, { @@ -60,8 +121,7 @@ static const struct pci_device_id pci_ids[] __devinitdata = { .device = PCI_DEVICE_ID_ENE_CB712_SD, .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, - .driver_data = SDHCI_QUIRK_SINGLE_POWER_WRITE | - SDHCI_QUIRK_BROKEN_DMA, + .driver_data = (kernel_ulong_t)&sdhci_ene_712, }, { @@ -69,8 +129,7 @@ static const struct pci_device_id pci_ids[] __devinitdata = { .device = PCI_DEVICE_ID_ENE_CB712_SD_2, .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, - .driver_data = SDHCI_QUIRK_SINGLE_POWER_WRITE | - SDHCI_QUIRK_BROKEN_DMA, + .driver_data = (kernel_ulong_t)&sdhci_ene_712, }, { @@ -78,9 +137,7 @@ static const struct pci_device_id pci_ids[] __devinitdata = { .device = PCI_DEVICE_ID_ENE_CB714_SD, .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, - .driver_data = SDHCI_QUIRK_SINGLE_POWER_WRITE | - SDHCI_QUIRK_RESET_CMD_DATA_ON_IOS | - SDHCI_QUIRK_BROKEN_DMA, + .driver_data = (kernel_ulong_t)&sdhci_ene_714, }, { @@ -88,9 +145,7 @@ static const struct pci_device_id pci_ids[] __devinitdata = { .device = PCI_DEVICE_ID_ENE_CB714_SD_2, .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, - .driver_data = SDHCI_QUIRK_SINGLE_POWER_WRITE | - SDHCI_QUIRK_RESET_CMD_DATA_ON_IOS | - SDHCI_QUIRK_BROKEN_DMA, + .driver_data = (kernel_ulong_t)&sdhci_ene_714, }, { @@ -98,8 +153,7 @@ static const struct pci_device_id pci_ids[] __devinitdata = { .device = PCI_DEVICE_ID_MARVELL_CAFE_SD, .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, - .driver_data = SDHCI_QUIRK_NO_SIMULT_VDD_AND_POWER | - SDHCI_QUIRK_INCR_TIMEOUT_CONTROL, + .driver_data = (kernel_ulong_t)&sdhci_cafe, }, { @@ -107,9 +161,7 @@ static const struct pci_device_id pci_ids[] __devinitdata = { .device = PCI_DEVICE_ID_JMICRON_JMB38X_SD, .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, - .driver_data = SDHCI_QUIRK_32BIT_DMA_ADDR | - SDHCI_QUIRK_32BIT_DMA_SIZE | - SDHCI_QUIRK_RESET_AFTER_REQUEST, + .driver_data = (kernel_ulong_t)&sdhci_jmicron, }, { /* Generic SD host controller */ @@ -121,23 +173,6 @@ static const struct pci_device_id pci_ids[] __devinitdata = { MODULE_DEVICE_TABLE(pci, pci_ids); -struct sdhci_pci_chip; - -struct sdhci_pci_slot { - struct sdhci_pci_chip *chip; - struct sdhci_host *host; - - int pci_bar; -}; - -struct sdhci_pci_chip { - struct pci_dev *pdev; - unsigned int quirks; - - int num_slots; /* Slots on controller */ - struct sdhci_pci_slot *slots[MAX_SLOTS]; /* Pointers to host slots */ -}; - /*****************************************************************************\ * * * SDHCI core callbacks * @@ -389,11 +424,19 @@ static int __devinit sdhci_pci_probe(struct pci_dev *pdev, } chip->pdev = pdev; - chip->quirks = ent->driver_data; + chip->fixes = (const struct sdhci_pci_fixes*)ent->driver_data; + if (chip->fixes) + chip->quirks = chip->fixes->quirks; chip->num_slots = slots; pci_set_drvdata(pdev, chip); + if (chip->fixes && chip->fixes->probe) { + ret = chip->fixes->probe(chip); + if (ret) + goto free; + } + for (i = 0;i < slots;i++) { slot = sdhci_pci_probe_slot(pdev, chip, first_bar + i); if (IS_ERR(slot)) { -- cgit v0.10.2 From ee53ab5d73998e502801c024a08de2c39a92c52a Mon Sep 17 00:00:00 2001 From: Pierre Ossman Date: Sat, 5 Jul 2008 00:25:15 +0200 Subject: sdhci: make workaround for timeout bug more general Give the quirk for broken timeout handling a better chance of handling more controllers by simply classifying the system as broken and setting a fixed value. Signed-off-by: Pierre Ossman diff --git a/drivers/mmc/host/sdhci-pci.c b/drivers/mmc/host/sdhci-pci.c index 5094fe8..856bb2b 100644 --- a/drivers/mmc/host/sdhci-pci.c +++ b/drivers/mmc/host/sdhci-pci.c @@ -98,7 +98,7 @@ static const struct sdhci_pci_fixes sdhci_ene_714 = { static const struct sdhci_pci_fixes sdhci_cafe = { .quirks = SDHCI_QUIRK_NO_SIMULT_VDD_AND_POWER | - SDHCI_QUIRK_INCR_TIMEOUT_CONTROL, + SDHCI_QUIRK_BROKEN_TIMEOUT_VAL, }; static const struct sdhci_pci_fixes sdhci_jmicron = { diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 8ce01d4..95b081a 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -314,23 +314,19 @@ static void sdhci_transfer_pio(struct sdhci_host *host) DBG("PIO transfer complete.\n"); } -static void sdhci_prepare_data(struct sdhci_host *host, struct mmc_data *data) +static u8 sdhci_calc_timeout(struct sdhci_host *host, struct mmc_data *data) { u8 count; unsigned target_timeout, current_timeout; - WARN_ON(host->data); - - if (data == NULL) - return; - - /* Sanity checks */ - BUG_ON(data->blksz * data->blocks > 524288); - BUG_ON(data->blksz > host->mmc->max_blk_size); - BUG_ON(data->blocks > 65535); - - host->data = data; - host->data_early = 0; + /* + * If the host controller provides us with an incorrect timeout + * value, just skip the check and use 0xE. The hardware may take + * longer to time out, but that's much better than having a too-short + * timeout value. + */ + if ((host->quirks & SDHCI_QUIRK_BROKEN_TIMEOUT_VAL)) + return 0xE; /* timeout in us */ target_timeout = data->timeout_ns / 1000 + @@ -355,19 +351,33 @@ static void sdhci_prepare_data(struct sdhci_host *host, struct mmc_data *data) break; } - /* - * Compensate for an off-by-one error in the CaFe hardware; otherwise, - * a too-small count gives us interrupt timeouts. - */ - if ((host->quirks & SDHCI_QUIRK_INCR_TIMEOUT_CONTROL)) - count++; - if (count >= 0xF) { printk(KERN_WARNING "%s: Too large timeout requested!\n", mmc_hostname(host->mmc)); count = 0xE; } + return count; +} + +static void sdhci_prepare_data(struct sdhci_host *host, struct mmc_data *data) +{ + u8 count; + + WARN_ON(host->data); + + if (data == NULL) + return; + + /* Sanity checks */ + BUG_ON(data->blksz * data->blocks > 524288); + BUG_ON(data->blksz > host->mmc->max_blk_size); + BUG_ON(data->blocks > 65535); + + host->data = data; + host->data_early = 0; + + count = sdhci_calc_timeout(host, data); writeb(count, host->ioaddr + SDHCI_TIMEOUT_CONTROL); if (host->flags & SDHCI_USE_DMA) diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h index 22fc258..7ce12f3 100644 --- a/drivers/mmc/host/sdhci.h +++ b/drivers/mmc/host/sdhci.h @@ -178,8 +178,8 @@ struct sdhci_host { #define SDHCI_QUIRK_RESET_AFTER_REQUEST (1<<8) /* Controller needs voltage and power writes to happen separately */ #define SDHCI_QUIRK_NO_SIMULT_VDD_AND_POWER (1<<9) -/* Controller has an off-by-one issue with timeout value */ -#define SDHCI_QUIRK_INCR_TIMEOUT_CONTROL (1<<10) +/* Controller provides an incorrect timeout value for transfers */ +#define SDHCI_QUIRK_BROKEN_TIMEOUT_VAL (1<<10) int irq; /* Device IRQ */ void __iomem * ioaddr; /* Mapped address */ -- cgit v0.10.2 From 45211e21598441a32e53cf5032b7faeac143df6d Mon Sep 17 00:00:00 2001 From: Pierre Ossman Date: Mon, 24 Mar 2008 13:09:09 +0100 Subject: sdhci: toggle JMicron PMOS setting Some of the JMicron chips requires us to manually enable the power output stages of the chip. Add the necessary hooks and functions to manage this. Signed-off-by: Pierre Ossman diff --git a/drivers/mmc/host/sdhci-pci.c b/drivers/mmc/host/sdhci-pci.c index 856bb2b..ef77ed1 100644 --- a/drivers/mmc/host/sdhci-pci.c +++ b/drivers/mmc/host/sdhci-pci.c @@ -44,6 +44,8 @@ struct sdhci_pci_fixes { unsigned int quirks; int (*probe)(struct sdhci_pci_chip*); + + int (*resume)(struct sdhci_pci_chip*); }; struct sdhci_pci_slot { @@ -101,10 +103,69 @@ static const struct sdhci_pci_fixes sdhci_cafe = { SDHCI_QUIRK_BROKEN_TIMEOUT_VAL, }; +static int jmicron_pmos(struct sdhci_pci_chip *chip, int on) +{ + u8 scratch; + int ret; + + ret = pci_read_config_byte(chip->pdev, 0xAE, &scratch); + if (ret) + return ret; + + /* + * Turn PMOS on [bit 0], set over current detection to 2.4 V + * [bit 1:2] and enable over current debouncing [bit 6]. + */ + if (on) + scratch |= 0x47; + else + scratch &= ~0x47; + + ret = pci_write_config_byte(chip->pdev, 0xAE, scratch); + if (ret) + return ret; + + return 0; +} + +static int jmicron_probe(struct sdhci_pci_chip *chip) +{ + int ret; + + /* + * JMicron chips need a bit of a nudge to enable the power + * output pins. + */ + ret = jmicron_pmos(chip, 1); + if (ret) { + dev_err(&chip->pdev->dev, "Failure enabling card power\n"); + return ret; + } + + return 0; +} + +static int jmicron_resume(struct sdhci_pci_chip *chip) +{ + int ret; + + ret = jmicron_pmos(chip, 1); + if (ret) { + dev_err(&chip->pdev->dev, "Failure enabling card power\n"); + return ret; + } + + return 0; +} + static const struct sdhci_pci_fixes sdhci_jmicron = { .quirks = SDHCI_QUIRK_32BIT_DMA_ADDR | SDHCI_QUIRK_32BIT_DMA_SIZE | SDHCI_QUIRK_RESET_AFTER_REQUEST, + + .probe = jmicron_probe, + + .resume = jmicron_resume, }; static const struct pci_device_id pci_ids[] __devinitdata = { @@ -264,6 +325,12 @@ static int sdhci_pci_resume (struct pci_dev *pdev) if (ret) return ret; + if (chip->fixes && chip->fixes->resume) { + ret = chip->fixes->resume(chip); + if (ret) + return ret; + } + for (i = 0;i < chip->num_slots;i++) { slot = chip->slots[i]; if (!slot) -- cgit v0.10.2 From 4489428ab5a49a6f443d9aa17f1d891417787d7b Mon Sep 17 00:00:00 2001 From: Pierre Ossman Date: Fri, 4 Apr 2008 19:36:59 +0200 Subject: sdhci: support JMicron secondary interface JMicron chips sometimes have two interfaces to work around limitations in Microsoft's sdhci driver. This patch allows us to use either interface. Signed-off-by: Pierre Ossman diff --git a/drivers/mmc/host/sdhci-pci.c b/drivers/mmc/host/sdhci-pci.c index ef77ed1..5dcb495 100644 --- a/drivers/mmc/host/sdhci-pci.c +++ b/drivers/mmc/host/sdhci-pci.c @@ -39,12 +39,18 @@ #define MAX_SLOTS 8 struct sdhci_pci_chip; +struct sdhci_pci_slot; struct sdhci_pci_fixes { unsigned int quirks; int (*probe)(struct sdhci_pci_chip*); + int (*probe_slot)(struct sdhci_pci_slot*); + void (*remove_slot)(struct sdhci_pci_slot*); + + int (*suspend)(struct sdhci_pci_chip*, + pm_message_t); int (*resume)(struct sdhci_pci_chip*); }; @@ -133,6 +139,38 @@ static int jmicron_probe(struct sdhci_pci_chip *chip) int ret; /* + * JMicron chips can have two interfaces to the same hardware + * in order to work around limitations in Microsoft's driver. + * We need to make sure we only bind to one of them. + * + * This code assumes two things: + * + * 1. The PCI code adds subfunctions in order. + * + * 2. The MMC interface has a lower subfunction number + * than the SD interface. + */ + if (chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB38X_SD) { + struct pci_dev *sd_dev; + + sd_dev = NULL; + while ((sd_dev = pci_get_device(PCI_VENDOR_ID_JMICRON, + PCI_DEVICE_ID_JMICRON_JMB38X_MMC, sd_dev)) != NULL) { + if ((PCI_SLOT(chip->pdev->devfn) == + PCI_SLOT(sd_dev->devfn)) && + (chip->pdev->bus == sd_dev->bus)) + break; + } + + if (sd_dev) { + pci_dev_put(sd_dev); + dev_info(&chip->pdev->dev, "Refusing to bind to " + "secondary interface.\n"); + return -ENODEV; + } + } + + /* * JMicron chips need a bit of a nudge to enable the power * output pins. */ @@ -145,9 +183,58 @@ static int jmicron_probe(struct sdhci_pci_chip *chip) return 0; } +static void jmicron_enable_mmc(struct sdhci_host *host, int on) +{ + u8 scratch; + + scratch = readb(host->ioaddr + 0xC0); + + if (on) + scratch |= 0x01; + else + scratch &= ~0x01; + + writeb(scratch, host->ioaddr + 0xC0); +} + +static int jmicron_probe_slot(struct sdhci_pci_slot *slot) +{ + /* + * The secondary interface requires a bit set to get the + * interrupts. + */ + if (slot->chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB38X_MMC) + jmicron_enable_mmc(slot->host, 1); + + return 0; +} + +static void jmicron_remove_slot(struct sdhci_pci_slot *slot) +{ + if (slot->chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB38X_MMC) + jmicron_enable_mmc(slot->host, 0); +} + +static int jmicron_suspend(struct sdhci_pci_chip *chip, pm_message_t state) +{ + int i; + + if (chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB38X_MMC) { + for (i = 0;i < chip->num_slots;i++) + jmicron_enable_mmc(chip->slots[i]->host, 0); + } + + return 0; +} + static int jmicron_resume(struct sdhci_pci_chip *chip) { - int ret; + int ret, i; + + if (chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB38X_MMC) { + for (i = 0;i < chip->num_slots;i++) + jmicron_enable_mmc(chip->slots[i]->host, 1); + } ret = jmicron_pmos(chip, 1); if (ret) { @@ -165,6 +252,10 @@ static const struct sdhci_pci_fixes sdhci_jmicron = { .probe = jmicron_probe, + .probe_slot = jmicron_probe_slot, + .remove_slot = jmicron_remove_slot, + + .suspend = jmicron_suspend, .resume = jmicron_resume, }; @@ -225,6 +316,14 @@ static const struct pci_device_id pci_ids[] __devinitdata = { .driver_data = (kernel_ulong_t)&sdhci_jmicron, }, + { + .vendor = PCI_VENDOR_ID_JMICRON, + .device = PCI_DEVICE_ID_JMICRON_JMB38X_MMC, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .driver_data = (kernel_ulong_t)&sdhci_jmicron, + }, + { /* Generic SD host controller */ PCI_DEVICE_CLASS((PCI_CLASS_SYSTEM_SDHCI << 8), 0xFFFF00) }, @@ -301,6 +400,15 @@ static int sdhci_pci_suspend (struct pci_dev *pdev, pm_message_t state) } } + if (chip->fixes && chip->fixes->suspend) { + ret = chip->fixes->suspend(chip, state); + if (ret) { + for (i = chip->num_slots - 1;i >= 0;i--) + sdhci_resume_host(chip->slots[i]->host); + return ret; + } + } + pci_save_state(pdev); pci_enable_wake(pdev, pci_choose_state(pdev, state), 0); pci_disable_device(pdev); @@ -418,12 +526,22 @@ static struct sdhci_pci_slot * __devinit sdhci_pci_probe_slot( goto release; } + if (chip->fixes && chip->fixes->probe_slot) { + ret = chip->fixes->probe_slot(slot); + if (ret) + goto unmap; + } + ret = sdhci_add_host(host); if (ret) - goto unmap; + goto remove; return slot; +remove: + if (chip->fixes && chip->fixes->remove_slot) + chip->fixes->remove_slot(slot); + unmap: iounmap(host->ioaddr); @@ -437,7 +555,12 @@ release: static void sdhci_pci_remove_slot(struct sdhci_pci_slot *slot) { sdhci_remove_host(slot->host); + + if (slot->chip->fixes && slot->chip->fixes->remove_slot) + slot->chip->fixes->remove_slot(slot); + pci_release_region(slot->chip->pdev, slot->pci_bar); + sdhci_free_host(slot->host); } diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 6595382..3015347 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2188,6 +2188,7 @@ #define PCI_DEVICE_ID_JMICRON_JMB366 0x2366 #define PCI_DEVICE_ID_JMICRON_JMB368 0x2368 #define PCI_DEVICE_ID_JMICRON_JMB38X_SD 0x2381 +#define PCI_DEVICE_ID_JMICRON_JMB38X_MMC 0x2382 #define PCI_DEVICE_ID_JMICRON_JMB38X_MS 0x2383 #define PCI_VENDOR_ID_KORENIX 0x1982 -- cgit v0.10.2 From 1e72859e3ae16346d4007024b20d2d4ef387dcc3 Mon Sep 17 00:00:00 2001 From: Pierre Ossman Date: Wed, 16 Apr 2008 19:13:13 +0200 Subject: sdhci: handle hot-remove Gracefully handle when the device is suddenly removed. Do a test read and avoid any further access if that read returns -1. Signed-off-by: Pierre Ossman diff --git a/drivers/mmc/host/sdhci-pci.c b/drivers/mmc/host/sdhci-pci.c index 5dcb495..8554466 100644 --- a/drivers/mmc/host/sdhci-pci.c +++ b/drivers/mmc/host/sdhci-pci.c @@ -47,7 +47,7 @@ struct sdhci_pci_fixes { int (*probe)(struct sdhci_pci_chip*); int (*probe_slot)(struct sdhci_pci_slot*); - void (*remove_slot)(struct sdhci_pci_slot*); + void (*remove_slot)(struct sdhci_pci_slot*, int); int (*suspend)(struct sdhci_pci_chip*, pm_message_t); @@ -209,8 +209,11 @@ static int jmicron_probe_slot(struct sdhci_pci_slot *slot) return 0; } -static void jmicron_remove_slot(struct sdhci_pci_slot *slot) +static void jmicron_remove_slot(struct sdhci_pci_slot *slot, int dead) { + if (dead) + return; + if (slot->chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB38X_MMC) jmicron_enable_mmc(slot->host, 0); } @@ -540,7 +543,7 @@ static struct sdhci_pci_slot * __devinit sdhci_pci_probe_slot( remove: if (chip->fixes && chip->fixes->remove_slot) - chip->fixes->remove_slot(slot); + chip->fixes->remove_slot(slot, 0); unmap: iounmap(host->ioaddr); @@ -554,10 +557,18 @@ release: static void sdhci_pci_remove_slot(struct sdhci_pci_slot *slot) { - sdhci_remove_host(slot->host); + int dead; + u32 scratch; + + dead = 0; + scratch = readl(slot->host->ioaddr + SDHCI_INT_STATUS); + if (scratch == (u32)-1) + dead = 1; + + sdhci_remove_host(slot->host, dead); if (slot->chip->fixes && slot->chip->fixes->remove_slot) - slot->chip->fixes->remove_slot(slot); + slot->chip->fixes->remove_slot(slot, dead); pci_release_region(slot->chip->pdev, slot->pci_bar); diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 95b081a..0ab582e 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -712,7 +712,8 @@ static void sdhci_request(struct mmc_host *mmc, struct mmc_request *mrq) host->mrq = mrq; - if (!(readl(host->ioaddr + SDHCI_PRESENT_STATE) & SDHCI_CARD_PRESENT)) { + if (!(readl(host->ioaddr + SDHCI_PRESENT_STATE) & SDHCI_CARD_PRESENT) + || (host->flags & SDHCI_DEVICE_DEAD)) { host->mrq->cmd->error = -ENOMEDIUM; tasklet_schedule(&host->finish_tasklet); } else @@ -732,6 +733,9 @@ static void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) spin_lock_irqsave(&host->lock, flags); + if (host->flags & SDHCI_DEVICE_DEAD) + goto out; + /* * Reset the chip on each power off. * Should clear out any weird states. @@ -770,6 +774,7 @@ static void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) if(host->quirks & SDHCI_QUIRK_RESET_CMD_DATA_ON_IOS) sdhci_reset(host, SDHCI_RESET_CMD | SDHCI_RESET_DATA); +out: mmiowb(); spin_unlock_irqrestore(&host->lock, flags); } @@ -784,7 +789,10 @@ static int sdhci_get_ro(struct mmc_host *mmc) spin_lock_irqsave(&host->lock, flags); - present = readl(host->ioaddr + SDHCI_PRESENT_STATE); + if (host->flags & SDHCI_DEVICE_DEAD) + present = 0; + else + present = readl(host->ioaddr + SDHCI_PRESENT_STATE); spin_unlock_irqrestore(&host->lock, flags); @@ -801,6 +809,9 @@ static void sdhci_enable_sdio_irq(struct mmc_host *mmc, int enable) spin_lock_irqsave(&host->lock, flags); + if (host->flags & SDHCI_DEVICE_DEAD) + goto out; + ier = readl(host->ioaddr + SDHCI_INT_ENABLE); ier &= ~SDHCI_INT_CARD_INT; @@ -810,6 +821,7 @@ static void sdhci_enable_sdio_irq(struct mmc_host *mmc, int enable) writel(ier, host->ioaddr + SDHCI_INT_ENABLE); writel(ier, host->ioaddr + SDHCI_SIGNAL_ENABLE); +out: mmiowb(); spin_unlock_irqrestore(&host->lock, flags); @@ -875,10 +887,11 @@ static void sdhci_tasklet_finish(unsigned long param) * The controller needs a reset of internal state machines * upon error conditions. */ - if (mrq->cmd->error || - (mrq->data && (mrq->data->error || - (mrq->data->stop && mrq->data->stop->error))) || - (host->quirks & SDHCI_QUIRK_RESET_AFTER_REQUEST)) { + if (!(host->flags & SDHCI_DEVICE_DEAD) && + (mrq->cmd->error || + (mrq->data && (mrq->data->error || + (mrq->data->stop && mrq->data->stop->error))) || + (host->quirks & SDHCI_QUIRK_RESET_AFTER_REQUEST))) { /* Some controllers need this kick or reset won't work here */ if (host->quirks & SDHCI_QUIRK_CLOCK_BEFORE_RESET) { @@ -1378,15 +1391,34 @@ untasklet: EXPORT_SYMBOL_GPL(sdhci_add_host); -void sdhci_remove_host(struct sdhci_host *host) +void sdhci_remove_host(struct sdhci_host *host, int dead) { + unsigned long flags; + + if (dead) { + spin_lock_irqsave(&host->lock, flags); + + host->flags |= SDHCI_DEVICE_DEAD; + + if (host->mrq) { + printk(KERN_ERR "%s: Controller removed during " + " transfer!\n", mmc_hostname(host->mmc)); + + host->mrq->cmd->error = -ENOMEDIUM; + tasklet_schedule(&host->finish_tasklet); + } + + spin_unlock_irqrestore(&host->lock, flags); + } + mmc_remove_host(host->mmc); #ifdef CONFIG_LEDS_CLASS led_classdev_unregister(&host->led); #endif - sdhci_reset(host, SDHCI_RESET_ALL); + if (!dead) + sdhci_reset(host, SDHCI_RESET_ALL); free_irq(host->irq, host); diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h index 7ce12f3..7c30251 100644 --- a/drivers/mmc/host/sdhci.h +++ b/drivers/mmc/host/sdhci.h @@ -198,6 +198,7 @@ struct sdhci_host { int flags; /* Host attributes */ #define SDHCI_USE_DMA (1<<0) /* Host is DMA capable */ #define SDHCI_REQ_USE_DMA (1<<1) /* Use DMA for this req. */ +#define SDHCI_DEVICE_DEAD (1<<2) /* Device unresponsive */ unsigned int max_clk; /* Max possible freq (MHz) */ unsigned int timeout_clk; /* Timeout freq (KHz) */ @@ -239,7 +240,7 @@ static inline void *sdhci_priv(struct sdhci_host *host) } extern int sdhci_add_host(struct sdhci_host *host); -extern void sdhci_remove_host(struct sdhci_host *host); +extern void sdhci_remove_host(struct sdhci_host *host, int dead); #ifdef CONFIG_PM extern int sdhci_suspend_host(struct sdhci_host *host, pm_message_t state); -- cgit v0.10.2 From fd8c326cadd2f781d5c4d6bcee79bb17b3745bb0 Mon Sep 17 00:00:00 2001 From: Pierre Ossman Date: Sat, 24 May 2008 22:36:31 +0200 Subject: mmc_test: add test case control Add the ability to run just a single test case by writing the test case number into the sysfs "test" file. Signed-off-by: Pierre Ossman diff --git a/drivers/mmc/card/mmc_test.c b/drivers/mmc/card/mmc_test.c index ffadee5..371d922 100644 --- a/drivers/mmc/card/mmc_test.c +++ b/drivers/mmc/card/mmc_test.c @@ -743,7 +743,7 @@ static const struct mmc_test_case mmc_test_cases[] = { static struct mutex mmc_test_lock; -static void mmc_test_run(struct mmc_test_card *test) +static void mmc_test_run(struct mmc_test_card *test, int testcase) { int i, ret; @@ -753,6 +753,9 @@ static void mmc_test_run(struct mmc_test_card *test) mmc_claim_host(test->card->host); for (i = 0;i < ARRAY_SIZE(mmc_test_cases);i++) { + if (testcase && ((i + 1) != testcase)) + continue; + printk(KERN_INFO "%s: Test case %d. %s...\n", mmc_hostname(test->card->host), i + 1, mmc_test_cases[i].name); @@ -824,9 +827,12 @@ static ssize_t mmc_test_store(struct device *dev, { struct mmc_card *card; struct mmc_test_card *test; + int testcase; card = container_of(dev, struct mmc_card, dev); + testcase = simple_strtol(buf, NULL, 10); + test = kzalloc(sizeof(struct mmc_test_card), GFP_KERNEL); if (!test) return -ENOMEM; @@ -836,7 +842,7 @@ static ssize_t mmc_test_store(struct device *dev, test->buffer = kzalloc(BUFFER_SIZE, GFP_KERNEL); if (test->buffer) { mutex_lock(&mmc_test_lock); - mmc_test_run(test); + mmc_test_run(test, testcase); mutex_unlock(&mmc_test_lock); } -- cgit v0.10.2 From 309d9736a903527d8bc41787b07573a054439bf6 Mon Sep 17 00:00:00 2001 From: Pierre Ossman Date: Wed, 28 May 2008 09:54:50 +0200 Subject: sdhci-pci: unaligned data with ricoh controllers The Ricoh controllers cannot handle unaligned data blocks. Signed-off-by: Pierre Ossman diff --git a/drivers/mmc/host/sdhci-pci.c b/drivers/mmc/host/sdhci-pci.c index 8554466..c0fbf48 100644 --- a/drivers/mmc/host/sdhci-pci.c +++ b/drivers/mmc/host/sdhci-pci.c @@ -91,6 +91,7 @@ static int ricoh_probe(struct sdhci_pci_chip *chip) static const struct sdhci_pci_fixes sdhci_ricoh = { .probe = ricoh_probe, + .quirks = SDHCI_QUIRK_32BIT_DMA_ADDR, }; static const struct sdhci_pci_fixes sdhci_ene_712 = { -- cgit v0.10.2 From 150a55683b6b0ccb66aae75a10a3a514340c7c03 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 20 May 2008 00:57:27 +0300 Subject: include/linux/mmc/mmc.h: remove CVS tags This patch removes a CVS tag that wasn't updated for a long time. Signed-off-by: Adrian Bunk Signed-off-by: Pierre Ossman diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h index 4236fbf..14b81f3 100644 --- a/include/linux/mmc/mmc.h +++ b/include/linux/mmc/mmc.h @@ -16,7 +16,6 @@ * Based strongly on code by: * * Author: Yong-iL Joh - * Date : $Date: 2002/06/18 12:37:30 $ * * Author: Andrew Christian * 15 May 2002 -- cgit v0.10.2 From 28f52482b41edc88cdf575aa6ed414c6e116ce10 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Tue, 17 Jun 2008 18:17:15 +0400 Subject: mmc: add support for card-detection polling Some hosts (and boards that use mmc_spi) do not use interrupts on the CD line, so they can't trigger mmc_detect_change. We want to poll the card and see if there was a change. 1 second poll interval seems resonable. This patch also implements .get_cd() host operation, that could be used by the hosts that are able to report card-detect status without need to talk MMC. Signed-off-by: Anton Vorontsov Signed-off-by: Pierre Ossman diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 01ced4c..ede5d1e 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -638,6 +638,9 @@ void mmc_rescan(struct work_struct *work) */ mmc_bus_put(host); + if (host->ops->get_cd && host->ops->get_cd(host) == 0) + goto out; + mmc_claim_host(host); mmc_power_up(host); @@ -652,7 +655,7 @@ void mmc_rescan(struct work_struct *work) if (!err) { if (mmc_attach_sdio(host, ocr)) mmc_power_off(host); - return; + goto out; } /* @@ -662,7 +665,7 @@ void mmc_rescan(struct work_struct *work) if (!err) { if (mmc_attach_sd(host, ocr)) mmc_power_off(host); - return; + goto out; } /* @@ -672,7 +675,7 @@ void mmc_rescan(struct work_struct *work) if (!err) { if (mmc_attach_mmc(host, ocr)) mmc_power_off(host); - return; + goto out; } mmc_release_host(host); @@ -683,6 +686,9 @@ void mmc_rescan(struct work_struct *work) mmc_bus_put(host); } +out: + if (host->caps & MMC_CAP_NEEDS_POLL) + mmc_schedule_delayed_work(&host->detect, HZ); } void mmc_start_host(struct mmc_host *host) diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 7ab962f..6188e19 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -51,8 +51,18 @@ struct mmc_ios { struct mmc_host_ops { void (*request)(struct mmc_host *host, struct mmc_request *req); + /* + * Avoid calling these three functions too often or in a "fast path", + * since underlaying controller might implement them in an expensive + * and/or slow way. + * + * Also note that these functions might sleep, so don't call them + * in the atomic contexts! + */ void (*set_ios)(struct mmc_host *host, struct mmc_ios *ios); int (*get_ro)(struct mmc_host *host); + int (*get_cd)(struct mmc_host *host); + void (*enable_sdio_irq)(struct mmc_host *host, int enable); }; @@ -94,6 +104,7 @@ struct mmc_host { #define MMC_CAP_SD_HIGHSPEED (1 << 3) /* Can do SD high-speed timing */ #define MMC_CAP_SDIO_IRQ (1 << 4) /* Can signal pending SDIO IRQs */ #define MMC_CAP_SPI (1 << 5) /* Talks only SPI protocols */ +#define MMC_CAP_NEEDS_POLL (1 << 6) /* Needs polling for card-detection */ /* host specific block data */ unsigned int max_seg_size; /* see blk_queue_max_segment_size */ -- cgit v0.10.2 From 619ef4b42128709de4d89d209b2c874f560deecd Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Tue, 17 Jun 2008 18:17:21 +0400 Subject: mmc_spi: add support for card-detection polling This patch adds new platform data variable "caps", so platforms could pass theirs capabilities into MMC core (for example, platforms without interrupt on the CD line will most probably want to pass MMC_CAP_NEEDS_POLL). New platform get_cd() callback provided to optimize polling. Signed-off-by: Anton Vorontsov Signed-off-by: Pierre Ossman diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c index 3550858..547eb85 100644 --- a/drivers/mmc/host/mmc_spi.c +++ b/drivers/mmc/host/mmc_spi.c @@ -1131,11 +1131,20 @@ static int mmc_spi_get_ro(struct mmc_host *mmc) return 0; } +static int mmc_spi_get_cd(struct mmc_host *mmc) +{ + struct mmc_spi_host *host = mmc_priv(mmc); + + if (host->pdata && host->pdata->get_cd) + return !!host->pdata->get_cd(mmc->parent); + return -ENOSYS; +} static const struct mmc_host_ops mmc_spi_ops = { .request = mmc_spi_request, .set_ios = mmc_spi_set_ios, .get_ro = mmc_spi_get_ro, + .get_cd = mmc_spi_get_cd, }; @@ -1319,17 +1328,23 @@ static int mmc_spi_probe(struct spi_device *spi) goto fail_glue_init; } + /* pass platform capabilities, if any */ + if (host->pdata) + mmc->caps |= host->pdata->caps; + status = mmc_add_host(mmc); if (status != 0) goto fail_add_host; - dev_info(&spi->dev, "SD/MMC host %s%s%s%s\n", + dev_info(&spi->dev, "SD/MMC host %s%s%s%s%s\n", mmc->class_dev.bus_id, host->dma_dev ? "" : ", no DMA", (host->pdata && host->pdata->get_ro) ? "" : ", no WP", (host->pdata && host->pdata->setpower) - ? "" : ", no poweroff"); + ? "" : ", no poweroff", + (mmc->caps & MMC_CAP_NEEDS_POLL) + ? ", cd polling" : ""); return 0; fail_add_host: diff --git a/include/linux/spi/mmc_spi.h b/include/linux/spi/mmc_spi.h index d5ca78b..a3626ae 100644 --- a/include/linux/spi/mmc_spi.h +++ b/include/linux/spi/mmc_spi.h @@ -23,6 +23,15 @@ struct mmc_spi_platform_data { /* sense switch on sd cards */ int (*get_ro)(struct device *); + /* + * If board does not use CD interrupts, driver can optimize polling + * using this function. + */ + int (*get_cd)(struct device *); + + /* Capabilities to pass into mmc core (e.g. MMC_CAP_NEEDS_POLL). */ + unsigned long caps; + /* how long to debounce card detect, in msecs */ u16 detect_delay; -- cgit v0.10.2 From 08f80bb5196517a0dfe50dc7c10f234c0ff2f0e8 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Tue, 17 Jun 2008 18:17:39 +0400 Subject: mmc: change .get_ro() callback semantics Now get_ro() callback must return 0/1 values for its logical states, and negative errno values in case of error. If particular host instance doesn't support RO/WP switch, it should return -ENOSYS. This patch changes some hosts in two ways: 1. Now functions should be smart to not return negative values in "RO asserted" case (particularly gpio_ calls could return negative values for the outermost GPIOs). Also, board code usually passes get_ro() callbacks that directly return gpioreg & bit result, so at91_mci, imxmmc, pxamci and mmc_spi's get_ro() handlers need take special care when returning platform's values to the mmc core. 2. In case of host instance didn't implement get_ro() callback, it should really return -ENOSYS and let the mmc core decide what to do about it (mmc core thinks the same way as the hosts, so it isn't functional change). Signed-off-by: Anton Vorontsov Signed-off-by: Pierre Ossman diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c index 7ef3b15..b122eb9 100644 --- a/drivers/mmc/core/sd.c +++ b/drivers/mmc/core/sd.c @@ -494,13 +494,13 @@ static int mmc_sd_init_card(struct mmc_host *host, u32 ocr, * Check if read-only switch is active. */ if (!oldcard) { - if (!host->ops->get_ro) { + if (!host->ops->get_ro || host->ops->get_ro(host) < 0) { printk(KERN_WARNING "%s: host does not " "support reading read-only " "switch. assuming write-enable.\n", mmc_hostname(host)); } else { - if (host->ops->get_ro(host)) + if (host->ops->get_ro(host) > 0) mmc_card_set_readonly(card); } } diff --git a/drivers/mmc/host/at91_mci.c b/drivers/mmc/host/at91_mci.c index 8979ad3..b9d4ed6 100644 --- a/drivers/mmc/host/at91_mci.c +++ b/drivers/mmc/host/at91_mci.c @@ -793,19 +793,15 @@ static irqreturn_t at91_mmc_det_irq(int irq, void *_host) static int at91_mci_get_ro(struct mmc_host *mmc) { - int read_only = 0; struct at91mci_host *host = mmc_priv(mmc); - if (host->board->wp_pin) { - read_only = gpio_get_value(host->board->wp_pin); - printk(KERN_WARNING "%s: card is %s\n", mmc_hostname(mmc), - (read_only ? "read-only" : "read-write") ); - } - else { - printk(KERN_WARNING "%s: host does not support reading read-only " - "switch. Assuming write-enable.\n", mmc_hostname(mmc)); - } - return read_only; + if (host->board->wp_pin) + return !!gpio_get_value(host->board->wp_pin); + /* + * Board doesn't support read only detection; let the mmc core + * decide what to do. + */ + return -ENOSYS; } static const struct mmc_host_ops at91_mci_ops = { diff --git a/drivers/mmc/host/imxmmc.c b/drivers/mmc/host/imxmmc.c index eed211b..5e880c0 100644 --- a/drivers/mmc/host/imxmmc.c +++ b/drivers/mmc/host/imxmmc.c @@ -892,9 +892,12 @@ static int imxmci_get_ro(struct mmc_host *mmc) struct imxmci_host *host = mmc_priv(mmc); if (host->pdata && host->pdata->get_ro) - return host->pdata->get_ro(mmc_dev(mmc)); - /* Host doesn't support read only detection so assume writeable */ - return 0; + return !!host->pdata->get_ro(mmc_dev(mmc)); + /* + * Board doesn't support read only detection; let the mmc core + * decide what to do. + */ + return -ENOSYS; } diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c index 547eb85..4e82f64 100644 --- a/drivers/mmc/host/mmc_spi.c +++ b/drivers/mmc/host/mmc_spi.c @@ -1126,9 +1126,12 @@ static int mmc_spi_get_ro(struct mmc_host *mmc) struct mmc_spi_host *host = mmc_priv(mmc); if (host->pdata && host->pdata->get_ro) - return host->pdata->get_ro(mmc->parent); - /* board doesn't support read only detection; assume writeable */ - return 0; + return !!host->pdata->get_ro(mmc->parent); + /* + * Board doesn't support read only detection; let the mmc core + * decide what to do. + */ + return -ENOSYS; } static int mmc_spi_get_cd(struct mmc_host *mmc) diff --git a/drivers/mmc/host/pxamci.c b/drivers/mmc/host/pxamci.c index d89475d..d39f597 100644 --- a/drivers/mmc/host/pxamci.c +++ b/drivers/mmc/host/pxamci.c @@ -374,9 +374,12 @@ static int pxamci_get_ro(struct mmc_host *mmc) struct pxamci_host *host = mmc_priv(mmc); if (host->pdata && host->pdata->get_ro) - return host->pdata->get_ro(mmc_dev(mmc)); - /* Host doesn't support read only detection so assume writeable */ - return 0; + return !!host->pdata->get_ro(mmc_dev(mmc)); + /* + * Board doesn't support read only detection; let the mmc core + * decide what to do. + */ + return -ENOSYS; } static void pxamci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) diff --git a/drivers/mmc/host/wbsd.c b/drivers/mmc/host/wbsd.c index c303e7f..67e5a9b 100644 --- a/drivers/mmc/host/wbsd.c +++ b/drivers/mmc/host/wbsd.c @@ -939,7 +939,7 @@ static int wbsd_get_ro(struct mmc_host *mmc) spin_unlock_bh(&host->lock); - return csr & WBSD_WRPT; + return !!(csr & WBSD_WRPT); } static const struct mmc_host_ops wbsd_ops = { diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 6188e19..753b723 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -58,6 +58,18 @@ struct mmc_host_ops { * * Also note that these functions might sleep, so don't call them * in the atomic contexts! + * + * Return values for the get_ro callback should be: + * 0 for a read/write card + * 1 for a read-only card + * -ENOSYS when not supported (equal to NULL callback) + * or a negative errno value when something bad happened + * + * Return values for the get_ro callback should be: + * 0 for a absent card + * 1 for a present card + * -ENOSYS when not supported (equal to NULL callback) + * or a negative errno value when something bad happened */ void (*set_ios)(struct mmc_host *host, struct mmc_ios *ios); int (*get_ro)(struct mmc_host *host); -- cgit v0.10.2 From 8769392b1918ec70ab62eebc82e06c47c12f8304 Mon Sep 17 00:00:00 2001 From: Deepak Saxena Date: Mon, 16 Jun 2008 19:20:57 -0700 Subject: MMC: Trivial comment cleanup Make the variable name in the comments match the actual name of the variable. Signed-off-by: Deepak Saxena Signed-off-by: Pierre Ossman diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index 3da29ee..fdd7c76 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -288,7 +288,7 @@ static struct device_type mmc_type = { /* * Handle the detection and initialisation of a card. * - * In the case of a resume, "curcard" will contain the card + * In the case of a resume, "oldcard" will contain the card * we're trying to reinitialise. */ static int mmc_init_card(struct mmc_host *host, u32 ocr, diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c index b122eb9..26fc098 100644 --- a/drivers/mmc/core/sd.c +++ b/drivers/mmc/core/sd.c @@ -326,7 +326,7 @@ static struct device_type sd_type = { /* * Handle the detection and initialisation of a card. * - * In the case of a resume, "curcard" will contain the card + * In the case of a resume, "oldcard" will contain the card * we're trying to reinitialise. */ static int mmc_sd_init_card(struct mmc_host *host, u32 ocr, -- cgit v0.10.2 From 80f9254668c63ae73c1359d8de50ad94aa1aa94a Mon Sep 17 00:00:00 2001 From: Marc Pignat Date: Fri, 30 May 2008 14:05:24 +0200 Subject: mmc: at91_mci: support for block size not modulo 4 Implement transfer with size not modulo 4 for at91sam9*. Please note that the at91rm9200 simply can't handle this. Signed-off-by: Marc Pignat Signed-off-by: Nicolas Ferre Signed-off-by: Pierre Ossman diff --git a/drivers/mmc/host/at91_mci.c b/drivers/mmc/host/at91_mci.c index b9d4ed6..e40340f 100644 --- a/drivers/mmc/host/at91_mci.c +++ b/drivers/mmc/host/at91_mci.c @@ -233,11 +233,11 @@ static void at91_mci_pre_dma_read(struct at91mci_host *host) if (i == 0) { at91_mci_write(host, ATMEL_PDC_RPR, sg->dma_address); - at91_mci_write(host, ATMEL_PDC_RCR, sg->length / 4); + at91_mci_write(host, ATMEL_PDC_RCR, (data->blksz & 0x3) ? sg->length : sg->length / 4); } else { at91_mci_write(host, ATMEL_PDC_RNPR, sg->dma_address); - at91_mci_write(host, ATMEL_PDC_RNCR, sg->length / 4); + at91_mci_write(host, ATMEL_PDC_RNCR, (data->blksz & 0x3) ? sg->length : sg->length / 4); } } @@ -430,7 +430,7 @@ static void at91_mci_send_command(struct at91mci_host *host, struct mmc_command if (data) { - if ( data->blksz & 0x3 ) { + if ( cpu_is_at91rm9200() && (data->blksz & 0x3) ) { pr_debug("Unsupported block size\n"); cmd->error = -EINVAL; mmc_request_done(host->mmc, host->request); @@ -482,7 +482,10 @@ static void at91_mci_send_command(struct at91mci_host *host, struct mmc_command } else { /* zero block length and PDC mode */ mr = at91_mci_read(host, AT91_MCI_MR) & 0x7fff; - at91_mci_write(host, AT91_MCI_MR, mr | (block_length << 16) | AT91_MCI_PDCMODE); + mr |= (data->blksz & 0x3) ? AT91_MCI_PDCFBYTE : 0; + mr |= (block_length << 16); + mr |= AT91_MCI_PDCMODE; + at91_mci_write(host, AT91_MCI_MR, mr); /* * Disable the PDC controller @@ -517,7 +520,9 @@ static void at91_mci_send_command(struct at91mci_host *host, struct mmc_command pr_debug("Transmitting %d bytes\n", host->total_length); at91_mci_write(host, ATMEL_PDC_TPR, host->physical_address); - at91_mci_write(host, ATMEL_PDC_TCR, host->total_length / 4); + at91_mci_write(host, ATMEL_PDC_TCR, (data->blksz & 0x3) ? + host->total_length : host->total_length / 4); + ier = AT91_MCI_CMDRDY; } } -- cgit v0.10.2 From e181dce8acab4f7d7c4772d2a8281510d503ab21 Mon Sep 17 00:00:00 2001 From: Marc Pignat Date: Fri, 30 May 2008 14:06:32 +0200 Subject: mmc: at91_mci: show timeouts Detect command timeout (or mci controller hangs). Signed-off-by: Marc Pignat Signed-off-by: Hans J Koch Signed-off-by: Nicolas Ferre Signed-off-by: Pierre Ossman diff --git a/drivers/mmc/host/at91_mci.c b/drivers/mmc/host/at91_mci.c index e40340f..fce171c 100644 --- a/drivers/mmc/host/at91_mci.c +++ b/drivers/mmc/host/at91_mci.c @@ -125,8 +125,33 @@ struct at91mci_host /* Latest in the scatterlist that has been enabled for transfer */ int transfer_index; + + /* Timer for timeouts */ + struct timer_list timer; }; +static void at91_timeout_timer(unsigned long data) +{ + struct at91mci_host *host; + + host = (struct at91mci_host *)data; + + if (host->request) { + dev_err(host->mmc->parent, "Timeout waiting end of packet\n"); + + if (host->cmd && host->cmd->data) { + host->cmd->data->error = -ETIMEDOUT; + } else { + if (host->cmd) + host->cmd->error = -ETIMEDOUT; + else + host->request->cmd->error = -ETIMEDOUT; + } + + mmc_request_done(host->mmc, host->request); + } +} + /* * Copy from sg to a dma block - used for transfers */ @@ -557,9 +582,10 @@ static void at91_mci_process_next(struct at91mci_host *host) else if ((!(host->flags & FL_SENT_STOP)) && host->request->stop) { host->flags |= FL_SENT_STOP; at91_mci_send_command(host, host->request->stop); - } - else + } else { + del_timer(&host->timer); mmc_request_done(host->mmc, host->request); + } } /* @@ -618,6 +644,8 @@ static void at91_mci_request(struct mmc_host *mmc, struct mmc_request *mrq) host->request = mrq; host->flags = 0; + mod_timer(&host->timer, jiffies + HZ); + at91_mci_process_next(host); } @@ -936,6 +964,8 @@ static int __init at91_mci_probe(struct platform_device *pdev) mmc_add_host(mmc); + setup_timer(&host->timer, at91_timeout_timer, (unsigned long)host); + /* * monitor card insertion/removal if we can */ @@ -996,6 +1026,7 @@ static int __exit at91_mci_remove(struct platform_device *pdev) } at91_mci_disable(host); + del_timer_sync(&host->timer); mmc_remove_host(mmc); free_irq(host->irq, host); -- cgit v0.10.2 From c5a89c6c0805959f813e8342d6f4040860f6d7db Mon Sep 17 00:00:00 2001 From: Marc Pignat Date: Fri, 30 May 2008 14:07:47 +0200 Subject: mmc: at91_mci: avoid timeouts The at91 mci controller internal state machine seems to often crash. This can be fixed by resetting the controller after each command for at91rm9200 and by setting the MCI_BLKR register on at91sam926*. Signed-off-by: Marc Pignat Signed-off-by: Hans J Koch Signed-off-by: Nicolas Ferre Signed-off-by: Pierre Ossman diff --git a/drivers/mmc/host/at91_mci.c b/drivers/mmc/host/at91_mci.c index fce171c..e924211 100644 --- a/drivers/mmc/host/at91_mci.c +++ b/drivers/mmc/host/at91_mci.c @@ -130,6 +130,43 @@ struct at91mci_host struct timer_list timer; }; +/* + * Reset the controller and restore most of the state + */ +static void at91_reset_host(struct at91mci_host *host) +{ + unsigned long flags; + u32 mr; + u32 sdcr; + u32 dtor; + u32 imr; + + local_irq_save(flags); + imr = at91_mci_read(host, AT91_MCI_IMR); + + at91_mci_write(host, AT91_MCI_IDR, 0xffffffff); + + /* save current state */ + mr = at91_mci_read(host, AT91_MCI_MR) & 0x7fff; + sdcr = at91_mci_read(host, AT91_MCI_SDCR); + dtor = at91_mci_read(host, AT91_MCI_DTOR); + + /* reset the controller */ + at91_mci_write(host, AT91_MCI_CR, AT91_MCI_MCIDIS | AT91_MCI_SWRST); + + /* restore state */ + at91_mci_write(host, AT91_MCI_CR, AT91_MCI_MCIEN); + at91_mci_write(host, AT91_MCI_MR, mr); + at91_mci_write(host, AT91_MCI_SDCR, sdcr); + at91_mci_write(host, AT91_MCI_DTOR, dtor); + at91_mci_write(host, AT91_MCI_IER, imr); + + /* make sure sdio interrupts will fire */ + at91_mci_read(host, AT91_MCI_SR); + + local_irq_restore(flags); +} + static void at91_timeout_timer(unsigned long data) { struct at91mci_host *host; @@ -148,6 +185,7 @@ static void at91_timeout_timer(unsigned long data) host->request->cmd->error = -ETIMEDOUT; } + at91_reset_host(host); mmc_request_done(host->mmc, host->request); } } @@ -512,6 +550,11 @@ static void at91_mci_send_command(struct at91mci_host *host, struct mmc_command mr |= AT91_MCI_PDCMODE; at91_mci_write(host, AT91_MCI_MR, mr); + if (!cpu_is_at91rm9200()) + at91_mci_write(host, AT91_MCI_BLKR, + AT91_MCI_BLKR_BCNT(blocks) | + AT91_MCI_BLKR_BLKLEN(block_length)); + /* * Disable the PDC controller */ @@ -584,6 +627,11 @@ static void at91_mci_process_next(struct at91mci_host *host) at91_mci_send_command(host, host->request->stop); } else { del_timer(&host->timer); + /* the at91rm9200 mci controller hangs after some transfers, + * and the workaround is to reset it after each transfer. + */ + if (cpu_is_at91rm9200()) + at91_reset_host(host); mmc_request_done(host->mmc, host->request); } } diff --git a/include/asm-arm/arch-at91/at91_mci.h b/include/asm-arm/arch-at91/at91_mci.h index 1551fc2..400ec10 100644 --- a/include/asm-arm/arch-at91/at91_mci.h +++ b/include/asm-arm/arch-at91/at91_mci.h @@ -75,6 +75,10 @@ #define AT91_MCI_TRTYP_MULTIPLE (1 << 19) #define AT91_MCI_TRTYP_STREAM (2 << 19) +#define AT91_MCI_BLKR 0x18 /* Block Register */ +#define AT91_MCI_BLKR_BCNT(n) ((0xffff & (n)) << 0) /* Block count */ +#define AT91_MCI_BLKR_BLKLEN(n) ((0xffff & (n)) << 16) /* Block lenght */ + #define AT91_MCI_RSPR(n) (0x20 + ((n) * 4)) /* Response Registers 0-3 */ #define AT91_MCR_RDR 0x30 /* Receive Data Register */ #define AT91_MCR_TDR 0x34 /* Transmit Data Register */ -- cgit v0.10.2 From 4ac24a8722b97d5b4cfc48a4fbd0b2489e358d4d Mon Sep 17 00:00:00 2001 From: Nicolas Ferre Date: Fri, 30 May 2008 14:18:57 +0200 Subject: mmc: at91_mci: update bytes_xfered value once xfer done Modify bytes_xfered value after a write. That will report, as accurately as possible, the amount of sectors that are effectively written. This update introduces the check of the busy signal given by the card. Signed-off-by: Nicolas Ferre Signed-off-by: Pierre Ossman diff --git a/drivers/mmc/host/at91_mci.c b/drivers/mmc/host/at91_mci.c index e924211..b5a6e25 100644 --- a/drivers/mmc/host/at91_mci.c +++ b/drivers/mmc/host/at91_mci.c @@ -340,8 +340,6 @@ static void at91_mci_post_dma_read(struct at91mci_host *host) dma_unmap_page(NULL, sg->dma_address, sg->length, DMA_FROM_DEVICE); - data->bytes_xfered += sg->length; - if (cpu_is_at91rm9200()) { /* AT91RM9200 errata */ unsigned int *buffer; int index; @@ -357,6 +355,8 @@ static void at91_mci_post_dma_read(struct at91mci_host *host) } flush_dcache_page(sg_page(sg)); + + data->bytes_xfered += sg->length; } /* Is there another transfer to trigger? */ @@ -397,10 +397,32 @@ static void at91_mci_handle_transmitted(struct at91mci_host *host) at91_mci_write(host, AT91_MCI_IER, AT91_MCI_BLKE); } else at91_mci_write(host, AT91_MCI_IER, AT91_MCI_NOTBUSY); +} + +/* + * Update bytes tranfered count during a write operation + */ +static void at91_mci_update_bytes_xfered(struct at91mci_host *host) +{ + struct mmc_data *data; - data->bytes_xfered = host->total_length; + /* always deal with the effective request (and not the current cmd) */ + + if (host->request->cmd && host->request->cmd->error != 0) + return; + + if (host->request->data) { + data = host->request->data; + if (data->flags & MMC_DATA_WRITE) { + /* card is in IDLE mode now */ + pr_debug("-> bytes_xfered %d, total_length = %d\n", + data->bytes_xfered, host->total_length); + data->bytes_xfered = host->total_length; + } + } } + /*Handle after command sent ready*/ static int at91_mci_handle_cmdrdy(struct at91mci_host *host) { @@ -413,8 +435,7 @@ static int at91_mci_handle_cmdrdy(struct at91mci_host *host) } else return 1; } else if (host->cmd->data->flags & MMC_DATA_WRITE) { /*After sendding multi-block-write command, start DMA transfer*/ - at91_mci_write(host, AT91_MCI_IER, AT91_MCI_TXBUFE); - at91_mci_write(host, AT91_MCI_IER, AT91_MCI_BLKE); + at91_mci_write(host, AT91_MCI_IER, AT91_MCI_TXBUFE | AT91_MCI_BLKE); at91_mci_write(host, ATMEL_PDC_PTCR, ATMEL_PDC_TXTEN); } @@ -817,6 +838,7 @@ static irqreturn_t at91_mci_irq(int irq, void *devid) if (int_status & AT91_MCI_NOTBUSY) { pr_debug("Card is ready\n"); + at91_mci_update_bytes_xfered(host); completed = 1; } @@ -825,7 +847,13 @@ static irqreturn_t at91_mci_irq(int irq, void *devid) if (int_status & AT91_MCI_BLKE) { pr_debug("Block transfer has ended\n"); - completed = 1; + if (host->request->data && host->request->data->blocks > 1) { + /* multi block write : complete multi write + * command and send stop */ + completed = 1; + } else { + at91_mci_write(host, AT91_MCI_IER, AT91_MCI_NOTBUSY); + } } if (int_status & AT91_MCI_TXRDY) -- cgit v0.10.2 From 3eb99a7bff3d7459a695f45124a6bbabf31cb4b5 Mon Sep 17 00:00:00 2001 From: Nicolas Ferre Date: Fri, 30 May 2008 14:08:56 +0200 Subject: mmc: at91_mci: add multiwrite switch at91_mci is capable of multiwrite. Enable it before it disappears. Signed-off-by: Nicolas Ferre Signed-off-by: Pierre Ossman diff --git a/drivers/mmc/host/at91_mci.c b/drivers/mmc/host/at91_mci.c index b5a6e25..b2bb79e 100644 --- a/drivers/mmc/host/at91_mci.c +++ b/drivers/mmc/host/at91_mci.c @@ -947,6 +947,7 @@ static int __init at91_mci_probe(struct platform_device *pdev) mmc->f_min = 375000; mmc->f_max = 25000000; mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34; + mmc->caps = MMC_CAP_MULTIWRITE; mmc->max_blk_size = 4095; mmc->max_blk_count = mmc->max_req_size; -- cgit v0.10.2 From 7a6588ba20aed4505da912f9dd73616e9bd9ba67 Mon Sep 17 00:00:00 2001 From: Eric Benard Date: Fri, 30 May 2008 14:26:05 +0200 Subject: mmc: at91_mci: add sdio irq management Enable SDIO interrupt handling. Signed-off-by: Eric Benard Signed-off-by: Nicolas Ferre Signed-off-by: Pierre Ossman diff --git a/drivers/mmc/host/at91_mci.c b/drivers/mmc/host/at91_mci.c index b2bb79e..9b546a9 100644 --- a/drivers/mmc/host/at91_mci.c +++ b/drivers/mmc/host/at91_mci.c @@ -665,7 +665,7 @@ static void at91_mci_completed_command(struct at91mci_host *host) struct mmc_command *cmd = host->cmd; unsigned int status; - at91_mci_write(host, AT91_MCI_IDR, 0xffffffff); + at91_mci_write(host, AT91_MCI_IDR, 0xffffffff & ~(AT91_MCI_SDIOIRQA | AT91_MCI_SDIOIRQB)); cmd->resp[0] = at91_mci_read(host, AT91_MCI_RSPR(0)); cmd->resp[1] = at91_mci_read(host, AT91_MCI_RSPR(1)); @@ -856,6 +856,12 @@ static irqreturn_t at91_mci_irq(int irq, void *devid) } } + if (int_status & AT91_MCI_SDIOIRQA) + mmc_signal_sdio_irq(host->mmc); + + if (int_status & AT91_MCI_SDIOIRQB) + mmc_signal_sdio_irq(host->mmc); + if (int_status & AT91_MCI_TXRDY) pr_debug("Ready to transmit\n"); @@ -870,10 +876,10 @@ static irqreturn_t at91_mci_irq(int irq, void *devid) if (completed) { pr_debug("Completed command\n"); - at91_mci_write(host, AT91_MCI_IDR, 0xffffffff); + at91_mci_write(host, AT91_MCI_IDR, 0xffffffff & ~(AT91_MCI_SDIOIRQA | AT91_MCI_SDIOIRQB)); at91_mci_completed_command(host); } else - at91_mci_write(host, AT91_MCI_IDR, int_status); + at91_mci_write(host, AT91_MCI_IDR, int_status & ~(AT91_MCI_SDIOIRQA | AT91_MCI_SDIOIRQB)); return IRQ_HANDLED; } @@ -913,10 +919,22 @@ static int at91_mci_get_ro(struct mmc_host *mmc) return -ENOSYS; } +static void at91_mci_enable_sdio_irq(struct mmc_host *mmc, int enable) +{ + struct at91mci_host *host = mmc_priv(mmc); + + pr_debug("%s: sdio_irq %c : %s\n", mmc_hostname(host->mmc), + host->board->slot_b ? 'B':'A', enable ? "enable" : "disable"); + at91_mci_write(host, enable ? AT91_MCI_IER : AT91_MCI_IDR, + host->board->slot_b ? AT91_MCI_SDIOIRQB : AT91_MCI_SDIOIRQA); + +} + static const struct mmc_host_ops at91_mci_ops = { .request = at91_mci_request, .set_ios = at91_mci_set_ios, .get_ro = at91_mci_get_ro, + .enable_sdio_irq = at91_mci_enable_sdio_irq, }; /* @@ -947,7 +965,7 @@ static int __init at91_mci_probe(struct platform_device *pdev) mmc->f_min = 375000; mmc->f_max = 25000000; mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34; - mmc->caps = MMC_CAP_MULTIWRITE; + mmc->caps = MMC_CAP_MULTIWRITE | MMC_CAP_SDIO_IRQ; mmc->max_blk_size = 4095; mmc->max_blk_count = mmc->max_req_size; -- cgit v0.10.2 From ba7deeed96ca1855c153ad81c45baf6efe1a3362 Mon Sep 17 00:00:00 2001 From: Nicolas Ferre Date: Fri, 30 May 2008 14:28:45 +0200 Subject: mmc: at91_mci: do not read irq status twice as it will forget some errors Reading AT91_MCI_SR again at the end of transfer can corrupt the error reporting. Some fields in the SR register are read-and-clear. Signed-off-by: Nicolas Ferre Signed-off-by: Pierre Ossman diff --git a/drivers/mmc/host/at91_mci.c b/drivers/mmc/host/at91_mci.c index 9b546a9..e1f91a4 100644 --- a/drivers/mmc/host/at91_mci.c +++ b/drivers/mmc/host/at91_mci.c @@ -660,10 +660,9 @@ static void at91_mci_process_next(struct at91mci_host *host) /* * Handle a command that has been completed */ -static void at91_mci_completed_command(struct at91mci_host *host) +static void at91_mci_completed_command(struct at91mci_host *host, unsigned int status) { struct mmc_command *cmd = host->cmd; - unsigned int status; at91_mci_write(host, AT91_MCI_IDR, 0xffffffff & ~(AT91_MCI_SDIOIRQA | AT91_MCI_SDIOIRQB)); @@ -677,10 +676,9 @@ static void at91_mci_completed_command(struct at91mci_host *host) host->buffer = NULL; } - status = at91_mci_read(host, AT91_MCI_SR); - - pr_debug("Status = %08X [%08X %08X %08X %08X]\n", - status, cmd->resp[0], cmd->resp[1], cmd->resp[2], cmd->resp[3]); + pr_debug("Status = %08X/%08x [%08X %08X %08X %08X]\n", + status, at91_mci_read(host, AT91_MCI_SR), + cmd->resp[0], cmd->resp[1], cmd->resp[2], cmd->resp[3]); if (status & AT91_MCI_ERRORS) { if ((status & AT91_MCI_RCRCE) && !(mmc_resp_type(cmd) & MMC_RSP_CRC)) { @@ -877,7 +875,7 @@ static irqreturn_t at91_mci_irq(int irq, void *devid) if (completed) { pr_debug("Completed command\n"); at91_mci_write(host, AT91_MCI_IDR, 0xffffffff & ~(AT91_MCI_SDIOIRQA | AT91_MCI_SDIOIRQB)); - at91_mci_completed_command(host); + at91_mci_completed_command(host, int_status); } else at91_mci_write(host, AT91_MCI_IDR, int_status & ~(AT91_MCI_SDIOIRQA | AT91_MCI_SDIOIRQB)); -- cgit v0.10.2 From fa1fe010c126ee69f2f75e3a4efc2f6252281ff8 Mon Sep 17 00:00:00 2001 From: Nicolas Ferre Date: Tue, 10 Jun 2008 11:27:29 +0200 Subject: at91_mci: manage cmd error and data error independently In at91_mci_completed_command() function, this patch distinguishes command error and data error. It reports it in the corresponding error field. Signed-off-by: Nicolas Ferre Signed-off-by: Pierre Ossman diff --git a/drivers/mmc/host/at91_mci.c b/drivers/mmc/host/at91_mci.c index e1f91a4..e4d018b 100644 --- a/drivers/mmc/host/at91_mci.c +++ b/drivers/mmc/host/at91_mci.c @@ -663,6 +663,7 @@ static void at91_mci_process_next(struct at91mci_host *host) static void at91_mci_completed_command(struct at91mci_host *host, unsigned int status) { struct mmc_command *cmd = host->cmd; + struct mmc_data *data = cmd->data; at91_mci_write(host, AT91_MCI_IDR, 0xffffffff & ~(AT91_MCI_SDIOIRQA | AT91_MCI_SDIOIRQB)); @@ -685,15 +686,25 @@ static void at91_mci_completed_command(struct at91mci_host *host, unsigned int s cmd->error = 0; } else { - if (status & (AT91_MCI_RTOE | AT91_MCI_DTOE)) - cmd->error = -ETIMEDOUT; - else if (status & (AT91_MCI_RCRCE | AT91_MCI_DCRCE)) - cmd->error = -EILSEQ; - else - cmd->error = -EIO; + if (status & (AT91_MCI_DTOE | AT91_MCI_DCRCE)) { + if (data) { + if (status & AT91_MCI_DTOE) + data->error = -ETIMEDOUT; + else if (status & AT91_MCI_DCRCE) + data->error = -EILSEQ; + } + } else { + if (status & AT91_MCI_RTOE) + cmd->error = -ETIMEDOUT; + else if (status & AT91_MCI_RCRCE) + cmd->error = -EILSEQ; + else + cmd->error = -EIO; + } - pr_debug("Error detected and set to %d (cmd = %d, retries = %d)\n", - cmd->error, cmd->opcode, cmd->retries); + pr_debug("Error detected and set to %d/%d (cmd = %d, retries = %d)\n", + cmd->error, data ? data->error : 0, + cmd->opcode, cmd->retries); } } else -- cgit v0.10.2 From 5385edc50063a2175383ef5e90aa67fb6ab1beae Mon Sep 17 00:00:00 2001 From: Ville Syrjala Date: Sat, 14 Jun 2008 20:27:20 +0300 Subject: at91_mci: AT91SAM9260/9263 12 byte write erratum (v2) AT91SAM926[0/3] PDC must write at least 12 bytes. The code compiles and runs but the actual condition for this erratum did not trigger in my tests so it's unclear if it actually works as intended. Signed-off-by: Ville Syrjala Signed-off-by: Nicolas Ferre Signed-off-by: Pierre Ossman diff --git a/drivers/mmc/host/at91_mci.c b/drivers/mmc/host/at91_mci.c index e4d018b..d3e96ff 100644 --- a/drivers/mmc/host/at91_mci.c +++ b/drivers/mmc/host/at91_mci.c @@ -198,9 +198,14 @@ static inline void at91_mci_sg_to_dma(struct at91mci_host *host, struct mmc_data unsigned int len, i, size; unsigned *dmabuf = host->buffer; - size = host->total_length; + size = data->blksz * data->blocks; len = data->sg_len; + /* AT91SAM926[0/3] Data Write Operation and number of bytes erratum */ + if (cpu_is_at91sam9260() || cpu_is_at91sam9263()) + if (host->total_length == 12) + memset(dmabuf, 0, 12); + /* * Just loop through all entries. Size might not * be the entire list though so make sure that @@ -222,9 +227,10 @@ static inline void at91_mci_sg_to_dma(struct at91mci_host *host, struct mmc_data for (index = 0; index < (amount / 4); index++) *dmabuf++ = swab32(sgbuffer[index]); - } - else + } else { memcpy(dmabuf, sgbuffer, amount); + dmabuf += amount; + } kunmap_atomic(sgbuffer, KM_BIO_SRC_IRQ); @@ -417,7 +423,7 @@ static void at91_mci_update_bytes_xfered(struct at91mci_host *host) /* card is in IDLE mode now */ pr_debug("-> bytes_xfered %d, total_length = %d\n", data->bytes_xfered, host->total_length); - data->bytes_xfered = host->total_length; + data->bytes_xfered = data->blksz * data->blocks; } } } @@ -600,6 +606,13 @@ static void at91_mci_send_command(struct at91mci_host *host, struct mmc_command * Handle a write */ host->total_length = block_length * blocks; + /* + * AT91SAM926[0/3] Data Write Operation and + * number of bytes erratum + */ + if (cpu_is_at91sam9260 () || cpu_is_at91sam9263()) + if (host->total_length < 12) + host->total_length = 12; host->buffer = dma_alloc_coherent(NULL, host->total_length, &host->physical_address, GFP_KERNEL); -- cgit v0.10.2 From 9da3cbaf2881df97e502593c49c93f55eb696091 Mon Sep 17 00:00:00 2001 From: Ville Syrjala Date: Mon, 9 Jun 2008 22:06:44 +0300 Subject: at91_mci: Cover more AT91RM9200 and AT91SAM9261 errata. According to the documentation the AT91SAM9261 MCI shares the block size limitations of the AT91RM9200 MCI. Also the errata documentation for AT91RM9200 and AT91SAM9261 state that stream commands are not supported. This has not been tested on actual hardware. Signed-off-by: Ville Syrjala Signed-off-by: Nicolas Ferre Signed-off-by: Pierre Ossman diff --git a/drivers/mmc/host/at91_mci.c b/drivers/mmc/host/at91_mci.c index d3e96ff..f8c3f7c 100644 --- a/drivers/mmc/host/at91_mci.c +++ b/drivers/mmc/host/at91_mci.c @@ -520,11 +520,19 @@ static void at91_mci_send_command(struct at91mci_host *host, struct mmc_command if (data) { - if ( cpu_is_at91rm9200() && (data->blksz & 0x3) ) { - pr_debug("Unsupported block size\n"); - cmd->error = -EINVAL; - mmc_request_done(host->mmc, host->request); - return; + if (cpu_is_at91rm9200() || cpu_is_at91sam9261()) { + if (data->blksz & 0x3) { + pr_debug("Unsupported block size\n"); + cmd->error = -EINVAL; + mmc_request_done(host->mmc, host->request); + return; + } + if (data->flags & MMC_DATA_STREAM) { + pr_debug("Stream commands not supported\n"); + cmd->error = -EINVAL; + mmc_request_done(host->mmc, host->request); + return; + } } block_length = data->blksz; @@ -577,7 +585,7 @@ static void at91_mci_send_command(struct at91mci_host *host, struct mmc_command mr |= AT91_MCI_PDCMODE; at91_mci_write(host, AT91_MCI_MR, mr); - if (!cpu_is_at91rm9200()) + if (!(cpu_is_at91rm9200() || cpu_is_at91sam9261())) at91_mci_write(host, AT91_MCI_BLKR, AT91_MCI_BLKR_BCNT(blocks) | AT91_MCI_BLKR_BLKLEN(block_length)); -- cgit v0.10.2 From 12bd257532708a4d5be4b8548ff121a45ff88f5d Mon Sep 17 00:00:00 2001 From: Ville Syrjala Date: Mon, 9 Jun 2008 22:06:45 +0300 Subject: at91_mci: Fix byte mode transitions. The byte mode support fails to clear the byte mode bit in the command register, possibly leaving byte mode enabled with the counters programmed in non-byte mode. Signed-off-by: Ville Syrjala Signed-off-by: Nicolas Ferre Signed-off-by: Pierre Ossman diff --git a/drivers/mmc/host/at91_mci.c b/drivers/mmc/host/at91_mci.c index f8c3f7c..16df235 100644 --- a/drivers/mmc/host/at91_mci.c +++ b/drivers/mmc/host/at91_mci.c @@ -579,7 +579,7 @@ static void at91_mci_send_command(struct at91mci_host *host, struct mmc_command ier = AT91_MCI_CMDRDY; } else { /* zero block length and PDC mode */ - mr = at91_mci_read(host, AT91_MCI_MR) & 0x7fff; + mr = at91_mci_read(host, AT91_MCI_MR) & 0x5fff; mr |= (data->blksz & 0x3) ? AT91_MCI_PDCFBYTE : 0; mr |= (block_length << 16); mr |= AT91_MCI_PDCMODE; -- cgit v0.10.2 From c4223c2c91fa9e5addd6eadd804e57a925ac5e5e Mon Sep 17 00:00:00 2001 From: Manuel Lauss Date: Mon, 9 Jun 2008 08:36:13 +0200 Subject: au1xmmc: remove db1200 board code, rewrite probe. Remove the DB1200 board-specific functions (card present, read-only, activity LED methods) and instead add platform data which is passed to the driver. This also allows for platforms to implement other carddetect schemes (e.g. dedicated irq) without having to pollute the driver code. The poll timer (used for pb1200) is kept for compatibility. With the board-specific stuff gone, the driver's ->probe() code can be cleaned up considerably. Signed-off-by: Manuel Lauss Signed-off-by: Pierre Ossman diff --git a/drivers/mmc/host/au1xmmc.c b/drivers/mmc/host/au1xmmc.c index cc5f7bc..d8776d6 100644 --- a/drivers/mmc/host/au1xmmc.c +++ b/drivers/mmc/host/au1xmmc.c @@ -41,8 +41,9 @@ #include #include #include - +#include #include + #include #include #include @@ -54,6 +55,7 @@ #define DRIVER_NAME "au1xxx-mmc" /* Set this to enable special debugging macros */ +/* #define DEBUG */ #ifdef DEBUG #define DBG(fmt, idx, args...) printk("au1xx(%d): DEBUG: " fmt, idx, ##args) @@ -61,32 +63,6 @@ #define DBG(fmt, idx, args...) #endif -const struct { - u32 iobase; - u32 tx_devid, rx_devid; - u16 bcsrpwr; - u16 bcsrstatus; - u16 wpstatus; -} au1xmmc_card_table[] = { - { SD0_BASE, DSCR_CMD0_SDMS_TX0, DSCR_CMD0_SDMS_RX0, - BCSR_BOARD_SD0PWR, BCSR_INT_SD0INSERT, BCSR_STATUS_SD0WP }, -#ifndef CONFIG_MIPS_DB1200 - { SD1_BASE, DSCR_CMD0_SDMS_TX1, DSCR_CMD0_SDMS_RX1, - BCSR_BOARD_DS1PWR, BCSR_INT_SD1INSERT, BCSR_STATUS_SD1WP } -#endif -}; - -#define AU1XMMC_CONTROLLER_COUNT (ARRAY_SIZE(au1xmmc_card_table)) - -/* This array stores pointers for the hosts (used by the IRQ handler) */ -struct au1xmmc_host *au1xmmc_hosts[AU1XMMC_CONTROLLER_COUNT]; -static int dma = 1; - -#ifdef MODULE -module_param(dma, bool, 0); -MODULE_PARM_DESC(dma, "Use DMA engine for data transfers (0 = disabled)"); -#endif - static inline void IRQ_ON(struct au1xmmc_host *host, u32 mask) { u32 val = au_readl(HOST_CONFIG(host)); @@ -135,26 +111,33 @@ static inline void SEND_STOP(struct au1xmmc_host *host) static void au1xmmc_set_power(struct au1xmmc_host *host, int state) { - - u32 val = au1xmmc_card_table[host->id].bcsrpwr; - - bcsr->board &= ~val; - if (state) bcsr->board |= val; - - au_sync_delay(1); + if (host->platdata && host->platdata->set_power) + host->platdata->set_power(host->mmc, state); } -static inline int au1xmmc_card_inserted(struct au1xmmc_host *host) +static int au1xmmc_card_inserted(struct au1xmmc_host *host) { - return (bcsr->sig_status & au1xmmc_card_table[host->id].bcsrstatus) - ? 1 : 0; + int ret; + + if (host->platdata && host->platdata->card_inserted) + ret = host->platdata->card_inserted(host->mmc); + else + ret = 1; /* assume there is a card */ + + return ret; } static int au1xmmc_card_readonly(struct mmc_host *mmc) { struct au1xmmc_host *host = mmc_priv(mmc); - return (bcsr->status & au1xmmc_card_table[host->id].wpstatus) - ? 1 : 0; + int ret; + + if (host->platdata && host->platdata->card_readonly) + ret = host->platdata->card_readonly(mmc); + else + ret = 0; /* assume card is read-write */ + + return ret; } static void au1xmmc_finish_request(struct au1xmmc_host *host) @@ -163,7 +146,7 @@ static void au1xmmc_finish_request(struct au1xmmc_host *host) struct mmc_request *mrq = host->mrq; host->mrq = NULL; - host->flags &= HOST_F_ACTIVE; + host->flags &= HOST_F_ACTIVE | HOST_F_DMA; host->dma.len = 0; host->dma.dir = 0; @@ -174,8 +157,6 @@ static void au1xmmc_finish_request(struct au1xmmc_host *host) host->status = HOST_S_IDLE; - bcsr->disk_leds |= (1 << 8); - mmc_request_done(host->mmc, mrq); } @@ -299,11 +280,13 @@ static void au1xmmc_data_complete(struct au1xmmc_host *host, u32 status) if (!data->error) { if (host->flags & HOST_F_DMA) { +#ifdef CONFIG_SOC_AU1200 /* DBDMA */ u32 chan = DMA_CHANNEL(host); chan_tab_t *c = *((chan_tab_t **) chan); au1x_dma_chan_t *cp = c->chan_ptr; data->bytes_xfered = cp->ddma_bytecnt; +#endif } else data->bytes_xfered = @@ -420,18 +403,18 @@ static void au1xmmc_receive_pio(struct au1xmmc_host *host) break; if (status & SD_STATUS_RC) { - DBG("RX CRC Error [%d + %d].\n", host->id, + DBG("RX CRC Error [%d + %d].\n", host->pdev->id, host->pio.len, count); break; } if (status & SD_STATUS_RO) { - DBG("RX Overrun [%d + %d]\n", host->id, + DBG("RX Overrun [%d + %d]\n", host->pdev->id, host->pio.len, count); break; } else if (status & SD_STATUS_RU) { - DBG("RX Underrun [%d + %d]\n", host->id, + DBG("RX Underrun [%d + %d]\n", host->pdev->id, host->pio.len, count); break; } @@ -528,6 +511,7 @@ static void au1xmmc_cmd_complete(struct au1xmmc_host *host, u32 status) host->status = HOST_S_DATA; if (host->flags & HOST_F_DMA) { +#ifdef CONFIG_SOC_AU1200 /* DBDMA */ u32 channel = DMA_CHANNEL(host); /* Start the DMA as soon as the buffer gets something in it */ @@ -540,6 +524,7 @@ static void au1xmmc_cmd_complete(struct au1xmmc_host *host, u32 status) } au1xxx_dbdma_start(channel); +#endif } } @@ -571,12 +556,8 @@ static void au1xmmc_set_clock(struct au1xmmc_host *host, int rate) static int au1xmmc_prepare_data(struct au1xmmc_host *host, struct mmc_data *data) { - int datalen = data->blocks * data->blksz; - if (dma != 0) - host->flags |= HOST_F_DMA; - if (data->flags & MMC_DATA_READ) host->flags |= HOST_F_RECV; else @@ -596,6 +577,7 @@ au1xmmc_prepare_data(struct au1xmmc_host *host, struct mmc_data *data) au_writel(data->blksz - 1, HOST_BLKSIZE(host)); if (host->flags & HOST_F_DMA) { +#ifdef CONFIG_SOC_AU1200 /* DBDMA */ int i; u32 channel = DMA_CHANNEL(host); @@ -621,11 +603,12 @@ au1xmmc_prepare_data(struct au1xmmc_host *host, struct mmc_data *data) len, flags); } - if (!ret) + if (!ret) goto dataerr; datalen -= len; } +#endif } else { host->pio.index = 0; @@ -641,8 +624,9 @@ au1xmmc_prepare_data(struct au1xmmc_host *host, struct mmc_data *data) return 0; - dataerr: - dma_unmap_sg(mmc_dev(host->mmc),data->sg,data->sg_len,host->dma.dir); +dataerr: + dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len, + host->dma.dir); return -ETIMEDOUT; } @@ -663,8 +647,6 @@ static void au1xmmc_request(struct mmc_host* mmc, struct mmc_request* mrq) host->mrq = mrq; host->status = HOST_S_CMD; - bcsr->disk_leds &= ~(1 << 8); - if (mrq->data) { FLUSH_FIFO(host); flags = mrq->data->flags; @@ -728,149 +710,145 @@ static void au1xmmc_set_ios(struct mmc_host* mmc, struct mmc_ios* ios) } } -static void au1xmmc_dma_callback(int irq, void *dev_id) -{ - struct au1xmmc_host *host = (struct au1xmmc_host *) dev_id; - - /* Avoid spurious interrupts */ - - if (!host->mrq) - return; - - if (host->flags & HOST_F_STOP) - SEND_STOP(host); - - tasklet_schedule(&host->data_task); -} - #define STATUS_TIMEOUT (SD_STATUS_RAT | SD_STATUS_DT) #define STATUS_DATA_IN (SD_STATUS_NE) #define STATUS_DATA_OUT (SD_STATUS_TH) static irqreturn_t au1xmmc_irq(int irq, void *dev_id) { - + struct au1xmmc_host *host = dev_id; u32 status; - int i, ret = 0; - - disable_irq(AU1100_SD_IRQ); - for(i = 0; i < AU1XMMC_CONTROLLER_COUNT; i++) { - struct au1xmmc_host * host = au1xmmc_hosts[i]; - u32 handled = 1; - - status = au_readl(HOST_STATUS(host)); + status = au_readl(HOST_STATUS(host)); - if (host->mrq && (status & STATUS_TIMEOUT)) { - if (status & SD_STATUS_RAT) - host->mrq->cmd->error = -ETIMEDOUT; + if (!(status & SD_STATUS_I)) + return IRQ_NONE; /* not ours */ - else if (status & SD_STATUS_DT) - host->mrq->data->error = -ETIMEDOUT; + if (host->mrq && (status & STATUS_TIMEOUT)) { + if (status & SD_STATUS_RAT) + host->mrq->cmd->error = -ETIMEDOUT; + else if (status & SD_STATUS_DT) + host->mrq->data->error = -ETIMEDOUT; - /* In PIO mode, interrupts might still be enabled */ - IRQ_OFF(host, SD_CONFIG_NE | SD_CONFIG_TH); + /* In PIO mode, interrupts might still be enabled */ + IRQ_OFF(host, SD_CONFIG_NE | SD_CONFIG_TH); - //IRQ_OFF(host, SD_CONFIG_TH|SD_CONFIG_RA|SD_CONFIG_RF); - tasklet_schedule(&host->finish_task); - } + /* IRQ_OFF(host, SD_CONFIG_TH | SD_CONFIG_RA | SD_CONFIG_RF); */ + tasklet_schedule(&host->finish_task); + } #if 0 - else if (status & SD_STATUS_DD) { - - /* Sometimes we get a DD before a NE in PIO mode */ - - if (!(host->flags & HOST_F_DMA) && - (status & SD_STATUS_NE)) - au1xmmc_receive_pio(host); - else { - au1xmmc_data_complete(host, status); - //tasklet_schedule(&host->data_task); - } - } -#endif - else if (status & (SD_STATUS_CR)) { - if (host->status == HOST_S_CMD) - au1xmmc_cmd_complete(host,status); - } - else if (!(host->flags & HOST_F_DMA)) { - if ((host->flags & HOST_F_XMIT) && - (status & STATUS_DATA_OUT)) - au1xmmc_send_pio(host); - else if ((host->flags & HOST_F_RECV) && - (status & STATUS_DATA_IN)) - au1xmmc_receive_pio(host); - } - else if (status & 0x203FBC70) { - DBG("Unhandled status %8.8x\n", host->id, status); - handled = 0; + else if (status & SD_STATUS_DD) { + /* Sometimes we get a DD before a NE in PIO mode */ + if (!(host->flags & HOST_F_DMA) && (status & SD_STATUS_NE)) + au1xmmc_receive_pio(host); + else { + au1xmmc_data_complete(host, status); + /* tasklet_schedule(&host->data_task); */ } - - au_writel(status, HOST_STATUS(host)); - au_sync(); - - ret |= handled; } - - enable_irq(AU1100_SD_IRQ); - return ret; -} - -static void au1xmmc_poll_event(unsigned long arg) -{ - struct au1xmmc_host *host = (struct au1xmmc_host *) arg; - - int card = au1xmmc_card_inserted(host); - int controller = (host->flags & HOST_F_ACTIVE) ? 1 : 0; - - if (card != controller) { - host->flags &= ~HOST_F_ACTIVE; - if (card) host->flags |= HOST_F_ACTIVE; - mmc_detect_change(host->mmc, 0); +#endif + else if (status & SD_STATUS_CR) { + if (host->status == HOST_S_CMD) + au1xmmc_cmd_complete(host, status); + + } else if (!(host->flags & HOST_F_DMA)) { + if ((host->flags & HOST_F_XMIT) && (status & STATUS_DATA_OUT)) + au1xmmc_send_pio(host); + else if ((host->flags & HOST_F_RECV) && (status & STATUS_DATA_IN)) + au1xmmc_receive_pio(host); + + } else if (status & 0x203F3C70) { + DBG("Unhandled status %8.8x\n", host->pdev->id, + status); } - if (host->mrq != NULL) { - u32 status = au_readl(HOST_STATUS(host)); - DBG("PENDING - %8.8x\n", host->id, status); - } + au_writel(status, HOST_STATUS(host)); + au_sync(); - mod_timer(&host->timer, jiffies + AU1XMMC_DETECT_TIMEOUT); + return IRQ_HANDLED; } -static dbdev_tab_t au1xmmc_mem_dbdev = -{ - DSCR_CMD0_ALWAYS, DEV_FLAGS_ANYUSE, 0, 8, 0x00000000, 0, 0 +#ifdef CONFIG_SOC_AU1200 +/* 8bit memory DMA device */ +static dbdev_tab_t au1xmmc_mem_dbdev = { + .dev_id = DSCR_CMD0_ALWAYS, + .dev_flags = DEV_FLAGS_ANYUSE, + .dev_tsize = 0, + .dev_devwidth = 8, + .dev_physaddr = 0x00000000, + .dev_intlevel = 0, + .dev_intpolarity = 0, }; +static int memid; -static void au1xmmc_init_dma(struct au1xmmc_host *host) +static void au1xmmc_dbdma_callback(int irq, void *dev_id) { + struct au1xmmc_host *host = (struct au1xmmc_host *)dev_id; - u32 rxchan, txchan; + /* Avoid spurious interrupts */ + if (!host->mrq) + return; - int txid = au1xmmc_card_table[host->id].tx_devid; - int rxid = au1xmmc_card_table[host->id].rx_devid; + if (host->flags & HOST_F_STOP) + SEND_STOP(host); - /* DSCR_CMD0_ALWAYS has a stride of 32 bits, we need a stride - of 8 bits. And since devices are shared, we need to create - our own to avoid freaking out other devices - */ + tasklet_schedule(&host->data_task); +} - int memid = au1xxx_ddma_add_device(&au1xmmc_mem_dbdev); +static int au1xmmc_dbdma_init(struct au1xmmc_host *host) +{ + struct resource *res; + int txid, rxid; + + res = platform_get_resource(host->pdev, IORESOURCE_DMA, 0); + if (!res) + return -ENODEV; + txid = res->start; + + res = platform_get_resource(host->pdev, IORESOURCE_DMA, 1); + if (!res) + return -ENODEV; + rxid = res->start; + + if (!memid) + return -ENODEV; + + host->tx_chan = au1xxx_dbdma_chan_alloc(memid, txid, + au1xmmc_dbdma_callback, (void *)host); + if (!host->tx_chan) { + dev_err(&host->pdev->dev, "cannot allocate TX DMA\n"); + return -ENODEV; + } + + host->rx_chan = au1xxx_dbdma_chan_alloc(rxid, memid, + au1xmmc_dbdma_callback, (void *)host); + if (!host->rx_chan) { + dev_err(&host->pdev->dev, "cannot allocate RX DMA\n"); + au1xxx_dbdma_chan_free(host->tx_chan); + return -ENODEV; + } - txchan = au1xxx_dbdma_chan_alloc(memid, txid, - au1xmmc_dma_callback, (void *) host); + au1xxx_dbdma_set_devwidth(host->tx_chan, 8); + au1xxx_dbdma_set_devwidth(host->rx_chan, 8); - rxchan = au1xxx_dbdma_chan_alloc(rxid, memid, - au1xmmc_dma_callback, (void *) host); + au1xxx_dbdma_ring_alloc(host->tx_chan, AU1XMMC_DESCRIPTOR_COUNT); + au1xxx_dbdma_ring_alloc(host->rx_chan, AU1XMMC_DESCRIPTOR_COUNT); - au1xxx_dbdma_set_devwidth(txchan, 8); - au1xxx_dbdma_set_devwidth(rxchan, 8); + /* DBDMA is good to go */ + host->flags |= HOST_F_DMA; - au1xxx_dbdma_ring_alloc(txchan, AU1XMMC_DESCRIPTOR_COUNT); - au1xxx_dbdma_ring_alloc(rxchan, AU1XMMC_DESCRIPTOR_COUNT); + return 0; +} - host->tx_chan = txchan; - host->rx_chan = rxchan; +static void au1xmmc_dbdma_shutdown(struct au1xmmc_host *host) +{ + if (host->flags & HOST_F_DMA) { + host->flags &= ~HOST_F_DMA; + au1xxx_dbdma_chan_free(host->tx_chan); + au1xxx_dbdma_chan_free(host->rx_chan); + } } +#endif static const struct mmc_host_ops au1xmmc_ops = { .request = au1xmmc_request, @@ -878,116 +856,235 @@ static const struct mmc_host_ops au1xmmc_ops = { .get_ro = au1xmmc_card_readonly, }; -static int __devinit au1xmmc_probe(struct platform_device *pdev) +static void au1xmmc_poll_event(unsigned long arg) { + struct au1xmmc_host *host = (struct au1xmmc_host *)arg; + int card = au1xmmc_card_inserted(host); + int controller = (host->flags & HOST_F_ACTIVE) ? 1 : 0; + + if (card != controller) { + host->flags &= ~HOST_F_ACTIVE; + if (card) + host->flags |= HOST_F_ACTIVE; + mmc_detect_change(host->mmc, 0); + } - int i, ret = 0; +#ifdef DEBUG + if (host->mrq != NULL) { + u32 status = au_readl(HOST_STATUS(host)); + DBG("PENDING - %8.8x\n", host->pdev->id, status); + } +#endif + mod_timer(&host->timer, jiffies + AU1XMMC_DETECT_TIMEOUT); +} - /* THe interrupt is shared among all controllers */ - ret = request_irq(AU1100_SD_IRQ, au1xmmc_irq, IRQF_DISABLED, "MMC", 0); +static void au1xmmc_init_cd_poll_timer(struct au1xmmc_host *host) +{ + init_timer(&host->timer); + host->timer.function = au1xmmc_poll_event; + host->timer.data = (unsigned long)host; + host->timer.expires = jiffies + AU1XMMC_DETECT_TIMEOUT; +} - if (ret) { - printk(DRIVER_NAME "ERROR: Couldn't get int %d: %d\n", - AU1100_SD_IRQ, ret); - return -ENXIO; +static int __devinit au1xmmc_probe(struct platform_device *pdev) +{ + struct mmc_host *mmc; + struct au1xmmc_host *host; + struct resource *r; + int ret; + + mmc = mmc_alloc_host(sizeof(struct au1xmmc_host), &pdev->dev); + if (!mmc) { + dev_err(&pdev->dev, "no memory for mmc_host\n"); + ret = -ENOMEM; + goto out0; } - disable_irq(AU1100_SD_IRQ); + host = mmc_priv(mmc); + host->mmc = mmc; + host->platdata = pdev->dev.platform_data; + host->pdev = pdev; - for(i = 0; i < AU1XMMC_CONTROLLER_COUNT; i++) { - struct mmc_host *mmc = mmc_alloc_host(sizeof(struct au1xmmc_host), &pdev->dev); - struct au1xmmc_host *host = 0; + ret = -ENODEV; + r = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!r) { + dev_err(&pdev->dev, "no mmio defined\n"); + goto out1; + } - if (!mmc) { - printk(DRIVER_NAME "ERROR: no mem for host %d\n", i); - au1xmmc_hosts[i] = 0; - continue; - } + host->ioarea = request_mem_region(r->start, r->end - r->start + 1, + pdev->name); + if (!host->ioarea) { + dev_err(&pdev->dev, "mmio already in use\n"); + goto out1; + } - mmc->ops = &au1xmmc_ops; + host->iobase = (unsigned long)ioremap(r->start, 0x3c); + if (!host->iobase) { + dev_err(&pdev->dev, "cannot remap mmio\n"); + goto out2; + } + + r = platform_get_resource(pdev, IORESOURCE_IRQ, 0); + if (!r) { + dev_err(&pdev->dev, "no IRQ defined\n"); + goto out3; + } - mmc->f_min = 450000; - mmc->f_max = 24000000; + host->irq = r->start; + /* IRQ is shared among both SD controllers */ + ret = request_irq(host->irq, au1xmmc_irq, IRQF_SHARED, + DRIVER_NAME, host); + if (ret) { + dev_err(&pdev->dev, "cannot grab IRQ\n"); + goto out3; + } - mmc->max_seg_size = AU1XMMC_DESCRIPTOR_SIZE; - mmc->max_phys_segs = AU1XMMC_DESCRIPTOR_COUNT; + mmc->ops = &au1xmmc_ops; - mmc->max_blk_size = 2048; - mmc->max_blk_count = 512; + mmc->f_min = 450000; + mmc->f_max = 24000000; - mmc->ocr_avail = AU1XMMC_OCR; + mmc->max_seg_size = AU1XMMC_DESCRIPTOR_SIZE; + mmc->max_phys_segs = AU1XMMC_DESCRIPTOR_COUNT; - host = mmc_priv(mmc); - host->mmc = mmc; + mmc->max_blk_size = 2048; + mmc->max_blk_count = 512; - host->id = i; - host->iobase = au1xmmc_card_table[host->id].iobase; - host->clock = 0; - host->power_mode = MMC_POWER_OFF; + mmc->ocr_avail = AU1XMMC_OCR; + mmc->caps = 0; - host->flags = au1xmmc_card_inserted(host) ? HOST_F_ACTIVE : 0; - host->status = HOST_S_IDLE; + host->status = HOST_S_IDLE; - init_timer(&host->timer); + /* board-specific carddetect setup, if any */ + if (host->platdata && host->platdata->cd_setup) { + ret = host->platdata->cd_setup(mmc, 1); + if (ret) { + dev_err(&pdev->dev, "board CD setup failed\n"); + goto out4; + } + } else { + /* poll the board-specific is-card-in-socket-? method */ + au1xmmc_init_cd_poll_timer(host); + } - host->timer.function = au1xmmc_poll_event; - host->timer.data = (unsigned long) host; - host->timer.expires = jiffies + AU1XMMC_DETECT_TIMEOUT; + tasklet_init(&host->data_task, au1xmmc_tasklet_data, + (unsigned long)host); - tasklet_init(&host->data_task, au1xmmc_tasklet_data, - (unsigned long) host); + tasklet_init(&host->finish_task, au1xmmc_tasklet_finish, + (unsigned long)host); - tasklet_init(&host->finish_task, au1xmmc_tasklet_finish, - (unsigned long) host); +#ifdef CONFIG_SOC_AU1200 + ret = au1xmmc_dbdma_init(host); + if (ret) + printk(KERN_INFO DRIVER_NAME ": DBDMA init failed; using PIO\n"); +#endif - spin_lock_init(&host->lock); +#ifdef CONFIG_LEDS_CLASS + if (host->platdata && host->platdata->led) { + struct led_classdev *led = host->platdata->led; + led->name = mmc_hostname(mmc); + led->brightness = LED_OFF; + led->default_trigger = mmc_hostname(mmc); + ret = led_classdev_register(mmc_dev(mmc), led); + if (ret) + goto out5; + } +#endif - if (dma != 0) - au1xmmc_init_dma(host); + au1xmmc_reset_controller(host); - au1xmmc_reset_controller(host); + ret = mmc_add_host(mmc); + if (ret) { + dev_err(&pdev->dev, "cannot add mmc host\n"); + goto out6; + } - mmc_add_host(mmc); - au1xmmc_hosts[i] = host; + platform_set_drvdata(pdev, mmc); + /* start the carddetect poll timer if necessary */ + if (!(host->platdata && host->platdata->cd_setup)) add_timer(&host->timer); - printk(KERN_INFO DRIVER_NAME ": MMC Controller %d set up at %8.8X (mode=%s)\n", - host->id, host->iobase, dma ? "dma" : "pio"); - } + printk(KERN_INFO DRIVER_NAME ": MMC Controller %d set up at %8.8X" + " (mode=%s)\n", pdev->id, host->iobase, + host->flags & HOST_F_DMA ? "dma" : "pio"); - enable_irq(AU1100_SD_IRQ); + return 0; /* all ok */ - return 0; +out6: +#ifdef CONFIG_LEDS_CLASS + if (host->platdata && host->platdata->led) + led_classdev_unregister(host->platdata->led); +out5: +#endif + au_writel(0, HOST_ENABLE(host)); + au_writel(0, HOST_CONFIG(host)); + au_writel(0, HOST_CONFIG2(host)); + au_sync(); + +#ifdef CONFIG_SOC_AU1200 + au1xmmc_dbdma_shutdown(host); +#endif + + tasklet_kill(&host->data_task); + tasklet_kill(&host->finish_task); + + if (host->platdata && host->platdata->cd_setup) + host->platdata->cd_setup(mmc, 0); +out4: + free_irq(host->irq, host); +out3: + iounmap((void *)host->iobase); +out2: + release_resource(host->ioarea); + kfree(host->ioarea); +out1: + mmc_free_host(mmc); +out0: + return ret; } static int __devexit au1xmmc_remove(struct platform_device *pdev) { + struct mmc_host *mmc = platform_get_drvdata(pdev); + struct au1xmmc_host *host; + + if (mmc) { + host = mmc_priv(mmc); + + mmc_remove_host(mmc); - int i; +#ifdef CONFIG_LEDS_CLASS + if (host->platdata && host->platdata->led) + led_classdev_unregister(host->platdata->led); +#endif - disable_irq(AU1100_SD_IRQ); + if (host->platdata && host->platdata->cd_setup) + host->platdata->cd_setup(mmc, 0); + else + del_timer_sync(&host->timer); - for(i = 0; i < AU1XMMC_CONTROLLER_COUNT; i++) { - struct au1xmmc_host *host = au1xmmc_hosts[i]; - if (!host) continue; + au_writel(0, HOST_ENABLE(host)); + au_writel(0, HOST_CONFIG(host)); + au_writel(0, HOST_CONFIG2(host)); + au_sync(); tasklet_kill(&host->data_task); tasklet_kill(&host->finish_task); - del_timer_sync(&host->timer); +#ifdef CONFIG_SOC_AU1200 + au1xmmc_dbdma_shutdown(host); +#endif au1xmmc_set_power(host, 0); - mmc_remove_host(host->mmc); + free_irq(host->irq, host); + iounmap((void *)host->iobase); + release_resource(host->ioarea); + kfree(host->ioarea); - au1xxx_dbdma_chan_free(host->tx_chan); - au1xxx_dbdma_chan_free(host->rx_chan); - - au_writel(0x0, HOST_ENABLE(host)); - au_sync(); + mmc_free_host(mmc); } - - free_irq(AU1100_SD_IRQ, 0); return 0; } @@ -1004,21 +1101,31 @@ static struct platform_driver au1xmmc_driver = { static int __init au1xmmc_init(void) { +#ifdef CONFIG_SOC_AU1200 + /* DSCR_CMD0_ALWAYS has a stride of 32 bits, we need a stride + * of 8 bits. And since devices are shared, we need to create + * our own to avoid freaking out other devices. + */ + memid = au1xxx_ddma_add_device(&au1xmmc_mem_dbdev); + if (!memid) + printk(KERN_ERR "au1xmmc: cannot add memory dbdma dev\n"); +#endif return platform_driver_register(&au1xmmc_driver); } static void __exit au1xmmc_exit(void) { +#ifdef CONFIG_SOC_AU1200 + if (memid) + au1xxx_ddma_del_device(memid); +#endif platform_driver_unregister(&au1xmmc_driver); } module_init(au1xmmc_init); module_exit(au1xmmc_exit); -#ifdef MODULE MODULE_AUTHOR("Advanced Micro Devices, Inc"); MODULE_DESCRIPTION("MMC/SD driver for the Alchemy Au1XXX"); MODULE_LICENSE("GPL"); MODULE_ALIAS("platform:au1xxx-mmc"); -#endif - diff --git a/drivers/mmc/host/au1xmmc.h b/drivers/mmc/host/au1xmmc.h index 341cbdf..3b40065 100644 --- a/drivers/mmc/host/au1xmmc.h +++ b/drivers/mmc/host/au1xmmc.h @@ -49,8 +49,6 @@ struct au1xmmc_host { struct mmc_host *mmc; struct mmc_request *mrq; - u32 id; - u32 flags; u32 iobase; u32 clock; @@ -73,11 +71,14 @@ struct au1xmmc_host { u32 tx_chan; u32 rx_chan; + int irq; + struct timer_list timer; struct tasklet_struct finish_task; struct tasklet_struct data_task; - - spinlock_t lock; + struct au1xmmc_platform_data *platdata; + struct platform_device *pdev; + struct resource *ioarea; }; /* Status flags used by the host structure */ diff --git a/include/asm-mips/mach-au1x00/au1100_mmc.h b/include/asm-mips/mach-au1x00/au1100_mmc.h index 9e0028f..c35e209 100644 --- a/include/asm-mips/mach-au1x00/au1100_mmc.h +++ b/include/asm-mips/mach-au1x00/au1100_mmc.h @@ -38,15 +38,15 @@ #ifndef __ASM_AU1100_MMC_H #define __ASM_AU1100_MMC_H - -#define NUM_AU1100_MMC_CONTROLLERS 2 - -#if defined(CONFIG_SOC_AU1100) -#define AU1100_SD_IRQ AU1100_SD_INT -#elif defined(CONFIG_SOC_AU1200) -#define AU1100_SD_IRQ AU1200_SD_INT -#endif - +#include + +struct au1xmmc_platform_data { + int(*cd_setup)(void *mmc_host, int on); + int(*card_inserted)(void *mmc_host); + int(*card_readonly)(void *mmc_host); + void(*set_power)(void *mmc_host, int state); + struct led_classdev *led; +}; #define SD0_BASE 0xB0600000 #define SD1_BASE 0xB0680000 -- cgit v0.10.2 From 281dd23ea03e9893c02d237d9e35a7d20d412452 Mon Sep 17 00:00:00 2001 From: Manuel Lauss Date: Mon, 9 Jun 2008 08:37:33 +0200 Subject: au1xmmc: enable 4 bit transfer mode Signed-off-by: Manuel Lauss Signed-off-by: Pierre Ossman diff --git a/drivers/mmc/host/au1xmmc.c b/drivers/mmc/host/au1xmmc.c index d8776d6..2bd4cf4 100644 --- a/drivers/mmc/host/au1xmmc.c +++ b/drivers/mmc/host/au1xmmc.c @@ -95,14 +95,13 @@ static inline void IRQ_OFF(struct au1xmmc_host *host, u32 mask) static inline void SEND_STOP(struct au1xmmc_host *host) { - - /* We know the value of CONFIG2, so avoid a read we don't need */ - u32 mask = SD_CONFIG2_EN; + u32 config2; WARN_ON(host->status != HOST_S_DATA); host->status = HOST_S_STOP; - au_writel(mask | SD_CONFIG2_DF, HOST_CONFIG2(host)); + config2 = au_readl(HOST_CONFIG2(host)); + au_writel(config2 | SD_CONFIG2_DF, HOST_CONFIG2(host)); au_sync(); /* Send the stop commmand */ @@ -697,6 +696,7 @@ static void au1xmmc_reset_controller(struct au1xmmc_host *host) static void au1xmmc_set_ios(struct mmc_host* mmc, struct mmc_ios* ios) { struct au1xmmc_host *host = mmc_priv(mmc); + u32 config2; if (ios->power_mode == MMC_POWER_OFF) au1xmmc_set_power(host, 0); @@ -708,6 +708,18 @@ static void au1xmmc_set_ios(struct mmc_host* mmc, struct mmc_ios* ios) au1xmmc_set_clock(host, ios->clock); host->clock = ios->clock; } + + config2 = au_readl(HOST_CONFIG2(host)); + switch (ios->bus_width) { + case MMC_BUS_WIDTH_4: + config2 |= SD_CONFIG2_WB; + break; + case MMC_BUS_WIDTH_1: + config2 &= ~SD_CONFIG2_WB; + break; + } + au_writel(config2, HOST_CONFIG2(host)); + au_sync(); } #define STATUS_TIMEOUT (SD_STATUS_RAT | SD_STATUS_DT) @@ -952,7 +964,7 @@ static int __devinit au1xmmc_probe(struct platform_device *pdev) mmc->max_blk_count = 512; mmc->ocr_avail = AU1XMMC_OCR; - mmc->caps = 0; + mmc->caps = MMC_CAP_4_BIT_DATA; host->status = HOST_S_IDLE; -- cgit v0.10.2 From 20f522ff5583a818922b3f650f6f7ef0e3e1aa68 Mon Sep 17 00:00:00 2001 From: Manuel Lauss Date: Mon, 9 Jun 2008 08:38:03 +0200 Subject: au1xmmc: SDIO IRQ support. Wire up the SD controllers' SDIO IRQ capability. Signed-off-by: Manuel Lauss Signed-off-by: Pierre Ossman diff --git a/drivers/mmc/host/au1xmmc.c b/drivers/mmc/host/au1xmmc.c index 2bd4cf4..16b5640 100644 --- a/drivers/mmc/host/au1xmmc.c +++ b/drivers/mmc/host/au1xmmc.c @@ -736,6 +736,9 @@ static irqreturn_t au1xmmc_irq(int irq, void *dev_id) if (!(status & SD_STATUS_I)) return IRQ_NONE; /* not ours */ + if (status & SD_STATUS_SI) /* SDIO */ + mmc_signal_sdio_irq(host->mmc); + if (host->mrq && (status & STATUS_TIMEOUT)) { if (status & SD_STATUS_RAT) host->mrq->cmd->error = -ETIMEDOUT; @@ -862,10 +865,21 @@ static void au1xmmc_dbdma_shutdown(struct au1xmmc_host *host) } #endif +static void au1xmmc_enable_sdio_irq(struct mmc_host *mmc, int en) +{ + struct au1xmmc_host *host = mmc_priv(mmc); + + if (en) + IRQ_ON(host, SD_CONFIG_SI); + else + IRQ_OFF(host, SD_CONFIG_SI); +} + static const struct mmc_host_ops au1xmmc_ops = { .request = au1xmmc_request, .set_ios = au1xmmc_set_ios, .get_ro = au1xmmc_card_readonly, + .enable_sdio_irq = au1xmmc_enable_sdio_irq, }; static void au1xmmc_poll_event(unsigned long arg) @@ -964,7 +978,7 @@ static int __devinit au1xmmc_probe(struct platform_device *pdev) mmc->max_blk_count = 512; mmc->ocr_avail = AU1XMMC_OCR; - mmc->caps = MMC_CAP_4_BIT_DATA; + mmc->caps = MMC_CAP_4_BIT_DATA | MMC_CAP_SDIO_IRQ; host->status = HOST_S_IDLE; -- cgit v0.10.2 From 5c0a889df56c9f6c5a68ec7aa222082569b35fd9 Mon Sep 17 00:00:00 2001 From: Manuel Lauss Date: Mon, 9 Jun 2008 08:38:35 +0200 Subject: au1xmmc: codingstyle tidying. Clean up the codebase, no functional changes. - merge the au1xmmc.h header contents into the driver file, - indentation, spelling and style fixes. Signed-off-by: Manuel Lauss Signed-off-by: Pierre Ossman diff --git a/drivers/mmc/host/au1xmmc.c b/drivers/mmc/host/au1xmmc.c index 16b5640..fcbaf40 100644 --- a/drivers/mmc/host/au1xmmc.c +++ b/drivers/mmc/host/au1xmmc.c @@ -49,20 +49,104 @@ #include #include -#include -#include "au1xmmc.h" - #define DRIVER_NAME "au1xxx-mmc" /* Set this to enable special debugging macros */ /* #define DEBUG */ #ifdef DEBUG -#define DBG(fmt, idx, args...) printk("au1xx(%d): DEBUG: " fmt, idx, ##args) +#define DBG(fmt, idx, args...) \ + printk(KERN_DEBUG "au1xmmc(%d): DEBUG: " fmt, idx, ##args) #else -#define DBG(fmt, idx, args...) +#define DBG(fmt, idx, args...) do {} while (0) #endif +/* Hardware definitions */ +#define AU1XMMC_DESCRIPTOR_COUNT 1 +#define AU1XMMC_DESCRIPTOR_SIZE 2048 + +#define AU1XMMC_OCR (MMC_VDD_27_28 | MMC_VDD_28_29 | MMC_VDD_29_30 | \ + MMC_VDD_30_31 | MMC_VDD_31_32 | MMC_VDD_32_33 | \ + MMC_VDD_33_34 | MMC_VDD_34_35 | MMC_VDD_35_36) + +/* This gives us a hard value for the stop command that we can write directly + * to the command register. + */ +#define STOP_CMD \ + (SD_CMD_RT_1B | SD_CMD_CT_7 | (0xC << SD_CMD_CI_SHIFT) | SD_CMD_GO) + +/* This is the set of interrupts that we configure by default. */ +#define AU1XMMC_INTERRUPTS \ + (SD_CONFIG_SC | SD_CONFIG_DT | SD_CONFIG_RAT | \ + SD_CONFIG_CR | SD_CONFIG_I) + +/* The poll event (looking for insert/remove events runs twice a second. */ +#define AU1XMMC_DETECT_TIMEOUT (HZ/2) + +struct au1xmmc_host { + struct mmc_host *mmc; + struct mmc_request *mrq; + + u32 flags; + u32 iobase; + u32 clock; + u32 bus_width; + u32 power_mode; + + int status; + + struct { + int len; + int dir; + } dma; + + struct { + int index; + int offset; + int len; + } pio; + + u32 tx_chan; + u32 rx_chan; + + int irq; + + struct timer_list timer; + struct tasklet_struct finish_task; + struct tasklet_struct data_task; + struct au1xmmc_platform_data *platdata; + struct platform_device *pdev; + struct resource *ioarea; +}; + +/* Status flags used by the host structure */ +#define HOST_F_XMIT 0x0001 +#define HOST_F_RECV 0x0002 +#define HOST_F_DMA 0x0010 +#define HOST_F_ACTIVE 0x0100 +#define HOST_F_STOP 0x1000 + +#define HOST_S_IDLE 0x0001 +#define HOST_S_CMD 0x0002 +#define HOST_S_DATA 0x0003 +#define HOST_S_STOP 0x0004 + +/* Easy access macros */ +#define HOST_STATUS(h) ((h)->iobase + SD_STATUS) +#define HOST_CONFIG(h) ((h)->iobase + SD_CONFIG) +#define HOST_ENABLE(h) ((h)->iobase + SD_ENABLE) +#define HOST_TXPORT(h) ((h)->iobase + SD_TXPORT) +#define HOST_RXPORT(h) ((h)->iobase + SD_RXPORT) +#define HOST_CMDARG(h) ((h)->iobase + SD_CMDARG) +#define HOST_BLKSIZE(h) ((h)->iobase + SD_BLKSIZE) +#define HOST_CMD(h) ((h)->iobase + SD_CMD) +#define HOST_CONFIG2(h) ((h)->iobase + SD_CONFIG2) +#define HOST_TIMEOUT(h) ((h)->iobase + SD_TIMEOUT) +#define HOST_DEBUG(h) ((h)->iobase + SD_DEBUG) + +#define DMA_CHANNEL(h) \ + (((h)->flags & HOST_F_XMIT) ? (h)->tx_chan : (h)->rx_chan) + static inline void IRQ_ON(struct au1xmmc_host *host, u32 mask) { u32 val = au_readl(HOST_CONFIG(host)); @@ -141,7 +225,6 @@ static int au1xmmc_card_readonly(struct mmc_host *mmc) static void au1xmmc_finish_request(struct au1xmmc_host *host) { - struct mmc_request *mrq = host->mrq; host->mrq = NULL; @@ -215,18 +298,14 @@ static int au1xmmc_send_command(struct au1xmmc_host *host, int wait, au_sync(); /* Wait for the command to go on the line */ - - while(1) { - if (!(au_readl(HOST_CMD(host)) & SD_CMD_GO)) - break; - } + while (au_readl(HOST_CMD(host)) & SD_CMD_GO) + /* nop */; /* Wait for the command to come back */ - if (wait) { u32 status = au_readl(HOST_STATUS(host)); - while(!(status & SD_STATUS_CR)) + while (!(status & SD_STATUS_CR)) status = au_readl(HOST_STATUS(host)); /* Clear the CR status */ @@ -240,12 +319,11 @@ static int au1xmmc_send_command(struct au1xmmc_host *host, int wait, static void au1xmmc_data_complete(struct au1xmmc_host *host, u32 status) { - struct mmc_request *mrq = host->mrq; struct mmc_data *data; u32 crc; - WARN_ON(host->status != HOST_S_DATA && host->status != HOST_S_STOP); + WARN_ON((host->status != HOST_S_DATA) && (host->status != HOST_S_STOP)); if (host->mrq == NULL) return; @@ -256,15 +334,13 @@ static void au1xmmc_data_complete(struct au1xmmc_host *host, u32 status) status = au_readl(HOST_STATUS(host)); /* The transaction is really over when the SD_STATUS_DB bit is clear */ - - while((host->flags & HOST_F_XMIT) && (status & SD_STATUS_DB)) + while ((host->flags & HOST_F_XMIT) && (status & SD_STATUS_DB)) status = au_readl(HOST_STATUS(host)); data->error = 0; dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len, host->dma.dir); /* Process any errors */ - crc = (status & (SD_STATUS_WC | SD_STATUS_RC)); if (host->flags & HOST_F_XMIT) crc |= ((status & 0x07) == 0x02) ? 0 : 1; @@ -282,15 +358,13 @@ static void au1xmmc_data_complete(struct au1xmmc_host *host, u32 status) #ifdef CONFIG_SOC_AU1200 /* DBDMA */ u32 chan = DMA_CHANNEL(host); - chan_tab_t *c = *((chan_tab_t **) chan); + chan_tab_t *c = *((chan_tab_t **)chan); au1x_dma_chan_t *cp = c->chan_ptr; data->bytes_xfered = cp->ddma_bytecnt; #endif - } - else + } else data->bytes_xfered = - (data->blocks * data->blksz) - - host->pio.len; + (data->blocks * data->blksz) - host->pio.len; } au1xmmc_finish_request(host); @@ -298,7 +372,7 @@ static void au1xmmc_data_complete(struct au1xmmc_host *host, u32 status) static void au1xmmc_tasklet_data(unsigned long param) { - struct au1xmmc_host *host = (struct au1xmmc_host *) param; + struct au1xmmc_host *host = (struct au1xmmc_host *)param; u32 status = au_readl(HOST_STATUS(host)); au1xmmc_data_complete(host, status); @@ -308,11 +382,10 @@ static void au1xmmc_tasklet_data(unsigned long param) static void au1xmmc_send_pio(struct au1xmmc_host *host) { - - struct mmc_data *data = 0; - int sg_len, max, count = 0; - unsigned char *sg_ptr; - u32 status = 0; + struct mmc_data *data; + int sg_len, max, count; + unsigned char *sg_ptr, val; + u32 status; struct scatterlist *sg; data = host->mrq->data; @@ -327,14 +400,12 @@ static void au1xmmc_send_pio(struct au1xmmc_host *host) /* This is the space left inside the buffer */ sg_len = data->sg[host->pio.index].length - host->pio.offset; - /* Check to if we need less then the size of the sg_buffer */ - + /* Check if we need less than the size of the sg_buffer */ max = (sg_len > host->pio.len) ? host->pio.len : sg_len; - if (max > AU1XMMC_MAX_TRANSFER) max = AU1XMMC_MAX_TRANSFER; - - for(count = 0; count < max; count++ ) { - unsigned char val; + if (max > AU1XMMC_MAX_TRANSFER) + max = AU1XMMC_MAX_TRANSFER; + for (count = 0; count < max; count++) { status = au_readl(HOST_STATUS(host)); if (!(status & SD_STATUS_TH)) @@ -342,7 +413,7 @@ static void au1xmmc_send_pio(struct au1xmmc_host *host) val = *sg_ptr++; - au_writel((unsigned long) val, HOST_TXPORT(host)); + au_writel((unsigned long)val, HOST_TXPORT(host)); au_sync(); } @@ -366,11 +437,10 @@ static void au1xmmc_send_pio(struct au1xmmc_host *host) static void au1xmmc_receive_pio(struct au1xmmc_host *host) { - - struct mmc_data *data = 0; - int sg_len = 0, max = 0, count = 0; - unsigned char *sg_ptr = 0; - u32 status = 0; + struct mmc_data *data; + int max, count, sg_len = 0; + unsigned char *sg_ptr = NULL; + u32 status, val; struct scatterlist *sg; data = host->mrq->data; @@ -387,15 +457,15 @@ static void au1xmmc_receive_pio(struct au1xmmc_host *host) /* This is the space left inside the buffer */ sg_len = sg_dma_len(&data->sg[host->pio.index]) - host->pio.offset; - /* Check to if we need less then the size of the sg_buffer */ - if (sg_len < max) max = sg_len; + /* Check if we need less than the size of the sg_buffer */ + if (sg_len < max) + max = sg_len; } if (max > AU1XMMC_MAX_TRANSFER) max = AU1XMMC_MAX_TRANSFER; - for(count = 0; count < max; count++ ) { - u32 val; + for (count = 0; count < max; count++) { status = au_readl(HOST_STATUS(host)); if (!(status & SD_STATUS_NE)) @@ -421,7 +491,7 @@ static void au1xmmc_receive_pio(struct au1xmmc_host *host) val = au_readl(HOST_RXPORT(host)); if (sg_ptr) - *sg_ptr++ = (unsigned char) (val & 0xFF); + *sg_ptr++ = (unsigned char)(val & 0xFF); } host->pio.len -= count; @@ -433,7 +503,7 @@ static void au1xmmc_receive_pio(struct au1xmmc_host *host) } if (host->pio.len == 0) { - //IRQ_OFF(host, SD_CONFIG_RA | SD_CONFIG_RF); + /* IRQ_OFF(host, SD_CONFIG_RA | SD_CONFIG_RF); */ IRQ_OFF(host, SD_CONFIG_NE); if (host->flags & HOST_F_STOP) @@ -443,17 +513,15 @@ static void au1xmmc_receive_pio(struct au1xmmc_host *host) } } -/* static void au1xmmc_cmd_complete - This is called when a command has been completed - grab the response - and check for errors. Then start the data transfer if it is indicated. -*/ - +/* This is called when a command has been completed - grab the response + * and check for errors. Then start the data transfer if it is indicated. + */ static void au1xmmc_cmd_complete(struct au1xmmc_host *host, u32 status) { - struct mmc_request *mrq = host->mrq; struct mmc_command *cmd; - int trans; + u32 r[4]; + int i, trans; if (!host->mrq) return; @@ -463,9 +531,6 @@ static void au1xmmc_cmd_complete(struct au1xmmc_host *host, u32 status) if (cmd->flags & MMC_RSP_PRESENT) { if (cmd->flags & MMC_RSP_136) { - u32 r[4]; - int i; - r[0] = au_readl(host->iobase + SD_RESP3); r[1] = au_readl(host->iobase + SD_RESP2); r[2] = au_readl(host->iobase + SD_RESP1); @@ -473,10 +538,9 @@ static void au1xmmc_cmd_complete(struct au1xmmc_host *host, u32 status) /* The CRC is omitted from the response, so really * we only got 120 bytes, but the engine expects - * 128 bits, so we have to shift things up + * 128 bits, so we have to shift things up. */ - - for(i = 0; i < 4; i++) { + for (i = 0; i < 4; i++) { cmd->resp[i] = (r[i] & 0x00FFFFFF) << 8; if (i != 3) cmd->resp[i] |= (r[i + 1] & 0xFF000000) >> 24; @@ -487,22 +551,20 @@ static void au1xmmc_cmd_complete(struct au1xmmc_host *host, u32 status) * our response omits the CRC, our data ends up * being shifted 8 bits to the right. In this case, * that means that the OSR data starts at bit 31, - * so we can just read RESP0 and return that + * so we can just read RESP0 and return that. */ cmd->resp[0] = au_readl(host->iobase + SD_RESP0); } } /* Figure out errors */ - if (status & (SD_STATUS_SC | SD_STATUS_WC | SD_STATUS_RC)) cmd->error = -EILSEQ; trans = host->flags & (HOST_F_XMIT | HOST_F_RECV); if (!trans || cmd->error) { - - IRQ_OFF(host, SD_CONFIG_TH | SD_CONFIG_RA|SD_CONFIG_RF); + IRQ_OFF(host, SD_CONFIG_TH | SD_CONFIG_RA | SD_CONFIG_RF); tasklet_schedule(&host->finish_task); return; } @@ -529,18 +591,15 @@ static void au1xmmc_cmd_complete(struct au1xmmc_host *host, u32 status) static void au1xmmc_set_clock(struct au1xmmc_host *host, int rate) { - unsigned int pbus = get_au1x00_speed(); unsigned int divisor; u32 config; /* From databook: - divisor = ((((cpuclock / sbus_divisor) / 2) / mmcclock) / 2) - 1 - */ - + * divisor = ((((cpuclock / sbus_divisor) / 2) / mmcclock) / 2) - 1 + */ pbus /= ((au_readl(SYS_POWERCTRL) & 0x3) + 2); pbus /= 2; - divisor = ((pbus / rate) / 2) - 1; config = au_readl(HOST_CONFIG(host)); @@ -552,8 +611,8 @@ static void au1xmmc_set_clock(struct au1xmmc_host *host, int rate) au_sync(); } -static int -au1xmmc_prepare_data(struct au1xmmc_host *host, struct mmc_data *data) +static int au1xmmc_prepare_data(struct au1xmmc_host *host, + struct mmc_data *data) { int datalen = data->blocks * data->blksz; @@ -582,7 +641,7 @@ au1xmmc_prepare_data(struct au1xmmc_host *host, struct mmc_data *data) au1xxx_dbdma_stop(channel); - for(i = 0; i < host->dma.len; i++) { + for (i = 0; i < host->dma.len; i++) { u32 ret = 0, flags = DDMA_FLAGS_NOIE; struct scatterlist *sg = &data->sg[i]; int sg_len = sg->length; @@ -592,14 +651,12 @@ au1xmmc_prepare_data(struct au1xmmc_host *host, struct mmc_data *data) if (i == host->dma.len - 1) flags = DDMA_FLAGS_IE; - if (host->flags & HOST_F_XMIT){ - ret = au1xxx_dbdma_put_source_flags(channel, - (void *) sg_virt(sg), len, flags); - } - else { - ret = au1xxx_dbdma_put_dest_flags(channel, - (void *) sg_virt(sg), - len, flags); + if (host->flags & HOST_F_XMIT) { + ret = au1xxx_dbdma_put_source_flags(channel, + (void *)sg_virt(sg), len, flags); + } else { + ret = au1xxx_dbdma_put_dest_flags(channel, + (void *)sg_virt(sg), len, flags); } if (!ret) @@ -608,8 +665,7 @@ au1xmmc_prepare_data(struct au1xmmc_host *host, struct mmc_data *data) datalen -= len; } #endif - } - else { + } else { host->pio.index = 0; host->pio.offset = 0; host->pio.len = datalen; @@ -618,7 +674,7 @@ au1xmmc_prepare_data(struct au1xmmc_host *host, struct mmc_data *data) IRQ_ON(host, SD_CONFIG_TH); else IRQ_ON(host, SD_CONFIG_NE); - //IRQ_ON(host, SD_CONFIG_RA|SD_CONFIG_RF); + /* IRQ_ON(host, SD_CONFIG_RA | SD_CONFIG_RF); */ } return 0; @@ -629,15 +685,10 @@ dataerr: return -ETIMEDOUT; } -/* static void au1xmmc_request - This actually starts a command or data transaction -*/ - +/* This actually starts a command or data transaction */ static void au1xmmc_request(struct mmc_host* mmc, struct mmc_request* mrq) { - struct au1xmmc_host *host = mmc_priv(mmc); - unsigned int flags = 0; int ret = 0; WARN_ON(irqs_disabled()); @@ -648,7 +699,6 @@ static void au1xmmc_request(struct mmc_host* mmc, struct mmc_request* mrq) if (mrq->data) { FLUSH_FIFO(host); - flags = mrq->data->flags; ret = au1xmmc_prepare_data(host, mrq->data); } @@ -663,7 +713,6 @@ static void au1xmmc_request(struct mmc_host* mmc, struct mmc_request* mrq) static void au1xmmc_reset_controller(struct au1xmmc_host *host) { - /* Apply the clock */ au_writel(SD_ENABLE_CE, HOST_ENABLE(host)); au_sync_delay(1); @@ -693,7 +742,7 @@ static void au1xmmc_reset_controller(struct au1xmmc_host *host) } -static void au1xmmc_set_ios(struct mmc_host* mmc, struct mmc_ios* ios) +static void au1xmmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) { struct au1xmmc_host *host = mmc_priv(mmc); u32 config2; diff --git a/drivers/mmc/host/au1xmmc.h b/drivers/mmc/host/au1xmmc.h deleted file mode 100644 index 3b40065..0000000 --- a/drivers/mmc/host/au1xmmc.h +++ /dev/null @@ -1,97 +0,0 @@ -#ifndef _AU1XMMC_H_ -#define _AU1XMMC_H_ - -/* Hardware definitions */ - -#define AU1XMMC_DESCRIPTOR_COUNT 1 -#define AU1XMMC_DESCRIPTOR_SIZE 2048 - -#define AU1XMMC_OCR ( MMC_VDD_27_28 | MMC_VDD_28_29 | MMC_VDD_29_30 | \ - MMC_VDD_30_31 | MMC_VDD_31_32 | MMC_VDD_32_33 | \ - MMC_VDD_33_34 | MMC_VDD_34_35 | MMC_VDD_35_36) - -/* Easy access macros */ - -#define HOST_STATUS(h) ((h)->iobase + SD_STATUS) -#define HOST_CONFIG(h) ((h)->iobase + SD_CONFIG) -#define HOST_ENABLE(h) ((h)->iobase + SD_ENABLE) -#define HOST_TXPORT(h) ((h)->iobase + SD_TXPORT) -#define HOST_RXPORT(h) ((h)->iobase + SD_RXPORT) -#define HOST_CMDARG(h) ((h)->iobase + SD_CMDARG) -#define HOST_BLKSIZE(h) ((h)->iobase + SD_BLKSIZE) -#define HOST_CMD(h) ((h)->iobase + SD_CMD) -#define HOST_CONFIG2(h) ((h)->iobase + SD_CONFIG2) -#define HOST_TIMEOUT(h) ((h)->iobase + SD_TIMEOUT) -#define HOST_DEBUG(h) ((h)->iobase + SD_DEBUG) - -#define DMA_CHANNEL(h) \ - ( ((h)->flags & HOST_F_XMIT) ? (h)->tx_chan : (h)->rx_chan) - -/* This gives us a hard value for the stop command that we can write directly - * to the command register - */ - -#define STOP_CMD (SD_CMD_RT_1B|SD_CMD_CT_7|(0xC << SD_CMD_CI_SHIFT)|SD_CMD_GO) - -/* This is the set of interrupts that we configure by default */ - -#if 0 -#define AU1XMMC_INTERRUPTS (SD_CONFIG_SC | SD_CONFIG_DT | SD_CONFIG_DD | \ - SD_CONFIG_RAT | SD_CONFIG_CR | SD_CONFIG_I) -#endif - -#define AU1XMMC_INTERRUPTS (SD_CONFIG_SC | SD_CONFIG_DT | \ - SD_CONFIG_RAT | SD_CONFIG_CR | SD_CONFIG_I) -/* The poll event (looking for insert/remove events runs twice a second */ -#define AU1XMMC_DETECT_TIMEOUT (HZ/2) - -struct au1xmmc_host { - struct mmc_host *mmc; - struct mmc_request *mrq; - - u32 flags; - u32 iobase; - u32 clock; - u32 bus_width; - u32 power_mode; - - int status; - - struct { - int len; - int dir; - } dma; - - struct { - int index; - int offset; - int len; - } pio; - - u32 tx_chan; - u32 rx_chan; - - int irq; - - struct timer_list timer; - struct tasklet_struct finish_task; - struct tasklet_struct data_task; - struct au1xmmc_platform_data *platdata; - struct platform_device *pdev; - struct resource *ioarea; -}; - -/* Status flags used by the host structure */ - -#define HOST_F_XMIT 0x0001 -#define HOST_F_RECV 0x0002 -#define HOST_F_DMA 0x0010 -#define HOST_F_ACTIVE 0x0100 -#define HOST_F_STOP 0x1000 - -#define HOST_S_IDLE 0x0001 -#define HOST_S_CMD 0x0002 -#define HOST_S_DATA 0x0003 -#define HOST_S_STOP 0x0004 - -#endif -- cgit v0.10.2 From 88b8d9a83431237bf3eec1f2968f763607811171 Mon Sep 17 00:00:00 2001 From: Manuel Lauss Date: Mon, 9 Jun 2008 08:39:11 +0200 Subject: au1xmmc: abort requests early if no card is present. Don't process an MMC request if no card is present. Signed-off-by: Manuel Lauss Signed-off-by: Pierre Ossman diff --git a/drivers/mmc/host/au1xmmc.c b/drivers/mmc/host/au1xmmc.c index fcbaf40..718eb87 100644 --- a/drivers/mmc/host/au1xmmc.c +++ b/drivers/mmc/host/au1xmmc.c @@ -697,6 +697,13 @@ static void au1xmmc_request(struct mmc_host* mmc, struct mmc_request* mrq) host->mrq = mrq; host->status = HOST_S_CMD; + /* fail request immediately if no card is present */ + if (0 == au1xmmc_card_inserted(host)) { + mrq->cmd->error = -ENOMEDIUM; + au1xmmc_finish_request(host); + return; + } + if (mrq->data) { FLUSH_FIFO(host); ret = au1xmmc_prepare_data(host, mrq->data); -- cgit v0.10.2 From 08fcb7208c4a8913232e48931783270262ece954 Mon Sep 17 00:00:00 2001 From: Manuel Lauss Date: Mon, 9 Jun 2008 08:40:35 +0200 Subject: au1xmmc: new maintainer. Signed-off-by: Manuel Lauss Signed-off-by: Pierre Ossman diff --git a/MAINTAINERS b/MAINTAINERS index 1528e58..f140449 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -348,7 +348,9 @@ W: http://www.linux-usb.org/SpeedTouch/ S: Maintained ALCHEMY AU1XX0 MMC DRIVER -S: Orphan +P: Manuel Lauss +M: manuel.lauss@gmail.com +S: Maintained ALI1563 I2C DRIVER P: Rudolf Marek -- cgit v0.10.2 From e2d2647702702ea08cb78cdc9eca8c24242aa9be Mon Sep 17 00:00:00 2001 From: Manuel Lauss Date: Fri, 27 Jun 2008 18:25:18 +0200 Subject: au1xmmc: remove custom carddetect poll implementation. The MMC core provides a carddetect poll feature, time to remove the driver's own implementation of it. Signed-off-by: Manuel Lauss Signed-off-by: Pierre Ossman diff --git a/drivers/mmc/host/au1xmmc.c b/drivers/mmc/host/au1xmmc.c index 718eb87..3f15eb2 100644 --- a/drivers/mmc/host/au1xmmc.c +++ b/drivers/mmc/host/au1xmmc.c @@ -21,7 +21,7 @@ * published by the Free Software Foundation. */ -/* Why is a timer used to detect insert events? +/* Why don't we use the SD controllers' carddetect feature? * * From the AU1100 MMC application guide: * If the Au1100-based design is intended to support both MultiMediaCards @@ -30,8 +30,6 @@ * In doing so, a MMC card never enters SPI-mode communications, * but now the SecureDigital card-detect feature of CD/DAT3 is ineffective * (the low to high transition will not occur). - * - * So we use the timer to check the status manually. */ #include @@ -111,7 +109,6 @@ struct au1xmmc_host { int irq; - struct timer_list timer; struct tasklet_struct finish_task; struct tasklet_struct data_task; struct au1xmmc_platform_data *platdata; @@ -198,29 +195,24 @@ static void au1xmmc_set_power(struct au1xmmc_host *host, int state) host->platdata->set_power(host->mmc, state); } -static int au1xmmc_card_inserted(struct au1xmmc_host *host) +static int au1xmmc_card_inserted(struct mmc_host *mmc) { - int ret; + struct au1xmmc_host *host = mmc_priv(mmc); if (host->platdata && host->platdata->card_inserted) - ret = host->platdata->card_inserted(host->mmc); - else - ret = 1; /* assume there is a card */ + return !!host->platdata->card_inserted(host->mmc); - return ret; + return -ENOSYS; } static int au1xmmc_card_readonly(struct mmc_host *mmc) { struct au1xmmc_host *host = mmc_priv(mmc); - int ret; if (host->platdata && host->platdata->card_readonly) - ret = host->platdata->card_readonly(mmc); - else - ret = 0; /* assume card is read-write */ + return !!host->platdata->card_readonly(mmc); - return ret; + return -ENOSYS; } static void au1xmmc_finish_request(struct au1xmmc_host *host) @@ -698,7 +690,7 @@ static void au1xmmc_request(struct mmc_host* mmc, struct mmc_request* mrq) host->status = HOST_S_CMD; /* fail request immediately if no card is present */ - if (0 == au1xmmc_card_inserted(host)) { + if (0 == au1xmmc_card_inserted(mmc)) { mrq->cmd->error = -ENOMEDIUM; au1xmmc_finish_request(host); return; @@ -935,39 +927,10 @@ static const struct mmc_host_ops au1xmmc_ops = { .request = au1xmmc_request, .set_ios = au1xmmc_set_ios, .get_ro = au1xmmc_card_readonly, + .get_cd = au1xmmc_card_inserted, .enable_sdio_irq = au1xmmc_enable_sdio_irq, }; -static void au1xmmc_poll_event(unsigned long arg) -{ - struct au1xmmc_host *host = (struct au1xmmc_host *)arg; - int card = au1xmmc_card_inserted(host); - int controller = (host->flags & HOST_F_ACTIVE) ? 1 : 0; - - if (card != controller) { - host->flags &= ~HOST_F_ACTIVE; - if (card) - host->flags |= HOST_F_ACTIVE; - mmc_detect_change(host->mmc, 0); - } - -#ifdef DEBUG - if (host->mrq != NULL) { - u32 status = au_readl(HOST_STATUS(host)); - DBG("PENDING - %8.8x\n", host->pdev->id, status); - } -#endif - mod_timer(&host->timer, jiffies + AU1XMMC_DETECT_TIMEOUT); -} - -static void au1xmmc_init_cd_poll_timer(struct au1xmmc_host *host) -{ - init_timer(&host->timer); - host->timer.function = au1xmmc_poll_event; - host->timer.data = (unsigned long)host; - host->timer.expires = jiffies + AU1XMMC_DETECT_TIMEOUT; -} - static int __devinit au1xmmc_probe(struct platform_device *pdev) { struct mmc_host *mmc; @@ -1042,13 +1005,11 @@ static int __devinit au1xmmc_probe(struct platform_device *pdev) if (host->platdata && host->platdata->cd_setup) { ret = host->platdata->cd_setup(mmc, 1); if (ret) { - dev_err(&pdev->dev, "board CD setup failed\n"); - goto out4; + dev_warn(&pdev->dev, "board CD setup failed\n"); + mmc->caps |= MMC_CAP_NEEDS_POLL; } - } else { - /* poll the board-specific is-card-in-socket-? method */ - au1xmmc_init_cd_poll_timer(host); - } + } else + mmc->caps |= MMC_CAP_NEEDS_POLL; tasklet_init(&host->data_task, au1xmmc_tasklet_data, (unsigned long)host); @@ -1084,10 +1045,6 @@ static int __devinit au1xmmc_probe(struct platform_device *pdev) platform_set_drvdata(pdev, mmc); - /* start the carddetect poll timer if necessary */ - if (!(host->platdata && host->platdata->cd_setup)) - add_timer(&host->timer); - printk(KERN_INFO DRIVER_NAME ": MMC Controller %d set up at %8.8X" " (mode=%s)\n", pdev->id, host->iobase, host->flags & HOST_F_DMA ? "dma" : "pio"); @@ -1112,9 +1069,10 @@ out5: tasklet_kill(&host->data_task); tasklet_kill(&host->finish_task); - if (host->platdata && host->platdata->cd_setup) + if (host->platdata && host->platdata->cd_setup && + !(mmc->caps & MMC_CAP_NEEDS_POLL)) host->platdata->cd_setup(mmc, 0); -out4: + free_irq(host->irq, host); out3: iounmap((void *)host->iobase); @@ -1142,10 +1100,9 @@ static int __devexit au1xmmc_remove(struct platform_device *pdev) led_classdev_unregister(host->platdata->led); #endif - if (host->platdata && host->platdata->cd_setup) + if (host->platdata && host->platdata->cd_setup && + !(mmc->caps & MMC_CAP_NEEDS_POLL)) host->platdata->cd_setup(mmc, 0); - else - del_timer_sync(&host->timer); au_writel(0, HOST_ENABLE(host)); au_writel(0, HOST_CONFIG(host)); -- cgit v0.10.2 From ad3868b2ec96ec14a1549c9e33f5f9a2a3c6ab15 Mon Sep 17 00:00:00 2001 From: Pierre Ossman Date: Sat, 28 Jun 2008 12:52:45 +0200 Subject: mmc,sdio: helper function for transfer padding There are a lot of crappy controllers out there that cannot handle all the request sizes that the MMC/SD/SDIO specifications require. In case the card driver can pad the data to overcome the problems, this commit adds a helper that calculates how much that padding should be. A corresponding helper is also added for SDIO, but it can also deal with all the complexities of splitting up a large transfer efficiently. Signed-off-by: Pierre Ossman diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index ede5d1e..3ee5b8c 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -3,7 +3,7 @@ * * Copyright (C) 2003-2004 Russell King, All Rights Reserved. * SD support Copyright (C) 2004 Ian Molton, All Rights Reserved. - * Copyright (C) 2005-2007 Pierre Ossman, All Rights Reserved. + * Copyright (C) 2005-2008 Pierre Ossman, All Rights Reserved. * MMCv4 support Copyright (C) 2006 Philip Langdale, All Rights Reserved. * * This program is free software; you can redistribute it and/or modify @@ -295,6 +295,33 @@ void mmc_set_data_timeout(struct mmc_data *data, const struct mmc_card *card) EXPORT_SYMBOL(mmc_set_data_timeout); /** + * mmc_align_data_size - pads a transfer size to a more optimal value + * @card: the MMC card associated with the data transfer + * @sz: original transfer size + * + * Pads the original data size with a number of extra bytes in + * order to avoid controller bugs and/or performance hits + * (e.g. some controllers revert to PIO for certain sizes). + * + * Returns the improved size, which might be unmodified. + * + * Note that this function is only relevant when issuing a + * single scatter gather entry. + */ +unsigned int mmc_align_data_size(struct mmc_card *card, unsigned int sz) +{ + /* + * FIXME: We don't have a system for the controller to tell + * the core about its problems yet, so for now we just 32-bit + * align the size. + */ + sz = ((sz + 3) / 4) * 4; + + return sz; +} +EXPORT_SYMBOL(mmc_align_data_size); + +/** * __mmc_claim_host - exclusively claim a host * @host: mmc host to claim * @abort: whether or not the operation should be aborted diff --git a/drivers/mmc/core/sdio_io.c b/drivers/mmc/core/sdio_io.c index 625b92c..6ee7861 100644 --- a/drivers/mmc/core/sdio_io.c +++ b/drivers/mmc/core/sdio_io.c @@ -1,7 +1,7 @@ /* * linux/drivers/mmc/core/sdio_io.c * - * Copyright 2007 Pierre Ossman + * Copyright 2007-2008 Pierre Ossman * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -189,6 +189,103 @@ int sdio_set_block_size(struct sdio_func *func, unsigned blksz) EXPORT_SYMBOL_GPL(sdio_set_block_size); +/** + * sdio_align_size - pads a transfer size to a more optimal value + * @func: SDIO function + * @sz: original transfer size + * + * Pads the original data size with a number of extra bytes in + * order to avoid controller bugs and/or performance hits + * (e.g. some controllers revert to PIO for certain sizes). + * + * If possible, it will also adjust the size so that it can be + * handled in just a single request. + * + * Returns the improved size, which might be unmodified. + */ +unsigned int sdio_align_size(struct sdio_func *func, unsigned int sz) +{ + unsigned int orig_sz; + unsigned int blk_sz, byte_sz; + unsigned chunk_sz; + + orig_sz = sz; + + /* + * Do a first check with the controller, in case it + * wants to increase the size up to a point where it + * might need more than one block. + */ + sz = mmc_align_data_size(func->card, sz); + + /* + * If we can still do this with just a byte transfer, then + * we're done. + */ + if ((sz <= func->cur_blksize) && (sz <= 512)) + return sz; + + if (func->card->cccr.multi_block) { + /* + * Check if the transfer is already block aligned + */ + if ((sz % func->cur_blksize) == 0) + return sz; + + /* + * Realign it so that it can be done with one request, + * and recheck if the controller still likes it. + */ + blk_sz = ((sz + func->cur_blksize - 1) / + func->cur_blksize) * func->cur_blksize; + blk_sz = mmc_align_data_size(func->card, blk_sz); + + /* + * This value is only good if it is still just + * one request. + */ + if ((blk_sz % func->cur_blksize) == 0) + return blk_sz; + + /* + * We failed to do one request, but at least try to + * pad the remainder properly. + */ + byte_sz = mmc_align_data_size(func->card, + sz % func->cur_blksize); + if ((byte_sz <= func->cur_blksize) && (byte_sz <= 512)) { + blk_sz = sz / func->cur_blksize; + return blk_sz * func->cur_blksize + byte_sz; + } + } else { + /* + * We need multiple requests, so first check that the + * controller can handle the chunk size; + */ + chunk_sz = mmc_align_data_size(func->card, + min(func->cur_blksize, 512u)); + if (chunk_sz == min(func->cur_blksize, 512u)) { + /* + * Fix up the size of the remainder (if any) + */ + byte_sz = orig_sz % chunk_sz; + if (byte_sz) { + byte_sz = mmc_align_data_size(func->card, + byte_sz); + } + + return (orig_sz / chunk_sz) * chunk_sz + byte_sz; + } + } + + /* + * The controller is simply incapable of transferring the size + * we want in decent manner, so just return the original size. + */ + return orig_sz; +} +EXPORT_SYMBOL_GPL(sdio_align_size); + /* Split an arbitrarily sized data transfer into several * IO_RW_EXTENDED commands. */ static int sdio_io_rw_ext_helper(struct sdio_func *func, int write, diff --git a/drivers/net/wireless/libertas/if_sdio.c b/drivers/net/wireless/libertas/if_sdio.c index 3dd537b..b54e2ea 100644 --- a/drivers/net/wireless/libertas/if_sdio.c +++ b/drivers/net/wireless/libertas/if_sdio.c @@ -1,7 +1,7 @@ /* * linux/drivers/net/wireless/libertas/if_sdio.c * - * Copyright 2007 Pierre Ossman + * Copyright 2007-2008 Pierre Ossman * * Inspired by if_cs.c, Copyright 2007 Holger Schurig * @@ -266,13 +266,10 @@ static int if_sdio_card_to_host(struct if_sdio_card *card) /* * The transfer must be in one transaction or the firmware - * goes suicidal. + * goes suicidal. There's no way to guarantee that for all + * controllers, but we can at least try. */ - chunk = size; - if ((chunk > card->func->cur_blksize) || (chunk > 512)) { - chunk = (chunk + card->func->cur_blksize - 1) / - card->func->cur_blksize * card->func->cur_blksize; - } + chunk = sdio_align_size(card->func, size); ret = sdio_readsb(card->func, card->buffer, card->ioport, chunk); if (ret) @@ -696,13 +693,10 @@ static int if_sdio_host_to_card(struct lbs_private *priv, /* * The transfer must be in one transaction or the firmware - * goes suicidal. + * goes suicidal. There's no way to guarantee that for all + * controllers, but we can at least try. */ - size = nb + 4; - if ((size > card->func->cur_blksize) || (size > 512)) { - size = (size + card->func->cur_blksize - 1) / - card->func->cur_blksize * card->func->cur_blksize; - } + size = sdio_align_size(card->func, nb + 4); packet = kzalloc(sizeof(struct if_sdio_packet) + size, GFP_ATOMIC); diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h index d0c3abe..143cebf 100644 --- a/include/linux/mmc/core.h +++ b/include/linux/mmc/core.h @@ -135,6 +135,7 @@ extern int mmc_wait_for_app_cmd(struct mmc_host *, struct mmc_card *, struct mmc_command *, int); extern void mmc_set_data_timeout(struct mmc_data *, const struct mmc_card *); +extern unsigned int mmc_align_data_size(struct mmc_card *, unsigned int); extern int __mmc_claim_host(struct mmc_host *host, atomic_t *abort); extern void mmc_release_host(struct mmc_host *host); diff --git a/include/linux/mmc/sdio_func.h b/include/linux/mmc/sdio_func.h index b050f4d..f57f22b 100644 --- a/include/linux/mmc/sdio_func.h +++ b/include/linux/mmc/sdio_func.h @@ -1,7 +1,7 @@ /* * include/linux/mmc/sdio_func.h * - * Copyright 2007 Pierre Ossman + * Copyright 2007-2008 Pierre Ossman * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -120,6 +120,8 @@ extern int sdio_set_block_size(struct sdio_func *func, unsigned blksz); extern int sdio_claim_irq(struct sdio_func *func, sdio_irq_handler_t *handler); extern int sdio_release_irq(struct sdio_func *func); +extern unsigned int sdio_align_size(struct sdio_func *func, unsigned int sz); + extern unsigned char sdio_readb(struct sdio_func *func, unsigned int addr, int *err_ret); extern unsigned short sdio_readw(struct sdio_func *func, -- cgit v0.10.2 From eea0f581c4e596e02250df230f8d385827977964 Mon Sep 17 00:00:00 2001 From: Pierre Ossman Date: Sat, 28 Jun 2008 13:22:40 +0200 Subject: sdio: clean up handling of byte mode transfer size Make sure that the maximum size for a byte mode transfer is identical in all places. Also tweak the transfer helper so that a single byte mode transfer is preferred over (possibly multiple) block mode request(s). Signed-off-by: Pierre Ossman diff --git a/drivers/mmc/core/sdio_io.c b/drivers/mmc/core/sdio_io.c index 6ee7861..cc42a41 100644 --- a/drivers/mmc/core/sdio_io.c +++ b/drivers/mmc/core/sdio_io.c @@ -186,9 +186,20 @@ int sdio_set_block_size(struct sdio_func *func, unsigned blksz) func->cur_blksize = blksz; return 0; } - EXPORT_SYMBOL_GPL(sdio_set_block_size); +/* + * Calculate the maximum byte mode transfer size + */ +static inline unsigned int sdio_max_byte_size(struct sdio_func *func) +{ + return min(min(min( + func->card->host->max_seg_size, + func->card->host->max_blk_size), + func->max_blksize), + 512u); /* maximum size for byte mode */ +} + /** * sdio_align_size - pads a transfer size to a more optimal value * @func: SDIO function @@ -222,7 +233,7 @@ unsigned int sdio_align_size(struct sdio_func *func, unsigned int sz) * If we can still do this with just a byte transfer, then * we're done. */ - if ((sz <= func->cur_blksize) && (sz <= 512)) + if (sz <= sdio_max_byte_size(func)) return sz; if (func->card->cccr.multi_block) { @@ -253,7 +264,7 @@ unsigned int sdio_align_size(struct sdio_func *func, unsigned int sz) */ byte_sz = mmc_align_data_size(func->card, sz % func->cur_blksize); - if ((byte_sz <= func->cur_blksize) && (byte_sz <= 512)) { + if (byte_sz <= sdio_max_byte_size(func)) { blk_sz = sz / func->cur_blksize; return blk_sz * func->cur_blksize + byte_sz; } @@ -263,8 +274,8 @@ unsigned int sdio_align_size(struct sdio_func *func, unsigned int sz) * controller can handle the chunk size; */ chunk_sz = mmc_align_data_size(func->card, - min(func->cur_blksize, 512u)); - if (chunk_sz == min(func->cur_blksize, 512u)) { + sdio_max_byte_size(func)); + if (chunk_sz == sdio_max_byte_size(func)) { /* * Fix up the size of the remainder (if any) */ @@ -296,7 +307,7 @@ static int sdio_io_rw_ext_helper(struct sdio_func *func, int write, int ret; /* Do the bulk of the transfer using block mode (if supported). */ - if (func->card->cccr.multi_block) { + if (func->card->cccr.multi_block && (size > sdio_max_byte_size(func))) { /* Blocks per command is limited by host count, host transfer * size (we only use a single sg entry) and the maximum for * IO_RW_EXTENDED of 511 blocks. */ @@ -328,11 +339,7 @@ static int sdio_io_rw_ext_helper(struct sdio_func *func, int write, /* Write the remainder using byte mode. */ while (remainder > 0) { - size = remainder; - if (size > func->cur_blksize) - size = func->cur_blksize; - if (size > 512) - size = 512; /* maximum size for byte mode */ + size = min(remainder, sdio_max_byte_size(func)); ret = mmc_io_rw_extended(func->card, write, func->num, addr, incr_addr, buf, 1, size); -- cgit v0.10.2 From 0121a9829bf28c65e1a05cc881899c10d82b8de2 Mon Sep 17 00:00:00 2001 From: Pierre Ossman Date: Sat, 28 Jun 2008 17:51:27 +0200 Subject: mmc_test: only bind to supported cards We can only perform the tests on MMC and SD cards, so avoid binding to any other type. Signed-off-by: Pierre Ossman diff --git a/drivers/mmc/card/mmc_test.c b/drivers/mmc/card/mmc_test.c index 371d922..8e0bb12 100644 --- a/drivers/mmc/card/mmc_test.c +++ b/drivers/mmc/card/mmc_test.c @@ -1,7 +1,7 @@ /* * linux/drivers/mmc/card/mmc_test.c * - * Copyright 2007 Pierre Ossman + * Copyright 2007-2008 Pierre Ossman * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -858,6 +858,9 @@ static int mmc_test_probe(struct mmc_card *card) { int ret; + if ((card->type != MMC_TYPE_MMC) && (card->type != MMC_TYPE_SD)) + return -ENODEV; + mutex_init(&mmc_test_lock); ret = device_create_file(&card->dev, &dev_attr_test); -- cgit v0.10.2 From 93fc48c785f6266e67663b3cbbf24579b53fe5cf Mon Sep 17 00:00:00 2001 From: Pierre Ossman Date: Sat, 28 Jun 2008 18:21:41 +0200 Subject: sdhci-pci: don't penalize newer jmicron chips The upcoming JMicron chips will have solved all the currently known bugs, so don't penalize them for older problems. Signed-off-by: Pierre Ossman diff --git a/drivers/mmc/host/sdhci-pci.c b/drivers/mmc/host/sdhci-pci.c index c0fbf48..0716dcf 100644 --- a/drivers/mmc/host/sdhci-pci.c +++ b/drivers/mmc/host/sdhci-pci.c @@ -139,6 +139,12 @@ static int jmicron_probe(struct sdhci_pci_chip *chip) { int ret; + if (chip->pdev->revision == 0) { + chip->quirks |= SDHCI_QUIRK_32BIT_DMA_ADDR | + SDHCI_QUIRK_32BIT_DMA_SIZE | + SDHCI_QUIRK_RESET_AFTER_REQUEST; + } + /* * JMicron chips can have two interfaces to the same hardware * in order to work around limitations in Microsoft's driver. @@ -250,10 +256,6 @@ static int jmicron_resume(struct sdhci_pci_chip *chip) } static const struct sdhci_pci_fixes sdhci_jmicron = { - .quirks = SDHCI_QUIRK_32BIT_DMA_ADDR | - SDHCI_QUIRK_32BIT_DMA_SIZE | - SDHCI_QUIRK_RESET_AFTER_REQUEST, - .probe = jmicron_probe, .probe_slot = jmicron_probe_slot, -- cgit v0.10.2 From 2134a922c6e75c779983cad5d8aae832275f5a0d Mon Sep 17 00:00:00 2001 From: Pierre Ossman Date: Sat, 28 Jun 2008 18:28:51 +0200 Subject: sdhci: scatter-gather (ADMA) support Add support for the scatter-gather DMA mode present on newer controllers. As the mode requires 32-bit alignment, non-aligned chunks are handled by using a bounce buffer. Also add some new quirks to handle controllers that have bugs in the ADMA engine. Signed-off-by: Pierre Ossman diff --git a/drivers/mmc/host/sdhci-pci.c b/drivers/mmc/host/sdhci-pci.c index 0716dcf..deb607c 100644 --- a/drivers/mmc/host/sdhci-pci.c +++ b/drivers/mmc/host/sdhci-pci.c @@ -142,6 +142,7 @@ static int jmicron_probe(struct sdhci_pci_chip *chip) if (chip->pdev->revision == 0) { chip->quirks |= SDHCI_QUIRK_32BIT_DMA_ADDR | SDHCI_QUIRK_32BIT_DMA_SIZE | + SDHCI_QUIRK_32BIT_ADMA_SIZE | SDHCI_QUIRK_RESET_AFTER_REQUEST; } @@ -206,6 +207,22 @@ static void jmicron_enable_mmc(struct sdhci_host *host, int on) static int jmicron_probe_slot(struct sdhci_pci_slot *slot) { + if (slot->chip->pdev->revision == 0) { + u16 version; + + version = readl(slot->host->ioaddr + SDHCI_HOST_VERSION); + version = (version & SDHCI_VENDOR_VER_MASK) >> + SDHCI_VENDOR_VER_SHIFT; + + /* + * Older versions of the chip have lots of nasty glitches + * in the ADMA engine. It's best just to avoid it + * completely. + */ + if (version < 0xAC) + slot->host->quirks |= SDHCI_QUIRK_BROKEN_ADMA; + } + /* * The secondary interface requires a bit set to get the * interrupts. diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 0ab582e..b802044 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -124,7 +124,8 @@ static void sdhci_init(struct sdhci_host *host) SDHCI_INT_END_BIT | SDHCI_INT_CRC | SDHCI_INT_TIMEOUT | SDHCI_INT_CARD_REMOVE | SDHCI_INT_CARD_INSERT | SDHCI_INT_DATA_AVAIL | SDHCI_INT_SPACE_AVAIL | - SDHCI_INT_DMA_END | SDHCI_INT_DATA_END | SDHCI_INT_RESPONSE; + SDHCI_INT_DMA_END | SDHCI_INT_DATA_END | SDHCI_INT_RESPONSE | + SDHCI_INT_ADMA_ERROR; writel(intmask, host->ioaddr + SDHCI_INT_ENABLE); writel(intmask, host->ioaddr + SDHCI_SIGNAL_ENABLE); @@ -314,6 +315,196 @@ static void sdhci_transfer_pio(struct sdhci_host *host) DBG("PIO transfer complete.\n"); } +static char *sdhci_kmap_atomic(struct scatterlist *sg, unsigned long *flags) +{ + local_irq_save(*flags); + return kmap_atomic(sg_page(sg), KM_BIO_SRC_IRQ) + sg->offset; +} + +static void sdhci_kunmap_atomic(void *buffer, unsigned long *flags) +{ + kunmap_atomic(buffer, KM_BIO_SRC_IRQ); + local_irq_restore(*flags); +} + +static void sdhci_adma_table_pre(struct sdhci_host *host, + struct mmc_data *data) +{ + int direction; + + u8 *desc; + u8 *align; + dma_addr_t addr; + dma_addr_t align_addr; + int len, offset; + + struct scatterlist *sg; + int i; + char *buffer; + unsigned long flags; + + /* + * The spec does not specify endianness of descriptor table. + * We currently guess that it is LE. + */ + + if (data->flags & MMC_DATA_READ) + direction = DMA_FROM_DEVICE; + else + direction = DMA_TO_DEVICE; + + /* + * The ADMA descriptor table is mapped further down as we + * need to fill it with data first. + */ + + host->align_addr = dma_map_single(mmc_dev(host->mmc), + host->align_buffer, 128 * 4, direction); + BUG_ON(host->align_addr & 0x3); + + host->sg_count = dma_map_sg(mmc_dev(host->mmc), + data->sg, data->sg_len, direction); + + desc = host->adma_desc; + align = host->align_buffer; + + align_addr = host->align_addr; + + for_each_sg(data->sg, sg, host->sg_count, i) { + addr = sg_dma_address(sg); + len = sg_dma_len(sg); + + /* + * The SDHCI specification states that ADMA + * addresses must be 32-bit aligned. If they + * aren't, then we use a bounce buffer for + * the (up to three) bytes that screw up the + * alignment. + */ + offset = (4 - (addr & 0x3)) & 0x3; + if (offset) { + if (data->flags & MMC_DATA_WRITE) { + buffer = sdhci_kmap_atomic(sg, &flags); + memcpy(align, buffer, offset); + sdhci_kunmap_atomic(buffer, &flags); + } + + desc[7] = (align_addr >> 24) & 0xff; + desc[6] = (align_addr >> 16) & 0xff; + desc[5] = (align_addr >> 8) & 0xff; + desc[4] = (align_addr >> 0) & 0xff; + + BUG_ON(offset > 65536); + + desc[3] = (offset >> 8) & 0xff; + desc[2] = (offset >> 0) & 0xff; + + desc[1] = 0x00; + desc[0] = 0x21; /* tran, valid */ + + align += 4; + align_addr += 4; + + desc += 8; + + addr += offset; + len -= offset; + } + + desc[7] = (addr >> 24) & 0xff; + desc[6] = (addr >> 16) & 0xff; + desc[5] = (addr >> 8) & 0xff; + desc[4] = (addr >> 0) & 0xff; + + BUG_ON(len > 65536); + + desc[3] = (len >> 8) & 0xff; + desc[2] = (len >> 0) & 0xff; + + desc[1] = 0x00; + desc[0] = 0x21; /* tran, valid */ + + desc += 8; + + /* + * If this triggers then we have a calculation bug + * somewhere. :/ + */ + WARN_ON((desc - host->adma_desc) > (128 * 2 + 1) * 4); + } + + /* + * Add a terminating entry. + */ + desc[7] = 0; + desc[6] = 0; + desc[5] = 0; + desc[4] = 0; + + desc[3] = 0; + desc[2] = 0; + + desc[1] = 0x00; + desc[0] = 0x03; /* nop, end, valid */ + + /* + * Resync align buffer as we might have changed it. + */ + if (data->flags & MMC_DATA_WRITE) { + dma_sync_single_for_device(mmc_dev(host->mmc), + host->align_addr, 128 * 4, direction); + } + + host->adma_addr = dma_map_single(mmc_dev(host->mmc), + host->adma_desc, (128 * 2 + 1) * 4, DMA_TO_DEVICE); + BUG_ON(host->adma_addr & 0x3); +} + +static void sdhci_adma_table_post(struct sdhci_host *host, + struct mmc_data *data) +{ + int direction; + + struct scatterlist *sg; + int i, size; + u8 *align; + char *buffer; + unsigned long flags; + + if (data->flags & MMC_DATA_READ) + direction = DMA_FROM_DEVICE; + else + direction = DMA_TO_DEVICE; + + dma_unmap_single(mmc_dev(host->mmc), host->adma_addr, + (128 * 2 + 1) * 4, DMA_TO_DEVICE); + + dma_unmap_single(mmc_dev(host->mmc), host->align_addr, + 128 * 4, direction); + + if (data->flags & MMC_DATA_READ) { + dma_sync_sg_for_cpu(mmc_dev(host->mmc), data->sg, + data->sg_len, direction); + + align = host->align_buffer; + + for_each_sg(data->sg, sg, host->sg_count, i) { + if (sg_dma_address(sg) & 0x3) { + size = 4 - (sg_dma_address(sg) & 0x3); + + buffer = sdhci_kmap_atomic(sg, &flags); + memcpy(buffer, align, size); + sdhci_kunmap_atomic(buffer, &flags); + + align += 4; + } + } + } + + dma_unmap_sg(mmc_dev(host->mmc), data->sg, + data->sg_len, direction); +} + static u8 sdhci_calc_timeout(struct sdhci_host *host, struct mmc_data *data) { u8 count; @@ -363,6 +554,7 @@ static u8 sdhci_calc_timeout(struct sdhci_host *host, struct mmc_data *data) static void sdhci_prepare_data(struct sdhci_host *host, struct mmc_data *data) { u8 count; + u8 ctrl; WARN_ON(host->data); @@ -383,35 +575,104 @@ static void sdhci_prepare_data(struct sdhci_host *host, struct mmc_data *data) if (host->flags & SDHCI_USE_DMA) host->flags |= SDHCI_REQ_USE_DMA; - if (unlikely((host->flags & SDHCI_REQ_USE_DMA) && - (host->quirks & SDHCI_QUIRK_32BIT_DMA_SIZE) && - ((data->blksz * data->blocks) & 0x3))) { - DBG("Reverting to PIO because of transfer size (%d)\n", - data->blksz * data->blocks); - host->flags &= ~SDHCI_REQ_USE_DMA; + /* + * FIXME: This doesn't account for merging when mapping the + * scatterlist. + */ + if (host->flags & SDHCI_REQ_USE_DMA) { + int broken, i; + struct scatterlist *sg; + + broken = 0; + if (host->flags & SDHCI_USE_ADMA) { + if (host->quirks & SDHCI_QUIRK_32BIT_ADMA_SIZE) + broken = 1; + } else { + if (host->quirks & SDHCI_QUIRK_32BIT_DMA_SIZE) + broken = 1; + } + + if (unlikely(broken)) { + for_each_sg(data->sg, sg, data->sg_len, i) { + if (sg->length & 0x3) { + DBG("Reverting to PIO because of " + "transfer size (%d)\n", + sg->length); + host->flags &= ~SDHCI_REQ_USE_DMA; + break; + } + } + } } /* * The assumption here being that alignment is the same after * translation to device address space. */ - if (unlikely((host->flags & SDHCI_REQ_USE_DMA) && - (host->quirks & SDHCI_QUIRK_32BIT_DMA_ADDR) && - (data->sg->offset & 0x3))) { - DBG("Reverting to PIO because of bad alignment\n"); - host->flags &= ~SDHCI_REQ_USE_DMA; + if (host->flags & SDHCI_REQ_USE_DMA) { + int broken, i; + struct scatterlist *sg; + + broken = 0; + if (host->flags & SDHCI_USE_ADMA) { + /* + * As we use 3 byte chunks to work around + * alignment problems, we need to check this + * quirk. + */ + if (host->quirks & SDHCI_QUIRK_32BIT_ADMA_SIZE) + broken = 1; + } else { + if (host->quirks & SDHCI_QUIRK_32BIT_DMA_ADDR) + broken = 1; + } + + if (unlikely(broken)) { + for_each_sg(data->sg, sg, data->sg_len, i) { + if (sg->offset & 0x3) { + DBG("Reverting to PIO because of " + "bad alignment\n"); + host->flags &= ~SDHCI_REQ_USE_DMA; + break; + } + } + } + } + + /* + * Always adjust the DMA selection as some controllers + * (e.g. JMicron) can't do PIO properly when the selection + * is ADMA. + */ + if (host->version >= SDHCI_SPEC_200) { + ctrl = readb(host->ioaddr + SDHCI_HOST_CONTROL); + ctrl &= ~SDHCI_CTRL_DMA_MASK; + if ((host->flags & SDHCI_REQ_USE_DMA) && + (host->flags & SDHCI_USE_ADMA)) + ctrl |= SDHCI_CTRL_ADMA32; + else + ctrl |= SDHCI_CTRL_SDMA; + writeb(ctrl, host->ioaddr + SDHCI_HOST_CONTROL); } if (host->flags & SDHCI_REQ_USE_DMA) { - int count; + if (host->flags & SDHCI_USE_ADMA) { + sdhci_adma_table_pre(host, data); + writel(host->adma_addr, + host->ioaddr + SDHCI_ADMA_ADDRESS); + } else { + int count; - count = dma_map_sg(mmc_dev(host->mmc), data->sg, data->sg_len, - (data->flags & MMC_DATA_READ) ? - DMA_FROM_DEVICE : DMA_TO_DEVICE); - WARN_ON(count != 1); + count = dma_map_sg(mmc_dev(host->mmc), + data->sg, data->sg_len, + (data->flags & MMC_DATA_READ) ? + DMA_FROM_DEVICE : + DMA_TO_DEVICE); + WARN_ON(count != 1); - writel(sg_dma_address(data->sg), - host->ioaddr + SDHCI_DMA_ADDRESS); + writel(sg_dma_address(data->sg), + host->ioaddr + SDHCI_DMA_ADDRESS); + } } else { host->cur_sg = data->sg; host->num_sg = data->sg_len; @@ -457,9 +718,13 @@ static void sdhci_finish_data(struct sdhci_host *host) host->data = NULL; if (host->flags & SDHCI_REQ_USE_DMA) { - dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len, - (data->flags & MMC_DATA_READ) ? - DMA_FROM_DEVICE : DMA_TO_DEVICE); + if (host->flags & SDHCI_USE_ADMA) + sdhci_adma_table_post(host, data); + else { + dma_unmap_sg(mmc_dev(host->mmc), data->sg, + data->sg_len, (data->flags & MMC_DATA_READ) ? + DMA_FROM_DEVICE : DMA_TO_DEVICE); + } } /* @@ -1008,6 +1273,8 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask) host->data->error = -ETIMEDOUT; else if (intmask & (SDHCI_INT_DATA_CRC | SDHCI_INT_DATA_END_BIT)) host->data->error = -EILSEQ; + else if (intmask & SDHCI_INT_ADMA_ERROR) + host->data->error = -EIO; if (host->data->error) sdhci_finish_data(host); @@ -1199,7 +1466,6 @@ int sdhci_add_host(struct sdhci_host *host) { struct mmc_host *mmc; unsigned int caps; - unsigned int version; int ret; WARN_ON(host == NULL); @@ -1213,12 +1479,13 @@ int sdhci_add_host(struct sdhci_host *host) sdhci_reset(host, SDHCI_RESET_ALL); - version = readw(host->ioaddr + SDHCI_HOST_VERSION); - version = (version & SDHCI_SPEC_VER_MASK) >> SDHCI_SPEC_VER_SHIFT; - if (version > 1) { + host->version = readw(host->ioaddr + SDHCI_HOST_VERSION); + host->version = (host->version & SDHCI_SPEC_VER_MASK) + >> SDHCI_SPEC_VER_SHIFT; + if (host->version > SDHCI_SPEC_200) { printk(KERN_ERR "%s: Unknown controller version (%d). " "You may experience problems.\n", mmc_hostname(mmc), - version); + host->version); } caps = readl(host->ioaddr + SDHCI_CAPABILITIES); @@ -1237,16 +1504,46 @@ int sdhci_add_host(struct sdhci_host *host) } if (host->flags & SDHCI_USE_DMA) { + if ((host->version >= SDHCI_SPEC_200) && + (caps & SDHCI_CAN_DO_ADMA2)) + host->flags |= SDHCI_USE_ADMA; + } + + if ((host->quirks & SDHCI_QUIRK_BROKEN_ADMA) && + (host->flags & SDHCI_USE_ADMA)) { + DBG("Disabling ADMA as it is marked broken\n"); + host->flags &= ~SDHCI_USE_ADMA; + } + + if (host->flags & SDHCI_USE_DMA) { if (host->ops->enable_dma) { if (host->ops->enable_dma(host)) { printk(KERN_WARNING "%s: No suitable DMA " "available. Falling back to PIO.\n", mmc_hostname(mmc)); - host->flags &= ~SDHCI_USE_DMA; + host->flags &= ~(SDHCI_USE_DMA | SDHCI_USE_ADMA); } } } + if (host->flags & SDHCI_USE_ADMA) { + /* + * We need to allocate descriptors for all sg entries + * (128) and potentially one alignment transfer for + * each of those entries. + */ + host->adma_desc = kmalloc((128 * 2 + 1) * 4, GFP_KERNEL); + host->align_buffer = kmalloc(128 * 4, GFP_KERNEL); + if (!host->adma_desc || !host->align_buffer) { + kfree(host->adma_desc); + kfree(host->align_buffer); + printk(KERN_WARNING "%s: Unable to allocate ADMA " + "buffers. Falling back to standard DMA.\n", + mmc_hostname(mmc)); + host->flags &= ~SDHCI_USE_ADMA; + } + } + /* XXX: Hack to get MMC layer to avoid highmem */ if (!(host->flags & SDHCI_USE_DMA)) mmc_dev(host->mmc)->dma_mask = 0; @@ -1298,13 +1595,16 @@ int sdhci_add_host(struct sdhci_host *host) spin_lock_init(&host->lock); /* - * Maximum number of segments. Hardware cannot do scatter lists. + * Maximum number of segments. Depends on if the hardware + * can do scatter/gather or not. */ - if (host->flags & SDHCI_USE_DMA) + if (host->flags & SDHCI_USE_ADMA) + mmc->max_hw_segs = 128; + else if (host->flags & SDHCI_USE_DMA) mmc->max_hw_segs = 1; - else - mmc->max_hw_segs = 16; - mmc->max_phys_segs = 16; + else /* PIO */ + mmc->max_hw_segs = 128; + mmc->max_phys_segs = 128; /* * Maximum number of sectors in one transfer. Limited by DMA boundary @@ -1314,9 +1614,13 @@ int sdhci_add_host(struct sdhci_host *host) /* * Maximum segment size. Could be one segment with the maximum number - * of bytes. + * of bytes. When doing hardware scatter/gather, each entry cannot + * be larger than 64 KiB though. */ - mmc->max_seg_size = mmc->max_req_size; + if (host->flags & SDHCI_USE_ADMA) + mmc->max_seg_size = 65536; + else + mmc->max_seg_size = mmc->max_req_size; /* * Maximum block size. This varies from controller to controller and @@ -1371,8 +1675,9 @@ int sdhci_add_host(struct sdhci_host *host) mmc_add_host(mmc); - printk(KERN_INFO "%s: SDHCI controller on %s [%s] using %s\n", + printk(KERN_INFO "%s: SDHCI controller on %s [%s] using %s%s\n", mmc_hostname(mmc), host->hw_name, mmc_dev(mmc)->bus_id, + (host->flags & SDHCI_USE_ADMA)?"A":"", (host->flags & SDHCI_USE_DMA)?"DMA":"PIO"); return 0; @@ -1426,6 +1731,12 @@ void sdhci_remove_host(struct sdhci_host *host, int dead) tasklet_kill(&host->card_tasklet); tasklet_kill(&host->finish_tasklet); + + kfree(host->adma_desc); + kfree(host->align_buffer); + + host->adma_desc = NULL; + host->align_buffer = NULL; } EXPORT_SYMBOL_GPL(sdhci_remove_host); diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h index 7c30251..5bb3552 100644 --- a/drivers/mmc/host/sdhci.h +++ b/drivers/mmc/host/sdhci.h @@ -60,6 +60,11 @@ #define SDHCI_CTRL_LED 0x01 #define SDHCI_CTRL_4BITBUS 0x02 #define SDHCI_CTRL_HISPD 0x04 +#define SDHCI_CTRL_DMA_MASK 0x18 +#define SDHCI_CTRL_SDMA 0x00 +#define SDHCI_CTRL_ADMA1 0x08 +#define SDHCI_CTRL_ADMA32 0x10 +#define SDHCI_CTRL_ADMA64 0x18 #define SDHCI_POWER_CONTROL 0x29 #define SDHCI_POWER_ON 0x01 @@ -105,6 +110,7 @@ #define SDHCI_INT_DATA_END_BIT 0x00400000 #define SDHCI_INT_BUS_POWER 0x00800000 #define SDHCI_INT_ACMD12ERR 0x01000000 +#define SDHCI_INT_ADMA_ERROR 0x02000000 #define SDHCI_INT_NORMAL_MASK 0x00007FFF #define SDHCI_INT_ERROR_MASK 0xFFFF8000 @@ -128,11 +134,14 @@ #define SDHCI_CLOCK_BASE_SHIFT 8 #define SDHCI_MAX_BLOCK_MASK 0x00030000 #define SDHCI_MAX_BLOCK_SHIFT 16 +#define SDHCI_CAN_DO_ADMA2 0x00080000 +#define SDHCI_CAN_DO_ADMA1 0x00100000 #define SDHCI_CAN_DO_HISPD 0x00200000 #define SDHCI_CAN_DO_DMA 0x00400000 #define SDHCI_CAN_VDD_330 0x01000000 #define SDHCI_CAN_VDD_300 0x02000000 #define SDHCI_CAN_VDD_180 0x04000000 +#define SDHCI_CAN_64BIT 0x10000000 /* 44-47 reserved for more caps */ @@ -140,7 +149,16 @@ /* 4C-4F reserved for more max current */ -/* 50-FB reserved */ +#define SDHCI_SET_ACMD12_ERROR 0x50 +#define SDHCI_SET_INT_ERROR 0x52 + +#define SDHCI_ADMA_ERROR 0x54 + +/* 55-57 reserved */ + +#define SDHCI_ADMA_ADDRESS 0x58 + +/* 60-FB reserved */ #define SDHCI_SLOT_INT_STATUS 0xFC @@ -149,6 +167,8 @@ #define SDHCI_VENDOR_VER_SHIFT 8 #define SDHCI_SPEC_VER_MASK 0x00FF #define SDHCI_SPEC_VER_SHIFT 0 +#define SDHCI_SPEC_100 0 +#define SDHCI_SPEC_200 1 struct sdhci_ops; @@ -170,16 +190,20 @@ struct sdhci_host { #define SDHCI_QUIRK_RESET_CMD_DATA_ON_IOS (1<<4) /* Controller has an unusable DMA engine */ #define SDHCI_QUIRK_BROKEN_DMA (1<<5) +/* Controller has an unusable ADMA engine */ +#define SDHCI_QUIRK_BROKEN_ADMA (1<<6) /* Controller can only DMA from 32-bit aligned addresses */ -#define SDHCI_QUIRK_32BIT_DMA_ADDR (1<<6) +#define SDHCI_QUIRK_32BIT_DMA_ADDR (1<<7) /* Controller can only DMA chunk sizes that are a multiple of 32 bits */ -#define SDHCI_QUIRK_32BIT_DMA_SIZE (1<<7) +#define SDHCI_QUIRK_32BIT_DMA_SIZE (1<<8) +/* Controller can only ADMA chunks that are a multiple of 32 bits */ +#define SDHCI_QUIRK_32BIT_ADMA_SIZE (1<<9) /* Controller needs to be reset after each request to stay stable */ -#define SDHCI_QUIRK_RESET_AFTER_REQUEST (1<<8) +#define SDHCI_QUIRK_RESET_AFTER_REQUEST (1<<10) /* Controller needs voltage and power writes to happen separately */ -#define SDHCI_QUIRK_NO_SIMULT_VDD_AND_POWER (1<<9) +#define SDHCI_QUIRK_NO_SIMULT_VDD_AND_POWER (1<<11) /* Controller provides an incorrect timeout value for transfers */ -#define SDHCI_QUIRK_BROKEN_TIMEOUT_VAL (1<<10) +#define SDHCI_QUIRK_BROKEN_TIMEOUT_VAL (1<<12) int irq; /* Device IRQ */ void __iomem * ioaddr; /* Mapped address */ @@ -197,8 +221,11 @@ struct sdhci_host { int flags; /* Host attributes */ #define SDHCI_USE_DMA (1<<0) /* Host is DMA capable */ -#define SDHCI_REQ_USE_DMA (1<<1) /* Use DMA for this req. */ -#define SDHCI_DEVICE_DEAD (1<<2) /* Device unresponsive */ +#define SDHCI_USE_ADMA (1<<1) /* Host is ADMA capable */ +#define SDHCI_REQ_USE_DMA (1<<2) /* Use DMA for this req. */ +#define SDHCI_DEVICE_DEAD (1<<3) /* Device unresponsive */ + + unsigned int version; /* SDHCI spec. version */ unsigned int max_clk; /* Max possible freq (MHz) */ unsigned int timeout_clk; /* Timeout freq (KHz) */ @@ -216,6 +243,14 @@ struct sdhci_host { int offset; /* Offset into current sg */ int remain; /* Bytes left in current */ + int sg_count; /* Mapped sg entries */ + + u8 *adma_desc; /* ADMA descriptor table */ + u8 *align_buffer; /* Bounce buffer */ + + dma_addr_t adma_addr; /* Mapped ADMA descr. table */ + dma_addr_t align_addr; /* Mapped bounce buffer */ + struct tasklet_struct card_tasklet; /* Tasklet structures */ struct tasklet_struct finish_tasklet; -- cgit v0.10.2 From 979ce7208a679b8d012450610d5d5aa75aab3af9 Mon Sep 17 00:00:00 2001 From: Pierre Ossman Date: Sun, 29 Jun 2008 12:19:47 +0200 Subject: mmc_block: wait for card even on failures Many failures are non-permanent, but the card might need some time to finish what it is doing before becoming responsive again. Make sure we wait for it to finish programming before dealing with the error. Signed-off-by: Pierre Ossman diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c index f9ad960..4b0f822 100644 --- a/drivers/mmc/card/block.c +++ b/drivers/mmc/card/block.c @@ -2,7 +2,7 @@ * Block driver for media (i.e., flash cards) * * Copyright 2002 Hewlett-Packard Company - * Copyright 2005-2007 Pierre Ossman + * Copyright 2005-2008 Pierre Ossman * * Use consistent with the GNU GPL is permitted, * provided that this copyright notice is @@ -296,22 +296,24 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req) mmc_queue_bounce_post(mq); + /* + * Check for errors here, but don't jump to cmd_err + * until later as we need to wait for the card to leave + * programming mode even when things go wrong. + */ if (brq.cmd.error) { printk(KERN_ERR "%s: error %d sending read/write command\n", req->rq_disk->disk_name, brq.cmd.error); - goto cmd_err; } if (brq.data.error) { printk(KERN_ERR "%s: error %d transferring data\n", req->rq_disk->disk_name, brq.data.error); - goto cmd_err; } if (brq.stop.error) { printk(KERN_ERR "%s: error %d sending stop command\n", req->rq_disk->disk_name, brq.stop.error); - goto cmd_err; } if (!mmc_host_is_spi(card->host) && rq_data_dir(req) != READ) { @@ -344,6 +346,9 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req) #endif } + if (brq.cmd.error || brq.data.error || brq.stop.error) + goto cmd_err; + /* * A block was successfully transferred. */ -- cgit v0.10.2 From 6b174931a73177c6519f87e6a8d5ae6ba269cdb5 Mon Sep 17 00:00:00 2001 From: Pierre Ossman Date: Mon, 30 Jun 2008 09:09:27 +0200 Subject: mmc_test: cleanup Clean up and reorganise the mmc_test driver so that it (hopefully) is easier to extend with more complex tests. Signed-off-by: Pierre Ossman diff --git a/drivers/mmc/card/mmc_test.c b/drivers/mmc/card/mmc_test.c index 8e0bb12..d6b9b48 100644 --- a/drivers/mmc/card/mmc_test.c +++ b/drivers/mmc/card/mmc_test.c @@ -26,13 +26,17 @@ struct mmc_test_card { struct mmc_card *card; + u8 scratch[BUFFER_SIZE]; u8 *buffer; }; /*******************************************************************/ -/* Helper functions */ +/* General helper functions */ /*******************************************************************/ +/* + * Configure correct block size in card + */ static int mmc_test_set_blksize(struct mmc_test_card *test, unsigned size) { struct mmc_command cmd; @@ -48,117 +52,61 @@ static int mmc_test_set_blksize(struct mmc_test_card *test, unsigned size) return 0; } -static int __mmc_test_transfer(struct mmc_test_card *test, int write, - unsigned broken_xfer, u8 *buffer, unsigned addr, - unsigned blocks, unsigned blksz) +/* + * Fill in the mmc_request structure given a set of transfer parameters. + */ +static void mmc_test_prepare_mrq(struct mmc_test_card *test, + struct mmc_request *mrq, struct scatterlist *sg, unsigned sg_len, + unsigned dev_addr, unsigned blocks, unsigned blksz, int write) { - int ret, busy; + BUG_ON(!mrq || !mrq->cmd || !mrq->data || !mrq->stop); - struct mmc_request mrq; - struct mmc_command cmd; - struct mmc_command stop; - struct mmc_data data; - - struct scatterlist sg; - - memset(&mrq, 0, sizeof(struct mmc_request)); - - mrq.cmd = &cmd; - mrq.data = &data; - - memset(&cmd, 0, sizeof(struct mmc_command)); - - if (broken_xfer) { - if (blocks > 1) { - cmd.opcode = write ? - MMC_WRITE_BLOCK : MMC_READ_SINGLE_BLOCK; - } else { - cmd.opcode = MMC_SEND_STATUS; - } + if (blocks > 1) { + mrq->cmd->opcode = write ? + MMC_WRITE_MULTIPLE_BLOCK : MMC_READ_MULTIPLE_BLOCK; } else { - if (blocks > 1) { - cmd.opcode = write ? - MMC_WRITE_MULTIPLE_BLOCK : MMC_READ_MULTIPLE_BLOCK; - } else { - cmd.opcode = write ? - MMC_WRITE_BLOCK : MMC_READ_SINGLE_BLOCK; - } + mrq->cmd->opcode = write ? + MMC_WRITE_BLOCK : MMC_READ_SINGLE_BLOCK; } - if (broken_xfer && blocks == 1) - cmd.arg = test->card->rca << 16; - else - cmd.arg = addr; - cmd.flags = MMC_RSP_R1 | MMC_CMD_ADTC; - - memset(&stop, 0, sizeof(struct mmc_command)); - - if (!broken_xfer && (blocks > 1)) { - stop.opcode = MMC_STOP_TRANSMISSION; - stop.arg = 0; - stop.flags = MMC_RSP_R1B | MMC_CMD_AC; + mrq->cmd->arg = dev_addr; + mrq->cmd->flags = MMC_RSP_R1 | MMC_CMD_ADTC; - mrq.stop = &stop; + if (blocks == 1) + mrq->stop = NULL; + else { + mrq->stop->opcode = MMC_STOP_TRANSMISSION; + mrq->stop->arg = 0; + mrq->stop->flags = MMC_RSP_R1B | MMC_CMD_AC; } - memset(&data, 0, sizeof(struct mmc_data)); - - data.blksz = blksz; - data.blocks = blocks; - data.flags = write ? MMC_DATA_WRITE : MMC_DATA_READ; - data.sg = &sg; - data.sg_len = 1; - - sg_init_one(&sg, buffer, blocks * blksz); + mrq->data->blksz = blksz; + mrq->data->blocks = blocks; + mrq->data->flags = write ? MMC_DATA_WRITE : MMC_DATA_READ; + mrq->data->sg = sg; + mrq->data->sg_len = sg_len; - mmc_set_data_timeout(&data, test->card); - - mmc_wait_for_req(test->card->host, &mrq); - - ret = 0; - - if (broken_xfer) { - if (!ret && cmd.error) - ret = cmd.error; - if (!ret && data.error == 0) - ret = RESULT_FAIL; - if (!ret && data.error != -ETIMEDOUT) - ret = data.error; - if (!ret && stop.error) - ret = stop.error; - if (blocks > 1) { - if (!ret && data.bytes_xfered > blksz) - ret = RESULT_FAIL; - } else { - if (!ret && data.bytes_xfered > 0) - ret = RESULT_FAIL; - } - } else { - if (!ret && cmd.error) - ret = cmd.error; - if (!ret && data.error) - ret = data.error; - if (!ret && stop.error) - ret = stop.error; - if (!ret && data.bytes_xfered != blocks * blksz) - ret = RESULT_FAIL; - } + mmc_set_data_timeout(mrq->data, test->card); +} - if (ret == -EINVAL) - ret = RESULT_UNSUP_HOST; +/* + * Wait for the card to finish the busy state + */ +static int mmc_test_wait_busy(struct mmc_test_card *test) +{ + int ret, busy; + struct mmc_command cmd; busy = 0; do { - int ret2; - memset(&cmd, 0, sizeof(struct mmc_command)); cmd.opcode = MMC_SEND_STATUS; cmd.arg = test->card->rca << 16; cmd.flags = MMC_RSP_R1 | MMC_CMD_AC; - ret2 = mmc_wait_for_cmd(test->card->host, &cmd, 0); - if (ret2) + ret = mmc_wait_for_cmd(test->card->host, &cmd, 0); + if (ret) break; if (!busy && !(cmd.resp[0] & R1_READY_FOR_DATA)) { @@ -172,14 +120,57 @@ static int __mmc_test_transfer(struct mmc_test_card *test, int write, return ret; } -static int mmc_test_transfer(struct mmc_test_card *test, int write, - u8 *buffer, unsigned addr, unsigned blocks, unsigned blksz) +/* + * Transfer a single sector of kernel addressable data + */ +static int mmc_test_buffer_transfer(struct mmc_test_card *test, + u8 *buffer, unsigned addr, unsigned blksz, int write) { - return __mmc_test_transfer(test, write, 0, buffer, - addr, blocks, blksz); + int ret; + + struct mmc_request mrq; + struct mmc_command cmd; + struct mmc_command stop; + struct mmc_data data; + + struct scatterlist sg; + + memset(&mrq, 0, sizeof(struct mmc_request)); + memset(&cmd, 0, sizeof(struct mmc_command)); + memset(&data, 0, sizeof(struct mmc_data)); + memset(&stop, 0, sizeof(struct mmc_command)); + + mrq.cmd = &cmd; + mrq.data = &data; + mrq.stop = &stop; + + sg_init_one(&sg, buffer, blksz); + + mmc_test_prepare_mrq(test, &mrq, &sg, 1, addr, 1, blksz, write); + + mmc_wait_for_req(test->card->host, &mrq); + + if (cmd.error) + return cmd.error; + if (data.error) + return data.error; + + ret = mmc_test_wait_busy(test); + if (ret) + return ret; + + return 0; } -static int mmc_test_prepare_verify(struct mmc_test_card *test, int write) +/*******************************************************************/ +/* Test preparation and cleanup */ +/*******************************************************************/ + +/* + * Fill the first couple of sectors of the card with known data + * so that bad reads/writes can be detected + */ +static int __mmc_test_prepare(struct mmc_test_card *test, int write) { int ret, i; @@ -188,15 +179,14 @@ static int mmc_test_prepare_verify(struct mmc_test_card *test, int write) return ret; if (write) - memset(test->buffer, 0xDF, BUFFER_SIZE); + memset(test->buffer, 0xDF, 512); else { - for (i = 0;i < BUFFER_SIZE;i++) + for (i = 0;i < 512;i++) test->buffer[i] = i; } for (i = 0;i < BUFFER_SIZE / 512;i++) { - ret = mmc_test_transfer(test, 1, test->buffer + i * 512, - i * 512, 1, 512); + ret = mmc_test_buffer_transfer(test, test->buffer, i * 512, 512, 1); if (ret) return ret; } @@ -204,41 +194,218 @@ static int mmc_test_prepare_verify(struct mmc_test_card *test, int write) return 0; } -static int mmc_test_prepare_verify_write(struct mmc_test_card *test) +static int mmc_test_prepare_write(struct mmc_test_card *test) +{ + return __mmc_test_prepare(test, 1); +} + +static int mmc_test_prepare_read(struct mmc_test_card *test) +{ + return __mmc_test_prepare(test, 0); +} + +static int mmc_test_cleanup(struct mmc_test_card *test) +{ + int ret, i; + + ret = mmc_test_set_blksize(test, 512); + if (ret) + return ret; + + memset(test->buffer, 0, 512); + + for (i = 0;i < BUFFER_SIZE / 512;i++) { + ret = mmc_test_buffer_transfer(test, test->buffer, i * 512, 512, 1); + if (ret) + return ret; + } + + return 0; +} + +/*******************************************************************/ +/* Test execution helpers */ +/*******************************************************************/ + +/* + * Modifies the mmc_request to perform the "short transfer" tests + */ +static void mmc_test_prepare_broken_mrq(struct mmc_test_card *test, + struct mmc_request *mrq, int write) +{ + BUG_ON(!mrq || !mrq->cmd || !mrq->data); + + if (mrq->data->blocks > 1) { + mrq->cmd->opcode = write ? + MMC_WRITE_BLOCK : MMC_READ_SINGLE_BLOCK; + mrq->stop = NULL; + } else { + mrq->cmd->opcode = MMC_SEND_STATUS; + mrq->cmd->arg = test->card->rca << 16; + } +} + +/* + * Checks that a normal transfer didn't have any errors + */ +static int mmc_test_check_result(struct mmc_test_card *test, + struct mmc_request *mrq) { - return mmc_test_prepare_verify(test, 1); + int ret; + + BUG_ON(!mrq || !mrq->cmd || !mrq->data); + + ret = 0; + + if (!ret && mrq->cmd->error) + ret = mrq->cmd->error; + if (!ret && mrq->data->error) + ret = mrq->data->error; + if (!ret && mrq->stop && mrq->stop->error) + ret = mrq->stop->error; + if (!ret && mrq->data->bytes_xfered != + mrq->data->blocks * mrq->data->blksz) + ret = RESULT_FAIL; + + if (ret == -EINVAL) + ret = RESULT_UNSUP_HOST; + + return ret; } -static int mmc_test_prepare_verify_read(struct mmc_test_card *test) +/* + * Checks that a "short transfer" behaved as expected + */ +static int mmc_test_check_broken_result(struct mmc_test_card *test, + struct mmc_request *mrq) { - return mmc_test_prepare_verify(test, 0); + int ret; + + BUG_ON(!mrq || !mrq->cmd || !mrq->data); + + ret = 0; + + if (!ret && mrq->cmd->error) + ret = mrq->cmd->error; + if (!ret && mrq->data->error == 0) + ret = RESULT_FAIL; + if (!ret && mrq->data->error != -ETIMEDOUT) + ret = mrq->data->error; + if (!ret && mrq->stop && mrq->stop->error) + ret = mrq->stop->error; + if (mrq->data->blocks > 1) { + if (!ret && mrq->data->bytes_xfered > mrq->data->blksz) + ret = RESULT_FAIL; + } else { + if (!ret && mrq->data->bytes_xfered > 0) + ret = RESULT_FAIL; + } + + if (ret == -EINVAL) + ret = RESULT_UNSUP_HOST; + + return ret; } -static int mmc_test_verified_transfer(struct mmc_test_card *test, int write, - u8 *buffer, unsigned addr, unsigned blocks, unsigned blksz) +/* + * Tests a basic transfer with certain parameters + */ +static int mmc_test_simple_transfer(struct mmc_test_card *test, + struct scatterlist *sg, unsigned sg_len, unsigned dev_addr, + unsigned blocks, unsigned blksz, int write) { - int ret, i, sectors; + struct mmc_request mrq; + struct mmc_command cmd; + struct mmc_command stop; + struct mmc_data data; - /* - * It is assumed that the above preparation has been done. - */ + memset(&mrq, 0, sizeof(struct mmc_request)); + memset(&cmd, 0, sizeof(struct mmc_command)); + memset(&data, 0, sizeof(struct mmc_data)); + memset(&stop, 0, sizeof(struct mmc_command)); + + mrq.cmd = &cmd; + mrq.data = &data; + mrq.stop = &stop; + + mmc_test_prepare_mrq(test, &mrq, sg, sg_len, dev_addr, + blocks, blksz, write); + + mmc_wait_for_req(test->card->host, &mrq); - memset(test->buffer, 0, BUFFER_SIZE); + mmc_test_wait_busy(test); + + return mmc_test_check_result(test, &mrq); +} + +/* + * Tests a transfer where the card will fail completely or partly + */ +static int mmc_test_broken_transfer(struct mmc_test_card *test, + unsigned blocks, unsigned blksz, int write) +{ + struct mmc_request mrq; + struct mmc_command cmd; + struct mmc_command stop; + struct mmc_data data; + + struct scatterlist sg; + + memset(&mrq, 0, sizeof(struct mmc_request)); + memset(&cmd, 0, sizeof(struct mmc_command)); + memset(&data, 0, sizeof(struct mmc_data)); + memset(&stop, 0, sizeof(struct mmc_command)); + + mrq.cmd = &cmd; + mrq.data = &data; + mrq.stop = &stop; + + sg_init_one(&sg, test->buffer, blocks * blksz); + + mmc_test_prepare_mrq(test, &mrq, &sg, 1, 0, blocks, blksz, write); + mmc_test_prepare_broken_mrq(test, &mrq, write); + + mmc_wait_for_req(test->card->host, &mrq); + + mmc_test_wait_busy(test); + + return mmc_test_check_broken_result(test, &mrq); +} + +/* + * Does a complete transfer test where data is also validated + * + * Note: mmc_test_prepare() must have been done before this call + */ +static int mmc_test_transfer(struct mmc_test_card *test, + struct scatterlist *sg, unsigned sg_len, unsigned dev_addr, + unsigned blocks, unsigned blksz, int write) +{ + int ret, i; + unsigned long flags; if (write) { for (i = 0;i < blocks * blksz;i++) - buffer[i] = i; + test->scratch[i] = i; + } else { + memset(test->scratch, 0, BUFFER_SIZE); } + local_irq_save(flags); + sg_copy_from_buffer(sg, sg_len, test->scratch, BUFFER_SIZE); + local_irq_restore(flags); ret = mmc_test_set_blksize(test, blksz); if (ret) return ret; - ret = mmc_test_transfer(test, write, buffer, addr, blocks, blksz); + ret = mmc_test_simple_transfer(test, sg, sg_len, dev_addr, + blocks, blksz, write); if (ret) return ret; if (write) { + int sectors; + ret = mmc_test_set_blksize(test, 512); if (ret) return ret; @@ -253,9 +420,9 @@ static int mmc_test_verified_transfer(struct mmc_test_card *test, int write, memset(test->buffer, 0, sectors * 512); for (i = 0;i < sectors;i++) { - ret = mmc_test_transfer(test, 0, + ret = mmc_test_buffer_transfer(test, test->buffer + i * 512, - addr + i * 512, 1, 512); + dev_addr + i * 512, 512, 0); if (ret) return ret; } @@ -270,8 +437,11 @@ static int mmc_test_verified_transfer(struct mmc_test_card *test, int write, return RESULT_FAIL; } } else { + local_irq_save(flags); + sg_copy_to_buffer(sg, sg_len, test->scratch, BUFFER_SIZE); + local_irq_restore(flags); for (i = 0;i < blocks * blksz;i++) { - if (buffer[i] != (u8)i) + if (test->scratch[i] != (u8)i) return RESULT_FAIL; } } @@ -279,26 +449,6 @@ static int mmc_test_verified_transfer(struct mmc_test_card *test, int write, return 0; } -static int mmc_test_cleanup_verify(struct mmc_test_card *test) -{ - int ret, i; - - ret = mmc_test_set_blksize(test, 512); - if (ret) - return ret; - - memset(test->buffer, 0, BUFFER_SIZE); - - for (i = 0;i < BUFFER_SIZE / 512;i++) { - ret = mmc_test_transfer(test, 1, test->buffer + i * 512, - i * 512, 1, 512); - if (ret) - return ret; - } - - return 0; -} - /*******************************************************************/ /* Tests */ /*******************************************************************/ @@ -314,12 +464,15 @@ struct mmc_test_case { static int mmc_test_basic_write(struct mmc_test_card *test) { int ret; + struct scatterlist sg; ret = mmc_test_set_blksize(test, 512); if (ret) return ret; - ret = mmc_test_transfer(test, 1, test->buffer, 0, 1, 512); + sg_init_one(&sg, test->buffer, 512); + + ret = mmc_test_simple_transfer(test, &sg, 1, 0, 1, 512, 1); if (ret) return ret; @@ -329,12 +482,15 @@ static int mmc_test_basic_write(struct mmc_test_card *test) static int mmc_test_basic_read(struct mmc_test_card *test) { int ret; + struct scatterlist sg; ret = mmc_test_set_blksize(test, 512); if (ret) return ret; - ret = mmc_test_transfer(test, 0, test->buffer, 0, 1, 512); + sg_init_one(&sg, test->buffer, 512); + + ret = mmc_test_simple_transfer(test, &sg, 1, 0, 1, 512, 1); if (ret) return ret; @@ -344,8 +500,11 @@ static int mmc_test_basic_read(struct mmc_test_card *test) static int mmc_test_verify_write(struct mmc_test_card *test) { int ret; + struct scatterlist sg; + + sg_init_one(&sg, test->buffer, 512); - ret = mmc_test_verified_transfer(test, 1, test->buffer, 0, 1, 512); + ret = mmc_test_transfer(test, &sg, 1, 0, 1, 512, 1); if (ret) return ret; @@ -355,8 +514,11 @@ static int mmc_test_verify_write(struct mmc_test_card *test) static int mmc_test_verify_read(struct mmc_test_card *test) { int ret; + struct scatterlist sg; + + sg_init_one(&sg, test->buffer, 512); - ret = mmc_test_verified_transfer(test, 0, test->buffer, 0, 1, 512); + ret = mmc_test_transfer(test, &sg, 1, 0, 1, 512, 0); if (ret) return ret; @@ -367,6 +529,7 @@ static int mmc_test_multi_write(struct mmc_test_card *test) { int ret; unsigned int size; + struct scatterlist sg; if (test->card->host->max_blk_count == 1) return RESULT_UNSUP_HOST; @@ -379,8 +542,9 @@ static int mmc_test_multi_write(struct mmc_test_card *test) if (size < 1024) return RESULT_UNSUP_HOST; - ret = mmc_test_verified_transfer(test, 1, test->buffer, 0, - size / 512, 512); + sg_init_one(&sg, test->buffer, size); + + ret = mmc_test_transfer(test, &sg, 1, 0, size/512, 512, 1); if (ret) return ret; @@ -391,6 +555,7 @@ static int mmc_test_multi_read(struct mmc_test_card *test) { int ret; unsigned int size; + struct scatterlist sg; if (test->card->host->max_blk_count == 1) return RESULT_UNSUP_HOST; @@ -403,8 +568,9 @@ static int mmc_test_multi_read(struct mmc_test_card *test) if (size < 1024) return RESULT_UNSUP_HOST; - ret = mmc_test_verified_transfer(test, 0, test->buffer, 0, - size / 512, 512); + sg_init_one(&sg, test->buffer, size); + + ret = mmc_test_transfer(test, &sg, 1, 0, size/512, 512, 0); if (ret) return ret; @@ -414,13 +580,14 @@ static int mmc_test_multi_read(struct mmc_test_card *test) static int mmc_test_pow2_write(struct mmc_test_card *test) { int ret, i; + struct scatterlist sg; if (!test->card->csd.write_partial) return RESULT_UNSUP_CARD; for (i = 1; i < 512;i <<= 1) { - ret = mmc_test_verified_transfer(test, 1, - test->buffer, 0, 1, i); + sg_init_one(&sg, test->buffer, i); + ret = mmc_test_transfer(test, &sg, 1, 0, 1, i, 1); if (ret) return ret; } @@ -431,13 +598,14 @@ static int mmc_test_pow2_write(struct mmc_test_card *test) static int mmc_test_pow2_read(struct mmc_test_card *test) { int ret, i; + struct scatterlist sg; if (!test->card->csd.read_partial) return RESULT_UNSUP_CARD; for (i = 1; i < 512;i <<= 1) { - ret = mmc_test_verified_transfer(test, 0, - test->buffer, 0, 1, i); + sg_init_one(&sg, test->buffer, i); + ret = mmc_test_transfer(test, &sg, 1, 0, 1, i, 0); if (ret) return ret; } @@ -448,13 +616,14 @@ static int mmc_test_pow2_read(struct mmc_test_card *test) static int mmc_test_weird_write(struct mmc_test_card *test) { int ret, i; + struct scatterlist sg; if (!test->card->csd.write_partial) return RESULT_UNSUP_CARD; for (i = 3; i < 512;i += 7) { - ret = mmc_test_verified_transfer(test, 1, - test->buffer, 0, 1, i); + sg_init_one(&sg, test->buffer, i); + ret = mmc_test_transfer(test, &sg, 1, 0, 1, i, 1); if (ret) return ret; } @@ -465,13 +634,14 @@ static int mmc_test_weird_write(struct mmc_test_card *test) static int mmc_test_weird_read(struct mmc_test_card *test) { int ret, i; + struct scatterlist sg; if (!test->card->csd.read_partial) return RESULT_UNSUP_CARD; for (i = 3; i < 512;i += 7) { - ret = mmc_test_verified_transfer(test, 0, - test->buffer, 0, 1, i); + sg_init_one(&sg, test->buffer, i); + ret = mmc_test_transfer(test, &sg, 1, 0, 1, i, 0); if (ret) return ret; } @@ -482,10 +652,11 @@ static int mmc_test_weird_read(struct mmc_test_card *test) static int mmc_test_align_write(struct mmc_test_card *test) { int ret, i; + struct scatterlist sg; for (i = 1;i < 4;i++) { - ret = mmc_test_verified_transfer(test, 1, test->buffer + i, - 0, 1, 512); + sg_init_one(&sg, test->buffer + i, 512); + ret = mmc_test_transfer(test, &sg, 1, 0, 1, 512, 1); if (ret) return ret; } @@ -496,10 +667,11 @@ static int mmc_test_align_write(struct mmc_test_card *test) static int mmc_test_align_read(struct mmc_test_card *test) { int ret, i; + struct scatterlist sg; for (i = 1;i < 4;i++) { - ret = mmc_test_verified_transfer(test, 0, test->buffer + i, - 0, 1, 512); + sg_init_one(&sg, test->buffer + i, 512); + ret = mmc_test_transfer(test, &sg, 1, 0, 1, 512, 0); if (ret) return ret; } @@ -511,6 +683,7 @@ static int mmc_test_align_multi_write(struct mmc_test_card *test) { int ret, i; unsigned int size; + struct scatterlist sg; if (test->card->host->max_blk_count == 1) return RESULT_UNSUP_HOST; @@ -524,8 +697,8 @@ static int mmc_test_align_multi_write(struct mmc_test_card *test) return RESULT_UNSUP_HOST; for (i = 1;i < 4;i++) { - ret = mmc_test_verified_transfer(test, 1, test->buffer + i, - 0, size / 512, 512); + sg_init_one(&sg, test->buffer + i, size); + ret = mmc_test_transfer(test, &sg, 1, 0, size/512, 512, 1); if (ret) return ret; } @@ -537,6 +710,7 @@ static int mmc_test_align_multi_read(struct mmc_test_card *test) { int ret, i; unsigned int size; + struct scatterlist sg; if (test->card->host->max_blk_count == 1) return RESULT_UNSUP_HOST; @@ -550,8 +724,8 @@ static int mmc_test_align_multi_read(struct mmc_test_card *test) return RESULT_UNSUP_HOST; for (i = 1;i < 4;i++) { - ret = mmc_test_verified_transfer(test, 0, test->buffer + i, - 0, size / 512, 512); + sg_init_one(&sg, test->buffer + i, size); + ret = mmc_test_transfer(test, &sg, 1, 0, size/512, 512, 0); if (ret) return ret; } @@ -567,7 +741,7 @@ static int mmc_test_xfersize_write(struct mmc_test_card *test) if (ret) return ret; - ret = __mmc_test_transfer(test, 1, 1, test->buffer, 0, 1, 512); + ret = mmc_test_broken_transfer(test, 1, 512, 1); if (ret) return ret; @@ -582,7 +756,7 @@ static int mmc_test_xfersize_read(struct mmc_test_card *test) if (ret) return ret; - ret = __mmc_test_transfer(test, 0, 1, test->buffer, 0, 1, 512); + ret = mmc_test_broken_transfer(test, 1, 512, 0); if (ret) return ret; @@ -600,7 +774,7 @@ static int mmc_test_multi_xfersize_write(struct mmc_test_card *test) if (ret) return ret; - ret = __mmc_test_transfer(test, 1, 1, test->buffer, 0, 2, 512); + ret = mmc_test_broken_transfer(test, 2, 512, 1); if (ret) return ret; @@ -618,7 +792,7 @@ static int mmc_test_multi_xfersize_read(struct mmc_test_card *test) if (ret) return ret; - ret = __mmc_test_transfer(test, 0, 1, test->buffer, 0, 2, 512); + ret = mmc_test_broken_transfer(test, 2, 512, 0); if (ret) return ret; @@ -638,86 +812,86 @@ static const struct mmc_test_case mmc_test_cases[] = { { .name = "Basic write (with data verification)", - .prepare = mmc_test_prepare_verify_write, + .prepare = mmc_test_prepare_write, .run = mmc_test_verify_write, - .cleanup = mmc_test_cleanup_verify, + .cleanup = mmc_test_cleanup, }, { .name = "Basic read (with data verification)", - .prepare = mmc_test_prepare_verify_read, + .prepare = mmc_test_prepare_read, .run = mmc_test_verify_read, - .cleanup = mmc_test_cleanup_verify, + .cleanup = mmc_test_cleanup, }, { .name = "Multi-block write", - .prepare = mmc_test_prepare_verify_write, + .prepare = mmc_test_prepare_write, .run = mmc_test_multi_write, - .cleanup = mmc_test_cleanup_verify, + .cleanup = mmc_test_cleanup, }, { .name = "Multi-block read", - .prepare = mmc_test_prepare_verify_read, + .prepare = mmc_test_prepare_read, .run = mmc_test_multi_read, - .cleanup = mmc_test_cleanup_verify, + .cleanup = mmc_test_cleanup, }, { .name = "Power of two block writes", - .prepare = mmc_test_prepare_verify_write, + .prepare = mmc_test_prepare_write, .run = mmc_test_pow2_write, - .cleanup = mmc_test_cleanup_verify, + .cleanup = mmc_test_cleanup, }, { .name = "Power of two block reads", - .prepare = mmc_test_prepare_verify_read, + .prepare = mmc_test_prepare_read, .run = mmc_test_pow2_read, - .cleanup = mmc_test_cleanup_verify, + .cleanup = mmc_test_cleanup, }, { .name = "Weird sized block writes", - .prepare = mmc_test_prepare_verify_write, + .prepare = mmc_test_prepare_write, .run = mmc_test_weird_write, - .cleanup = mmc_test_cleanup_verify, + .cleanup = mmc_test_cleanup, }, { .name = "Weird sized block reads", - .prepare = mmc_test_prepare_verify_read, + .prepare = mmc_test_prepare_read, .run = mmc_test_weird_read, - .cleanup = mmc_test_cleanup_verify, + .cleanup = mmc_test_cleanup, }, { .name = "Badly aligned write", - .prepare = mmc_test_prepare_verify_write, + .prepare = mmc_test_prepare_write, .run = mmc_test_align_write, - .cleanup = mmc_test_cleanup_verify, + .cleanup = mmc_test_cleanup, }, { .name = "Badly aligned read", - .prepare = mmc_test_prepare_verify_read, + .prepare = mmc_test_prepare_read, .run = mmc_test_align_read, - .cleanup = mmc_test_cleanup_verify, + .cleanup = mmc_test_cleanup, }, { .name = "Badly aligned multi-block write", - .prepare = mmc_test_prepare_verify_write, + .prepare = mmc_test_prepare_write, .run = mmc_test_align_multi_write, - .cleanup = mmc_test_cleanup_verify, + .cleanup = mmc_test_cleanup, }, { .name = "Badly aligned multi-block read", - .prepare = mmc_test_prepare_verify_read, + .prepare = mmc_test_prepare_read, .run = mmc_test_align_multi_read, - .cleanup = mmc_test_cleanup_verify, + .cleanup = mmc_test_cleanup, }, { -- cgit v0.10.2 From 8f1934ce784bd8f2eaf06f190526500f7f3f9c74 Mon Sep 17 00:00:00 2001 From: Pierre Ossman Date: Mon, 30 Jun 2008 21:15:49 +0200 Subject: sdhci: graceful handling of bad addresses Be a bit more robust and fall back to PIO if someone is feeding us bogus addresses. Signed-off-by: Pierre Ossman diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index b802044..d3e4a39 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -327,7 +327,7 @@ static void sdhci_kunmap_atomic(void *buffer, unsigned long *flags) local_irq_restore(*flags); } -static void sdhci_adma_table_pre(struct sdhci_host *host, +static int sdhci_adma_table_pre(struct sdhci_host *host, struct mmc_data *data) { int direction; @@ -360,10 +360,14 @@ static void sdhci_adma_table_pre(struct sdhci_host *host, host->align_addr = dma_map_single(mmc_dev(host->mmc), host->align_buffer, 128 * 4, direction); + if (dma_mapping_error(host->align_addr)) + goto fail; BUG_ON(host->align_addr & 0x3); host->sg_count = dma_map_sg(mmc_dev(host->mmc), data->sg, data->sg_len, direction); + if (host->sg_count == 0) + goto unmap_align; desc = host->adma_desc; align = host->align_buffer; @@ -457,7 +461,20 @@ static void sdhci_adma_table_pre(struct sdhci_host *host, host->adma_addr = dma_map_single(mmc_dev(host->mmc), host->adma_desc, (128 * 2 + 1) * 4, DMA_TO_DEVICE); + if (dma_mapping_error(host->align_addr)) + goto unmap_entries; BUG_ON(host->adma_addr & 0x3); + + return 0; + +unmap_entries: + dma_unmap_sg(mmc_dev(host->mmc), data->sg, + data->sg_len, direction); +unmap_align: + dma_unmap_single(mmc_dev(host->mmc), host->align_addr, + 128 * 4, direction); +fail: + return -EINVAL; } static void sdhci_adma_table_post(struct sdhci_host *host, @@ -555,6 +572,7 @@ static void sdhci_prepare_data(struct sdhci_host *host, struct mmc_data *data) { u8 count; u8 ctrl; + int ret; WARN_ON(host->data); @@ -639,6 +657,43 @@ static void sdhci_prepare_data(struct sdhci_host *host, struct mmc_data *data) } } + if (host->flags & SDHCI_REQ_USE_DMA) { + if (host->flags & SDHCI_USE_ADMA) { + ret = sdhci_adma_table_pre(host, data); + if (ret) { + /* + * This only happens when someone fed + * us an invalid request. + */ + WARN_ON(1); + host->flags &= ~SDHCI_USE_DMA; + } else { + writel(host->adma_addr, + host->ioaddr + SDHCI_ADMA_ADDRESS); + } + } else { + int count; + + count = dma_map_sg(mmc_dev(host->mmc), + data->sg, data->sg_len, + (data->flags & MMC_DATA_READ) ? + DMA_FROM_DEVICE : + DMA_TO_DEVICE); + if (count == 0) { + /* + * This only happens when someone fed + * us an invalid request. + */ + WARN_ON(1); + host->flags &= ~SDHCI_USE_DMA; + } else { + WARN_ON(count != 1); + writel(sg_dma_address(data->sg), + host->ioaddr + SDHCI_DMA_ADDRESS); + } + } + } + /* * Always adjust the DMA selection as some controllers * (e.g. JMicron) can't do PIO properly when the selection @@ -655,25 +710,7 @@ static void sdhci_prepare_data(struct sdhci_host *host, struct mmc_data *data) writeb(ctrl, host->ioaddr + SDHCI_HOST_CONTROL); } - if (host->flags & SDHCI_REQ_USE_DMA) { - if (host->flags & SDHCI_USE_ADMA) { - sdhci_adma_table_pre(host, data); - writel(host->adma_addr, - host->ioaddr + SDHCI_ADMA_ADDRESS); - } else { - int count; - - count = dma_map_sg(mmc_dev(host->mmc), - data->sg, data->sg_len, - (data->flags & MMC_DATA_READ) ? - DMA_FROM_DEVICE : - DMA_TO_DEVICE); - WARN_ON(count != 1); - - writel(sg_dma_address(data->sg), - host->ioaddr + SDHCI_DMA_ADDRESS); - } - } else { + if (!(host->flags & SDHCI_REQ_USE_DMA)) { host->cur_sg = data->sg; host->num_sg = data->sg_len; -- cgit v0.10.2 From be518018c6b9224c02284fb243207ef741c31ec6 Mon Sep 17 00:00:00 2001 From: Thomas Kleffel Date: Mon, 30 Jun 2008 22:40:24 +0100 Subject: MMC: S3C24XX MMC/SD driver. This is the latest S3C MMC/SD driver by Thomas Kleffel with cleanups as suggested by AKPM done by Ben Dooks. Signed-off-by: Ben Dooks Signed-off-by: Thomas Kleffel Signed-off-by: Pierre Ossman diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig index f9cbcb8..dc88c03 100644 --- a/drivers/mmc/host/Kconfig +++ b/drivers/mmc/host/Kconfig @@ -143,3 +143,14 @@ config MMC_SPI If unsure, or if your system has no SPI master driver, say N. +config MMC_S3C + tristate "Samsung S3C SD/MMC Card Interface support" + depends on ARCH_S3C2410 && MMC + help + This selects a driver for the MCI interface found in + Samsung's S3C2410, S3C2412, S3C2440, S3C2442 CPUs. + If you have a board based on one of those and a MMC/SD + slot, say Y or M here. + + If unsure, say N. + diff --git a/drivers/mmc/host/Makefile b/drivers/mmc/host/Makefile index 3027250..b3e023b 100644 --- a/drivers/mmc/host/Makefile +++ b/drivers/mmc/host/Makefile @@ -18,4 +18,4 @@ obj-$(CONFIG_MMC_OMAP) += omap.o obj-$(CONFIG_MMC_AT91) += at91_mci.o obj-$(CONFIG_MMC_TIFM_SD) += tifm_sd.o obj-$(CONFIG_MMC_SPI) += mmc_spi.o - +obj-$(CONFIG_MMC_S3C) += s3cmci.o diff --git a/drivers/mmc/host/s3cmci.c b/drivers/mmc/host/s3cmci.c new file mode 100644 index 0000000..c6a4d3c --- /dev/null +++ b/drivers/mmc/host/s3cmci.c @@ -0,0 +1,1343 @@ +/* + * linux/drivers/mmc/s3cmci.h - Samsung S3C MCI driver + * + * Copyright (C) 2004-2006 maintech GmbH, Thomas Kleffel + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + +#include "s3cmci.h" + +#define DRIVER_NAME "s3c-mci" + +enum dbg_channels { + dbg_err = (1 << 0), + dbg_debug = (1 << 1), + dbg_info = (1 << 2), + dbg_irq = (1 << 3), + dbg_sg = (1 << 4), + dbg_dma = (1 << 5), + dbg_pio = (1 << 6), + dbg_fail = (1 << 7), + dbg_conf = (1 << 8), +}; + +static const int dbgmap_err = dbg_err | dbg_fail; +static const int dbgmap_info = dbg_info | dbg_conf; +static const int dbgmap_debug = dbg_debug; + +#define dbg(host, channels, args...) \ + do { \ + if (dbgmap_err & channels) \ + dev_err(&host->pdev->dev, args); \ + else if (dbgmap_info & channels) \ + dev_info(&host->pdev->dev, args); \ + else if (dbgmap_debug & channels) \ + dev_dbg(&host->pdev->dev, args); \ + } while (0) + +#define RESSIZE(ressource) (((ressource)->end - (ressource)->start)+1) + +static struct s3c2410_dma_client s3cmci_dma_client = { + .name = "s3c-mci", +}; + +static void finalize_request(struct s3cmci_host *host); +static void s3cmci_send_request(struct mmc_host *mmc); +static void s3cmci_reset(struct s3cmci_host *host); + +#ifdef CONFIG_MMC_DEBUG + +static void dbg_dumpregs(struct s3cmci_host *host, char *prefix) +{ + u32 con, pre, cmdarg, cmdcon, cmdsta, r0, r1, r2, r3, timer, bsize; + u32 datcon, datcnt, datsta, fsta, imask; + + con = readl(host->base + S3C2410_SDICON); + pre = readl(host->base + S3C2410_SDIPRE); + cmdarg = readl(host->base + S3C2410_SDICMDARG); + cmdcon = readl(host->base + S3C2410_SDICMDCON); + cmdsta = readl(host->base + S3C2410_SDICMDSTAT); + r0 = readl(host->base + S3C2410_SDIRSP0); + r1 = readl(host->base + S3C2410_SDIRSP1); + r2 = readl(host->base + S3C2410_SDIRSP2); + r3 = readl(host->base + S3C2410_SDIRSP3); + timer = readl(host->base + S3C2410_SDITIMER); + bsize = readl(host->base + S3C2410_SDIBSIZE); + datcon = readl(host->base + S3C2410_SDIDCON); + datcnt = readl(host->base + S3C2410_SDIDCNT); + datsta = readl(host->base + S3C2410_SDIDSTA); + fsta = readl(host->base + S3C2410_SDIFSTA); + imask = readl(host->base + host->sdiimsk); + + dbg(host, dbg_debug, "%s CON:[%08x] PRE:[%08x] TMR:[%08x]\n", + prefix, con, pre, timer); + + dbg(host, dbg_debug, "%s CCON:[%08x] CARG:[%08x] CSTA:[%08x]\n", + prefix, cmdcon, cmdarg, cmdsta); + + dbg(host, dbg_debug, "%s DCON:[%08x] FSTA:[%08x]" + " DSTA:[%08x] DCNT:[%08x]\n", + prefix, datcon, fsta, datsta, datcnt); + + dbg(host, dbg_debug, "%s R0:[%08x] R1:[%08x]" + " R2:[%08x] R3:[%08x]\n", + prefix, r0, r1, r2, r3); +} + +static void prepare_dbgmsg(struct s3cmci_host *host, struct mmc_command *cmd, + int stop) +{ + snprintf(host->dbgmsg_cmd, 300, + "#%u%s op:%i arg:0x%08x flags:0x08%x retries:%u", + host->ccnt, (stop ? " (STOP)" : ""), + cmd->opcode, cmd->arg, cmd->flags, cmd->retries); + + if (cmd->data) { + snprintf(host->dbgmsg_dat, 300, + "#%u bsize:%u blocks:%u bytes:%u", + host->dcnt, cmd->data->blksz, + cmd->data->blocks, + cmd->data->blocks * cmd->data->blksz); + } else { + host->dbgmsg_dat[0] = '\0'; + } +} + +static void dbg_dumpcmd(struct s3cmci_host *host, struct mmc_command *cmd, + int fail) +{ + unsigned int dbglvl = fail ? dbg_fail : dbg_debug; + + if (!cmd) + return; + + if (cmd->error == 0) { + dbg(host, dbglvl, "CMD[OK] %s R0:0x%08x\n", + host->dbgmsg_cmd, cmd->resp[0]); + } else { + dbg(host, dbglvl, "CMD[ERR %i] %s Status:%s\n", + cmd->error, host->dbgmsg_cmd, host->status); + } + + if (!cmd->data) + return; + + if (cmd->data->error == 0) { + dbg(host, dbglvl, "DAT[OK] %s\n", host->dbgmsg_dat); + } else { + dbg(host, dbglvl, "DAT[ERR %i] %s DCNT:0x%08x\n", + cmd->data->error, host->dbgmsg_dat, + readl(host->base + S3C2410_SDIDCNT)); + } +} +#else +static void dbg_dumpcmd(struct s3cmci_host *host, + struct mmc_command *cmd, int fail) { } + +static void prepare_dbgmsg(struct s3cmci_host *host, struct mmc_command *cmd, + int stop) { } + +static void dbg_dumpregs(struct s3cmci_host *host, char *prefix) { } + +#endif /* CONFIG_MMC_DEBUG */ + +static inline u32 enable_imask(struct s3cmci_host *host, u32 imask) +{ + u32 newmask; + + newmask = readl(host->base + host->sdiimsk); + newmask |= imask; + + writel(newmask, host->base + host->sdiimsk); + + return newmask; +} + +static inline u32 disable_imask(struct s3cmci_host *host, u32 imask) +{ + u32 newmask; + + newmask = readl(host->base + host->sdiimsk); + newmask &= ~imask; + + writel(newmask, host->base + host->sdiimsk); + + return newmask; +} + +static inline void clear_imask(struct s3cmci_host *host) +{ + writel(0, host->base + host->sdiimsk); +} + +static inline int get_data_buffer(struct s3cmci_host *host, + u32 *words, u32 **pointer) +{ + struct scatterlist *sg; + + if (host->pio_active == XFER_NONE) + return -EINVAL; + + if ((!host->mrq) || (!host->mrq->data)) + return -EINVAL; + + if (host->pio_sgptr >= host->mrq->data->sg_len) { + dbg(host, dbg_debug, "no more buffers (%i/%i)\n", + host->pio_sgptr, host->mrq->data->sg_len); + return -EBUSY; + } + sg = &host->mrq->data->sg[host->pio_sgptr]; + + *words = sg->length >> 2; + *pointer = sg_virt(sg); + + host->pio_sgptr++; + + dbg(host, dbg_sg, "new buffer (%i/%i)\n", + host->pio_sgptr, host->mrq->data->sg_len); + + return 0; +} + +static inline u32 fifo_count(struct s3cmci_host *host) +{ + u32 fifostat = readl(host->base + S3C2410_SDIFSTA); + + fifostat &= S3C2410_SDIFSTA_COUNTMASK; + return fifostat >> 2; +} + +static inline u32 fifo_free(struct s3cmci_host *host) +{ + u32 fifostat = readl(host->base + S3C2410_SDIFSTA); + + fifostat &= S3C2410_SDIFSTA_COUNTMASK; + return (63 - fifostat) >> 2; +} + +static void do_pio_read(struct s3cmci_host *host) +{ + int res; + u32 fifo; + void __iomem *from_ptr; + + /* write real prescaler to host, it might be set slow to fix */ + writel(host->prescaler, host->base + S3C2410_SDIPRE); + + from_ptr = host->base + host->sdidata; + + while ((fifo = fifo_count(host))) { + if (!host->pio_words) { + res = get_data_buffer(host, &host->pio_words, + &host->pio_ptr); + if (res) { + host->pio_active = XFER_NONE; + host->complete_what = COMPLETION_FINALIZE; + + dbg(host, dbg_pio, "pio_read(): " + "complete (no more data).\n"); + return; + } + + dbg(host, dbg_pio, + "pio_read(): new target: [%i]@[%p]\n", + host->pio_words, host->pio_ptr); + } + + dbg(host, dbg_pio, + "pio_read(): fifo:[%02i] buffer:[%03i] dcnt:[%08X]\n", + fifo, host->pio_words, + readl(host->base + S3C2410_SDIDCNT)); + + if (fifo > host->pio_words) + fifo = host->pio_words; + + host->pio_words -= fifo; + host->pio_count += fifo; + + while (fifo--) + *(host->pio_ptr++) = readl(from_ptr); + } + + if (!host->pio_words) { + res = get_data_buffer(host, &host->pio_words, &host->pio_ptr); + if (res) { + dbg(host, dbg_pio, + "pio_read(): complete (no more buffers).\n"); + host->pio_active = XFER_NONE; + host->complete_what = COMPLETION_FINALIZE; + + return; + } + } + + enable_imask(host, + S3C2410_SDIIMSK_RXFIFOHALF | S3C2410_SDIIMSK_RXFIFOLAST); +} + +static void do_pio_write(struct s3cmci_host *host) +{ + void __iomem *to_ptr; + int res; + u32 fifo; + + to_ptr = host->base + host->sdidata; + + while ((fifo = fifo_free(host))) { + if (!host->pio_words) { + res = get_data_buffer(host, &host->pio_words, + &host->pio_ptr); + if (res) { + dbg(host, dbg_pio, + "pio_write(): complete (no more data).\n"); + host->pio_active = XFER_NONE; + + return; + } + + dbg(host, dbg_pio, + "pio_write(): new source: [%i]@[%p]\n", + host->pio_words, host->pio_ptr); + + } + + if (fifo > host->pio_words) + fifo = host->pio_words; + + host->pio_words -= fifo; + host->pio_count += fifo; + + while (fifo--) + writel(*(host->pio_ptr++), to_ptr); + } + + enable_imask(host, S3C2410_SDIIMSK_TXFIFOHALF); +} + +static void pio_tasklet(unsigned long data) +{ + struct s3cmci_host *host = (struct s3cmci_host *) data; + + + if (host->pio_active == XFER_WRITE) + do_pio_write(host); + + if (host->pio_active == XFER_READ) + do_pio_read(host); + + if (host->complete_what == COMPLETION_FINALIZE) { + clear_imask(host); + if (host->pio_active != XFER_NONE) { + dbg(host, dbg_err, "unfinished %s " + "- pio_count:[%u] pio_words:[%u]\n", + (host->pio_active == XFER_READ) ? "read" : "write", + host->pio_count, host->pio_words); + + host->mrq->data->error = -EINVAL; + } + + disable_irq(host->irq); + finalize_request(host); + } +} + +/* + * ISR for SDI Interface IRQ + * Communication between driver and ISR works as follows: + * host->mrq points to current request + * host->complete_what Indicates when the request is considered done + * COMPLETION_CMDSENT when the command was sent + * COMPLETION_RSPFIN when a response was received + * COMPLETION_XFERFINISH when the data transfer is finished + * COMPLETION_XFERFINISH_RSPFIN both of the above. + * host->complete_request is the completion-object the driver waits for + * + * 1) Driver sets up host->mrq and host->complete_what + * 2) Driver prepares the transfer + * 3) Driver enables interrupts + * 4) Driver starts transfer + * 5) Driver waits for host->complete_rquest + * 6) ISR checks for request status (errors and success) + * 6) ISR sets host->mrq->cmd->error and host->mrq->data->error + * 7) ISR completes host->complete_request + * 8) ISR disables interrupts + * 9) Driver wakes up and takes care of the request + * + * Note: "->error"-fields are expected to be set to 0 before the request + * was issued by mmc.c - therefore they are only set, when an error + * contition comes up + */ + +static irqreturn_t s3cmci_irq(int irq, void *dev_id) +{ + struct s3cmci_host *host = dev_id; + struct mmc_command *cmd; + u32 mci_csta, mci_dsta, mci_fsta, mci_dcnt, mci_imsk; + u32 mci_cclear, mci_dclear; + unsigned long iflags; + + spin_lock_irqsave(&host->complete_lock, iflags); + + mci_csta = readl(host->base + S3C2410_SDICMDSTAT); + mci_dsta = readl(host->base + S3C2410_SDIDSTA); + mci_dcnt = readl(host->base + S3C2410_SDIDCNT); + mci_fsta = readl(host->base + S3C2410_SDIFSTA); + mci_imsk = readl(host->base + host->sdiimsk); + mci_cclear = 0; + mci_dclear = 0; + + if ((host->complete_what == COMPLETION_NONE) || + (host->complete_what == COMPLETION_FINALIZE)) { + host->status = "nothing to complete"; + clear_imask(host); + goto irq_out; + } + + if (!host->mrq) { + host->status = "no active mrq"; + clear_imask(host); + goto irq_out; + } + + cmd = host->cmd_is_stop ? host->mrq->stop : host->mrq->cmd; + + if (!cmd) { + host->status = "no active cmd"; + clear_imask(host); + goto irq_out; + } + + if (!host->dodma) { + if ((host->pio_active == XFER_WRITE) && + (mci_fsta & S3C2410_SDIFSTA_TFDET)) { + + disable_imask(host, S3C2410_SDIIMSK_TXFIFOHALF); + tasklet_schedule(&host->pio_tasklet); + host->status = "pio tx"; + } + + if ((host->pio_active == XFER_READ) && + (mci_fsta & S3C2410_SDIFSTA_RFDET)) { + + disable_imask(host, + S3C2410_SDIIMSK_RXFIFOHALF | + S3C2410_SDIIMSK_RXFIFOLAST); + + tasklet_schedule(&host->pio_tasklet); + host->status = "pio rx"; + } + } + + if (mci_csta & S3C2410_SDICMDSTAT_CMDTIMEOUT) { + cmd->error = -ETIMEDOUT; + host->status = "error: command timeout"; + goto fail_transfer; + } + + if (mci_csta & S3C2410_SDICMDSTAT_CMDSENT) { + if (host->complete_what == COMPLETION_CMDSENT) { + host->status = "ok: command sent"; + goto close_transfer; + } + + mci_cclear |= S3C2410_SDICMDSTAT_CMDSENT; + } + + if (mci_csta & S3C2410_SDICMDSTAT_CRCFAIL) { + if (cmd->flags & MMC_RSP_CRC) { + cmd->error = -EILSEQ; + host->status = "error: bad command crc"; + goto fail_transfer; + } + + mci_cclear |= S3C2410_SDICMDSTAT_CRCFAIL; + } + + if (mci_csta & S3C2410_SDICMDSTAT_RSPFIN) { + if (host->complete_what == COMPLETION_RSPFIN) { + host->status = "ok: command response received"; + goto close_transfer; + } + + if (host->complete_what == COMPLETION_XFERFINISH_RSPFIN) + host->complete_what = COMPLETION_XFERFINISH; + + mci_cclear |= S3C2410_SDICMDSTAT_RSPFIN; + } + + /* errors handled after this point are only relevant + when a data transfer is in progress */ + + if (!cmd->data) + goto clear_status_bits; + + /* Check for FIFO failure */ + if (host->is2440) { + if (mci_fsta & S3C2440_SDIFSTA_FIFOFAIL) { + host->mrq->data->error = -EILSEQ; + host->status = "error: 2440 fifo failure"; + goto fail_transfer; + } + } else { + if (mci_dsta & S3C2410_SDIDSTA_FIFOFAIL) { + cmd->data->error = -EILSEQ; + host->status = "error: fifo failure"; + goto fail_transfer; + } + } + + if (mci_dsta & S3C2410_SDIDSTA_RXCRCFAIL) { + cmd->data->error = -EILSEQ; + host->status = "error: bad data crc (outgoing)"; + goto fail_transfer; + } + + if (mci_dsta & S3C2410_SDIDSTA_CRCFAIL) { + cmd->data->error = -EILSEQ; + host->status = "error: bad data crc (incoming)"; + goto fail_transfer; + } + + if (mci_dsta & S3C2410_SDIDSTA_DATATIMEOUT) { + cmd->data->error = -ETIMEDOUT; + host->status = "error: data timeout"; + goto fail_transfer; + } + + if (mci_dsta & S3C2410_SDIDSTA_XFERFINISH) { + if (host->complete_what == COMPLETION_XFERFINISH) { + host->status = "ok: data transfer completed"; + goto close_transfer; + } + + if (host->complete_what == COMPLETION_XFERFINISH_RSPFIN) + host->complete_what = COMPLETION_RSPFIN; + + mci_dclear |= S3C2410_SDIDSTA_XFERFINISH; + } + +clear_status_bits: + writel(mci_cclear, host->base + S3C2410_SDICMDSTAT); + writel(mci_dclear, host->base + S3C2410_SDIDSTA); + + goto irq_out; + +fail_transfer: + host->pio_active = XFER_NONE; + +close_transfer: + host->complete_what = COMPLETION_FINALIZE; + + clear_imask(host); + tasklet_schedule(&host->pio_tasklet); + + goto irq_out; + +irq_out: + dbg(host, dbg_irq, + "csta:0x%08x dsta:0x%08x fsta:0x%08x dcnt:0x%08x status:%s.\n", + mci_csta, mci_dsta, mci_fsta, mci_dcnt, host->status); + + spin_unlock_irqrestore(&host->complete_lock, iflags); + return IRQ_HANDLED; + +} + +/* + * ISR for the CardDetect Pin +*/ + +static irqreturn_t s3cmci_irq_cd(int irq, void *dev_id) +{ + struct s3cmci_host *host = (struct s3cmci_host *)dev_id; + + dbg(host, dbg_irq, "card detect\n"); + + mmc_detect_change(host->mmc, 500); + + return IRQ_HANDLED; +} + +void s3cmci_dma_done_callback(struct s3c2410_dma_chan *dma_ch, void *buf_id, + int size, enum s3c2410_dma_buffresult result) +{ + struct s3cmci_host *host = buf_id; + unsigned long iflags; + u32 mci_csta, mci_dsta, mci_fsta, mci_dcnt; + + mci_csta = readl(host->base + S3C2410_SDICMDSTAT); + mci_dsta = readl(host->base + S3C2410_SDIDSTA); + mci_fsta = readl(host->base + S3C2410_SDIFSTA); + mci_dcnt = readl(host->base + S3C2410_SDIDCNT); + + BUG_ON(!host->mrq); + BUG_ON(!host->mrq->data); + BUG_ON(!host->dmatogo); + + spin_lock_irqsave(&host->complete_lock, iflags); + + if (result != S3C2410_RES_OK) { + dbg(host, dbg_fail, "DMA FAILED: csta=0x%08x dsta=0x%08x " + "fsta=0x%08x dcnt:0x%08x result:0x%08x toGo:%u\n", + mci_csta, mci_dsta, mci_fsta, + mci_dcnt, result, host->dmatogo); + + goto fail_request; + } + + host->dmatogo--; + if (host->dmatogo) { + dbg(host, dbg_dma, "DMA DONE Size:%i DSTA:[%08x] " + "DCNT:[%08x] toGo:%u\n", + size, mci_dsta, mci_dcnt, host->dmatogo); + + goto out; + } + + dbg(host, dbg_dma, "DMA FINISHED Size:%i DSTA:%08x DCNT:%08x\n", + size, mci_dsta, mci_dcnt); + + host->complete_what = COMPLETION_FINALIZE; + +out: + tasklet_schedule(&host->pio_tasklet); + spin_unlock_irqrestore(&host->complete_lock, iflags); + return; + + +fail_request: + host->mrq->data->error = -EINVAL; + host->complete_what = COMPLETION_FINALIZE; + writel(0, host->base + host->sdiimsk); + goto out; + +} + +static void finalize_request(struct s3cmci_host *host) +{ + struct mmc_request *mrq = host->mrq; + struct mmc_command *cmd = host->cmd_is_stop ? mrq->stop : mrq->cmd; + int debug_as_failure = 0; + + if (host->complete_what != COMPLETION_FINALIZE) + return; + + if (!mrq) + return; + + if (cmd->data && (cmd->error == 0) && + (cmd->data->error == 0)) { + if (host->dodma && (!host->dma_complete)) { + dbg(host, dbg_dma, "DMA Missing!\n"); + return; + } + } + + /* Read response from controller. */ + cmd->resp[0] = readl(host->base + S3C2410_SDIRSP0); + cmd->resp[1] = readl(host->base + S3C2410_SDIRSP1); + cmd->resp[2] = readl(host->base + S3C2410_SDIRSP2); + cmd->resp[3] = readl(host->base + S3C2410_SDIRSP3); + + writel(host->prescaler, host->base + S3C2410_SDIPRE); + + if (cmd->error) + debug_as_failure = 1; + + if (cmd->data && cmd->data->error) + debug_as_failure = 1; + + dbg_dumpcmd(host, cmd, debug_as_failure); + + /* Cleanup controller */ + writel(0, host->base + S3C2410_SDICMDARG); + writel(0, host->base + S3C2410_SDIDCON); + writel(0, host->base + S3C2410_SDICMDCON); + writel(0, host->base + host->sdiimsk); + + if (cmd->data && cmd->error) + cmd->data->error = cmd->error; + + if (cmd->data && cmd->data->stop && (!host->cmd_is_stop)) { + host->cmd_is_stop = 1; + s3cmci_send_request(host->mmc); + return; + } + + /* If we have no data transfer we are finished here */ + if (!mrq->data) + goto request_done; + + /* Calulate the amout of bytes transfer if there was no error */ + if (mrq->data->error == 0) { + mrq->data->bytes_xfered = + (mrq->data->blocks * mrq->data->blksz); + } else { + mrq->data->bytes_xfered = 0; + } + + /* If we had an error while transfering data we flush the + * DMA channel and the fifo to clear out any garbage. */ + if (mrq->data->error != 0) { + if (host->dodma) + s3c2410_dma_ctrl(host->dma, S3C2410_DMAOP_FLUSH); + + if (host->is2440) { + /* Clear failure register and reset fifo. */ + writel(S3C2440_SDIFSTA_FIFORESET | + S3C2440_SDIFSTA_FIFOFAIL, + host->base + S3C2410_SDIFSTA); + } else { + u32 mci_con; + + /* reset fifo */ + mci_con = readl(host->base + S3C2410_SDICON); + mci_con |= S3C2410_SDICON_FIFORESET; + + writel(mci_con, host->base + S3C2410_SDICON); + } + } + +request_done: + host->complete_what = COMPLETION_NONE; + host->mrq = NULL; + mmc_request_done(host->mmc, mrq); +} + + +void s3cmci_dma_setup(struct s3cmci_host *host, enum s3c2410_dmasrc source) +{ + static enum s3c2410_dmasrc last_source = -1; + static int setup_ok; + + if (last_source == source) + return; + + last_source = source; + + s3c2410_dma_devconfig(host->dma, source, 3, + host->mem->start + host->sdidata); + + if (!setup_ok) { + s3c2410_dma_config(host->dma, 4, + (S3C2410_DCON_HWTRIG | S3C2410_DCON_CH0_SDI)); + s3c2410_dma_set_buffdone_fn(host->dma, + s3cmci_dma_done_callback); + s3c2410_dma_setflags(host->dma, S3C2410_DMAF_AUTOSTART); + setup_ok = 1; + } +} + +static void s3cmci_send_command(struct s3cmci_host *host, + struct mmc_command *cmd) +{ + u32 ccon, imsk; + + imsk = S3C2410_SDIIMSK_CRCSTATUS | S3C2410_SDIIMSK_CMDTIMEOUT | + S3C2410_SDIIMSK_RESPONSEND | S3C2410_SDIIMSK_CMDSENT | + S3C2410_SDIIMSK_RESPONSECRC; + + enable_imask(host, imsk); + + if (cmd->data) + host->complete_what = COMPLETION_XFERFINISH_RSPFIN; + else if (cmd->flags & MMC_RSP_PRESENT) + host->complete_what = COMPLETION_RSPFIN; + else + host->complete_what = COMPLETION_CMDSENT; + + writel(cmd->arg, host->base + S3C2410_SDICMDARG); + + ccon = cmd->opcode & S3C2410_SDICMDCON_INDEX; + ccon |= S3C2410_SDICMDCON_SENDERHOST | S3C2410_SDICMDCON_CMDSTART; + + if (cmd->flags & MMC_RSP_PRESENT) + ccon |= S3C2410_SDICMDCON_WAITRSP; + + if (cmd->flags & MMC_RSP_136) + ccon |= S3C2410_SDICMDCON_LONGRSP; + + writel(ccon, host->base + S3C2410_SDICMDCON); +} + +static int s3cmci_setup_data(struct s3cmci_host *host, struct mmc_data *data) +{ + u32 dcon, imsk, stoptries = 3; + + /* write DCON register */ + + if (!data) { + writel(0, host->base + S3C2410_SDIDCON); + return 0; + } + + while (readl(host->base + S3C2410_SDIDSTA) & + (S3C2410_SDIDSTA_TXDATAON | S3C2410_SDIDSTA_RXDATAON)) { + + dbg(host, dbg_err, + "mci_setup_data() transfer stillin progress.\n"); + + writel(0, host->base + S3C2410_SDIDCON); + s3cmci_reset(host); + + if ((stoptries--) == 0) { + dbg_dumpregs(host, "DRF"); + return -EINVAL; + } + } + + dcon = data->blocks & S3C2410_SDIDCON_BLKNUM_MASK; + + if (host->dodma) + dcon |= S3C2410_SDIDCON_DMAEN; + + if (host->bus_width == MMC_BUS_WIDTH_4) + dcon |= S3C2410_SDIDCON_WIDEBUS; + + if (!(data->flags & MMC_DATA_STREAM)) + dcon |= S3C2410_SDIDCON_BLOCKMODE; + + if (data->flags & MMC_DATA_WRITE) { + dcon |= S3C2410_SDIDCON_TXAFTERRESP; + dcon |= S3C2410_SDIDCON_XFER_TXSTART; + } + + if (data->flags & MMC_DATA_READ) { + dcon |= S3C2410_SDIDCON_RXAFTERCMD; + dcon |= S3C2410_SDIDCON_XFER_RXSTART; + } + + if (host->is2440) { + dcon |= S3C2440_SDIDCON_DS_WORD; + dcon |= S3C2440_SDIDCON_DATSTART; + } + + writel(dcon, host->base + S3C2410_SDIDCON); + + /* write BSIZE register */ + + writel(data->blksz, host->base + S3C2410_SDIBSIZE); + + /* add to IMASK register */ + imsk = S3C2410_SDIIMSK_FIFOFAIL | S3C2410_SDIIMSK_DATACRC | + S3C2410_SDIIMSK_DATATIMEOUT | S3C2410_SDIIMSK_DATAFINISH; + + enable_imask(host, imsk); + + /* write TIMER register */ + + if (host->is2440) { + writel(0x007FFFFF, host->base + S3C2410_SDITIMER); + } else { + writel(0x0000FFFF, host->base + S3C2410_SDITIMER); + + /* FIX: set slow clock to prevent timeouts on read */ + if (data->flags & MMC_DATA_READ) + writel(0xFF, host->base + S3C2410_SDIPRE); + } + + return 0; +} + +#define BOTH_DIR (MMC_DATA_WRITE | MMC_DATA_READ) + +static int s3cmci_prepare_pio(struct s3cmci_host *host, struct mmc_data *data) +{ + int rw = (data->flags & MMC_DATA_WRITE) ? 1 : 0; + + BUG_ON((data->flags & BOTH_DIR) == BOTH_DIR); + + host->pio_sgptr = 0; + host->pio_words = 0; + host->pio_count = 0; + host->pio_active = rw ? XFER_WRITE : XFER_READ; + + if (rw) { + do_pio_write(host); + enable_imask(host, S3C2410_SDIIMSK_TXFIFOHALF); + } else { + enable_imask(host, S3C2410_SDIIMSK_RXFIFOHALF + | S3C2410_SDIIMSK_RXFIFOLAST); + } + + return 0; +} + +static int s3cmci_prepare_dma(struct s3cmci_host *host, struct mmc_data *data) +{ + int dma_len, i; + int rw = (data->flags & MMC_DATA_WRITE) ? 1 : 0; + + BUG_ON((data->flags & BOTH_DIR) == BOTH_DIR); + + s3cmci_dma_setup(host, rw ? S3C2410_DMASRC_MEM : S3C2410_DMASRC_HW); + s3c2410_dma_ctrl(host->dma, S3C2410_DMAOP_FLUSH); + + dma_len = dma_map_sg(mmc_dev(host->mmc), data->sg, data->sg_len, + (rw) ? DMA_TO_DEVICE : DMA_FROM_DEVICE); + + if (dma_len == 0) + return -ENOMEM; + + host->dma_complete = 0; + host->dmatogo = dma_len; + + for (i = 0; i < dma_len; i++) { + int res; + + dbg(host, dbg_dma, "enqueue %i:%u@%u\n", i, + sg_dma_address(&data->sg[i]), + sg_dma_len(&data->sg[i])); + + res = s3c2410_dma_enqueue(host->dma, (void *) host, + sg_dma_address(&data->sg[i]), + sg_dma_len(&data->sg[i])); + + if (res) { + s3c2410_dma_ctrl(host->dma, S3C2410_DMAOP_FLUSH); + return -EBUSY; + } + } + + s3c2410_dma_ctrl(host->dma, S3C2410_DMAOP_START); + + return 0; +} + +static void s3cmci_send_request(struct mmc_host *mmc) +{ + struct s3cmci_host *host = mmc_priv(mmc); + struct mmc_request *mrq = host->mrq; + struct mmc_command *cmd = host->cmd_is_stop ? mrq->stop : mrq->cmd; + + host->ccnt++; + prepare_dbgmsg(host, cmd, host->cmd_is_stop); + + /* Clear command, data and fifo status registers + Fifo clear only necessary on 2440, but doesn't hurt on 2410 + */ + writel(0xFFFFFFFF, host->base + S3C2410_SDICMDSTAT); + writel(0xFFFFFFFF, host->base + S3C2410_SDIDSTA); + writel(0xFFFFFFFF, host->base + S3C2410_SDIFSTA); + + if (cmd->data) { + int res = s3cmci_setup_data(host, cmd->data); + + host->dcnt++; + + if (res) { + cmd->error = -EINVAL; + cmd->data->error = -EINVAL; + + mmc_request_done(mmc, mrq); + return; + } + + if (host->dodma) + res = s3cmci_prepare_dma(host, cmd->data); + else + res = s3cmci_prepare_pio(host, cmd->data); + + if (res) { + cmd->error = res; + cmd->data->error = res; + + mmc_request_done(mmc, mrq); + return; + } + } + + /* Send command */ + s3cmci_send_command(host, cmd); + + /* Enable Interrupt */ + enable_irq(host->irq); +} + +static void s3cmci_request(struct mmc_host *mmc, struct mmc_request *mrq) +{ + struct s3cmci_host *host = mmc_priv(mmc); + + host->status = "mmc request"; + host->cmd_is_stop = 0; + host->mrq = mrq; + + s3cmci_send_request(mmc); +} + +static void s3cmci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) +{ + struct s3cmci_host *host = mmc_priv(mmc); + u32 mci_psc, mci_con; + + /* Set the power state */ + + mci_con = readl(host->base + S3C2410_SDICON); + + switch (ios->power_mode) { + case MMC_POWER_ON: + case MMC_POWER_UP: + s3c2410_gpio_cfgpin(S3C2410_GPE5, S3C2410_GPE5_SDCLK); + s3c2410_gpio_cfgpin(S3C2410_GPE6, S3C2410_GPE6_SDCMD); + s3c2410_gpio_cfgpin(S3C2410_GPE7, S3C2410_GPE7_SDDAT0); + s3c2410_gpio_cfgpin(S3C2410_GPE8, S3C2410_GPE8_SDDAT1); + s3c2410_gpio_cfgpin(S3C2410_GPE9, S3C2410_GPE9_SDDAT2); + s3c2410_gpio_cfgpin(S3C2410_GPE10, S3C2410_GPE10_SDDAT3); + + if (!host->is2440) + mci_con |= S3C2410_SDICON_FIFORESET; + + break; + + case MMC_POWER_OFF: + default: + s3c2410_gpio_setpin(S3C2410_GPE5, 0); + s3c2410_gpio_cfgpin(S3C2410_GPE5, S3C2410_GPE5_OUTP); + + if (host->is2440) + mci_con |= S3C2440_SDICON_SDRESET; + + break; + } + + /* Set clock */ + for (mci_psc = 0; mci_psc < 255; mci_psc++) { + host->real_rate = host->clk_rate / (host->clk_div*(mci_psc+1)); + + if (host->real_rate <= ios->clock) + break; + } + + if (mci_psc > 255) + mci_psc = 255; + + host->prescaler = mci_psc; + writel(host->prescaler, host->base + S3C2410_SDIPRE); + + /* If requested clock is 0, real_rate will be 0, too */ + if (ios->clock == 0) + host->real_rate = 0; + + /* Set CLOCK_ENABLE */ + if (ios->clock) + mci_con |= S3C2410_SDICON_CLOCKTYPE; + else + mci_con &= ~S3C2410_SDICON_CLOCKTYPE; + + writel(mci_con, host->base + S3C2410_SDICON); + + if ((ios->power_mode == MMC_POWER_ON) || + (ios->power_mode == MMC_POWER_UP)) { + dbg(host, dbg_conf, "running at %lukHz (requested: %ukHz).\n", + host->real_rate/1000, ios->clock/1000); + } else { + dbg(host, dbg_conf, "powered down.\n"); + } + + host->bus_width = ios->bus_width; +} + +static void s3cmci_reset(struct s3cmci_host *host) +{ + u32 con = readl(host->base + S3C2410_SDICON); + + con |= S3C2440_SDICON_SDRESET; + writel(con, host->base + S3C2410_SDICON); +} + +static struct mmc_host_ops s3cmci_ops = { + .request = s3cmci_request, + .set_ios = s3cmci_set_ios, +}; + +static int __devinit s3cmci_probe(struct platform_device *pdev, int is2440) +{ + struct s3cmci_host *host; + struct mmc_host *mmc; + int ret; + + mmc = mmc_alloc_host(sizeof(struct s3cmci_host), &pdev->dev); + if (!mmc) { + ret = -ENOMEM; + goto probe_out; + } + + host = mmc_priv(mmc); + host->mmc = mmc; + host->pdev = pdev; + host->is2440 = is2440; + + spin_lock_init(&host->complete_lock); + tasklet_init(&host->pio_tasklet, pio_tasklet, (unsigned long) host); + + if (is2440) { + host->sdiimsk = S3C2440_SDIIMSK; + host->sdidata = S3C2440_SDIDATA; + host->clk_div = 1; + } else { + host->sdiimsk = S3C2410_SDIIMSK; + host->sdidata = S3C2410_SDIDATA; + host->clk_div = 2; + } + + host->dodma = 0; + host->complete_what = COMPLETION_NONE; + host->pio_active = XFER_NONE; + + host->dma = S3CMCI_DMA; + host->irq_cd = IRQ_EINT2; + + host->mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!host->mem) { + dev_err(&pdev->dev, + "failed to get io memory region resouce.\n"); + + ret = -ENOENT; + goto probe_free_host; + } + + host->mem = request_mem_region(host->mem->start, + RESSIZE(host->mem), pdev->name); + + if (!host->mem) { + dev_err(&pdev->dev, "failed to request io memory region.\n"); + ret = -ENOENT; + goto probe_free_host; + } + + host->base = ioremap(host->mem->start, RESSIZE(host->mem)); + if (host->base == 0) { + dev_err(&pdev->dev, "failed to ioremap() io memory region.\n"); + ret = -EINVAL; + goto probe_free_mem_region; + } + + host->irq = platform_get_irq(pdev, 0); + if (host->irq == 0) { + dev_err(&pdev->dev, "failed to get interrupt resouce.\n"); + ret = -EINVAL; + goto probe_iounmap; + } + + if (request_irq(host->irq, s3cmci_irq, 0, DRIVER_NAME, host)) { + dev_err(&pdev->dev, "failed to request mci interrupt.\n"); + ret = -ENOENT; + goto probe_iounmap; + } + + /* We get spurious interrupts even when we have set the IMSK + * register to ignore everything, so use disable_irq() to make + * ensure we don't lock the system with un-serviceable requests. */ + + disable_irq(host->irq); + + s3c2410_gpio_cfgpin(S3C2410_GPF2, S3C2410_GPF2_EINT2); + set_irq_type(host->irq_cd, IRQT_BOTHEDGE); + + if (request_irq(host->irq_cd, s3cmci_irq_cd, 0, DRIVER_NAME, host)) { + dev_err(&pdev->dev, + "failed to request card detect interrupt.\n"); + ret = -ENOENT; + goto probe_free_irq; + } + + if (s3c2410_dma_request(S3CMCI_DMA, &s3cmci_dma_client, NULL)) { + dev_err(&pdev->dev, "unable to get DMA channel.\n"); + ret = -EBUSY; + goto probe_free_irq_cd; + } + + host->clk = clk_get(&pdev->dev, "sdi"); + if (IS_ERR(host->clk)) { + dev_err(&pdev->dev, "failed to find clock source.\n"); + ret = PTR_ERR(host->clk); + host->clk = NULL; + goto probe_free_host; + } + + ret = clk_enable(host->clk); + if (ret) { + dev_err(&pdev->dev, "failed to enable clock source.\n"); + goto clk_free; + } + + host->clk_rate = clk_get_rate(host->clk); + + mmc->ops = &s3cmci_ops; + mmc->ocr_avail = MMC_VDD_32_33; + mmc->caps = MMC_CAP_4_BIT_DATA; + mmc->f_min = host->clk_rate / (host->clk_div * 256); + mmc->f_max = host->clk_rate / host->clk_div; + + mmc->max_blk_count = 4095; + mmc->max_blk_size = 4095; + mmc->max_req_size = 4095 * 512; + mmc->max_seg_size = mmc->max_req_size; + + mmc->max_phys_segs = 128; + mmc->max_hw_segs = 128; + + dbg(host, dbg_debug, + "probe: mode:%s mapped mci_base:%p irq:%u irq_cd:%u dma:%u.\n", + (host->is2440?"2440":""), + host->base, host->irq, host->irq_cd, host->dma); + + ret = mmc_add_host(mmc); + if (ret) { + dev_err(&pdev->dev, "failed to add mmc host.\n"); + goto free_dmabuf; + } + + platform_set_drvdata(pdev, mmc); + dev_info(&pdev->dev, "initialisation done.\n"); + + return 0; + + free_dmabuf: + clk_disable(host->clk); + + clk_free: + clk_put(host->clk); + + probe_free_irq_cd: + free_irq(host->irq_cd, host); + + probe_free_irq: + free_irq(host->irq, host); + + probe_iounmap: + iounmap(host->base); + + probe_free_mem_region: + release_mem_region(host->mem->start, RESSIZE(host->mem)); + + probe_free_host: + mmc_free_host(mmc); + probe_out: + return ret; +} + +static int __devexit s3cmci_remove(struct platform_device *pdev) +{ + struct mmc_host *mmc = platform_get_drvdata(pdev); + struct s3cmci_host *host = mmc_priv(mmc); + + mmc_remove_host(mmc); + + clk_disable(host->clk); + clk_put(host->clk); + + tasklet_disable(&host->pio_tasklet); + + free_irq(host->irq_cd, host); + free_irq(host->irq, host); + + iounmap(host->base); + release_mem_region(host->mem->start, RESSIZE(host->mem)); + + mmc_free_host(mmc); + return 0; +} + +static int __devinit s3cmci_probe_2410(struct platform_device *dev) +{ + return s3cmci_probe(dev, 0); +} + +static int __devinit s3cmci_probe_2412(struct platform_device *dev) +{ + return s3cmci_probe(dev, 1); +} + +static int __devinit s3cmci_probe_2440(struct platform_device *dev) +{ + return s3cmci_probe(dev, 1); +} + +#ifdef CONFIG_PM + +static int s3cmci_suspend(struct platform_device *dev, pm_message_t state) +{ + struct mmc_host *mmc = platform_get_drvdata(dev); + + return mmc_suspend_host(mmc, state); +} + +static int s3cmci_resume(struct platform_device *dev) +{ + struct mmc_host *mmc = platform_get_drvdata(dev); + + return mmc_resume_host(mmc); +} + +#else /* CONFIG_PM */ +#define s3cmci_suspend NULL +#define s3cmci_resume NULL +#endif /* CONFIG_PM */ + + +static struct platform_driver s3cmci_driver_2410 = { + .driver.name = "s3c2410-sdi", + .driver.owner = THIS_MODULE, + .probe = s3cmci_probe_2410, + .remove = __devexit_p(s3cmci_remove), + .suspend = s3cmci_suspend, + .resume = s3cmci_resume, +}; + +static struct platform_driver s3cmci_driver_2412 = { + .driver.name = "s3c2412-sdi", + .driver.owner = THIS_MODULE, + .probe = s3cmci_probe_2412, + .remove = __devexit_p(s3cmci_remove), + .suspend = s3cmci_suspend, + .resume = s3cmci_resume, +}; + +static struct platform_driver s3cmci_driver_2440 = { + .driver.name = "s3c2440-sdi", + .driver.owner = THIS_MODULE, + .probe = s3cmci_probe_2440, + .remove = __devexit_p(s3cmci_remove), + .suspend = s3cmci_suspend, + .resume = s3cmci_resume, +}; + + +static int __init s3cmci_init(void) +{ + platform_driver_register(&s3cmci_driver_2410); + platform_driver_register(&s3cmci_driver_2412); + platform_driver_register(&s3cmci_driver_2440); + return 0; +} + +static void __exit s3cmci_exit(void) +{ + platform_driver_unregister(&s3cmci_driver_2410); + platform_driver_unregister(&s3cmci_driver_2412); + platform_driver_unregister(&s3cmci_driver_2440); +} + +module_init(s3cmci_init); +module_exit(s3cmci_exit); + +MODULE_DESCRIPTION("Samsung S3C MMC/SD Card Interface driver"); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Thomas Kleffel "); diff --git a/drivers/mmc/host/s3cmci.h b/drivers/mmc/host/s3cmci.h new file mode 100644 index 0000000..90b8af7 --- /dev/null +++ b/drivers/mmc/host/s3cmci.h @@ -0,0 +1,69 @@ +/* + * linux/drivers/mmc/s3cmci.h - Samsung S3C MCI driver + * + * Copyright (C) 2004-2006 Thomas Kleffel, All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* FIXME: DMA Resource management ?! */ +#define S3CMCI_DMA 0 + +enum s3cmci_waitfor { + COMPLETION_NONE, + COMPLETION_FINALIZE, + COMPLETION_CMDSENT, + COMPLETION_RSPFIN, + COMPLETION_XFERFINISH, + COMPLETION_XFERFINISH_RSPFIN, +}; + +struct s3cmci_host { + struct platform_device *pdev; + struct mmc_host *mmc; + struct resource *mem; + struct clk *clk; + void __iomem *base; + int irq; + int irq_cd; + int dma; + + unsigned long clk_rate; + unsigned long clk_div; + unsigned long real_rate; + u8 prescaler; + + int is2440; + unsigned sdiimsk; + unsigned sdidata; + int dodma; + int dmatogo; + + struct mmc_request *mrq; + int cmd_is_stop; + + spinlock_t complete_lock; + enum s3cmci_waitfor complete_what; + + int dma_complete; + + u32 pio_sgptr; + u32 pio_words; + u32 pio_count; + u32 *pio_ptr; +#define XFER_NONE 0 +#define XFER_READ 1 +#define XFER_WRITE 2 + u32 pio_active; + + int bus_width; + + char dbgmsg_cmd[301]; + char dbgmsg_dat[301]; + char *status; + + unsigned int ccnt, dcnt; + struct tasklet_struct pio_tasklet; +}; diff --git a/include/asm-arm/arch-s3c2410/regs-sdi.h b/include/asm-arm/arch-s3c2410/regs-sdi.h index bb9d30b..bfb222f 100644 --- a/include/asm-arm/arch-s3c2410/regs-sdi.h +++ b/include/asm-arm/arch-s3c2410/regs-sdi.h @@ -28,9 +28,15 @@ #define S3C2410_SDIDCNT (0x30) #define S3C2410_SDIDSTA (0x34) #define S3C2410_SDIFSTA (0x38) + #define S3C2410_SDIDATA (0x3C) #define S3C2410_SDIIMSK (0x40) +#define S3C2440_SDIDATA (0x40) +#define S3C2440_SDIIMSK (0x3C) + +#define S3C2440_SDICON_SDRESET (1<<8) +#define S3C2440_SDICON_MMCCLOCK (1<<5) #define S3C2410_SDICON_BYTEORDER (1<<4) #define S3C2410_SDICON_SDIOIRQ (1<<3) #define S3C2410_SDICON_RWAITEN (1<<2) @@ -42,7 +48,8 @@ #define S3C2410_SDICMDCON_LONGRSP (1<<10) #define S3C2410_SDICMDCON_WAITRSP (1<<9) #define S3C2410_SDICMDCON_CMDSTART (1<<8) -#define S3C2410_SDICMDCON_INDEX (0xff) +#define S3C2410_SDICMDCON_SENDERHOST (1<<6) +#define S3C2410_SDICMDCON_INDEX (0x3f) #define S3C2410_SDICMDSTAT_CRCFAIL (1<<12) #define S3C2410_SDICMDSTAT_CMDSENT (1<<11) @@ -51,6 +58,9 @@ #define S3C2410_SDICMDSTAT_XFERING (1<<8) #define S3C2410_SDICMDSTAT_INDEX (0xff) +#define S3C2440_SDIDCON_DS_BYTE (0<<22) +#define S3C2440_SDIDCON_DS_HALFWORD (1<<22) +#define S3C2440_SDIDCON_DS_WORD (2<<22) #define S3C2410_SDIDCON_IRQPERIOD (1<<21) #define S3C2410_SDIDCON_TXAFTERRESP (1<<20) #define S3C2410_SDIDCON_RXAFTERCMD (1<<19) @@ -59,6 +69,7 @@ #define S3C2410_SDIDCON_WIDEBUS (1<<16) #define S3C2410_SDIDCON_DMAEN (1<<15) #define S3C2410_SDIDCON_STOP (1<<14) +#define S3C2440_SDIDCON_DATSTART (1<<14) #define S3C2410_SDIDCON_DATMODE (3<<12) #define S3C2410_SDIDCON_BLKNUM (0x7ff) @@ -68,6 +79,7 @@ #define S3C2410_SDIDCON_XFER_RXSTART (2<<12) #define S3C2410_SDIDCON_XFER_TXSTART (3<<12) +#define S3C2410_SDIDCON_BLKNUM_MASK (0xFFF) #define S3C2410_SDIDCNT_BLKNUM_SHIFT (12) #define S3C2410_SDIDSTA_RDYWAITREQ (1<<10) @@ -82,10 +94,12 @@ #define S3C2410_SDIDSTA_TXDATAON (1<<1) #define S3C2410_SDIDSTA_RXDATAON (1<<0) +#define S3C2440_SDIFSTA_FIFORESET (1<<16) +#define S3C2440_SDIFSTA_FIFOFAIL (3<<14) /* 3 is correct (2 bits) */ #define S3C2410_SDIFSTA_TFDET (1<<13) #define S3C2410_SDIFSTA_RFDET (1<<12) -#define S3C2410_SDIFSTA_TXHALF (1<<11) -#define S3C2410_SDIFSTA_TXEMPTY (1<<10) +#define S3C2410_SDIFSTA_TFHALF (1<<11) +#define S3C2410_SDIFSTA_TFEMPTY (1<<10) #define S3C2410_SDIFSTA_RFLAST (1<<9) #define S3C2410_SDIFSTA_RFFULL (1<<8) #define S3C2410_SDIFSTA_RFHALF (1<<7) -- cgit v0.10.2 From 679f0f8abd7187baaff40a47fe4733ae4c24cc9a Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Mon, 30 Jun 2008 22:40:25 +0100 Subject: MMC: S3C24XX MMC/SD driver write fixes This patch is a workaround of some S3C2410 MMC chip bug Signed-off-by: Ben Dooks Signed-off-by: Pierre Ossman diff --git a/drivers/mmc/host/s3cmci.c b/drivers/mmc/host/s3cmci.c index c6a4d3c..4db5bd7 100644 --- a/drivers/mmc/host/s3cmci.c +++ b/drivers/mmc/host/s3cmci.c @@ -461,9 +461,19 @@ static irqreturn_t s3cmci_irq(int irq, void *dev_id) if (mci_csta & S3C2410_SDICMDSTAT_CRCFAIL) { if (cmd->flags & MMC_RSP_CRC) { - cmd->error = -EILSEQ; - host->status = "error: bad command crc"; - goto fail_transfer; + if (host->mrq->cmd->flags & MMC_RSP_136) { + dbg(host, dbg_irq, + "fixup: ignore CRC fail with long rsp\n"); + } else { + /* note, we used to fail the transfer + * here, but it seems that this is just + * the hardware getting it wrong. + * + * cmd->error = -EILSEQ; + * host->status = "error: bad command crc"; + * goto fail_transfer; + */ + } } mci_cclear |= S3C2410_SDICMDSTAT_CRCFAIL; -- cgit v0.10.2 From ceb3ac252519f9ab318050c8ee842e62820d6731 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Mon, 30 Jun 2008 22:40:26 +0100 Subject: MMC: DMA free fix for S3C24XX SD/MMC driver Bugfix to ensure DMA channel allocated is freed on exit. Signed-off-by: Ben Dooks Signed-off-by: Pierre Ossman diff --git a/drivers/mmc/host/s3cmci.c b/drivers/mmc/host/s3cmci.c index 4db5bd7..6070f36 100644 --- a/drivers/mmc/host/s3cmci.c +++ b/drivers/mmc/host/s3cmci.c @@ -1254,6 +1254,7 @@ static int __devexit s3cmci_remove(struct platform_device *pdev) clk_put(host->clk); tasklet_disable(&host->pio_tasklet); + s3c2410_dma_free(S3CMCI_DMA, &s3cmci_dma_client); free_irq(host->irq_cd, host); free_irq(host->irq, host); -- cgit v0.10.2 From bdbc9c3a8f7a7956611c970e262693faa95081a5 Mon Sep 17 00:00:00 2001 From: Thomas Kleffel Date: Mon, 30 Jun 2008 22:40:27 +0100 Subject: Fix the request finalisation by ensuring the controller is stopped. Signed-off-by: Ben Dooks Signed-off-by: Harald Welte Signed-off-by: Pierre Ossman diff --git a/drivers/mmc/host/s3cmci.c b/drivers/mmc/host/s3cmci.c index 6070f36..8c68b2e 100644 --- a/drivers/mmc/host/s3cmci.c +++ b/drivers/mmc/host/s3cmci.c @@ -677,7 +677,7 @@ static void finalize_request(struct s3cmci_host *host) /* Cleanup controller */ writel(0, host->base + S3C2410_SDICMDARG); - writel(0, host->base + S3C2410_SDIDCON); + writel(S3C2410_SDIDCON_STOP, host->base + S3C2410_SDIDCON); writel(0, host->base + S3C2410_SDICMDCON); writel(0, host->base + host->sdiimsk); @@ -803,7 +803,7 @@ static int s3cmci_setup_data(struct s3cmci_host *host, struct mmc_data *data) dbg(host, dbg_err, "mci_setup_data() transfer stillin progress.\n"); - writel(0, host->base + S3C2410_SDIDCON); + writel(S3C2410_SDIDCON_STOP, host->base + S3C2410_SDIDCON); s3cmci_reset(host); if ((stoptries--) == 0) { -- cgit v0.10.2 From d643b5f7e0793ef7828a35a5ea049d675ad2ad8c Mon Sep 17 00:00:00 2001 From: Roman Moracik Date: Mon, 30 Jun 2008 22:40:28 +0100 Subject: MMC: Fix S3C24XX IRQ enable during PIO transfers Fix Bug #677 - I/O errors on heavy microSD writes for 2.6.22.x. Signed-off-by: Ben Dooks Signed-off-by: Pierre Ossman diff --git a/drivers/mmc/host/s3cmci.c b/drivers/mmc/host/s3cmci.c index 8c68b2e..774af3d 100644 --- a/drivers/mmc/host/s3cmci.c +++ b/drivers/mmc/host/s3cmci.c @@ -335,6 +335,8 @@ static void pio_tasklet(unsigned long data) struct s3cmci_host *host = (struct s3cmci_host *) data; + disable_irq(host->irq); + if (host->pio_active == XFER_WRITE) do_pio_write(host); @@ -352,9 +354,9 @@ static void pio_tasklet(unsigned long data) host->mrq->data->error = -EINVAL; } - disable_irq(host->irq); finalize_request(host); - } + } else + enable_irq(host->irq); } /* @@ -630,7 +632,6 @@ out: spin_unlock_irqrestore(&host->complete_lock, iflags); return; - fail_request: host->mrq->data->error = -EINVAL; host->complete_what = COMPLETION_FINALIZE; -- cgit v0.10.2 From edb5a98e43682d66c98ddd1dee863d867807546e Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Mon, 30 Jun 2008 22:40:29 +0100 Subject: MMC: S3C24XX: Add platform data for MMC/SD driver This patch adds platform data support to the s3mci driver. This allows flexible board-specific configuration of set_power, card detect and read only pins. Signed-off-by: Ben Dooks Signed-off-by: Pierre Ossman diff --git a/drivers/mmc/host/s3cmci.c b/drivers/mmc/host/s3cmci.c index 774af3d..684a10c 100644 --- a/drivers/mmc/host/s3cmci.c +++ b/drivers/mmc/host/s3cmci.c @@ -21,6 +21,8 @@ #include #include +#include + #include "s3cmci.h" #define DRIVER_NAME "s3c-mci" @@ -1011,6 +1013,9 @@ static void s3cmci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) s3c2410_gpio_cfgpin(S3C2410_GPE9, S3C2410_GPE9_SDDAT2); s3c2410_gpio_cfgpin(S3C2410_GPE10, S3C2410_GPE10_SDDAT3); + if (host->pdata->set_power) + host->pdata->set_power(ios->power_mode, ios->vdd); + if (!host->is2440) mci_con |= S3C2410_SDICON_FIFORESET; @@ -1024,6 +1029,9 @@ static void s3cmci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) if (host->is2440) mci_con |= S3C2440_SDICON_SDRESET; + if (host->pdata->set_power) + host->pdata->set_power(ios->power_mode, ios->vdd); + break; } @@ -1072,9 +1080,25 @@ static void s3cmci_reset(struct s3cmci_host *host) writel(con, host->base + S3C2410_SDICON); } +static int s3cmci_get_ro(struct mmc_host *mmc) +{ + struct s3cmci_host *host = mmc_priv(mmc); + + if (host->pdata->gpio_wprotect == 0) + return 0; + + return s3c2410_gpio_getpin(host->pdata->gpio_wprotect); +} + static struct mmc_host_ops s3cmci_ops = { .request = s3cmci_request, .set_ios = s3cmci_set_ios, + .get_ro = s3cmci_get_ro, +}; + +static struct s3c24xx_mci_pdata s3cmci_def_pdata = { + /* This is currently here to avoid a number of if (host->pdata) + * checks. Any zero fields to ensure reaonable defaults are picked. */ }; static int __devinit s3cmci_probe(struct platform_device *pdev, int is2440) @@ -1094,6 +1118,12 @@ static int __devinit s3cmci_probe(struct platform_device *pdev, int is2440) host->pdev = pdev; host->is2440 = is2440; + host->pdata = pdev->dev.platform_data; + if (!host->pdata) { + pdev->dev.platform_data = &s3cmci_def_pdata; + host->pdata = &s3cmci_def_pdata; + } + spin_lock_init(&host->complete_lock); tasklet_init(&host->pio_tasklet, pio_tasklet, (unsigned long) host); @@ -1112,7 +1142,8 @@ static int __devinit s3cmci_probe(struct platform_device *pdev, int is2440) host->pio_active = XFER_NONE; host->dma = S3CMCI_DMA; - host->irq_cd = IRQ_EINT2; + host->irq_cd = s3c2410_gpio_getirq(host->pdata->gpio_detect); + s3c2410_gpio_cfgpin(host->pdata->gpio_detect, S3C2410_GPIO_IRQ); host->mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!host->mem) { @@ -1158,7 +1189,7 @@ static int __devinit s3cmci_probe(struct platform_device *pdev, int is2440) disable_irq(host->irq); - s3c2410_gpio_cfgpin(S3C2410_GPF2, S3C2410_GPF2_EINT2); + s3c2410_gpio_cfgpin(host->pdata->gpio_detect, S3C2410_GPIO_IRQ); set_irq_type(host->irq_cd, IRQT_BOTHEDGE); if (request_irq(host->irq_cd, s3cmci_irq_cd, 0, DRIVER_NAME, host)) { @@ -1168,6 +1199,10 @@ static int __devinit s3cmci_probe(struct platform_device *pdev, int is2440) goto probe_free_irq; } + if (host->pdata->gpio_wprotect) + s3c2410_gpio_cfgpin(host->pdata->gpio_wprotect, + S3C2410_GPIO_INPUT); + if (s3c2410_dma_request(S3CMCI_DMA, &s3cmci_dma_client, NULL)) { dev_err(&pdev->dev, "unable to get DMA channel.\n"); ret = -EBUSY; @@ -1191,11 +1226,14 @@ static int __devinit s3cmci_probe(struct platform_device *pdev, int is2440) host->clk_rate = clk_get_rate(host->clk); mmc->ops = &s3cmci_ops; - mmc->ocr_avail = MMC_VDD_32_33; + mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34; mmc->caps = MMC_CAP_4_BIT_DATA; mmc->f_min = host->clk_rate / (host->clk_div * 256); mmc->f_max = host->clk_rate / host->clk_div; + if (host->pdata->ocr_avail) + mmc->ocr_avail = host->pdata->ocr_avail; + mmc->max_blk_count = 4095; mmc->max_blk_size = 4095; mmc->max_req_size = 4095 * 512; diff --git a/drivers/mmc/host/s3cmci.h b/drivers/mmc/host/s3cmci.h index 90b8af7..37d9c60 100644 --- a/drivers/mmc/host/s3cmci.h +++ b/drivers/mmc/host/s3cmci.h @@ -22,6 +22,7 @@ enum s3cmci_waitfor { struct s3cmci_host { struct platform_device *pdev; + struct s3c24xx_mci_pdata *pdata; struct mmc_host *mmc; struct resource *mem; struct clk *clk; diff --git a/include/asm-arm/plat-s3c24xx/mci.h b/include/asm-arm/plat-s3c24xx/mci.h new file mode 100644 index 0000000..5be2c14 --- /dev/null +++ b/include/asm-arm/plat-s3c24xx/mci.h @@ -0,0 +1,12 @@ +#ifndef _ARCH_MCI_H +#define _ARCH_MCI_H + +struct s3c24xx_mci_pdata { + unsigned int gpio_detect; + unsigned int gpio_wprotect; + unsigned long ocr_avail; + void (*set_power)(unsigned char power_mode, + unsigned short vdd); +}; + +#endif /* _ARCH_NCI_H */ -- cgit v0.10.2 From cf0984c8edf63017fcc2ead212ca057877e345df Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Mon, 30 Jun 2008 22:40:30 +0100 Subject: MMC: S3C24XX: Add support to invert write protect line Support for inverting the sense of the MMC driver's write protect detection line. Signed-off-by: Ben Dooks Signed-off-by: Pierre Ossman diff --git a/drivers/mmc/host/s3cmci.c b/drivers/mmc/host/s3cmci.c index 684a10c..4fd11d8 100644 --- a/drivers/mmc/host/s3cmci.c +++ b/drivers/mmc/host/s3cmci.c @@ -1083,11 +1083,18 @@ static void s3cmci_reset(struct s3cmci_host *host) static int s3cmci_get_ro(struct mmc_host *mmc) { struct s3cmci_host *host = mmc_priv(mmc); + struct s3c24xx_mci_pdata *pdata = host->pdata; + int ret; - if (host->pdata->gpio_wprotect == 0) + if (pdata->gpio_wprotect == 0) return 0; - return s3c2410_gpio_getpin(host->pdata->gpio_wprotect); + ret = s3c2410_gpio_getpin(pdata->gpio_wprotect); + + if (pdata->wprotect_invert) + ret = !ret; + + return ret; } static struct mmc_host_ops s3cmci_ops = { diff --git a/include/asm-arm/plat-s3c24xx/mci.h b/include/asm-arm/plat-s3c24xx/mci.h index 5be2c14..4f4ccd1 100644 --- a/include/asm-arm/plat-s3c24xx/mci.h +++ b/include/asm-arm/plat-s3c24xx/mci.h @@ -2,6 +2,8 @@ #define _ARCH_MCI_H struct s3c24xx_mci_pdata { + unsigned int wprotect_invert : 1; + unsigned int gpio_detect; unsigned int gpio_wprotect; unsigned long ocr_avail; -- cgit v0.10.2 From 7c14450ed6ab4ed453b2bf216ca3aaa7a5402af3 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Mon, 30 Jun 2008 22:40:31 +0100 Subject: MMC: S3C24XX: Ensure host->mrq->data is valid Fix a crash if host->mrq->data is NULL on ending a transfer. Signed-off-by: Ben Dooks Signed-off-by: Pierre Ossman diff --git a/drivers/mmc/host/s3cmci.c b/drivers/mmc/host/s3cmci.c index 4fd11d8..ffd9269 100644 --- a/drivers/mmc/host/s3cmci.c +++ b/drivers/mmc/host/s3cmci.c @@ -353,7 +353,8 @@ static void pio_tasklet(unsigned long data) (host->pio_active == XFER_READ) ? "read" : "write", host->pio_count, host->pio_words); - host->mrq->data->error = -EINVAL; + if (host->mrq->data) + host->mrq->data->error = -EINVAL; } finalize_request(host); -- cgit v0.10.2 From 55d70f5a7b25800fc8376cdd81d42d6c201fa91d Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Mon, 30 Jun 2008 22:40:32 +0100 Subject: MMC: S3C24XX: Allow card-detect on non-IRQ capable pin Add support to the S3C24XX MMC driver to have the card detect be on a pin that is not IRQ capable. Signed-off-by: Ben Dooks Signed-off-by: Pierre Ossman diff --git a/drivers/mmc/host/s3cmci.c b/drivers/mmc/host/s3cmci.c index ffd9269..aa9a8b4 100644 --- a/drivers/mmc/host/s3cmci.c +++ b/drivers/mmc/host/s3cmci.c @@ -1150,8 +1150,6 @@ static int __devinit s3cmci_probe(struct platform_device *pdev, int is2440) host->pio_active = XFER_NONE; host->dma = S3CMCI_DMA; - host->irq_cd = s3c2410_gpio_getirq(host->pdata->gpio_detect); - s3c2410_gpio_cfgpin(host->pdata->gpio_detect, S3C2410_GPIO_IRQ); host->mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!host->mem) { @@ -1197,14 +1195,20 @@ static int __devinit s3cmci_probe(struct platform_device *pdev, int is2440) disable_irq(host->irq); - s3c2410_gpio_cfgpin(host->pdata->gpio_detect, S3C2410_GPIO_IRQ); - set_irq_type(host->irq_cd, IRQT_BOTHEDGE); + host->irq_cd = s3c2410_gpio_getirq(host->pdata->gpio_detect); - if (request_irq(host->irq_cd, s3cmci_irq_cd, 0, DRIVER_NAME, host)) { - dev_err(&pdev->dev, - "failed to request card detect interrupt.\n"); - ret = -ENOENT; - goto probe_free_irq; + if (host->irq_cd >= 0) { + if (request_irq(host->irq_cd, s3cmci_irq_cd, + IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING, + DRIVER_NAME, host)) { + dev_err(&pdev->dev, "can't get card detect irq.\n"); + ret = -ENOENT; + goto probe_free_irq; + } + } else { + dev_warn(&pdev->dev, "host detect has no irq available\n"); + s3c2410_gpio_cfgpin(host->pdata->gpio_detect, + S3C2410_GPIO_INPUT); } if (host->pdata->gpio_wprotect) @@ -1273,7 +1277,8 @@ static int __devinit s3cmci_probe(struct platform_device *pdev, int is2440) clk_put(host->clk); probe_free_irq_cd: - free_irq(host->irq_cd, host); + if (host->irq_cd >= 0) + free_irq(host->irq_cd, host); probe_free_irq: free_irq(host->irq, host); @@ -1303,7 +1308,8 @@ static int __devexit s3cmci_remove(struct platform_device *pdev) tasklet_disable(&host->pio_tasklet); s3c2410_dma_free(S3CMCI_DMA, &s3cmci_dma_client); - free_irq(host->irq_cd, host); + if (host->irq_cd >= 0) + free_irq(host->irq_cd, host); free_irq(host->irq, host); iounmap(host->base); -- cgit v0.10.2 From 3886ff5f63f33c801ed3af265ac0df20d3a8dcf5 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Mon, 30 Jun 2008 22:40:33 +0100 Subject: MMC: S3C24XX: Fix s3c2410_dma_request() return code check. The driver should be checking for a negative error code from s3c2410_dma_request(), not non-zero. Newer kernels now return the DMA channel number that was allocated by the request. Signed-off-by: Ben Dooks Signed-off-by: Pierre Ossman diff --git a/drivers/mmc/host/s3cmci.c b/drivers/mmc/host/s3cmci.c index aa9a8b4..8389107 100644 --- a/drivers/mmc/host/s3cmci.c +++ b/drivers/mmc/host/s3cmci.c @@ -1215,7 +1215,7 @@ static int __devinit s3cmci_probe(struct platform_device *pdev, int is2440) s3c2410_gpio_cfgpin(host->pdata->gpio_wprotect, S3C2410_GPIO_INPUT); - if (s3c2410_dma_request(S3CMCI_DMA, &s3cmci_dma_client, NULL)) { + if (s3c2410_dma_request(S3CMCI_DMA, &s3cmci_dma_client, NULL) < 0) { dev_err(&pdev->dev, "unable to get DMA channel.\n"); ret = -EBUSY; goto probe_free_irq_cd; -- cgit v0.10.2 From 318f905f02b427b8df33d724b7392a0597b40bdd Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Mon, 30 Jun 2008 22:40:34 +0100 Subject: MMC: S3C24XX: Add MODULE_ALIAS() entries for the platform devices Add MODULE_ALIAS() declerations for all the supported platform devices for this driver. Signed-off-by: Ben Dooks Signed-off-by: Pierre Ossman diff --git a/drivers/mmc/host/s3cmci.c b/drivers/mmc/host/s3cmci.c index 8389107..a6224f9 100644 --- a/drivers/mmc/host/s3cmci.c +++ b/drivers/mmc/host/s3cmci.c @@ -1405,3 +1405,6 @@ module_exit(s3cmci_exit); MODULE_DESCRIPTION("Samsung S3C MMC/SD Card Interface driver"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Thomas Kleffel "); +MODULE_ALIAS("platform:s3c2410-sdi"); +MODULE_ALIAS("platform:s3c2412-sdi"); +MODULE_ALIAS("platform:s3c2440-sdi"); -- cgit v0.10.2 From 2de5f79d4dfcb1be16f0b873bc77d6ec74b0426d Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Mon, 30 Jun 2008 22:40:35 +0100 Subject: MMC: S3C24XX: Fix use of msecs where jiffies are needed mmc_detect_change() takes jiffies, not msecs. Convert the previous value of msecs into jiffies before calling. Signed-off-by: Ben Dooks Signed-off-by: Pierre Ossman diff --git a/drivers/mmc/host/s3cmci.c b/drivers/mmc/host/s3cmci.c index a6224f9..6f1b474 100644 --- a/drivers/mmc/host/s3cmci.c +++ b/drivers/mmc/host/s3cmci.c @@ -584,7 +584,7 @@ static irqreturn_t s3cmci_irq_cd(int irq, void *dev_id) dbg(host, dbg_irq, "card detect\n"); - mmc_detect_change(host->mmc, 500); + mmc_detect_change(host->mmc, msecs_to_jiffies(500)); return IRQ_HANDLED; } -- cgit v0.10.2 From 50a845700b3b55f825b0eb901b03d6091f66d9f4 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Mon, 30 Jun 2008 22:40:36 +0100 Subject: MMC: S3C24XX: Add media presence test to request handling. Ensure that we have physical media present before attempting to send a request to a card. This ensures that we do not get flooded by errors from commands that can never be completed timing out. Signed-off-by: Ben Dooks Signed-off-by: Pierre Ossman diff --git a/drivers/mmc/host/s3cmci.c b/drivers/mmc/host/s3cmci.c index 6f1b474..62d73d3 100644 --- a/drivers/mmc/host/s3cmci.c +++ b/drivers/mmc/host/s3cmci.c @@ -984,6 +984,18 @@ static void s3cmci_send_request(struct mmc_host *mmc) enable_irq(host->irq); } +static int s3cmci_card_present(struct s3cmci_host *host) +{ + struct s3c24xx_mci_pdata *pdata = host->pdata; + int ret; + + if (pdata->gpio_detect == 0) + return -ENOSYS; + + ret = s3c2410_gpio_getpin(pdata->gpio_detect) ? 0 : 1; + return ret ^ pdata->detect_invert; +} + static void s3cmci_request(struct mmc_host *mmc, struct mmc_request *mrq) { struct s3cmci_host *host = mmc_priv(mmc); @@ -992,7 +1004,12 @@ static void s3cmci_request(struct mmc_host *mmc, struct mmc_request *mrq) host->cmd_is_stop = 0; host->mrq = mrq; - s3cmci_send_request(mmc); + if (s3cmci_card_present(host) == 0) { + dbg(host, dbg_err, "%s: no medium present\n", __func__); + host->mrq->cmd->error = -ENOMEDIUM; + mmc_request_done(mmc, mrq); + } else + s3cmci_send_request(mmc); } static void s3cmci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) diff --git a/include/asm-arm/plat-s3c24xx/mci.h b/include/asm-arm/plat-s3c24xx/mci.h index 4f4ccd1..2d0852a 100644 --- a/include/asm-arm/plat-s3c24xx/mci.h +++ b/include/asm-arm/plat-s3c24xx/mci.h @@ -3,6 +3,7 @@ struct s3c24xx_mci_pdata { unsigned int wprotect_invert : 1; + unsigned int detect_invert : 1; /* set => detect active high. */ unsigned int gpio_detect; unsigned int gpio_wprotect; -- cgit v0.10.2 From ff8c804f1fdecb198c4be57155c61800e0d37bd2 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Mon, 30 Jun 2008 22:40:37 +0100 Subject: MMC: S3C24XX: Update error debugging. Add better debugging to show where errors are being generated, as some error codes can come from several different code paths. Also fix the error return path from s3cmci_setup_data() to return the error it returned to the request. Signed-off-by: Ben Dooks Signed-off-by: Pierre Ossman diff --git a/drivers/mmc/host/s3cmci.c b/drivers/mmc/host/s3cmci.c index 62d73d3..2b48395 100644 --- a/drivers/mmc/host/s3cmci.c +++ b/drivers/mmc/host/s3cmci.c @@ -450,6 +450,7 @@ static irqreturn_t s3cmci_irq(int irq, void *dev_id) } if (mci_csta & S3C2410_SDICMDSTAT_CMDTIMEOUT) { + dbg(host, dbg_err, "CMDSTAT: error CMDTIMEOUT\n"); cmd->error = -ETIMEDOUT; host->status = "error: command timeout"; goto fail_transfer; @@ -505,12 +506,14 @@ static irqreturn_t s3cmci_irq(int irq, void *dev_id) /* Check for FIFO failure */ if (host->is2440) { if (mci_fsta & S3C2440_SDIFSTA_FIFOFAIL) { + dbg(host, dbg_err, "FIFO failure\n"); host->mrq->data->error = -EILSEQ; host->status = "error: 2440 fifo failure"; goto fail_transfer; } } else { if (mci_dsta & S3C2410_SDIDSTA_FIFOFAIL) { + dbg(host, dbg_err, "FIFO failure\n"); cmd->data->error = -EILSEQ; host->status = "error: fifo failure"; goto fail_transfer; @@ -518,18 +521,21 @@ static irqreturn_t s3cmci_irq(int irq, void *dev_id) } if (mci_dsta & S3C2410_SDIDSTA_RXCRCFAIL) { + dbg(host, dbg_err, "bad data crc (outgoing)\n"); cmd->data->error = -EILSEQ; host->status = "error: bad data crc (outgoing)"; goto fail_transfer; } if (mci_dsta & S3C2410_SDIDSTA_CRCFAIL) { + dbg(host, dbg_err, "bad data crc (incoming)\n"); cmd->data->error = -EILSEQ; host->status = "error: bad data crc (incoming)"; goto fail_transfer; } if (mci_dsta & S3C2410_SDIDSTA_DATATIMEOUT) { + dbg(host, dbg_err, "data timeout\n"); cmd->data->error = -ETIMEDOUT; host->status = "error: data timeout"; goto fail_transfer; @@ -956,8 +962,9 @@ static void s3cmci_send_request(struct mmc_host *mmc) host->dcnt++; if (res) { - cmd->error = -EINVAL; - cmd->data->error = -EINVAL; + dbg(host, dbg_err, "setup data error %d\n", res); + cmd->error = res; + cmd->data->error = res; mmc_request_done(mmc, mrq); return; @@ -969,6 +976,7 @@ static void s3cmci_send_request(struct mmc_host *mmc) res = s3cmci_prepare_pio(host, cmd->data); if (res) { + dbg(host, dbg_err, "data prepare error %d\n", res); cmd->error = res; cmd->data->error = res; -- cgit v0.10.2 From 4dde7f755211fd58105c989a99a3a9f2f1238fba Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Mon, 30 Jun 2008 22:40:38 +0100 Subject: MMC: S3C24XX: Add maintainer entry Add Ben Dooks as S3C24XX SD/MMC driver maintainer. Signed-off-by: Ben Dooks Signed-off-by: Pierre Ossman diff --git a/MAINTAINERS b/MAINTAINERS index f140449..9def20a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3530,6 +3530,13 @@ L: linux-s390@vger.kernel.org W: http://www.ibm.com/developerworks/linux/linux390/ S: Supported +S3C24XX SD/MMC Driver +P: Ben Dooks +M: ben-linux@fluff.org +L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) +L: linux-kernel@vger.kernel.org +S: Supported + SAA7146 VIDEO4LINUX-2 DRIVER P: Michael Hunold M: michael@mihu.de -- cgit v0.10.2 From 7e9c7b64022b7faff6022df64baec8ab467d0bfd Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Mon, 30 Jun 2008 22:40:39 +0100 Subject: MMC: S3C24XX: Refuse incorrectly aligned transfers The hardware does not support any multi-block transfers with an block-size that is not 32bit aligned. Also the driver itself does not support single block non-32bit transfers either. Ensure that the s3cmci_setup_data() returns the appropriate error if we encounter this. Signed-off-by: Ben Dooks Signed-off-by: Pierre Ossman diff --git a/drivers/mmc/host/s3cmci.c b/drivers/mmc/host/s3cmci.c index 2b48395..6a1e499 100644 --- a/drivers/mmc/host/s3cmci.c +++ b/drivers/mmc/host/s3cmci.c @@ -807,6 +807,17 @@ static int s3cmci_setup_data(struct s3cmci_host *host, struct mmc_data *data) return 0; } + if ((data->blksz & 3) != 0) { + /* We cannot deal with unaligned blocks with more than + * one block being transfered. */ + + if (data->blocks > 1) + return -EINVAL; + + /* No support yet for non-word block transfers. */ + return -EINVAL; + } + while (readl(host->base + S3C2410_SDIDSTA) & (S3C2410_SDIDSTA_TXDATAON | S3C2410_SDIDSTA_RXDATAON)) { -- cgit v0.10.2 From 9eeebd22ca757fee8dc10ffe6fa6992f33a3c5ec Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Mon, 30 Jun 2008 10:50:23 +0300 Subject: mmc: wbsd.c fix shadowing of 'dma' variable This patch fix warning :shadowing dma variable and made use of module_param_named instead of module_param Signed-off-by: Tomas Winkler Acked-by: Marcel Holtmann Signed-off-by: Pierre Ossman diff --git a/drivers/mmc/host/wbsd.c b/drivers/mmc/host/wbsd.c index 67e5a9b..c2b6160 100644 --- a/drivers/mmc/host/wbsd.c +++ b/drivers/mmc/host/wbsd.c @@ -68,16 +68,16 @@ static const int unlock_codes[] = { 0x83, 0x87 }; static const int valid_ids[] = { 0x7112, - }; +}; #ifdef CONFIG_PNP -static unsigned int nopnp = 0; +static unsigned int param_nopnp = 0; #else -static const unsigned int nopnp = 1; +static const unsigned int param_nopnp = 1; #endif -static unsigned int io = 0x248; -static unsigned int irq = 6; -static int dma = 2; +static unsigned int param_io = 0x248; +static unsigned int param_irq = 6; +static int param_dma = 2; /* * Basic functions @@ -1765,7 +1765,7 @@ static void __devexit wbsd_shutdown(struct device *dev, int pnp) static int __devinit wbsd_probe(struct platform_device *dev) { /* Use the module parameters for resources */ - return wbsd_init(&dev->dev, io, irq, dma, 0); + return wbsd_init(&dev->dev, param_io, param_irq, param_dma, 0); } static int __devexit wbsd_remove(struct platform_device *dev) @@ -1979,14 +1979,14 @@ static int __init wbsd_drv_init(void) #ifdef CONFIG_PNP - if (!nopnp) { + if (!param_nopnp) { result = pnp_register_driver(&wbsd_pnp_driver); if (result < 0) return result; } #endif /* CONFIG_PNP */ - if (nopnp) { + if (param_nopnp) { result = platform_driver_register(&wbsd_driver); if (result < 0) return result; @@ -2012,12 +2012,12 @@ static void __exit wbsd_drv_exit(void) { #ifdef CONFIG_PNP - if (!nopnp) + if (!param_nopnp) pnp_unregister_driver(&wbsd_pnp_driver); #endif /* CONFIG_PNP */ - if (nopnp) { + if (param_nopnp) { platform_device_unregister(wbsd_device); platform_driver_unregister(&wbsd_driver); @@ -2029,11 +2029,11 @@ static void __exit wbsd_drv_exit(void) module_init(wbsd_drv_init); module_exit(wbsd_drv_exit); #ifdef CONFIG_PNP -module_param(nopnp, uint, 0444); +module_param_named(nopnp, param_nopnp, uint, 0444); #endif -module_param(io, uint, 0444); -module_param(irq, uint, 0444); -module_param(dma, int, 0444); +module_param_named(io, param_io, uint, 0444); +module_param_named(irq, param_irq, uint, 0444); +module_param_named(dma, param_dma, int, 0444); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pierre Ossman "); -- cgit v0.10.2 From 6d37333163025b46afbcad434ec9a5f2e88e7254 Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Mon, 30 Jun 2008 10:50:24 +0300 Subject: mmc: fix sdio_io sparse errors This patch fixes sdio_io sparse errors. This fix changes signature of API functions, changing unsigned char -> u8 unsigned short -> u16 unsigned long -> u32 - this was probably a bug in 64 bit platforms Signed-off-by: Tomas Winkler Signed-off-by: Pierre Ossman diff --git a/drivers/mmc/core/sdio_io.c b/drivers/mmc/core/sdio_io.c old mode 100644 new mode 100755 index cc42a41..3ccf691 --- a/drivers/mmc/core/sdio_io.c +++ b/drivers/mmc/core/sdio_io.c @@ -167,10 +167,8 @@ int sdio_set_block_size(struct sdio_func *func, unsigned blksz) return -EINVAL; if (blksz == 0) { - blksz = min(min( - func->max_blksize, - func->card->host->max_blk_size), - 512u); + blksz = min(func->max_blksize, func->card->host->max_blk_size); + blksz = min(blksz, 512u); } ret = mmc_io_rw_direct(func->card, 1, 0, @@ -311,10 +309,9 @@ static int sdio_io_rw_ext_helper(struct sdio_func *func, int write, /* Blocks per command is limited by host count, host transfer * size (we only use a single sg entry) and the maximum for * IO_RW_EXTENDED of 511 blocks. */ - max_blocks = min(min( - func->card->host->max_blk_count, - func->card->host->max_seg_size / func->cur_blksize), - 511u); + max_blocks = min(func->card->host->max_blk_count, + func->card->host->max_seg_size / func->cur_blksize); + max_blocks = min(max_blocks, 511u); while (remainder > func->cur_blksize) { unsigned blocks; @@ -364,11 +361,10 @@ static int sdio_io_rw_ext_helper(struct sdio_func *func, int write, * function. If there is a problem reading the address, 0xff * is returned and @err_ret will contain the error code. */ -unsigned char sdio_readb(struct sdio_func *func, unsigned int addr, - int *err_ret) +u8 sdio_readb(struct sdio_func *func, unsigned int addr, int *err_ret) { int ret; - unsigned char val; + u8 val; BUG_ON(!func); @@ -397,8 +393,7 @@ EXPORT_SYMBOL_GPL(sdio_readb); * function. @err_ret will contain the status of the actual * transfer. */ -void sdio_writeb(struct sdio_func *func, unsigned char b, unsigned int addr, - int *err_ret) +void sdio_writeb(struct sdio_func *func, u8 b, unsigned int addr, int *err_ret) { int ret; @@ -459,7 +454,6 @@ int sdio_readsb(struct sdio_func *func, void *dst, unsigned int addr, { return sdio_io_rw_ext_helper(func, 0, addr, 0, dst, count); } - EXPORT_SYMBOL_GPL(sdio_readsb); /** @@ -489,8 +483,7 @@ EXPORT_SYMBOL_GPL(sdio_writesb); * function. If there is a problem reading the address, 0xffff * is returned and @err_ret will contain the error code. */ -unsigned short sdio_readw(struct sdio_func *func, unsigned int addr, - int *err_ret) +u16 sdio_readw(struct sdio_func *func, unsigned int addr, int *err_ret) { int ret; @@ -504,7 +497,7 @@ unsigned short sdio_readw(struct sdio_func *func, unsigned int addr, return 0xFFFF; } - return le16_to_cpu(*(u16*)func->tmpbuf); + return le16_to_cpup((__le16 *)func->tmpbuf); } EXPORT_SYMBOL_GPL(sdio_readw); @@ -519,12 +512,11 @@ EXPORT_SYMBOL_GPL(sdio_readw); * function. @err_ret will contain the status of the actual * transfer. */ -void sdio_writew(struct sdio_func *func, unsigned short b, unsigned int addr, - int *err_ret) +void sdio_writew(struct sdio_func *func, u16 b, unsigned int addr, int *err_ret) { int ret; - *(u16*)func->tmpbuf = cpu_to_le16(b); + *(__le16 *)func->tmpbuf = cpu_to_le16(b); ret = sdio_memcpy_toio(func, addr, func->tmpbuf, 2); if (err_ret) @@ -543,8 +535,7 @@ EXPORT_SYMBOL_GPL(sdio_writew); * 0xffffffff is returned and @err_ret will contain the error * code. */ -unsigned long sdio_readl(struct sdio_func *func, unsigned int addr, - int *err_ret) +u32 sdio_readl(struct sdio_func *func, unsigned int addr, int *err_ret) { int ret; @@ -558,7 +549,7 @@ unsigned long sdio_readl(struct sdio_func *func, unsigned int addr, return 0xFFFFFFFF; } - return le32_to_cpu(*(u32*)func->tmpbuf); + return le32_to_cpup((__le32 *)func->tmpbuf); } EXPORT_SYMBOL_GPL(sdio_readl); @@ -573,12 +564,11 @@ EXPORT_SYMBOL_GPL(sdio_readl); * function. @err_ret will contain the status of the actual * transfer. */ -void sdio_writel(struct sdio_func *func, unsigned long b, unsigned int addr, - int *err_ret) +void sdio_writel(struct sdio_func *func, u32 b, unsigned int addr, int *err_ret) { int ret; - *(u32*)func->tmpbuf = cpu_to_le32(b); + *(__le32 *)func->tmpbuf = cpu_to_le32(b); ret = sdio_memcpy_toio(func, addr, func->tmpbuf, 4); if (err_ret) diff --git a/include/linux/mmc/sdio_func.h b/include/linux/mmc/sdio_func.h old mode 100644 new mode 100755 index f57f22b..28fb0a3 --- a/include/linux/mmc/sdio_func.h +++ b/include/linux/mmc/sdio_func.h @@ -122,23 +122,20 @@ extern int sdio_release_irq(struct sdio_func *func); extern unsigned int sdio_align_size(struct sdio_func *func, unsigned int sz); -extern unsigned char sdio_readb(struct sdio_func *func, - unsigned int addr, int *err_ret); -extern unsigned short sdio_readw(struct sdio_func *func, - unsigned int addr, int *err_ret); -extern unsigned long sdio_readl(struct sdio_func *func, - unsigned int addr, int *err_ret); +extern u8 sdio_readb(struct sdio_func *func, unsigned int addr, int *err_ret); +extern u16 sdio_readw(struct sdio_func *func, unsigned int addr, int *err_ret); +extern u32 sdio_readl(struct sdio_func *func, unsigned int addr, int *err_ret); extern int sdio_memcpy_fromio(struct sdio_func *func, void *dst, unsigned int addr, int count); extern int sdio_readsb(struct sdio_func *func, void *dst, unsigned int addr, int count); -extern void sdio_writeb(struct sdio_func *func, unsigned char b, +extern void sdio_writeb(struct sdio_func *func, u8 b, unsigned int addr, int *err_ret); -extern void sdio_writew(struct sdio_func *func, unsigned short b, +extern void sdio_writew(struct sdio_func *func, u16 b, unsigned int addr, int *err_ret); -extern void sdio_writel(struct sdio_func *func, unsigned long b, +extern void sdio_writel(struct sdio_func *func, u32 b, unsigned int addr, int *err_ret); extern int sdio_memcpy_toio(struct sdio_func *func, unsigned int addr, -- cgit v0.10.2 From 7d2be0749a59096a334c94dc48f43294193cb8ed Mon Sep 17 00:00:00 2001 From: Haavard Skinnemoen Date: Mon, 30 Jun 2008 18:35:03 +0200 Subject: atmel-mci: Driver for Atmel on-chip MMC controllers This is a driver for the MMC controller on the AP7000 chips from Atmel. It should in theory work on AT91 systems too with some tweaking, but since the DMA interface is quite different, it's not entirely clear if it's worth merging this with the at91_mci driver. This driver has been around for a while in BSPs and kernel sources provided by Atmel, but this particular version uses the generic DMA Engine framework (with the slave extensions) instead of an avr32-only DMA controller framework. This driver can also use PIO transfers when no DMA channels are available, and for transfers where using DMA may be difficult or impractical for some reason (e.g. the DMA setup overhead is usually not worth it for very short transfers, and badly aligned buffers or lengths are difficult to handle.) Currently, the driver only support PIO transfers. DMA support has been split out to a separate patch to hopefully make it easier to review. The driver has been tested using mmc-block and ext3fs on several SD, SDHC and MMC+ cards. Reads and writes work fine, with read transfer rates up to 3.5 MiB/s on fast cards with debugging disabled. The driver has also been tested using the mmc_test module on the same cards. All tests except 7, 9, 15 and 17 succeed. The first two are unsupported by all the cards I have, so I don't know if the driver handles this correctly. The last two fail because the hardware flags a Data CRC Error instead of a Data Timeout error. I'm not sure how to deal with that. Documentation for this controller can be found in many data sheets from Atmel, including the AT32AP7000 data sheet which can be found here: http://www.atmel.com/dyn/products/datasheets.asp?family_id=682 Signed-off-by: Haavard Skinnemoen Signed-off-by: Pierre Ossman diff --git a/arch/avr32/boards/atngw100/setup.c b/arch/avr32/boards/atngw100/setup.c index a51bb9f..c7fe94d 100644 --- a/arch/avr32/boards/atngw100/setup.c +++ b/arch/avr32/boards/atngw100/setup.c @@ -19,6 +19,7 @@ #include #include +#include #include #include @@ -51,6 +52,11 @@ static struct spi_board_info spi0_board_info[] __initdata = { }, }; +static struct mci_platform_data __initdata mci0_data = { + .detect_pin = GPIO_PIN_PC(25), + .wp_pin = GPIO_PIN_PE(0), +}; + /* * The next two functions should go away as the boot loader is * supposed to initialize the macb address registers with a valid @@ -170,6 +176,7 @@ static int __init atngw100_init(void) set_hw_addr(at32_add_device_eth(1, ð_data[1])); at32_add_device_spi(0, spi0_board_info, ARRAY_SIZE(spi0_board_info)); + at32_add_device_mci(0, &mci0_data); at32_add_device_usba(0, NULL); for (i = 0; i < ARRAY_SIZE(ngw_leds); i++) { diff --git a/arch/avr32/boards/atstk1000/atstk1002.c b/arch/avr32/boards/atstk1000/atstk1002.c index 86b363c..e11659b 100644 --- a/arch/avr32/boards/atstk1000/atstk1002.c +++ b/arch/avr32/boards/atstk1000/atstk1002.c @@ -234,6 +234,9 @@ static int __init atstk1002_init(void) #ifdef CONFIG_BOARD_ATSTK100X_SPI1 at32_add_device_spi(1, spi1_board_info, ARRAY_SIZE(spi1_board_info)); #endif +#ifndef CONFIG_BOARD_ATSTK1002_SW2_CUSTOM + at32_add_device_mci(0, NULL); +#endif #ifdef CONFIG_BOARD_ATSTK1002_SW5_CUSTOM set_hw_addr(at32_add_device_eth(1, ð_data[1])); #else diff --git a/arch/avr32/mach-at32ap/at32ap700x.c b/arch/avr32/mach-at32ap/at32ap700x.c index 07b21b1..021d512 100644 --- a/arch/avr32/mach-at32ap/at32ap700x.c +++ b/arch/avr32/mach-at32ap/at32ap700x.c @@ -14,6 +14,7 @@ #include #include +#include #include #include @@ -1278,20 +1279,32 @@ static struct clk atmel_mci0_pclk = { .index = 9, }; -struct platform_device *__init at32_add_device_mci(unsigned int id) +struct platform_device *__init +at32_add_device_mci(unsigned int id, struct mci_platform_data *data) { - struct platform_device *pdev; + struct mci_platform_data _data; + struct platform_device *pdev; + struct dw_dma_slave *dws; if (id != 0) return NULL; pdev = platform_device_alloc("atmel_mci", id); if (!pdev) - return NULL; + goto fail; if (platform_device_add_resources(pdev, atmel_mci0_resource, ARRAY_SIZE(atmel_mci0_resource))) - goto err_add_resources; + goto fail; + + if (!data) { + data = &_data; + memset(data, 0, sizeof(struct mci_platform_data)); + } + + if (platform_device_add_data(pdev, data, + sizeof(struct mci_platform_data))) + goto fail; select_peripheral(PA(10), PERIPH_A, 0); /* CLK */ select_peripheral(PA(11), PERIPH_A, 0); /* CMD */ @@ -1300,12 +1313,19 @@ struct platform_device *__init at32_add_device_mci(unsigned int id) select_peripheral(PA(14), PERIPH_A, 0); /* DATA2 */ select_peripheral(PA(15), PERIPH_A, 0); /* DATA3 */ + if (data) { + if (data->detect_pin != GPIO_PIN_NONE) + at32_select_gpio(data->detect_pin, 0); + if (data->wp_pin != GPIO_PIN_NONE) + at32_select_gpio(data->wp_pin, 0); + } + atmel_mci0_pclk.dev = &pdev->dev; platform_device_add(pdev); return pdev; -err_add_resources: +fail: platform_device_put(pdev); return NULL; } diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig index dc88c03..198df42 100644 --- a/drivers/mmc/host/Kconfig +++ b/drivers/mmc/host/Kconfig @@ -104,6 +104,16 @@ config MMC_AT91 If unsure, say N. +config MMC_ATMELMCI + tristate "Atmel Multimedia Card Interface support" + depends on AVR32 + help + This selects the Atmel Multimedia Card Interface driver. If + you have an AT32 (AVR32) platform with a Multimedia Card + slot, say Y or M here. + + If unsure, say N. + config MMC_IMX tristate "Motorola i.MX Multimedia Card Interface support" depends on ARCH_IMX diff --git a/drivers/mmc/host/Makefile b/drivers/mmc/host/Makefile index b3e023b..2dc9ff2 100644 --- a/drivers/mmc/host/Makefile +++ b/drivers/mmc/host/Makefile @@ -16,6 +16,7 @@ obj-$(CONFIG_MMC_WBSD) += wbsd.o obj-$(CONFIG_MMC_AU1X) += au1xmmc.o obj-$(CONFIG_MMC_OMAP) += omap.o obj-$(CONFIG_MMC_AT91) += at91_mci.o +obj-$(CONFIG_MMC_ATMELMCI) += atmel-mci.o obj-$(CONFIG_MMC_TIFM_SD) += tifm_sd.o obj-$(CONFIG_MMC_SPI) += mmc_spi.o obj-$(CONFIG_MMC_S3C) += s3cmci.o diff --git a/drivers/mmc/host/atmel-mci-regs.h b/drivers/mmc/host/atmel-mci-regs.h new file mode 100644 index 0000000..a9a5657 --- /dev/null +++ b/drivers/mmc/host/atmel-mci-regs.h @@ -0,0 +1,91 @@ +/* + * Atmel MultiMedia Card Interface driver + * + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __DRIVERS_MMC_ATMEL_MCI_H__ +#define __DRIVERS_MMC_ATMEL_MCI_H__ + +/* MCI Register Definitions */ +#define MCI_CR 0x0000 /* Control */ +# define MCI_CR_MCIEN ( 1 << 0) /* MCI Enable */ +# define MCI_CR_MCIDIS ( 1 << 1) /* MCI Disable */ +# define MCI_CR_SWRST ( 1 << 7) /* Software Reset */ +#define MCI_MR 0x0004 /* Mode */ +# define MCI_MR_CLKDIV(x) ((x) << 0) /* Clock Divider */ +# define MCI_MR_RDPROOF ( 1 << 11) /* Read Proof */ +# define MCI_MR_WRPROOF ( 1 << 12) /* Write Proof */ +#define MCI_DTOR 0x0008 /* Data Timeout */ +# define MCI_DTOCYC(x) ((x) << 0) /* Data Timeout Cycles */ +# define MCI_DTOMUL(x) ((x) << 4) /* Data Timeout Multiplier */ +#define MCI_SDCR 0x000c /* SD Card / SDIO */ +# define MCI_SDCSEL_SLOT_A ( 0 << 0) /* Select SD slot A */ +# define MCI_SDCSEL_SLOT_B ( 1 << 0) /* Select SD slot A */ +# define MCI_SDCBUS_1BIT ( 0 << 7) /* 1-bit data bus */ +# define MCI_SDCBUS_4BIT ( 1 << 7) /* 4-bit data bus */ +#define MCI_ARGR 0x0010 /* Command Argument */ +#define MCI_CMDR 0x0014 /* Command */ +# define MCI_CMDR_CMDNB(x) ((x) << 0) /* Command Opcode */ +# define MCI_CMDR_RSPTYP_NONE ( 0 << 6) /* No response */ +# define MCI_CMDR_RSPTYP_48BIT ( 1 << 6) /* 48-bit response */ +# define MCI_CMDR_RSPTYP_136BIT ( 2 << 6) /* 136-bit response */ +# define MCI_CMDR_SPCMD_INIT ( 1 << 8) /* Initialization command */ +# define MCI_CMDR_SPCMD_SYNC ( 2 << 8) /* Synchronized command */ +# define MCI_CMDR_SPCMD_INT ( 4 << 8) /* Interrupt command */ +# define MCI_CMDR_SPCMD_INTRESP ( 5 << 8) /* Interrupt response */ +# define MCI_CMDR_OPDCMD ( 1 << 11) /* Open Drain */ +# define MCI_CMDR_MAXLAT_5CYC ( 0 << 12) /* Max latency 5 cycles */ +# define MCI_CMDR_MAXLAT_64CYC ( 1 << 12) /* Max latency 64 cycles */ +# define MCI_CMDR_START_XFER ( 1 << 16) /* Start data transfer */ +# define MCI_CMDR_STOP_XFER ( 2 << 16) /* Stop data transfer */ +# define MCI_CMDR_TRDIR_WRITE ( 0 << 18) /* Write data */ +# define MCI_CMDR_TRDIR_READ ( 1 << 18) /* Read data */ +# define MCI_CMDR_BLOCK ( 0 << 19) /* Single-block transfer */ +# define MCI_CMDR_MULTI_BLOCK ( 1 << 19) /* Multi-block transfer */ +# define MCI_CMDR_STREAM ( 2 << 19) /* MMC Stream transfer */ +# define MCI_CMDR_SDIO_BYTE ( 4 << 19) /* SDIO Byte transfer */ +# define MCI_CMDR_SDIO_BLOCK ( 5 << 19) /* SDIO Block transfer */ +# define MCI_CMDR_SDIO_SUSPEND ( 1 << 24) /* SDIO Suspend Command */ +# define MCI_CMDR_SDIO_RESUME ( 2 << 24) /* SDIO Resume Command */ +#define MCI_BLKR 0x0018 /* Block */ +# define MCI_BCNT(x) ((x) << 0) /* Data Block Count */ +# define MCI_BLKLEN(x) ((x) << 16) /* Data Block Length */ +#define MCI_RSPR 0x0020 /* Response 0 */ +#define MCI_RSPR1 0x0024 /* Response 1 */ +#define MCI_RSPR2 0x0028 /* Response 2 */ +#define MCI_RSPR3 0x002c /* Response 3 */ +#define MCI_RDR 0x0030 /* Receive Data */ +#define MCI_TDR 0x0034 /* Transmit Data */ +#define MCI_SR 0x0040 /* Status */ +#define MCI_IER 0x0044 /* Interrupt Enable */ +#define MCI_IDR 0x0048 /* Interrupt Disable */ +#define MCI_IMR 0x004c /* Interrupt Mask */ +# define MCI_CMDRDY ( 1 << 0) /* Command Ready */ +# define MCI_RXRDY ( 1 << 1) /* Receiver Ready */ +# define MCI_TXRDY ( 1 << 2) /* Transmitter Ready */ +# define MCI_BLKE ( 1 << 3) /* Data Block Ended */ +# define MCI_DTIP ( 1 << 4) /* Data Transfer In Progress */ +# define MCI_NOTBUSY ( 1 << 5) /* Data Not Busy */ +# define MCI_SDIOIRQA ( 1 << 8) /* SDIO IRQ in slot A */ +# define MCI_SDIOIRQB ( 1 << 9) /* SDIO IRQ in slot B */ +# define MCI_RINDE ( 1 << 16) /* Response Index Error */ +# define MCI_RDIRE ( 1 << 17) /* Response Direction Error */ +# define MCI_RCRCE ( 1 << 18) /* Response CRC Error */ +# define MCI_RENDE ( 1 << 19) /* Response End Bit Error */ +# define MCI_RTOE ( 1 << 20) /* Response Time-Out Error */ +# define MCI_DCRCE ( 1 << 21) /* Data CRC Error */ +# define MCI_DTOE ( 1 << 22) /* Data Time-Out Error */ +# define MCI_OVRE ( 1 << 30) /* RX Overrun Error */ +# define MCI_UNRE ( 1 << 31) /* TX Underrun Error */ + +/* Register access macros */ +#define mci_readl(port,reg) \ + __raw_readl((port)->regs + MCI_##reg) +#define mci_writel(port,reg,value) \ + __raw_writel((value), (port)->regs + MCI_##reg) + +#endif /* __DRIVERS_MMC_ATMEL_MCI_H__ */ diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c new file mode 100644 index 0000000..25d5324a --- /dev/null +++ b/drivers/mmc/host/atmel-mci.c @@ -0,0 +1,981 @@ +/* + * Atmel MultiMedia Card Interface driver + * + * Copyright (C) 2004-2008 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +#include +#include + +#include "atmel-mci-regs.h" + +#define ATMCI_DATA_ERROR_FLAGS (MCI_DCRCE | MCI_DTOE | MCI_OVRE | MCI_UNRE) + +enum { + EVENT_CMD_COMPLETE = 0, + EVENT_DATA_ERROR, + EVENT_DATA_COMPLETE, + EVENT_STOP_SENT, + EVENT_STOP_COMPLETE, + EVENT_XFER_COMPLETE, +}; + +struct atmel_mci { + struct mmc_host *mmc; + void __iomem *regs; + + struct scatterlist *sg; + unsigned int pio_offset; + + struct mmc_request *mrq; + struct mmc_command *cmd; + struct mmc_data *data; + + u32 cmd_status; + u32 data_status; + u32 stop_status; + u32 stop_cmdr; + + u32 mode_reg; + u32 sdc_reg; + + struct tasklet_struct tasklet; + unsigned long pending_events; + unsigned long completed_events; + + int present; + int detect_pin; + int wp_pin; + + /* For detect pin debouncing */ + struct timer_list detect_timer; + + unsigned long bus_hz; + unsigned long mapbase; + struct clk *mck; + struct platform_device *pdev; +}; + +#define atmci_is_completed(host, event) \ + test_bit(event, &host->completed_events) +#define atmci_test_and_clear_pending(host, event) \ + test_and_clear_bit(event, &host->pending_events) +#define atmci_test_and_set_completed(host, event) \ + test_and_set_bit(event, &host->completed_events) +#define atmci_set_completed(host, event) \ + set_bit(event, &host->completed_events) +#define atmci_set_pending(host, event) \ + set_bit(event, &host->pending_events) +#define atmci_clear_pending(host, event) \ + clear_bit(event, &host->pending_events) + + +static void atmci_enable(struct atmel_mci *host) +{ + clk_enable(host->mck); + mci_writel(host, CR, MCI_CR_MCIEN); + mci_writel(host, MR, host->mode_reg); + mci_writel(host, SDCR, host->sdc_reg); +} + +static void atmci_disable(struct atmel_mci *host) +{ + mci_writel(host, CR, MCI_CR_SWRST); + + /* Stall until write is complete, then disable the bus clock */ + mci_readl(host, SR); + clk_disable(host->mck); +} + +static inline unsigned int ns_to_clocks(struct atmel_mci *host, + unsigned int ns) +{ + return (ns * (host->bus_hz / 1000000) + 999) / 1000; +} + +static void atmci_set_timeout(struct atmel_mci *host, + struct mmc_data *data) +{ + static unsigned dtomul_to_shift[] = { + 0, 4, 7, 8, 10, 12, 16, 20 + }; + unsigned timeout; + unsigned dtocyc; + unsigned dtomul; + + timeout = ns_to_clocks(host, data->timeout_ns) + data->timeout_clks; + + for (dtomul = 0; dtomul < 8; dtomul++) { + unsigned shift = dtomul_to_shift[dtomul]; + dtocyc = (timeout + (1 << shift) - 1) >> shift; + if (dtocyc < 15) + break; + } + + if (dtomul >= 8) { + dtomul = 7; + dtocyc = 15; + } + + dev_vdbg(&host->mmc->class_dev, "setting timeout to %u cycles\n", + dtocyc << dtomul_to_shift[dtomul]); + mci_writel(host, DTOR, (MCI_DTOMUL(dtomul) | MCI_DTOCYC(dtocyc))); +} + +/* + * Return mask with command flags to be enabled for this command. + */ +static u32 atmci_prepare_command(struct mmc_host *mmc, + struct mmc_command *cmd) +{ + struct mmc_data *data; + u32 cmdr; + + cmd->error = -EINPROGRESS; + + cmdr = MCI_CMDR_CMDNB(cmd->opcode); + + if (cmd->flags & MMC_RSP_PRESENT) { + if (cmd->flags & MMC_RSP_136) + cmdr |= MCI_CMDR_RSPTYP_136BIT; + else + cmdr |= MCI_CMDR_RSPTYP_48BIT; + } + + /* + * This should really be MAXLAT_5 for CMD2 and ACMD41, but + * it's too difficult to determine whether this is an ACMD or + * not. Better make it 64. + */ + cmdr |= MCI_CMDR_MAXLAT_64CYC; + + if (mmc->ios.bus_mode == MMC_BUSMODE_OPENDRAIN) + cmdr |= MCI_CMDR_OPDCMD; + + data = cmd->data; + if (data) { + cmdr |= MCI_CMDR_START_XFER; + if (data->flags & MMC_DATA_STREAM) + cmdr |= MCI_CMDR_STREAM; + else if (data->blocks > 1) + cmdr |= MCI_CMDR_MULTI_BLOCK; + else + cmdr |= MCI_CMDR_BLOCK; + + if (data->flags & MMC_DATA_READ) + cmdr |= MCI_CMDR_TRDIR_READ; + } + + return cmdr; +} + +static void atmci_start_command(struct atmel_mci *host, + struct mmc_command *cmd, + u32 cmd_flags) +{ + /* Must read host->cmd after testing event flags */ + smp_rmb(); + WARN_ON(host->cmd); + host->cmd = cmd; + + dev_vdbg(&host->mmc->class_dev, + "start command: ARGR=0x%08x CMDR=0x%08x\n", + cmd->arg, cmd_flags); + + mci_writel(host, ARGR, cmd->arg); + mci_writel(host, CMDR, cmd_flags); +} + +static void send_stop_cmd(struct mmc_host *mmc, struct mmc_data *data) +{ + struct atmel_mci *host = mmc_priv(mmc); + + atmci_start_command(host, data->stop, host->stop_cmdr); + mci_writel(host, IER, MCI_CMDRDY); +} + +static void atmci_request_end(struct mmc_host *mmc, struct mmc_request *mrq) +{ + struct atmel_mci *host = mmc_priv(mmc); + + WARN_ON(host->cmd || host->data); + host->mrq = NULL; + + atmci_disable(host); + + mmc_request_done(mmc, mrq); +} + +/* + * Returns a mask of interrupt flags to be enabled after the whole + * request has been prepared. + */ +static u32 atmci_submit_data(struct mmc_host *mmc, struct mmc_data *data) +{ + struct atmel_mci *host = mmc_priv(mmc); + u32 iflags; + + data->error = -EINPROGRESS; + + WARN_ON(host->data); + host->sg = NULL; + host->data = data; + + mci_writel(host, BLKR, MCI_BCNT(data->blocks) + | MCI_BLKLEN(data->blksz)); + dev_vdbg(&mmc->class_dev, "BLKR=0x%08x\n", + MCI_BCNT(data->blocks) | MCI_BLKLEN(data->blksz)); + + iflags = ATMCI_DATA_ERROR_FLAGS; + host->sg = data->sg; + host->pio_offset = 0; + if (data->flags & MMC_DATA_READ) + iflags |= MCI_RXRDY; + else + iflags |= MCI_TXRDY; + + return iflags; +} + +static void atmci_request(struct mmc_host *mmc, struct mmc_request *mrq) +{ + struct atmel_mci *host = mmc_priv(mmc); + struct mmc_data *data; + struct mmc_command *cmd; + u32 iflags; + u32 cmdflags = 0; + + iflags = mci_readl(host, IMR); + if (iflags) + dev_warn(&mmc->class_dev, "WARNING: IMR=0x%08x\n", + mci_readl(host, IMR)); + + WARN_ON(host->mrq != NULL); + + /* + * We may "know" the card is gone even though there's still an + * electrical connection. If so, we really need to communicate + * this to the MMC core since there won't be any more + * interrupts as the card is completely removed. Otherwise, + * the MMC core might believe the card is still there even + * though the card was just removed very slowly. + */ + if (!host->present) { + mrq->cmd->error = -ENOMEDIUM; + mmc_request_done(mmc, mrq); + return; + } + + host->mrq = mrq; + host->pending_events = 0; + host->completed_events = 0; + + atmci_enable(host); + + /* We don't support multiple blocks of weird lengths. */ + data = mrq->data; + if (data) { + if (data->blocks > 1 && data->blksz & 3) + goto fail; + atmci_set_timeout(host, data); + } + + iflags = MCI_CMDRDY; + cmd = mrq->cmd; + cmdflags = atmci_prepare_command(mmc, cmd); + atmci_start_command(host, cmd, cmdflags); + + if (data) + iflags |= atmci_submit_data(mmc, data); + + if (mrq->stop) { + host->stop_cmdr = atmci_prepare_command(mmc, mrq->stop); + host->stop_cmdr |= MCI_CMDR_STOP_XFER; + if (!(data->flags & MMC_DATA_WRITE)) + host->stop_cmdr |= MCI_CMDR_TRDIR_READ; + if (data->flags & MMC_DATA_STREAM) + host->stop_cmdr |= MCI_CMDR_STREAM; + else + host->stop_cmdr |= MCI_CMDR_MULTI_BLOCK; + } + + /* + * We could have enabled interrupts earlier, but I suspect + * that would open up a nice can of interesting race + * conditions (e.g. command and data complete, but stop not + * prepared yet.) + */ + mci_writel(host, IER, iflags); + + return; + +fail: + atmci_disable(host); + host->mrq = NULL; + mrq->cmd->error = -EINVAL; + mmc_request_done(mmc, mrq); +} + +static void atmci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) +{ + struct atmel_mci *host = mmc_priv(mmc); + + if (ios->clock) { + u32 clkdiv; + + /* Set clock rate */ + clkdiv = DIV_ROUND_UP(host->bus_hz, 2 * ios->clock) - 1; + if (clkdiv > 255) { + dev_warn(&mmc->class_dev, + "clock %u too slow; using %lu\n", + ios->clock, host->bus_hz / (2 * 256)); + clkdiv = 255; + } + + host->mode_reg = MCI_MR_CLKDIV(clkdiv) | MCI_MR_WRPROOF + | MCI_MR_RDPROOF; + } + + switch (ios->bus_width) { + case MMC_BUS_WIDTH_1: + host->sdc_reg = 0; + break; + case MMC_BUS_WIDTH_4: + host->sdc_reg = MCI_SDCBUS_4BIT; + break; + } + + switch (ios->power_mode) { + case MMC_POWER_ON: + /* Send init sequence (74 clock cycles) */ + atmci_enable(host); + mci_writel(host, CMDR, MCI_CMDR_SPCMD_INIT); + while (!(mci_readl(host, SR) & MCI_CMDRDY)) + cpu_relax(); + atmci_disable(host); + break; + default: + /* + * TODO: None of the currently available AVR32-based + * boards allow MMC power to be turned off. Implement + * power control when this can be tested properly. + */ + break; + } +} + +static int atmci_get_ro(struct mmc_host *mmc) +{ + int read_only = 0; + struct atmel_mci *host = mmc_priv(mmc); + + if (host->wp_pin >= 0) { + read_only = gpio_get_value(host->wp_pin); + dev_dbg(&mmc->class_dev, "card is %s\n", + read_only ? "read-only" : "read-write"); + } else { + dev_dbg(&mmc->class_dev, + "no pin for checking read-only switch." + " Assuming write-enable.\n"); + } + + return read_only; +} + +static struct mmc_host_ops atmci_ops = { + .request = atmci_request, + .set_ios = atmci_set_ios, + .get_ro = atmci_get_ro, +}; + +static void atmci_command_complete(struct atmel_mci *host, + struct mmc_command *cmd, u32 status) +{ + /* Read the response from the card (up to 16 bytes) */ + cmd->resp[0] = mci_readl(host, RSPR); + cmd->resp[1] = mci_readl(host, RSPR); + cmd->resp[2] = mci_readl(host, RSPR); + cmd->resp[3] = mci_readl(host, RSPR); + + if (status & MCI_RTOE) + cmd->error = -ETIMEDOUT; + else if ((cmd->flags & MMC_RSP_CRC) && (status & MCI_RCRCE)) + cmd->error = -EILSEQ; + else if (status & (MCI_RINDE | MCI_RDIRE | MCI_RENDE)) + cmd->error = -EIO; + else + cmd->error = 0; + + if (cmd->error) { + dev_dbg(&host->mmc->class_dev, + "command error: status=0x%08x\n", status); + + if (cmd->data) { + host->data = NULL; + mci_writel(host, IDR, MCI_NOTBUSY + | MCI_TXRDY | MCI_RXRDY + | ATMCI_DATA_ERROR_FLAGS); + } + } +} + +static void atmci_detect_change(unsigned long data) +{ + struct atmel_mci *host = (struct atmel_mci *)data; + struct mmc_request *mrq = host->mrq; + int present; + + /* + * atmci_remove() sets detect_pin to -1 before freeing the + * interrupt. We must not re-enable the interrupt if it has + * been freed. + */ + smp_rmb(); + if (host->detect_pin < 0) + return; + + enable_irq(gpio_to_irq(host->detect_pin)); + present = !gpio_get_value(host->detect_pin); + + dev_vdbg(&host->pdev->dev, "detect change: %d (was %d)\n", + present, host->present); + + if (present != host->present) { + dev_dbg(&host->mmc->class_dev, "card %s\n", + present ? "inserted" : "removed"); + host->present = present; + + /* Reset controller if card is gone */ + if (!present) { + mci_writel(host, CR, MCI_CR_SWRST); + mci_writel(host, IDR, ~0UL); + mci_writel(host, CR, MCI_CR_MCIEN); + } + + /* Clean up queue if present */ + if (mrq) { + /* + * Reset controller to terminate any ongoing + * commands or data transfers. + */ + mci_writel(host, CR, MCI_CR_SWRST); + + if (!atmci_is_completed(host, EVENT_CMD_COMPLETE)) + mrq->cmd->error = -ENOMEDIUM; + + if (mrq->data && !atmci_is_completed(host, + EVENT_DATA_COMPLETE)) { + host->data = NULL; + mrq->data->error = -ENOMEDIUM; + } + if (mrq->stop && !atmci_is_completed(host, + EVENT_STOP_COMPLETE)) + mrq->stop->error = -ENOMEDIUM; + + host->cmd = NULL; + atmci_request_end(host->mmc, mrq); + } + + mmc_detect_change(host->mmc, 0); + } +} + +static void atmci_tasklet_func(unsigned long priv) +{ + struct mmc_host *mmc = (struct mmc_host *)priv; + struct atmel_mci *host = mmc_priv(mmc); + struct mmc_request *mrq = host->mrq; + struct mmc_data *data = host->data; + + dev_vdbg(&mmc->class_dev, + "tasklet: pending/completed/mask %lx/%lx/%x\n", + host->pending_events, host->completed_events, + mci_readl(host, IMR)); + + if (atmci_test_and_clear_pending(host, EVENT_CMD_COMPLETE)) { + /* + * host->cmd must be set to NULL before the interrupt + * handler sees EVENT_CMD_COMPLETE + */ + host->cmd = NULL; + smp_wmb(); + atmci_set_completed(host, EVENT_CMD_COMPLETE); + atmci_command_complete(host, mrq->cmd, host->cmd_status); + + if (!mrq->cmd->error && mrq->stop + && atmci_is_completed(host, EVENT_XFER_COMPLETE) + && !atmci_test_and_set_completed(host, + EVENT_STOP_SENT)) + send_stop_cmd(host->mmc, mrq->data); + } + if (atmci_test_and_clear_pending(host, EVENT_STOP_COMPLETE)) { + /* + * host->cmd must be set to NULL before the interrupt + * handler sees EVENT_STOP_COMPLETE + */ + host->cmd = NULL; + smp_wmb(); + atmci_set_completed(host, EVENT_STOP_COMPLETE); + atmci_command_complete(host, mrq->stop, host->stop_status); + } + if (atmci_test_and_clear_pending(host, EVENT_DATA_ERROR)) { + u32 status = host->data_status; + + dev_vdbg(&mmc->class_dev, "data error: status=%08x\n", status); + + atmci_set_completed(host, EVENT_DATA_ERROR); + atmci_set_completed(host, EVENT_DATA_COMPLETE); + + if (status & MCI_DTOE) { + dev_dbg(&mmc->class_dev, + "data timeout error\n"); + data->error = -ETIMEDOUT; + } else if (status & MCI_DCRCE) { + dev_dbg(&mmc->class_dev, "data CRC error\n"); + data->error = -EILSEQ; + } else { + dev_dbg(&mmc->class_dev, + "data FIFO error (status=%08x)\n", + status); + data->error = -EIO; + } + + if (host->present && data->stop + && atmci_is_completed(host, EVENT_CMD_COMPLETE) + && !atmci_test_and_set_completed( + host, EVENT_STOP_SENT)) + send_stop_cmd(host->mmc, data); + + host->data = NULL; + } + if (atmci_test_and_clear_pending(host, EVENT_DATA_COMPLETE)) { + atmci_set_completed(host, EVENT_DATA_COMPLETE); + + if (!atmci_is_completed(host, EVENT_DATA_ERROR)) { + data->bytes_xfered = data->blocks * data->blksz; + data->error = 0; + } + + host->data = NULL; + } + + if (host->mrq && !host->cmd && !host->data) + atmci_request_end(mmc, host->mrq); +} + +static void atmci_read_data_pio(struct atmel_mci *host) +{ + struct scatterlist *sg = host->sg; + void *buf = sg_virt(sg); + unsigned int offset = host->pio_offset; + struct mmc_data *data = host->data; + u32 value; + u32 status; + unsigned int nbytes = 0; + + do { + value = mci_readl(host, RDR); + if (likely(offset + 4 <= sg->length)) { + put_unaligned(value, (u32 *)(buf + offset)); + + offset += 4; + nbytes += 4; + + if (offset == sg->length) { + host->sg = sg = sg_next(sg); + if (!sg) + goto done; + + offset = 0; + buf = sg_virt(sg); + } + } else { + unsigned int remaining = sg->length - offset; + memcpy(buf + offset, &value, remaining); + nbytes += remaining; + + flush_dcache_page(sg_page(sg)); + host->sg = sg = sg_next(sg); + if (!sg) + goto done; + + offset = 4 - remaining; + buf = sg_virt(sg); + memcpy(buf, (u8 *)&value + remaining, offset); + nbytes += offset; + } + + status = mci_readl(host, SR); + if (status & ATMCI_DATA_ERROR_FLAGS) { + mci_writel(host, IDR, (MCI_NOTBUSY | MCI_RXRDY + | ATMCI_DATA_ERROR_FLAGS)); + host->data_status = status; + atmci_set_pending(host, EVENT_DATA_ERROR); + tasklet_schedule(&host->tasklet); + break; + } + } while (status & MCI_RXRDY); + + host->pio_offset = offset; + data->bytes_xfered += nbytes; + + return; + +done: + mci_writel(host, IDR, MCI_RXRDY); + mci_writel(host, IER, MCI_NOTBUSY); + data->bytes_xfered += nbytes; + atmci_set_completed(host, EVENT_XFER_COMPLETE); + if (data->stop && atmci_is_completed(host, EVENT_CMD_COMPLETE) + && !atmci_test_and_set_completed(host, EVENT_STOP_SENT)) + send_stop_cmd(host->mmc, data); +} + +static void atmci_write_data_pio(struct atmel_mci *host) +{ + struct scatterlist *sg = host->sg; + void *buf = sg_virt(sg); + unsigned int offset = host->pio_offset; + struct mmc_data *data = host->data; + u32 value; + u32 status; + unsigned int nbytes = 0; + + do { + if (likely(offset + 4 <= sg->length)) { + value = get_unaligned((u32 *)(buf + offset)); + mci_writel(host, TDR, value); + + offset += 4; + nbytes += 4; + if (offset == sg->length) { + host->sg = sg = sg_next(sg); + if (!sg) + goto done; + + offset = 0; + buf = sg_virt(sg); + } + } else { + unsigned int remaining = sg->length - offset; + + value = 0; + memcpy(&value, buf + offset, remaining); + nbytes += remaining; + + host->sg = sg = sg_next(sg); + if (!sg) { + mci_writel(host, TDR, value); + goto done; + } + + offset = 4 - remaining; + buf = sg_virt(sg); + memcpy((u8 *)&value + remaining, buf, offset); + mci_writel(host, TDR, value); + nbytes += offset; + } + + status = mci_readl(host, SR); + if (status & ATMCI_DATA_ERROR_FLAGS) { + mci_writel(host, IDR, (MCI_NOTBUSY | MCI_TXRDY + | ATMCI_DATA_ERROR_FLAGS)); + host->data_status = status; + atmci_set_pending(host, EVENT_DATA_ERROR); + tasklet_schedule(&host->tasklet); + break; + } + } while (status & MCI_TXRDY); + + host->pio_offset = offset; + data->bytes_xfered += nbytes; + + return; + +done: + mci_writel(host, IDR, MCI_TXRDY); + mci_writel(host, IER, MCI_NOTBUSY); + data->bytes_xfered += nbytes; + atmci_set_completed(host, EVENT_XFER_COMPLETE); + if (data->stop && atmci_is_completed(host, EVENT_CMD_COMPLETE) + && !atmci_test_and_set_completed(host, EVENT_STOP_SENT)) + send_stop_cmd(host->mmc, data); +} + +static void atmci_cmd_interrupt(struct mmc_host *mmc, u32 status) +{ + struct atmel_mci *host = mmc_priv(mmc); + + mci_writel(host, IDR, MCI_CMDRDY); + + if (atmci_is_completed(host, EVENT_STOP_SENT)) { + host->stop_status = status; + atmci_set_pending(host, EVENT_STOP_COMPLETE); + } else { + host->cmd_status = status; + atmci_set_pending(host, EVENT_CMD_COMPLETE); + } + + tasklet_schedule(&host->tasklet); +} + +static irqreturn_t atmci_interrupt(int irq, void *dev_id) +{ + struct mmc_host *mmc = dev_id; + struct atmel_mci *host = mmc_priv(mmc); + u32 status, mask, pending; + unsigned int pass_count = 0; + + spin_lock(&mmc->lock); + + do { + status = mci_readl(host, SR); + mask = mci_readl(host, IMR); + pending = status & mask; + if (!pending) + break; + + if (pending & ATMCI_DATA_ERROR_FLAGS) { + mci_writel(host, IDR, ATMCI_DATA_ERROR_FLAGS + | MCI_RXRDY | MCI_TXRDY); + pending &= mci_readl(host, IMR); + host->data_status = status; + atmci_set_pending(host, EVENT_DATA_ERROR); + tasklet_schedule(&host->tasklet); + } + if (pending & MCI_NOTBUSY) { + mci_writel(host, IDR, (MCI_NOTBUSY + | ATMCI_DATA_ERROR_FLAGS)); + atmci_set_pending(host, EVENT_DATA_COMPLETE); + tasklet_schedule(&host->tasklet); + } + if (pending & MCI_RXRDY) + atmci_read_data_pio(host); + if (pending & MCI_TXRDY) + atmci_write_data_pio(host); + + if (pending & MCI_CMDRDY) + atmci_cmd_interrupt(mmc, status); + } while (pass_count++ < 5); + + spin_unlock(&mmc->lock); + + return pass_count ? IRQ_HANDLED : IRQ_NONE; +} + +static irqreturn_t atmci_detect_interrupt(int irq, void *dev_id) +{ + struct mmc_host *mmc = dev_id; + struct atmel_mci *host = mmc_priv(mmc); + + /* + * Disable interrupts until the pin has stabilized and check + * the state then. Use mod_timer() since we may be in the + * middle of the timer routine when this interrupt triggers. + */ + disable_irq_nosync(irq); + mod_timer(&host->detect_timer, jiffies + msecs_to_jiffies(20)); + + return IRQ_HANDLED; +} + +static int __init atmci_probe(struct platform_device *pdev) +{ + struct mci_platform_data *pdata; + struct atmel_mci *host; + struct mmc_host *mmc; + struct resource *regs; + int irq; + int ret; + + regs = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!regs) + return -ENXIO; + pdata = pdev->dev.platform_data; + if (!pdata) + return -ENXIO; + irq = platform_get_irq(pdev, 0); + if (irq < 0) + return irq; + + mmc = mmc_alloc_host(sizeof(struct atmel_mci), &pdev->dev); + if (!mmc) + return -ENOMEM; + + host = mmc_priv(mmc); + host->pdev = pdev; + host->mmc = mmc; + host->detect_pin = pdata->detect_pin; + host->wp_pin = pdata->wp_pin; + + host->mck = clk_get(&pdev->dev, "mci_clk"); + if (IS_ERR(host->mck)) { + ret = PTR_ERR(host->mck); + goto err_clk_get; + } + + ret = -ENOMEM; + host->regs = ioremap(regs->start, regs->end - regs->start + 1); + if (!host->regs) + goto err_ioremap; + + clk_enable(host->mck); + mci_writel(host, CR, MCI_CR_SWRST); + host->bus_hz = clk_get_rate(host->mck); + clk_disable(host->mck); + + host->mapbase = regs->start; + + mmc->ops = &atmci_ops; + mmc->f_min = (host->bus_hz + 511) / 512; + mmc->f_max = host->bus_hz / 2; + mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34; + mmc->caps |= MMC_CAP_4_BIT_DATA | MMC_CAP_MULTIWRITE; + + mmc->max_hw_segs = 64; + mmc->max_phys_segs = 64; + mmc->max_req_size = 32768 * 512; + mmc->max_blk_size = 32768; + mmc->max_blk_count = 512; + + tasklet_init(&host->tasklet, atmci_tasklet_func, (unsigned long)mmc); + + ret = request_irq(irq, atmci_interrupt, 0, pdev->dev.bus_id, mmc); + if (ret) + goto err_request_irq; + + /* Assume card is present if we don't have a detect pin */ + host->present = 1; + if (host->detect_pin >= 0) { + if (gpio_request(host->detect_pin, "mmc_detect")) { + dev_dbg(&mmc->class_dev, "no detect pin available\n"); + host->detect_pin = -1; + } else { + host->present = !gpio_get_value(host->detect_pin); + } + } + if (host->wp_pin >= 0) { + if (gpio_request(host->wp_pin, "mmc_wp")) { + dev_dbg(&mmc->class_dev, "no WP pin available\n"); + host->wp_pin = -1; + } + } + + platform_set_drvdata(pdev, host); + + mmc_add_host(mmc); + + if (host->detect_pin >= 0) { + setup_timer(&host->detect_timer, atmci_detect_change, + (unsigned long)host); + + ret = request_irq(gpio_to_irq(host->detect_pin), + atmci_detect_interrupt, + IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING, + "mmc-detect", mmc); + if (ret) { + dev_dbg(&mmc->class_dev, + "could not request IRQ %d for detect pin\n", + gpio_to_irq(host->detect_pin)); + gpio_free(host->detect_pin); + host->detect_pin = -1; + } + } + + dev_info(&mmc->class_dev, + "Atmel MCI controller at 0x%08lx irq %d\n", + host->mapbase, irq); + + return 0; + +err_request_irq: + iounmap(host->regs); +err_ioremap: + clk_put(host->mck); +err_clk_get: + mmc_free_host(mmc); + return ret; +} + +static int __exit atmci_remove(struct platform_device *pdev) +{ + struct atmel_mci *host = platform_get_drvdata(pdev); + + platform_set_drvdata(pdev, NULL); + + if (host) { + if (host->detect_pin >= 0) { + int pin = host->detect_pin; + + /* Make sure the timer doesn't enable the interrupt */ + host->detect_pin = -1; + smp_wmb(); + + free_irq(gpio_to_irq(pin), host->mmc); + del_timer_sync(&host->detect_timer); + gpio_free(pin); + } + + mmc_remove_host(host->mmc); + + clk_enable(host->mck); + mci_writel(host, IDR, ~0UL); + mci_writel(host, CR, MCI_CR_MCIDIS); + mci_readl(host, SR); + clk_disable(host->mck); + + if (host->wp_pin >= 0) + gpio_free(host->wp_pin); + + free_irq(platform_get_irq(pdev, 0), host->mmc); + iounmap(host->regs); + + clk_put(host->mck); + + mmc_free_host(host->mmc); + } + return 0; +} + +static struct platform_driver atmci_driver = { + .remove = __exit_p(atmci_remove), + .driver = { + .name = "atmel_mci", + }, +}; + +static int __init atmci_init(void) +{ + return platform_driver_probe(&atmci_driver, atmci_probe); +} + +static void __exit atmci_exit(void) +{ + platform_driver_unregister(&atmci_driver); +} + +module_init(atmci_init); +module_exit(atmci_exit); + +MODULE_DESCRIPTION("Atmel Multimedia Card Interface driver"); +MODULE_AUTHOR("Haavard Skinnemoen "); +MODULE_LICENSE("GPL v2"); diff --git a/include/asm-avr32/arch-at32ap/board.h b/include/asm-avr32/arch-at32ap/board.h index b4cddfa..a378386 100644 --- a/include/asm-avr32/arch-at32ap/board.h +++ b/include/asm-avr32/arch-at32ap/board.h @@ -77,7 +77,11 @@ struct i2c_board_info; struct platform_device *at32_add_device_twi(unsigned int id, struct i2c_board_info *b, unsigned int n); -struct platform_device *at32_add_device_mci(unsigned int id); + +struct mci_platform_data; +struct platform_device * +at32_add_device_mci(unsigned int id, struct mci_platform_data *data); + struct platform_device *at32_add_device_ac97c(unsigned int id); struct platform_device *at32_add_device_abdac(unsigned int id); struct platform_device *at32_add_device_psif(unsigned int id); diff --git a/include/asm-avr32/atmel-mci.h b/include/asm-avr32/atmel-mci.h new file mode 100644 index 0000000..c2ea6e1 --- /dev/null +++ b/include/asm-avr32/atmel-mci.h @@ -0,0 +1,9 @@ +#ifndef __ASM_AVR32_ATMEL_MCI_H +#define __ASM_AVR32_ATMEL_MCI_H + +struct mci_platform_data { + int detect_pin; + int wp_pin; +}; + +#endif /* __ASM_AVR32_ATMEL_MCI_H */ -- cgit v0.10.2 From 97067d5581ec831a75a45a52e417bee0f7943dbf Mon Sep 17 00:00:00 2001 From: Pierre Ossman Date: Sun, 6 Jul 2008 00:51:07 +0200 Subject: wbsd: fix bad dma_addr_t conversion DMA addresses are not pointers, so don't treat them as such. Avoids compiler warnings when using 64-bit DMA addresses on a 32-bit system. Signed-off-by: Pierre Ossman diff --git a/drivers/mmc/host/wbsd.c b/drivers/mmc/host/wbsd.c index c2b6160..f7dcd8e 100644 --- a/drivers/mmc/host/wbsd.c +++ b/drivers/mmc/host/wbsd.c @@ -1420,7 +1420,7 @@ kfree: dma_unmap_single(mmc_dev(host->mmc), host->dma_addr, WBSD_DMA_SIZE, DMA_BIDIRECTIONAL); - host->dma_addr = (dma_addr_t)NULL; + host->dma_addr = 0; kfree(host->dma_buffer); host->dma_buffer = NULL; @@ -1445,7 +1445,7 @@ static void wbsd_release_dma(struct wbsd_host *host) host->dma = -1; host->dma_buffer = NULL; - host->dma_addr = (dma_addr_t)NULL; + host->dma_addr = 0; } /* -- cgit v0.10.2 From 23af60398af2f5033e2f53665538a09f498dbc03 Mon Sep 17 00:00:00 2001 From: Pierre Ossman Date: Sun, 6 Jul 2008 01:10:27 +0200 Subject: mmc: remove multiwrite capability Relax requirements on host controllers and only require that they do not report a transfer count than is larger than the actual one (i.e. a lower value is okay). This is how many other parts of the kernel behaves so upper layers should already be prepared to handle that scenario. This gives us a performance boost on MMC cards. Signed-off-by: Pierre Ossman diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c index 4b0f822..66e5a54 100644 --- a/drivers/mmc/card/block.c +++ b/drivers/mmc/card/block.c @@ -237,17 +237,6 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req) if (brq.data.blocks > card->host->max_blk_count) brq.data.blocks = card->host->max_blk_count; - /* - * If the host doesn't support multiple block writes, force - * block writes to single block. SD cards are excepted from - * this rule as they support querying the number of - * successfully written sectors. - */ - if (rq_data_dir(req) != READ && - !(card->host->caps & MMC_CAP_MULTIWRITE) && - !mmc_card_sd(card)) - brq.data.blocks = 1; - if (brq.data.blocks > 1) { /* SPI multiblock writes terminate using a special * token, not a STOP_TRANSMISSION request. @@ -367,30 +356,32 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req) * mark the known good sectors as ok. * * If the card is not SD, we can still ok written sectors - * if the controller can do proper error reporting. + * as reported by the controller (which might be less than + * the real number of written sectors, but never more). * * For reads we just fail the entire chunk as that should * be safe in all cases. */ - if (rq_data_dir(req) != READ && mmc_card_sd(card)) { - u32 blocks; - unsigned int bytes; - - blocks = mmc_sd_num_wr_blocks(card); - if (blocks != (u32)-1) { - if (card->csd.write_partial) - bytes = blocks << md->block_bits; - else - bytes = blocks << 9; + if (rq_data_dir(req) != READ) { + if (mmc_card_sd(card)) { + u32 blocks; + unsigned int bytes; + + blocks = mmc_sd_num_wr_blocks(card); + if (blocks != (u32)-1) { + if (card->csd.write_partial) + bytes = blocks << md->block_bits; + else + bytes = blocks << 9; + spin_lock_irq(&md->lock); + ret = __blk_end_request(req, 0, bytes); + spin_unlock_irq(&md->lock); + } + } else { spin_lock_irq(&md->lock); - ret = __blk_end_request(req, 0, bytes); + ret = __blk_end_request(req, 0, brq.data.bytes_xfered); spin_unlock_irq(&md->lock); } - } else if (rq_data_dir(req) != READ && - (card->host->caps & MMC_CAP_MULTIWRITE)) { - spin_lock_irq(&md->lock); - ret = __blk_end_request(req, 0, brq.data.bytes_xfered); - spin_unlock_irq(&md->lock); } mmc_release_host(card->host); diff --git a/drivers/mmc/host/at91_mci.c b/drivers/mmc/host/at91_mci.c index 16df235..f15e206 100644 --- a/drivers/mmc/host/at91_mci.c +++ b/drivers/mmc/host/at91_mci.c @@ -995,7 +995,7 @@ static int __init at91_mci_probe(struct platform_device *pdev) mmc->f_min = 375000; mmc->f_max = 25000000; mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34; - mmc->caps = MMC_CAP_MULTIWRITE | MMC_CAP_SDIO_IRQ; + mmc->caps = MMC_CAP_SDIO_IRQ; mmc->max_blk_size = 4095; mmc->max_blk_count = mmc->max_req_size; diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c index 25d5324a..cce873c 100644 --- a/drivers/mmc/host/atmel-mci.c +++ b/drivers/mmc/host/atmel-mci.c @@ -849,7 +849,7 @@ static int __init atmci_probe(struct platform_device *pdev) mmc->f_min = (host->bus_hz + 511) / 512; mmc->f_max = host->bus_hz / 2; mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34; - mmc->caps |= MMC_CAP_4_BIT_DATA | MMC_CAP_MULTIWRITE; + mmc->caps |= MMC_CAP_4_BIT_DATA; mmc->max_hw_segs = 64; mmc->max_phys_segs = 64; diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c index 4e82f64..41cc633 100644 --- a/drivers/mmc/host/mmc_spi.c +++ b/drivers/mmc/host/mmc_spi.c @@ -1252,10 +1252,7 @@ static int mmc_spi_probe(struct spi_device *spi) mmc->ops = &mmc_spi_ops; mmc->max_blk_size = MMC_SPI_BLOCKSIZE; - /* As long as we keep track of the number of successfully - * transmitted blocks, we're good for multiwrite. - */ - mmc->caps = MMC_CAP_SPI | MMC_CAP_MULTIWRITE; + mmc->caps = MMC_CAP_SPI; /* SPI doesn't need the lowspeed device identification thing for * MMC or SD cards, since it never comes up in open drain mode. diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c index da5feca..696cf36 100644 --- a/drivers/mmc/host/mmci.c +++ b/drivers/mmc/host/mmci.c @@ -535,7 +535,6 @@ static int mmci_probe(struct amba_device *dev, void *id) mmc->f_min = (host->mclk + 511) / 512; mmc->f_max = min(host->mclk, fmax); mmc->ocr_avail = plat->ocr_mask; - mmc->caps = MMC_CAP_MULTIWRITE; /* * We can do SGIO diff --git a/drivers/mmc/host/omap.c b/drivers/mmc/host/omap.c index 549517c..dbc26eb 100644 --- a/drivers/mmc/host/omap.c +++ b/drivers/mmc/host/omap.c @@ -1317,7 +1317,7 @@ static int __init mmc_omap_new_slot(struct mmc_omap_host *host, int id) host->slots[id] = slot; - mmc->caps = MMC_CAP_MULTIWRITE; + mmc->caps = 0; if (host->pdata->conf.wire4) mmc->caps |= MMC_CAP_4_BIT_DATA; diff --git a/drivers/mmc/host/tifm_sd.c b/drivers/mmc/host/tifm_sd.c index 1c14a18..1384484 100644 --- a/drivers/mmc/host/tifm_sd.c +++ b/drivers/mmc/host/tifm_sd.c @@ -973,7 +973,7 @@ static int tifm_sd_probe(struct tifm_dev *sock) mmc->ops = &tifm_sd_ops; mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34; - mmc->caps = MMC_CAP_4_BIT_DATA | MMC_CAP_MULTIWRITE; + mmc->caps = MMC_CAP_4_BIT_DATA; mmc->f_min = 20000000 / 60; mmc->f_max = 24000000; diff --git a/drivers/mmc/host/wbsd.c b/drivers/mmc/host/wbsd.c index f7dcd8e..adda379 100644 --- a/drivers/mmc/host/wbsd.c +++ b/drivers/mmc/host/wbsd.c @@ -1219,7 +1219,7 @@ static int __devinit wbsd_alloc_mmc(struct device *dev) mmc->f_min = 375000; mmc->f_max = 24000000; mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34; - mmc->caps = MMC_CAP_4_BIT_DATA | MMC_CAP_MULTIWRITE; + mmc->caps = MMC_CAP_4_BIT_DATA; spin_lock_init(&host->lock); diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 753b723..10a2080 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -111,12 +111,11 @@ struct mmc_host { unsigned long caps; /* Host capabilities */ #define MMC_CAP_4_BIT_DATA (1 << 0) /* Can the host do 4 bit transfers */ -#define MMC_CAP_MULTIWRITE (1 << 1) /* Can accurately report bytes sent to card on error */ -#define MMC_CAP_MMC_HIGHSPEED (1 << 2) /* Can do MMC high-speed timing */ -#define MMC_CAP_SD_HIGHSPEED (1 << 3) /* Can do SD high-speed timing */ -#define MMC_CAP_SDIO_IRQ (1 << 4) /* Can signal pending SDIO IRQs */ -#define MMC_CAP_SPI (1 << 5) /* Talks only SPI protocols */ -#define MMC_CAP_NEEDS_POLL (1 << 6) /* Needs polling for card-detection */ +#define MMC_CAP_MMC_HIGHSPEED (1 << 1) /* Can do MMC high-speed timing */ +#define MMC_CAP_SD_HIGHSPEED (1 << 2) /* Can do SD high-speed timing */ +#define MMC_CAP_SDIO_IRQ (1 << 3) /* Can signal pending SDIO IRQs */ +#define MMC_CAP_SPI (1 << 4) /* Talks only SPI protocols */ +#define MMC_CAP_NEEDS_POLL (1 << 5) /* Needs polling for card-detection */ /* host specific block data */ unsigned int max_seg_size; /* see blk_queue_max_segment_size */ -- cgit v0.10.2 From c8b3e02eb250ceb661437e9b198757eff0eb6fd2 Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Sat, 5 Jul 2008 19:52:04 +0300 Subject: mmc: fix spares errors of sdhci.c 1. sdhci_prepare_data: fix shadowing of count variable u8 count int count -> sg_cnt; 2. sdhci_add_host: assignment of integer to pointer dma_mask = 0 -> dma_mask = NULL; Signed-off-by: Tomas Winkler Signed-off-by: Pierre Ossman diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index d3e4a39..13bd681 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -672,14 +672,14 @@ static void sdhci_prepare_data(struct sdhci_host *host, struct mmc_data *data) host->ioaddr + SDHCI_ADMA_ADDRESS); } } else { - int count; + int sg_cnt; - count = dma_map_sg(mmc_dev(host->mmc), + sg_cnt = dma_map_sg(mmc_dev(host->mmc), data->sg, data->sg_len, (data->flags & MMC_DATA_READ) ? DMA_FROM_DEVICE : DMA_TO_DEVICE); - if (count == 0) { + if (sg_cnt == 0) { /* * This only happens when someone fed * us an invalid request. @@ -1583,7 +1583,7 @@ int sdhci_add_host(struct sdhci_host *host) /* XXX: Hack to get MMC layer to avoid highmem */ if (!(host->flags & SDHCI_USE_DMA)) - mmc_dev(host->mmc)->dma_mask = 0; + mmc_dev(host->mmc)->dma_mask = NULL; host->max_clk = (caps & SDHCI_CLOCK_BASE_MASK) >> SDHCI_CLOCK_BASE_SHIFT; -- cgit v0.10.2 From c71f65129a1fb67bc6b9b8d03b493675b5c9302b Mon Sep 17 00:00:00 2001 From: Andres Salomon Date: Mon, 7 Jul 2008 17:25:56 -0400 Subject: mmc: OLPC: update vdd/powerup quirk comment This comment update got lost in the great floo^Wmerge. As Pierre pointed out, no one knows what 'CaFe' is. Signed-off-by: Andres Salomon Signed-off-by: Pierre Ossman diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 13bd681..17701c3 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -978,7 +978,7 @@ static void sdhci_set_power(struct sdhci_host *host, unsigned short power) } /* - * At least the CaFe chip gets confused if we set the voltage + * At least the Marvell CaFe chip gets confused if we set the voltage * and set turn on power at the same time, so set the voltage first. */ if ((host->quirks & SDHCI_QUIRK_NO_SIMULT_VDD_AND_POWER)) -- cgit v0.10.2 From 62a7573ee9f31d4fdb330b3e68ebf6efaba1d57c Mon Sep 17 00:00:00 2001 From: Benzi Zbit Date: Thu, 10 Jul 2008 02:41:43 +0300 Subject: sdio: fix the use of hard coded timeout value. This adds reading and using of enable_timeout from the CIS Signed-off-by: Benzi Zbit Signed-off-by: Tomas Winkler Signed-off-by: Pierre Ossman diff --git a/drivers/mmc/core/sdio_cis.c b/drivers/mmc/core/sdio_cis.c index d5e51b1..956bd76 100644 --- a/drivers/mmc/core/sdio_cis.c +++ b/drivers/mmc/core/sdio_cis.c @@ -129,6 +129,12 @@ static int cistpl_funce_func(struct sdio_func *func, /* TPLFE_MAX_BLK_SIZE */ func->max_blksize = buf[12] | (buf[13] << 8); + /* TPLFE_ENABLE_TIMEOUT_VAL, present in ver 1.1 and above */ + if (vsn > SDIO_SDIO_REV_1_00) + func->enable_timeout = (buf[28] | (buf[29] << 8)) * 10; + else + func->enable_timeout = jiffies_to_msecs(HZ); + return 0; } diff --git a/drivers/mmc/core/sdio_io.c b/drivers/mmc/core/sdio_io.c index 3ccf691..0888df6 100755 --- a/drivers/mmc/core/sdio_io.c +++ b/drivers/mmc/core/sdio_io.c @@ -76,11 +76,7 @@ int sdio_enable_func(struct sdio_func *func) if (ret) goto err; - /* - * FIXME: This should timeout based on information in the CIS, - * but we don't have card to parse that yet. - */ - timeout = jiffies + HZ; + timeout = jiffies + msecs_to_jiffies(func->enable_timeout); while (1) { ret = mmc_io_rw_direct(func->card, 0, 0, SDIO_CCCR_IORx, 0, ®); diff --git a/include/linux/mmc/sdio_func.h b/include/linux/mmc/sdio_func.h index 28fb0a3..07bee4a 100755 --- a/include/linux/mmc/sdio_func.h +++ b/include/linux/mmc/sdio_func.h @@ -46,6 +46,8 @@ struct sdio_func { unsigned max_blksize; /* maximum block size */ unsigned cur_blksize; /* current block size */ + unsigned enable_timeout; /* max enable timeout in msec */ + unsigned int state; /* function state */ #define SDIO_STATE_PRESENT (1<<0) /* present in sysfs */ -- cgit v0.10.2 From ea901300cd8b809285fa5cbced18124f127e0ac6 Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Thu, 10 Jul 2008 03:01:56 +0300 Subject: sdio: sdio_io.c Fix sparse warnings Unfold nested macros it creates not readable code and sparse warnings sdio_io.c:190:9: warning: symbol '_min1' shadows an earlier one Signed-off-by: Tomas Winkler Signed-off-by: Pierre Ossman diff --git a/drivers/mmc/core/sdio_io.c b/drivers/mmc/core/sdio_io.c index 0888df6..f61fc2d 100755 --- a/drivers/mmc/core/sdio_io.c +++ b/drivers/mmc/core/sdio_io.c @@ -187,11 +187,10 @@ EXPORT_SYMBOL_GPL(sdio_set_block_size); */ static inline unsigned int sdio_max_byte_size(struct sdio_func *func) { - return min(min(min( - func->card->host->max_seg_size, - func->card->host->max_blk_size), - func->max_blksize), - 512u); /* maximum size for byte mode */ + unsigned mval = min(func->card->host->max_seg_size, + func->card->host->max_blk_size); + mval = min(mval, func->max_blksize); + return min(mval, 512u); /* maximum size for byte mode */ } /** -- cgit v0.10.2 From 6a36913a33cf3d366e124f25c486a71212d02bce Mon Sep 17 00:00:00 2001 From: Sascha Sommer Date: Tue, 15 Jul 2008 14:21:29 +0200 Subject: mmc: host driver for Ricoh Bay1Controllers Signed-off-by: Sascha Sommer Signed-off-by: Pierre Ossman diff --git a/MAINTAINERS b/MAINTAINERS index 9def20a..c16b986 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3609,6 +3609,12 @@ P: Jim Cromie M: jim.cromie@gmail.com S: Maintained +SDRICOH_CS MMC/SD HOST CONTROLLER INTERFACE DRIVER +P: Sascha Sommer +M: saschasommer@freenet.de +L: sdricohcs-devel@lists.sourceforge.net (subscribers-only) +S: Maintained + SECURITY CONTACT P: Security Officers M: security@kernel.org diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig index 198df42..dc6f257 100644 --- a/drivers/mmc/host/Kconfig +++ b/drivers/mmc/host/Kconfig @@ -164,3 +164,13 @@ config MMC_S3C If unsure, say N. +config MMC_SDRICOH_CS + tristate "MMC/SD driver for Ricoh Bay1Controllers (EXPERIMENTAL)" + depends on EXPERIMENTAL && MMC && PCI && PCMCIA + help + Say Y here if your Notebook reports a Ricoh Bay1Controller PCMCIA + card whenever you insert a MMC or SD card into the card slot. + + To compile this driver as a module, choose M here: the + module will be called sdricoh_cs. + diff --git a/drivers/mmc/host/Makefile b/drivers/mmc/host/Makefile index 2dc9ff2..db52eeb 100644 --- a/drivers/mmc/host/Makefile +++ b/drivers/mmc/host/Makefile @@ -20,3 +20,5 @@ obj-$(CONFIG_MMC_ATMELMCI) += atmel-mci.o obj-$(CONFIG_MMC_TIFM_SD) += tifm_sd.o obj-$(CONFIG_MMC_SPI) += mmc_spi.o obj-$(CONFIG_MMC_S3C) += s3cmci.o +obj-$(CONFIG_MMC_SDRICOH_CS) += sdricoh_cs.o + diff --git a/drivers/mmc/host/sdricoh_cs.c b/drivers/mmc/host/sdricoh_cs.c new file mode 100644 index 0000000..f99e9f7 --- /dev/null +++ b/drivers/mmc/host/sdricoh_cs.c @@ -0,0 +1,575 @@ +/* + * sdricoh_cs.c - driver for Ricoh Secure Digital Card Readers that can be + * found on some Ricoh RL5c476 II cardbus bridge + * + * Copyright (C) 2006 - 2008 Sascha Sommer + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +/* +#define DEBUG +#define VERBOSE_DEBUG +*/ +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + +#define DRIVER_NAME "sdricoh_cs" + +static unsigned int switchlocked; + +/* i/o region */ +#define SDRICOH_PCI_REGION 0 +#define SDRICOH_PCI_REGION_SIZE 0x1000 + +/* registers */ +#define R104_VERSION 0x104 +#define R200_CMD 0x200 +#define R204_CMD_ARG 0x204 +#define R208_DATAIO 0x208 +#define R20C_RESP 0x20c +#define R21C_STATUS 0x21c +#define R2E0_INIT 0x2e0 +#define R2E4_STATUS_RESP 0x2e4 +#define R2F0_RESET 0x2f0 +#define R224_MODE 0x224 +#define R226_BLOCKSIZE 0x226 +#define R228_POWER 0x228 +#define R230_DATA 0x230 + +/* flags for the R21C_STATUS register */ +#define STATUS_CMD_FINISHED 0x00000001 +#define STATUS_TRANSFER_FINISHED 0x00000004 +#define STATUS_CARD_INSERTED 0x00000020 +#define STATUS_CARD_LOCKED 0x00000080 +#define STATUS_CMD_TIMEOUT 0x00400000 +#define STATUS_READY_TO_READ 0x01000000 +#define STATUS_READY_TO_WRITE 0x02000000 +#define STATUS_BUSY 0x40000000 + +/* timeouts */ +#define INIT_TIMEOUT 100 +#define CMD_TIMEOUT 100000 +#define TRANSFER_TIMEOUT 100000 +#define BUSY_TIMEOUT 32767 + +/* list of supported pcmcia devices */ +static struct pcmcia_device_id pcmcia_ids[] = { + /* vendor and device strings followed by their crc32 hashes */ + PCMCIA_DEVICE_PROD_ID12("RICOH", "Bay1Controller", 0xd9f522ed, + 0xc3901202), + PCMCIA_DEVICE_NULL, +}; + +MODULE_DEVICE_TABLE(pcmcia, pcmcia_ids); + +/* mmc privdata */ +struct sdricoh_host { + struct device *dev; + struct mmc_host *mmc; /* MMC structure */ + unsigned char __iomem *iobase; + struct pci_dev *pci_dev; + int app_cmd; +}; + +/***************** register i/o helper functions *****************************/ + +static inline unsigned int sdricoh_readl(struct sdricoh_host *host, + unsigned int reg) +{ + unsigned int value = readl(host->iobase + reg); + dev_vdbg(host->dev, "rl %x 0x%x\n", reg, value); + return value; +} + +static inline void sdricoh_writel(struct sdricoh_host *host, unsigned int reg, + unsigned int value) +{ + writel(value, host->iobase + reg); + dev_vdbg(host->dev, "wl %x 0x%x\n", reg, value); + +} + +static inline unsigned int sdricoh_readw(struct sdricoh_host *host, + unsigned int reg) +{ + unsigned int value = readw(host->iobase + reg); + dev_vdbg(host->dev, "rb %x 0x%x\n", reg, value); + return value; +} + +static inline void sdricoh_writew(struct sdricoh_host *host, unsigned int reg, + unsigned short value) +{ + writew(value, host->iobase + reg); + dev_vdbg(host->dev, "ww %x 0x%x\n", reg, value); +} + +static inline unsigned int sdricoh_readb(struct sdricoh_host *host, + unsigned int reg) +{ + unsigned int value = readb(host->iobase + reg); + dev_vdbg(host->dev, "rb %x 0x%x\n", reg, value); + return value; +} + +static int sdricoh_query_status(struct sdricoh_host *host, unsigned int wanted, + unsigned int timeout){ + unsigned int loop; + unsigned int status = 0; + struct device *dev = host->dev; + for (loop = 0; loop < timeout; loop++) { + status = sdricoh_readl(host, R21C_STATUS); + sdricoh_writel(host, R2E4_STATUS_RESP, status); + if (status & wanted) + break; + } + + if (loop == timeout) { + dev_err(dev, "query_status: timeout waiting for %x\n", wanted); + return -ETIMEDOUT; + } + + /* do not do this check in the loop as some commands fail otherwise */ + if (status & 0x7F0000) { + dev_err(dev, "waiting for status bit %x failed\n", wanted); + return -EINVAL; + } + return 0; + +} + +static int sdricoh_mmc_cmd(struct sdricoh_host *host, unsigned char opcode, + unsigned int arg) +{ + unsigned int status; + int result = 0; + unsigned int loop = 0; + /* reset status reg? */ + sdricoh_writel(host, R21C_STATUS, 0x18); + /* fill parameters */ + sdricoh_writel(host, R204_CMD_ARG, arg); + sdricoh_writel(host, R200_CMD, (0x10000 << 8) | opcode); + /* wait for command completion */ + if (opcode) { + for (loop = 0; loop < CMD_TIMEOUT; loop++) { + status = sdricoh_readl(host, R21C_STATUS); + sdricoh_writel(host, R2E4_STATUS_RESP, status); + if (status & STATUS_CMD_FINISHED) + break; + } + /* don't check for timeout in the loop it is not always + reset correctly + */ + if (loop == CMD_TIMEOUT || status & STATUS_CMD_TIMEOUT) + result = -ETIMEDOUT; + + } + + return result; + +} + +static int sdricoh_reset(struct sdricoh_host *host) +{ + dev_dbg(host->dev, "reset\n"); + sdricoh_writel(host, R2F0_RESET, 0x10001); + sdricoh_writel(host, R2E0_INIT, 0x10000); + if (sdricoh_readl(host, R2E0_INIT) != 0x10000) + return -EIO; + sdricoh_writel(host, R2E0_INIT, 0x10007); + + sdricoh_writel(host, R224_MODE, 0x2000000); + sdricoh_writel(host, R228_POWER, 0xe0); + + + /* status register ? */ + sdricoh_writel(host, R21C_STATUS, 0x18); + + return 0; +} + +static int sdricoh_blockio(struct sdricoh_host *host, int read, + u8 *buf, int len) +{ + int size; + u32 data = 0; + /* wait until the data is available */ + if (read) { + if (sdricoh_query_status(host, STATUS_READY_TO_READ, + TRANSFER_TIMEOUT)) + return -ETIMEDOUT; + sdricoh_writel(host, R21C_STATUS, 0x18); + /* read data */ + while (len) { + data = sdricoh_readl(host, R230_DATA); + size = min(len, 4); + len -= size; + while (size) { + *buf = data & 0xFF; + buf++; + data >>= 8; + size--; + } + } + } else { + if (sdricoh_query_status(host, STATUS_READY_TO_WRITE, + TRANSFER_TIMEOUT)) + return -ETIMEDOUT; + sdricoh_writel(host, R21C_STATUS, 0x18); + /* write data */ + while (len) { + size = min(len, 4); + len -= size; + while (size) { + data >>= 8; + data |= (u32)*buf << 24; + buf++; + size--; + } + sdricoh_writel(host, R230_DATA, data); + } + } + + if (len) + return -EIO; + + return 0; +} + +static void sdricoh_request(struct mmc_host *mmc, struct mmc_request *mrq) +{ + struct sdricoh_host *host = mmc_priv(mmc); + struct mmc_command *cmd = mrq->cmd; + struct mmc_data *data = cmd->data; + struct device *dev = host->dev; + unsigned char opcode = cmd->opcode; + int i; + + dev_dbg(dev, "=============================\n"); + dev_dbg(dev, "sdricoh_request opcode=%i\n", opcode); + + sdricoh_writel(host, R21C_STATUS, 0x18); + + /* MMC_APP_CMDs need some special handling */ + if (host->app_cmd) { + opcode |= 64; + host->app_cmd = 0; + } else if (opcode == 55) + host->app_cmd = 1; + + /* read/write commands seem to require this */ + if (data) { + sdricoh_writew(host, R226_BLOCKSIZE, data->blksz); + sdricoh_writel(host, R208_DATAIO, 0); + } + + cmd->error = sdricoh_mmc_cmd(host, opcode, cmd->arg); + + /* read response buffer */ + if (cmd->flags & MMC_RSP_PRESENT) { + if (cmd->flags & MMC_RSP_136) { + /* CRC is stripped so we need to do some shifting. */ + for (i = 0; i < 4; i++) { + cmd->resp[i] = + sdricoh_readl(host, + R20C_RESP + (3 - i) * 4) << 8; + if (i != 3) + cmd->resp[i] |= + sdricoh_readb(host, R20C_RESP + + (3 - i) * 4 - 1); + } + } else + cmd->resp[0] = sdricoh_readl(host, R20C_RESP); + } + + /* transfer data */ + if (data && cmd->error == 0) { + dev_dbg(dev, "transfer: blksz %i blocks %i sg_len %i " + "sg length %i\n", data->blksz, data->blocks, + data->sg_len, data->sg->length); + + /* enter data reading mode */ + sdricoh_writel(host, R21C_STATUS, 0x837f031e); + for (i = 0; i < data->blocks; i++) { + size_t len = data->blksz; + u8 *buf; + struct page *page; + int result; + page = sg_page(data->sg); + + buf = kmap(page) + data->sg->offset + (len * i); + result = + sdricoh_blockio(host, + data->flags & MMC_DATA_READ, buf, len); + kunmap(page); + flush_dcache_page(page); + if (result) { + dev_err(dev, "sdricoh_request: cmd %i " + "block transfer failed\n", cmd->opcode); + cmd->error = result; + break; + } else + data->bytes_xfered += len; + } + + sdricoh_writel(host, R208_DATAIO, 1); + + if (sdricoh_query_status(host, STATUS_TRANSFER_FINISHED, + TRANSFER_TIMEOUT)) { + dev_err(dev, "sdricoh_request: transfer end error\n"); + cmd->error = -EINVAL; + } + } + /* FIXME check busy flag */ + + mmc_request_done(mmc, mrq); + dev_dbg(dev, "=============================\n"); +} + +static void sdricoh_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) +{ + struct sdricoh_host *host = mmc_priv(mmc); + dev_dbg(host->dev, "set_ios\n"); + + if (ios->power_mode == MMC_POWER_ON) { + sdricoh_writel(host, R228_POWER, 0xc0e0); + + if (ios->bus_width == MMC_BUS_WIDTH_4) { + sdricoh_writel(host, R224_MODE, 0x2000300); + sdricoh_writel(host, R228_POWER, 0x40e0); + } else { + sdricoh_writel(host, R224_MODE, 0x2000340); + } + + } else if (ios->power_mode == MMC_POWER_UP) { + sdricoh_writel(host, R224_MODE, 0x2000320); + sdricoh_writel(host, R228_POWER, 0xe0); + } +} + +static int sdricoh_get_ro(struct mmc_host *mmc) +{ + struct sdricoh_host *host = mmc_priv(mmc); + unsigned int status; + + status = sdricoh_readl(host, R21C_STATUS); + sdricoh_writel(host, R2E4_STATUS_RESP, status); + + /* some notebooks seem to have the locked flag switched */ + if (switchlocked) + return !(status & STATUS_CARD_LOCKED); + + return (status & STATUS_CARD_LOCKED); +} + +static struct mmc_host_ops sdricoh_ops = { + .request = sdricoh_request, + .set_ios = sdricoh_set_ios, + .get_ro = sdricoh_get_ro, +}; + +/* initialize the control and register it to the mmc framework */ +static int sdricoh_init_mmc(struct pci_dev *pci_dev, + struct pcmcia_device *pcmcia_dev) +{ + int result = 0; + void __iomem *iobase = NULL; + struct mmc_host *mmc = NULL; + struct sdricoh_host *host = NULL; + struct device *dev = &pcmcia_dev->dev; + /* map iomem */ + if (pci_resource_len(pci_dev, SDRICOH_PCI_REGION) != + SDRICOH_PCI_REGION_SIZE) { + dev_dbg(dev, "unexpected pci resource len\n"); + return -ENODEV; + } + iobase = + pci_iomap(pci_dev, SDRICOH_PCI_REGION, SDRICOH_PCI_REGION_SIZE); + if (!iobase) { + dev_err(dev, "unable to map iobase\n"); + return -ENODEV; + } + /* check version? */ + if (readl(iobase + R104_VERSION) != 0x4000) { + dev_dbg(dev, "no supported mmc controller found\n"); + result = -ENODEV; + goto err; + } + /* allocate privdata */ + mmc = pcmcia_dev->priv = + mmc_alloc_host(sizeof(struct sdricoh_host), &pcmcia_dev->dev); + if (!mmc) { + dev_err(dev, "mmc_alloc_host failed\n"); + result = -ENOMEM; + goto err; + } + host = mmc_priv(mmc); + + host->iobase = iobase; + host->dev = dev; + host->pci_dev = pci_dev; + + mmc->ops = &sdricoh_ops; + + /* FIXME: frequency and voltage handling is done by the controller + */ + mmc->f_min = 450000; + mmc->f_max = 24000000; + mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34; + mmc->caps |= MMC_CAP_4_BIT_DATA; + + mmc->max_seg_size = 1024 * 512; + mmc->max_blk_size = 512; + + /* reset the controler */ + if (sdricoh_reset(host)) { + dev_dbg(dev, "could not reset\n"); + result = -EIO; + goto err; + + } + + result = mmc_add_host(mmc); + + if (!result) { + dev_dbg(dev, "mmc host registered\n"); + return 0; + } + +err: + if (iobase) + iounmap(iobase); + if (mmc) + mmc_free_host(mmc); + + return result; +} + +/* search for supported mmc controllers */ +static int sdricoh_pcmcia_probe(struct pcmcia_device *pcmcia_dev) +{ + struct pci_dev *pci_dev = NULL; + + dev_info(&pcmcia_dev->dev, "Searching MMC controller for pcmcia device" + " %s %s ...\n", pcmcia_dev->prod_id[0], pcmcia_dev->prod_id[1]); + + /* search pci cardbus bridge that contains the mmc controler */ + /* the io region is already claimed by yenta_socket... */ + while ((pci_dev = + pci_get_device(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_RL5C476, + pci_dev))) { + /* try to init the device */ + if (!sdricoh_init_mmc(pci_dev, pcmcia_dev)) { + dev_info(&pcmcia_dev->dev, "MMC controller found\n"); + return 0; + } + + } + dev_err(&pcmcia_dev->dev, "No MMC controller was found.\n"); + return -ENODEV; +} + +static void sdricoh_pcmcia_detach(struct pcmcia_device *link) +{ + struct mmc_host *mmc = link->priv; + + dev_dbg(&link->dev, "detach\n"); + + /* remove mmc host */ + if (mmc) { + struct sdricoh_host *host = mmc_priv(mmc); + mmc_remove_host(mmc); + pci_iounmap(host->pci_dev, host->iobase); + pci_dev_put(host->pci_dev); + mmc_free_host(mmc); + } + pcmcia_disable_device(link); + +} + +#ifdef CONFIG_PM +static int sdricoh_pcmcia_suspend(struct pcmcia_device *link) +{ + struct mmc_host *mmc = link->priv; + dev_dbg(&link->dev, "suspend\n"); + mmc_suspend_host(mmc, PMSG_SUSPEND); + return 0; +} + +static int sdricoh_pcmcia_resume(struct pcmcia_device *link) +{ + struct mmc_host *mmc = link->priv; + dev_dbg(&link->dev, "resume\n"); + sdricoh_reset(mmc_priv(mmc)); + mmc_resume_host(mmc); + return 0; +} +#else +#define sdricoh_pcmcia_suspend NULL +#define sdricoh_pcmcia_resume NULL +#endif + +static struct pcmcia_driver sdricoh_driver = { + .drv = { + .name = DRIVER_NAME, + }, + .probe = sdricoh_pcmcia_probe, + .remove = sdricoh_pcmcia_detach, + .id_table = pcmcia_ids, + .suspend = sdricoh_pcmcia_suspend, + .resume = sdricoh_pcmcia_resume, +}; + +/*****************************************************************************\ + * * + * Driver init/exit * + * * +\*****************************************************************************/ + +static int __init sdricoh_drv_init(void) +{ + return pcmcia_register_driver(&sdricoh_driver); +} + +static void __exit sdricoh_drv_exit(void) +{ + pcmcia_unregister_driver(&sdricoh_driver); +} + +module_init(sdricoh_drv_init); +module_exit(sdricoh_drv_exit); + +module_param(switchlocked, uint, 0444); + +MODULE_AUTHOR("Sascha Sommer "); +MODULE_DESCRIPTION("Ricoh PCMCIA Secure Digital Interface driver"); +MODULE_LICENSE("GPL"); + +MODULE_PARM_DESC(switchlocked, "Switch the cards locked status." + "Use this when unlocked cards are shown readonly (default 0)"); -- cgit v0.10.2 From c43d8636971c39da993e94082fd65bfff421618e Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 10 Jul 2008 12:28:48 +0100 Subject: sdio_uart: Fix SDIO break control to now return success or an error This is a consequence of patch 9ea761bfef52c116fed4715d4043392c2503fe6a. Signed-off-by: David Howells Signed-off-by: Pierre Ossman diff --git a/drivers/mmc/card/sdio_uart.c b/drivers/mmc/card/sdio_uart.c index eeea84c..78ad487 100644 --- a/drivers/mmc/card/sdio_uart.c +++ b/drivers/mmc/card/sdio_uart.c @@ -885,12 +885,14 @@ static void sdio_uart_set_termios(struct tty_struct *tty, struct ktermios *old_t sdio_uart_release_func(port); } -static void sdio_uart_break_ctl(struct tty_struct *tty, int break_state) +static int sdio_uart_break_ctl(struct tty_struct *tty, int break_state) { struct sdio_uart_port *port = tty->driver_data; + int result; - if (sdio_uart_claim_func(port) != 0) - return; + result = sdio_uart_claim_func(port); + if (result != 0) + return result; if (break_state == -1) port->lcr |= UART_LCR_SBC; @@ -899,6 +901,7 @@ static void sdio_uart_break_ctl(struct tty_struct *tty, int break_state) sdio_out(port, UART_LCR, port->lcr); sdio_uart_release_func(port); + return 0; } static int sdio_uart_tiocmget(struct tty_struct *tty, struct file *file) -- cgit v0.10.2 From 1e51764a3c2ac05a23a22b2a95ddee4d9bffb16d Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 14 Jul 2008 19:08:37 +0300 Subject: UBIFS: add new flash file system This is a new flash file system. See http://www.linux-mtd.infradead.org/doc/ubifs.html Signed-off-by: Artem Bityutskiy Signed-off-by: Adrian Hunter diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c new file mode 100644 index 0000000..d81fb9e --- /dev/null +++ b/fs/ubifs/budget.c @@ -0,0 +1,731 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Adrian Hunter + * Artem Bityutskiy (Битюцкий Артём) + */ + +/* + * This file implements the budgeting sub-system which is responsible for UBIFS + * space management. + * + * Factors such as compression, wasted space at the ends of LEBs, space in other + * journal heads, the effect of updates on the index, and so on, make it + * impossible to accurately predict the amount of space needed. Consequently + * approximations are used. + */ + +#include "ubifs.h" +#include +#include + +/* + * When pessimistic budget calculations say that there is no enough space, + * UBIFS starts writing back dirty inodes and pages, doing garbage collection, + * or committing. The below constants define maximum number of times UBIFS + * repeats the operations. + */ +#define MAX_SHRINK_RETRIES 8 +#define MAX_GC_RETRIES 4 +#define MAX_CMT_RETRIES 2 +#define MAX_NOSPC_RETRIES 1 + +/* + * The below constant defines amount of dirty pages which should be written + * back at when trying to shrink the liability. + */ +#define NR_TO_WRITE 16 + +/** + * struct retries_info - information about re-tries while making free space. + * @prev_liability: previous liability + * @shrink_cnt: how many times the liability was shrinked + * @shrink_retries: count of liability shrink re-tries (increased when + * liability does not shrink) + * @try_gc: GC should be tried first + * @gc_retries: how many times GC was run + * @cmt_retries: how many times commit has been done + * @nospc_retries: how many times GC returned %-ENOSPC + * + * Since we consider budgeting to be the fast-path, and this structure has to + * be allocated on stack and zeroed out, we make it smaller using bit-fields. + */ +struct retries_info { + long long prev_liability; + unsigned int shrink_cnt; + unsigned int shrink_retries:5; + unsigned int try_gc:1; + unsigned int gc_retries:4; + unsigned int cmt_retries:3; + unsigned int nospc_retries:1; +}; + +/** + * shrink_liability - write-back some dirty pages/inodes. + * @c: UBIFS file-system description object + * @nr_to_write: how many dirty pages to write-back + * + * This function shrinks UBIFS liability by means of writing back some amount + * of dirty inodes and their pages. Returns the amount of pages which were + * written back. The returned value does not include dirty inodes which were + * synchronized. + * + * Note, this function synchronizes even VFS inodes which are locked + * (@i_mutex) by the caller of the budgeting function, because write-back does + * not touch @i_mutex. + */ +static int shrink_liability(struct ubifs_info *c, int nr_to_write) +{ + int nr_written; + struct writeback_control wbc = { + .sync_mode = WB_SYNC_NONE, + .range_end = LLONG_MAX, + .nr_to_write = nr_to_write, + }; + + generic_sync_sb_inodes(c->vfs_sb, &wbc); + nr_written = nr_to_write - wbc.nr_to_write; + + if (!nr_written) { + /* + * Re-try again but wait on pages/inodes which are being + * written-back concurrently (e.g., by pdflush). + */ + memset(&wbc, 0, sizeof(struct writeback_control)); + wbc.sync_mode = WB_SYNC_ALL; + wbc.range_end = LLONG_MAX; + wbc.nr_to_write = nr_to_write; + generic_sync_sb_inodes(c->vfs_sb, &wbc); + nr_written = nr_to_write - wbc.nr_to_write; + } + + dbg_budg("%d pages were written back", nr_written); + return nr_written; +} + + +/** + * run_gc - run garbage collector. + * @c: UBIFS file-system description object + * + * This function runs garbage collector to make some more free space. Returns + * zero if a free LEB has been produced, %-EAGAIN if commit is required, and a + * negative error code in case of failure. + */ +static int run_gc(struct ubifs_info *c) +{ + int err, lnum; + + /* Make some free space by garbage-collecting dirty space */ + down_read(&c->commit_sem); + lnum = ubifs_garbage_collect(c, 1); + up_read(&c->commit_sem); + if (lnum < 0) + return lnum; + + /* GC freed one LEB, return it to lprops */ + dbg_budg("GC freed LEB %d", lnum); + err = ubifs_return_leb(c, lnum); + if (err) + return err; + return 0; +} + +/** + * make_free_space - make more free space on the file-system. + * @c: UBIFS file-system description object + * @ri: information about previous invocations of this function + * + * This function is called when an operation cannot be budgeted because there + * is supposedly no free space. But in most cases there is some free space: + * o budgeting is pessimistic, so it always budgets more then it is actually + * needed, so shrinking the liability is one way to make free space - the + * cached data will take less space then it was budgeted for; + * o GC may turn some dark space into free space (budgeting treats dark space + * as not available); + * o commit may free some LEB, i.e., turn freeable LEBs into free LEBs. + * + * So this function tries to do the above. Returns %-EAGAIN if some free space + * was presumably made and the caller has to re-try budgeting the operation. + * Returns %-ENOSPC if it couldn't do more free space, and other negative error + * codes on failures. + */ +static int make_free_space(struct ubifs_info *c, struct retries_info *ri) +{ + int err; + + /* + * If we have some dirty pages and inodes (liability), try to write + * them back unless this was tried too many times without effect + * already. + */ + if (ri->shrink_retries < MAX_SHRINK_RETRIES && !ri->try_gc) { + long long liability; + + spin_lock(&c->space_lock); + liability = c->budg_idx_growth + c->budg_data_growth + + c->budg_dd_growth; + spin_unlock(&c->space_lock); + + if (ri->prev_liability >= liability) { + /* Liability does not shrink, next time try GC then */ + ri->shrink_retries += 1; + if (ri->gc_retries < MAX_GC_RETRIES) + ri->try_gc = 1; + dbg_budg("liability did not shrink: retries %d of %d", + ri->shrink_retries, MAX_SHRINK_RETRIES); + } + + dbg_budg("force write-back (count %d)", ri->shrink_cnt); + shrink_liability(c, NR_TO_WRITE + ri->shrink_cnt); + + ri->prev_liability = liability; + ri->shrink_cnt += 1; + return -EAGAIN; + } + + /* + * Try to run garbage collector unless it was already tried too many + * times. + */ + if (ri->gc_retries < MAX_GC_RETRIES) { + ri->gc_retries += 1; + dbg_budg("run GC, retries %d of %d", + ri->gc_retries, MAX_GC_RETRIES); + + ri->try_gc = 0; + err = run_gc(c); + if (!err) + return -EAGAIN; + + if (err == -EAGAIN) { + dbg_budg("GC asked to commit"); + err = ubifs_run_commit(c); + if (err) + return err; + return -EAGAIN; + } + + if (err != -ENOSPC) + return err; + + /* + * GC could not make any progress. If this is the first time, + * then it makes sense to try to commit, because it might make + * some dirty space. + */ + dbg_budg("GC returned -ENOSPC, retries %d", + ri->nospc_retries); + if (ri->nospc_retries >= MAX_NOSPC_RETRIES) + return err; + ri->nospc_retries += 1; + } + + /* Neither GC nor write-back helped, try to commit */ + if (ri->cmt_retries < MAX_CMT_RETRIES) { + ri->cmt_retries += 1; + dbg_budg("run commit, retries %d of %d", + ri->cmt_retries, MAX_CMT_RETRIES); + err = ubifs_run_commit(c); + if (err) + return err; + return -EAGAIN; + } + return -ENOSPC; +} + +/** + * ubifs_calc_min_idx_lebs - calculate amount of eraseblocks for the index. + * @c: UBIFS file-system description object + * + * This function calculates and returns the number of eraseblocks which should + * be kept for index usage. + */ +int ubifs_calc_min_idx_lebs(struct ubifs_info *c) +{ + int ret; + uint64_t idx_size; + + idx_size = c->old_idx_sz + c->budg_idx_growth + c->budg_uncommitted_idx; + + /* And make sure we have twice the index size of space reserved */ + idx_size <<= 1; + + /* + * We do not maintain 'old_idx_size' as 'old_idx_lebs'/'old_idx_bytes' + * pair, nor similarly the two variables for the new index size, so we + * have to do this costly 64-bit division on fast-path. + */ + if (do_div(idx_size, c->leb_size - c->max_idx_node_sz)) + ret = idx_size + 1; + else + ret = idx_size; + /* + * The index head is not available for the in-the-gaps method, so add an + * extra LEB to compensate. + */ + ret += 1; + /* + * At present the index needs at least 2 LEBs: one for the index head + * and one for in-the-gaps method (which currently does not cater for + * the index head and so excludes it from consideration). + */ + if (ret < 2) + ret = 2; + return ret; +} + +/** + * ubifs_calc_available - calculate available FS space. + * @c: UBIFS file-system description object + * @min_idx_lebs: minimum number of LEBs reserved for the index + * + * This function calculates and returns amount of FS space available for use. + */ +long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs) +{ + int subtract_lebs; + long long available; + + /* + * Force the amount available to the total size reported if the used + * space is zero. + */ + if (c->lst.total_used <= UBIFS_INO_NODE_SZ && + c->budg_data_growth + c->budg_dd_growth == 0) { + /* Do the same calculation as for c->block_cnt */ + available = c->main_lebs - 2; + available *= c->leb_size - c->dark_wm; + return available; + } + + available = c->main_bytes - c->lst.total_used; + + /* + * Now 'available' contains theoretically available flash space + * assuming there is no index, so we have to subtract the space which + * is reserved for the index. + */ + subtract_lebs = min_idx_lebs; + + /* Take into account that GC reserves one LEB for its own needs */ + subtract_lebs += 1; + + /* + * The GC journal head LEB is not really accessible. And since + * different write types go to different heads, we may count only on + * one head's space. + */ + subtract_lebs += c->jhead_cnt - 1; + + /* We also reserve one LEB for deletions, which bypass budgeting */ + subtract_lebs += 1; + + available -= (long long)subtract_lebs * c->leb_size; + + /* Subtract the dead space which is not available for use */ + available -= c->lst.total_dead; + + /* + * Subtract dark space, which might or might not be usable - it depends + * on the data which we have on the media and which will be written. If + * this is a lot of uncompressed or not-compressible data, the dark + * space cannot be used. + */ + available -= c->lst.total_dark; + + /* + * However, there is more dark space. The index may be bigger than + * @min_idx_lebs. Those extra LEBs are assumed to be available, but + * their dark space is not included in total_dark, so it is subtracted + * here. + */ + if (c->lst.idx_lebs > min_idx_lebs) { + subtract_lebs = c->lst.idx_lebs - min_idx_lebs; + available -= subtract_lebs * c->dark_wm; + } + + /* The calculations are rough and may end up with a negative number */ + return available > 0 ? available : 0; +} + +/** + * can_use_rp - check whether the user is allowed to use reserved pool. + * @c: UBIFS file-system description object + * + * UBIFS has so-called "reserved pool" which is flash space reserved + * for the superuser and for uses whose UID/GID is recorded in UBIFS superblock. + * This function checks whether current user is allowed to use reserved pool. + * Returns %1 current user is allowed to use reserved pool and %0 otherwise. + */ +static int can_use_rp(struct ubifs_info *c) +{ + if (current->fsuid == c->rp_uid || capable(CAP_SYS_RESOURCE) || + (c->rp_gid != 0 && in_group_p(c->rp_gid))) + return 1; + return 0; +} + +/** + * do_budget_space - reserve flash space for index and data growth. + * @c: UBIFS file-system description object + * + * This function makes sure UBIFS has enough free eraseblocks for index growth + * and data. + * + * When budgeting index space, UBIFS reserves twice as more LEBs as the index + * would take if it was consolidated and written to the flash. This guarantees + * that the "in-the-gaps" commit method always succeeds and UBIFS will always + * be able to commit dirty index. So this function basically adds amount of + * budgeted index space to the size of the current index, multiplies this by 2, + * and makes sure this does not exceed the amount of free eraseblocks. + * + * Notes about @c->min_idx_lebs and @c->lst.idx_lebs variables: + * o @c->lst.idx_lebs is the number of LEBs the index currently uses. It might + * be large, because UBIFS does not do any index consolidation as long as + * there is free space. IOW, the index may take a lot of LEBs, but the LEBs + * will contain a lot of dirt. + * o @c->min_idx_lebs is the the index presumably takes. IOW, the index may be + * consolidated to take up to @c->min_idx_lebs LEBs. + * + * This function returns zero in case of success, and %-ENOSPC in case of + * failure. + */ +static int do_budget_space(struct ubifs_info *c) +{ + long long outstanding, available; + int lebs, rsvd_idx_lebs, min_idx_lebs; + + /* First budget index space */ + min_idx_lebs = ubifs_calc_min_idx_lebs(c); + + /* Now 'min_idx_lebs' contains number of LEBs to reserve */ + if (min_idx_lebs > c->lst.idx_lebs) + rsvd_idx_lebs = min_idx_lebs - c->lst.idx_lebs; + else + rsvd_idx_lebs = 0; + + /* + * The number of LEBs that are available to be used by the index is: + * + * @c->lst.empty_lebs + @c->freeable_cnt + @c->idx_gc_cnt - + * @c->lst.taken_empty_lebs + * + * @empty_lebs are available because they are empty. @freeable_cnt are + * available because they contain only free and dirty space and the + * index allocation always occurs after wbufs are synch'ed. + * @idx_gc_cnt are available because they are index LEBs that have been + * garbage collected (including trivial GC) and are awaiting the commit + * before they can be unmapped - note that the in-the-gaps method will + * grab these if it needs them. @taken_empty_lebs are empty_lebs that + * have already been allocated for some purpose (also includes those + * LEBs on the @idx_gc list). + * + * Note, @taken_empty_lebs may temporarily be higher by one because of + * the way we serialize LEB allocations and budgeting. See a comment in + * 'ubifs_find_free_space()'. + */ + lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - + c->lst.taken_empty_lebs; + if (unlikely(rsvd_idx_lebs > lebs)) { + dbg_budg("out of indexing space: min_idx_lebs %d (old %d), " + "rsvd_idx_lebs %d", min_idx_lebs, c->min_idx_lebs, + rsvd_idx_lebs); + return -ENOSPC; + } + + available = ubifs_calc_available(c, min_idx_lebs); + outstanding = c->budg_data_growth + c->budg_dd_growth; + + if (unlikely(available < outstanding)) { + dbg_budg("out of data space: available %lld, outstanding %lld", + available, outstanding); + return -ENOSPC; + } + + if (available - outstanding <= c->rp_size && !can_use_rp(c)) + return -ENOSPC; + + c->min_idx_lebs = min_idx_lebs; + return 0; +} + +/** + * calc_idx_growth - calculate approximate index growth from budgeting request. + * @c: UBIFS file-system description object + * @req: budgeting request + * + * For now we assume each new node adds one znode. But this is rather poor + * approximation, though. + */ +static int calc_idx_growth(const struct ubifs_info *c, + const struct ubifs_budget_req *req) +{ + int znodes; + + znodes = req->new_ino + (req->new_page << UBIFS_BLOCKS_PER_PAGE_SHIFT) + + req->new_dent; + return znodes * c->max_idx_node_sz; +} + +/** + * calc_data_growth - calculate approximate amount of new data from budgeting + * request. + * @c: UBIFS file-system description object + * @req: budgeting request + */ +static int calc_data_growth(const struct ubifs_info *c, + const struct ubifs_budget_req *req) +{ + int data_growth; + + data_growth = req->new_ino ? c->inode_budget : 0; + if (req->new_page) + data_growth += c->page_budget; + if (req->new_dent) + data_growth += c->dent_budget; + data_growth += req->new_ino_d; + return data_growth; +} + +/** + * calc_dd_growth - calculate approximate amount of data which makes other data + * dirty from budgeting request. + * @c: UBIFS file-system description object + * @req: budgeting request + */ +static int calc_dd_growth(const struct ubifs_info *c, + const struct ubifs_budget_req *req) +{ + int dd_growth; + + dd_growth = req->dirtied_page ? c->page_budget : 0; + + if (req->dirtied_ino) + dd_growth += c->inode_budget << (req->dirtied_ino - 1); + if (req->mod_dent) + dd_growth += c->dent_budget; + dd_growth += req->dirtied_ino_d; + return dd_growth; +} + +/** + * ubifs_budget_space - ensure there is enough space to complete an operation. + * @c: UBIFS file-system description object + * @req: budget request + * + * This function allocates budget for an operation. It uses pessimistic + * approximation of how much flash space the operation needs. The goal of this + * function is to make sure UBIFS always has flash space to flush all dirty + * pages, dirty inodes, and dirty znodes (liability). This function may force + * commit, garbage-collection or write-back. Returns zero in case of success, + * %-ENOSPC if there is no free space and other negative error codes in case of + * failures. + */ +int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req) +{ + int uninitialized_var(cmt_retries), uninitialized_var(wb_retries); + int err, idx_growth, data_growth, dd_growth; + struct retries_info ri; + + ubifs_assert(req->dirtied_ino <= 4); + ubifs_assert(req->dirtied_ino_d <= UBIFS_MAX_INO_DATA * 4); + + data_growth = calc_data_growth(c, req); + dd_growth = calc_dd_growth(c, req); + if (!data_growth && !dd_growth) + return 0; + idx_growth = calc_idx_growth(c, req); + memset(&ri, 0, sizeof(struct retries_info)); + +again: + spin_lock(&c->space_lock); + ubifs_assert(c->budg_idx_growth >= 0); + ubifs_assert(c->budg_data_growth >= 0); + ubifs_assert(c->budg_dd_growth >= 0); + + if (unlikely(c->nospace) && (c->nospace_rp || !can_use_rp(c))) { + dbg_budg("no space"); + spin_unlock(&c->space_lock); + return -ENOSPC; + } + + c->budg_idx_growth += idx_growth; + c->budg_data_growth += data_growth; + c->budg_dd_growth += dd_growth; + + err = do_budget_space(c); + if (likely(!err)) { + req->idx_growth = idx_growth; + req->data_growth = data_growth; + req->dd_growth = dd_growth; + spin_unlock(&c->space_lock); + return 0; + } + + /* Restore the old values */ + c->budg_idx_growth -= idx_growth; + c->budg_data_growth -= data_growth; + c->budg_dd_growth -= dd_growth; + spin_unlock(&c->space_lock); + + if (req->fast) { + dbg_budg("no space for fast budgeting"); + return err; + } + + err = make_free_space(c, &ri); + if (err == -EAGAIN) { + dbg_budg("try again"); + cond_resched(); + goto again; + } else if (err == -ENOSPC) { + dbg_budg("FS is full, -ENOSPC"); + c->nospace = 1; + if (can_use_rp(c) || c->rp_size == 0) + c->nospace_rp = 1; + smp_wmb(); + } else + ubifs_err("cannot budget space, error %d", err); + return err; +} + +/** + * ubifs_release_budget - release budgeted free space. + * @c: UBIFS file-system description object + * @req: budget request + * + * This function releases the space budgeted by 'ubifs_budget_space()'. Note, + * since the index changes (which were budgeted for in @req->idx_growth) will + * only be written to the media on commit, this function moves the index budget + * from @c->budg_idx_growth to @c->budg_uncommitted_idx. The latter will be + * zeroed by the commit operation. + */ +void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req) +{ + ubifs_assert(req->dirtied_ino <= 4); + ubifs_assert(req->dirtied_ino_d <= UBIFS_MAX_INO_DATA * 4); + if (!req->recalculate) { + ubifs_assert(req->idx_growth >= 0); + ubifs_assert(req->data_growth >= 0); + ubifs_assert(req->dd_growth >= 0); + } + + if (req->recalculate) { + req->data_growth = calc_data_growth(c, req); + req->dd_growth = calc_dd_growth(c, req); + req->idx_growth = calc_idx_growth(c, req); + } + + if (!req->data_growth && !req->dd_growth) + return; + + c->nospace = c->nospace_rp = 0; + smp_wmb(); + + spin_lock(&c->space_lock); + c->budg_idx_growth -= req->idx_growth; + c->budg_uncommitted_idx += req->idx_growth; + c->budg_data_growth -= req->data_growth; + c->budg_dd_growth -= req->dd_growth; + c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); + + ubifs_assert(c->budg_idx_growth >= 0); + ubifs_assert(c->budg_data_growth >= 0); + ubifs_assert(c->min_idx_lebs < c->main_lebs); + spin_unlock(&c->space_lock); +} + +/** + * ubifs_convert_page_budget - convert budget of a new page. + * @c: UBIFS file-system description object + * + * This function converts budget which was allocated for a new page of data to + * the budget of changing an existing page of data. The latter is smaller then + * the former, so this function only does simple re-calculation and does not + * involve any write-back. + */ +void ubifs_convert_page_budget(struct ubifs_info *c) +{ + spin_lock(&c->space_lock); + /* Release the index growth reservation */ + c->budg_idx_growth -= c->max_idx_node_sz << UBIFS_BLOCKS_PER_PAGE_SHIFT; + /* Release the data growth reservation */ + c->budg_data_growth -= c->page_budget; + /* Increase the dirty data growth reservation instead */ + c->budg_dd_growth += c->page_budget; + /* And re-calculate the indexing space reservation */ + c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); + spin_unlock(&c->space_lock); +} + +/** + * ubifs_release_dirty_inode_budget - release dirty inode budget. + * @c: UBIFS file-system description object + * @ui: UBIFS inode to release the budget for + * + * This function releases budget corresponding to a dirty inode. It is usually + * called when after the inode has been written to the media and marked as + * clean. + */ +void ubifs_release_dirty_inode_budget(struct ubifs_info *c, + struct ubifs_inode *ui) +{ + struct ubifs_budget_req req = {.dd_growth = c->inode_budget, + .dirtied_ino_d = ui->data_len}; + + ubifs_release_budget(c, &req); +} + +/** + * ubifs_budg_get_free_space - return amount of free space. + * @c: UBIFS file-system description object + * + * This function returns amount of free space on the file-system. + */ +long long ubifs_budg_get_free_space(struct ubifs_info *c) +{ + int min_idx_lebs, rsvd_idx_lebs; + long long available, outstanding, free; + + /* Do exactly the same calculations as in 'do_budget_space()' */ + spin_lock(&c->space_lock); + min_idx_lebs = ubifs_calc_min_idx_lebs(c); + + if (min_idx_lebs > c->lst.idx_lebs) + rsvd_idx_lebs = min_idx_lebs - c->lst.idx_lebs; + else + rsvd_idx_lebs = 0; + + if (rsvd_idx_lebs > c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt + - c->lst.taken_empty_lebs) { + spin_unlock(&c->space_lock); + return 0; + } + + available = ubifs_calc_available(c, min_idx_lebs); + outstanding = c->budg_data_growth + c->budg_dd_growth; + c->min_idx_lebs = min_idx_lebs; + spin_unlock(&c->space_lock); + + if (available > outstanding) + free = ubifs_reported_space(c, available - outstanding); + else + free = 0; + return free; +} diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c new file mode 100644 index 0000000..3b51631 --- /dev/null +++ b/fs/ubifs/commit.c @@ -0,0 +1,677 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Adrian Hunter + * Artem Bityutskiy (Битюцкий Артём) + */ + +/* + * This file implements functions that manage the running of the commit process. + * Each affected module has its own functions to accomplish their part in the + * commit and those functions are called here. + * + * The commit is the process whereby all updates to the index and LEB properties + * are written out together and the journal becomes empty. This keeps the + * file system consistent - at all times the state can be recreated by reading + * the index and LEB properties and then replaying the journal. + * + * The commit is split into two parts named "commit start" and "commit end". + * During commit start, the commit process has exclusive access to the journal + * by holding the commit semaphore down for writing. As few I/O operations as + * possible are performed during commit start, instead the nodes that are to be + * written are merely identified. During commit end, the commit semaphore is no + * longer held and the journal is again in operation, allowing users to continue + * to use the file system while the bulk of the commit I/O is performed. The + * purpose of this two-step approach is to prevent the commit from causing any + * latency blips. Note that in any case, the commit does not prevent lookups + * (as permitted by the TNC mutex), or access to VFS data structures e.g. page + * cache. + */ + +#include +#include +#include "ubifs.h" + +/** + * do_commit - commit the journal. + * @c: UBIFS file-system description object + * + * This function implements UBIFS commit. It has to be called with commit lock + * locked. Returns zero in case of success and a negative error code in case of + * failure. + */ +static int do_commit(struct ubifs_info *c) +{ + int err, new_ltail_lnum, old_ltail_lnum, i; + struct ubifs_zbranch zroot; + struct ubifs_lp_stats lst; + + dbg_cmt("start"); + if (c->ro_media) { + err = -EROFS; + goto out_up; + } + + /* Sync all write buffers (necessary for recovery) */ + for (i = 0; i < c->jhead_cnt; i++) { + err = ubifs_wbuf_sync(&c->jheads[i].wbuf); + if (err) + goto out_up; + } + + err = ubifs_gc_start_commit(c); + if (err) + goto out_up; + err = dbg_check_lprops(c); + if (err) + goto out_up; + err = ubifs_log_start_commit(c, &new_ltail_lnum); + if (err) + goto out_up; + err = ubifs_tnc_start_commit(c, &zroot); + if (err) + goto out_up; + err = ubifs_lpt_start_commit(c); + if (err) + goto out_up; + err = ubifs_orphan_start_commit(c); + if (err) + goto out_up; + + ubifs_get_lp_stats(c, &lst); + + up_write(&c->commit_sem); + + err = ubifs_tnc_end_commit(c); + if (err) + goto out; + err = ubifs_lpt_end_commit(c); + if (err) + goto out; + err = ubifs_orphan_end_commit(c); + if (err) + goto out; + old_ltail_lnum = c->ltail_lnum; + err = ubifs_log_end_commit(c, new_ltail_lnum); + if (err) + goto out; + err = dbg_check_old_index(c, &zroot); + if (err) + goto out; + + mutex_lock(&c->mst_mutex); + c->mst_node->cmt_no = cpu_to_le64(++c->cmt_no); + c->mst_node->log_lnum = cpu_to_le32(new_ltail_lnum); + c->mst_node->root_lnum = cpu_to_le32(zroot.lnum); + c->mst_node->root_offs = cpu_to_le32(zroot.offs); + c->mst_node->root_len = cpu_to_le32(zroot.len); + c->mst_node->ihead_lnum = cpu_to_le32(c->ihead_lnum); + c->mst_node->ihead_offs = cpu_to_le32(c->ihead_offs); + c->mst_node->index_size = cpu_to_le64(c->old_idx_sz); + c->mst_node->lpt_lnum = cpu_to_le32(c->lpt_lnum); + c->mst_node->lpt_offs = cpu_to_le32(c->lpt_offs); + c->mst_node->nhead_lnum = cpu_to_le32(c->nhead_lnum); + c->mst_node->nhead_offs = cpu_to_le32(c->nhead_offs); + c->mst_node->ltab_lnum = cpu_to_le32(c->ltab_lnum); + c->mst_node->ltab_offs = cpu_to_le32(c->ltab_offs); + c->mst_node->lsave_lnum = cpu_to_le32(c->lsave_lnum); + c->mst_node->lsave_offs = cpu_to_le32(c->lsave_offs); + c->mst_node->lscan_lnum = cpu_to_le32(c->lscan_lnum); + c->mst_node->empty_lebs = cpu_to_le32(lst.empty_lebs); + c->mst_node->idx_lebs = cpu_to_le32(lst.idx_lebs); + c->mst_node->total_free = cpu_to_le64(lst.total_free); + c->mst_node->total_dirty = cpu_to_le64(lst.total_dirty); + c->mst_node->total_used = cpu_to_le64(lst.total_used); + c->mst_node->total_dead = cpu_to_le64(lst.total_dead); + c->mst_node->total_dark = cpu_to_le64(lst.total_dark); + if (c->no_orphs) + c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS); + else + c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_NO_ORPHS); + err = ubifs_write_master(c); + mutex_unlock(&c->mst_mutex); + if (err) + goto out; + + err = ubifs_log_post_commit(c, old_ltail_lnum); + if (err) + goto out; + err = ubifs_gc_end_commit(c); + if (err) + goto out; + err = ubifs_lpt_post_commit(c); + if (err) + goto out; + + spin_lock(&c->cs_lock); + c->cmt_state = COMMIT_RESTING; + wake_up(&c->cmt_wq); + dbg_cmt("commit end"); + spin_unlock(&c->cs_lock); + + return 0; + +out_up: + up_write(&c->commit_sem); +out: + ubifs_err("commit failed, error %d", err); + spin_lock(&c->cs_lock); + c->cmt_state = COMMIT_BROKEN; + wake_up(&c->cmt_wq); + spin_unlock(&c->cs_lock); + ubifs_ro_mode(c, err); + return err; +} + +/** + * run_bg_commit - run background commit if it is needed. + * @c: UBIFS file-system description object + * + * This function runs background commit if it is needed. Returns zero in case + * of success and a negative error code in case of failure. + */ +static int run_bg_commit(struct ubifs_info *c) +{ + spin_lock(&c->cs_lock); + /* + * Run background commit only if background commit was requested or if + * commit is required. + */ + if (c->cmt_state != COMMIT_BACKGROUND && + c->cmt_state != COMMIT_REQUIRED) + goto out; + spin_unlock(&c->cs_lock); + + down_write(&c->commit_sem); + spin_lock(&c->cs_lock); + if (c->cmt_state == COMMIT_REQUIRED) + c->cmt_state = COMMIT_RUNNING_REQUIRED; + else if (c->cmt_state == COMMIT_BACKGROUND) + c->cmt_state = COMMIT_RUNNING_BACKGROUND; + else + goto out_cmt_unlock; + spin_unlock(&c->cs_lock); + + return do_commit(c); + +out_cmt_unlock: + up_write(&c->commit_sem); +out: + spin_unlock(&c->cs_lock); + return 0; +} + +/** + * ubifs_bg_thread - UBIFS background thread function. + * @info: points to the file-system description object + * + * This function implements various file-system background activities: + * o when a write-buffer timer expires it synchronizes the appropriate + * write-buffer; + * o when the journal is about to be full, it starts in-advance commit. + * + * Note, other stuff like background garbage collection may be added here in + * future. + */ +int ubifs_bg_thread(void *info) +{ + int err; + struct ubifs_info *c = info; + + ubifs_msg("background thread \"%s\" started, PID %d", + c->bgt_name, current->pid); + set_freezable(); + + while (1) { + if (kthread_should_stop()) + break; + + if (try_to_freeze()) + continue; + + set_current_state(TASK_INTERRUPTIBLE); + /* Check if there is something to do */ + if (!c->need_bgt) { + /* + * Nothing prevents us from going sleep now and + * be never woken up and block the task which + * could wait in 'kthread_stop()' forever. + */ + if (kthread_should_stop()) + break; + schedule(); + continue; + } else + __set_current_state(TASK_RUNNING); + + c->need_bgt = 0; + err = ubifs_bg_wbufs_sync(c); + if (err) + ubifs_ro_mode(c, err); + + run_bg_commit(c); + cond_resched(); + } + + dbg_msg("background thread \"%s\" stops", c->bgt_name); + return 0; +} + +/** + * ubifs_commit_required - set commit state to "required". + * @c: UBIFS file-system description object + * + * This function is called if a commit is required but cannot be done from the + * calling function, so it is just flagged instead. + */ +void ubifs_commit_required(struct ubifs_info *c) +{ + spin_lock(&c->cs_lock); + switch (c->cmt_state) { + case COMMIT_RESTING: + case COMMIT_BACKGROUND: + dbg_cmt("old: %s, new: %s", dbg_cstate(c->cmt_state), + dbg_cstate(COMMIT_REQUIRED)); + c->cmt_state = COMMIT_REQUIRED; + break; + case COMMIT_RUNNING_BACKGROUND: + dbg_cmt("old: %s, new: %s", dbg_cstate(c->cmt_state), + dbg_cstate(COMMIT_RUNNING_REQUIRED)); + c->cmt_state = COMMIT_RUNNING_REQUIRED; + break; + case COMMIT_REQUIRED: + case COMMIT_RUNNING_REQUIRED: + case COMMIT_BROKEN: + break; + } + spin_unlock(&c->cs_lock); +} + +/** + * ubifs_request_bg_commit - notify the background thread to do a commit. + * @c: UBIFS file-system description object + * + * This function is called if the journal is full enough to make a commit + * worthwhile, so background thread is kicked to start it. + */ +void ubifs_request_bg_commit(struct ubifs_info *c) +{ + spin_lock(&c->cs_lock); + if (c->cmt_state == COMMIT_RESTING) { + dbg_cmt("old: %s, new: %s", dbg_cstate(c->cmt_state), + dbg_cstate(COMMIT_BACKGROUND)); + c->cmt_state = COMMIT_BACKGROUND; + spin_unlock(&c->cs_lock); + ubifs_wake_up_bgt(c); + } else + spin_unlock(&c->cs_lock); +} + +/** + * wait_for_commit - wait for commit. + * @c: UBIFS file-system description object + * + * This function sleeps until the commit operation is no longer running. + */ +static int wait_for_commit(struct ubifs_info *c) +{ + dbg_cmt("pid %d goes sleep", current->pid); + + /* + * The following sleeps if the condition is false, and will be woken + * when the commit ends. It is possible, although very unlikely, that we + * will wake up and see the subsequent commit running, rather than the + * one we were waiting for, and go back to sleep. However, we will be + * woken again, so there is no danger of sleeping forever. + */ + wait_event(c->cmt_wq, c->cmt_state != COMMIT_RUNNING_BACKGROUND && + c->cmt_state != COMMIT_RUNNING_REQUIRED); + dbg_cmt("commit finished, pid %d woke up", current->pid); + return 0; +} + +/** + * ubifs_run_commit - run or wait for commit. + * @c: UBIFS file-system description object + * + * This function runs commit and returns zero in case of success and a negative + * error code in case of failure. + */ +int ubifs_run_commit(struct ubifs_info *c) +{ + int err = 0; + + spin_lock(&c->cs_lock); + if (c->cmt_state == COMMIT_BROKEN) { + err = -EINVAL; + goto out; + } + + if (c->cmt_state == COMMIT_RUNNING_BACKGROUND) + /* + * We set the commit state to 'running required' to indicate + * that we want it to complete as quickly as possible. + */ + c->cmt_state = COMMIT_RUNNING_REQUIRED; + + if (c->cmt_state == COMMIT_RUNNING_REQUIRED) { + spin_unlock(&c->cs_lock); + return wait_for_commit(c); + } + spin_unlock(&c->cs_lock); + + /* Ok, the commit is indeed needed */ + + down_write(&c->commit_sem); + spin_lock(&c->cs_lock); + /* + * Since we unlocked 'c->cs_lock', the state may have changed, so + * re-check it. + */ + if (c->cmt_state == COMMIT_BROKEN) { + err = -EINVAL; + goto out_cmt_unlock; + } + + if (c->cmt_state == COMMIT_RUNNING_BACKGROUND) + c->cmt_state = COMMIT_RUNNING_REQUIRED; + + if (c->cmt_state == COMMIT_RUNNING_REQUIRED) { + up_write(&c->commit_sem); + spin_unlock(&c->cs_lock); + return wait_for_commit(c); + } + c->cmt_state = COMMIT_RUNNING_REQUIRED; + spin_unlock(&c->cs_lock); + + err = do_commit(c); + return err; + +out_cmt_unlock: + up_write(&c->commit_sem); +out: + spin_unlock(&c->cs_lock); + return err; +} + +/** + * ubifs_gc_should_commit - determine if it is time for GC to run commit. + * @c: UBIFS file-system description object + * + * This function is called by garbage collection to determine if commit should + * be run. If commit state is @COMMIT_BACKGROUND, which means that the journal + * is full enough to start commit, this function returns true. It is not + * absolutely necessary to commit yet, but it feels like this should be better + * then to keep doing GC. This function returns %1 if GC has to initiate commit + * and %0 if not. + */ +int ubifs_gc_should_commit(struct ubifs_info *c) +{ + int ret = 0; + + spin_lock(&c->cs_lock); + if (c->cmt_state == COMMIT_BACKGROUND) { + dbg_cmt("commit required now"); + c->cmt_state = COMMIT_REQUIRED; + } else + dbg_cmt("commit not requested"); + if (c->cmt_state == COMMIT_REQUIRED) + ret = 1; + spin_unlock(&c->cs_lock); + return ret; +} + +#ifdef CONFIG_UBIFS_FS_DEBUG + +/** + * struct idx_node - hold index nodes during index tree traversal. + * @list: list + * @iip: index in parent (slot number of this indexing node in the parent + * indexing node) + * @upper_key: all keys in this indexing node have to be less or equivalent to + * this key + * @idx: index node (8-byte aligned because all node structures must be 8-byte + * aligned) + */ +struct idx_node { + struct list_head list; + int iip; + union ubifs_key upper_key; + struct ubifs_idx_node idx __attribute__((aligned(8))); +}; + +/** + * dbg_old_index_check_init - get information for the next old index check. + * @c: UBIFS file-system description object + * @zroot: root of the index + * + * This function records information about the index that will be needed for the + * next old index check i.e. 'dbg_check_old_index()'. + * + * This function returns %0 on success and a negative error code on failure. + */ +int dbg_old_index_check_init(struct ubifs_info *c, struct ubifs_zbranch *zroot) +{ + struct ubifs_idx_node *idx; + int lnum, offs, len, err = 0; + + c->old_zroot = *zroot; + + lnum = c->old_zroot.lnum; + offs = c->old_zroot.offs; + len = c->old_zroot.len; + + idx = kmalloc(c->max_idx_node_sz, GFP_NOFS); + if (!idx) + return -ENOMEM; + + err = ubifs_read_node(c, idx, UBIFS_IDX_NODE, len, lnum, offs); + if (err) + goto out; + + c->old_zroot_level = le16_to_cpu(idx->level); + c->old_zroot_sqnum = le64_to_cpu(idx->ch.sqnum); +out: + kfree(idx); + return err; +} + +/** + * dbg_check_old_index - check the old copy of the index. + * @c: UBIFS file-system description object + * @zroot: root of the new index + * + * In order to be able to recover from an unclean unmount, a complete copy of + * the index must exist on flash. This is the "old" index. The commit process + * must write the "new" index to flash without overwriting or destroying any + * part of the old index. This function is run at commit end in order to check + * that the old index does indeed exist completely intact. + * + * This function returns %0 on success and a negative error code on failure. + */ +int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot) +{ + int lnum, offs, len, err = 0, uninitialized_var(last_level), child_cnt; + int first = 1, iip; + union ubifs_key lower_key, upper_key, l_key, u_key; + unsigned long long uninitialized_var(last_sqnum); + struct ubifs_idx_node *idx; + struct list_head list; + struct idx_node *i; + size_t sz; + + if (!(ubifs_chk_flags & UBIFS_CHK_OLD_IDX)) + goto out; + + INIT_LIST_HEAD(&list); + + sz = sizeof(struct idx_node) + ubifs_idx_node_sz(c, c->fanout) - + UBIFS_IDX_NODE_SZ; + + /* Start at the old zroot */ + lnum = c->old_zroot.lnum; + offs = c->old_zroot.offs; + len = c->old_zroot.len; + iip = 0; + + /* + * Traverse the index tree preorder depth-first i.e. do a node and then + * its subtrees from left to right. + */ + while (1) { + struct ubifs_branch *br; + + /* Get the next index node */ + i = kmalloc(sz, GFP_NOFS); + if (!i) { + err = -ENOMEM; + goto out_free; + } + i->iip = iip; + /* Keep the index nodes on our path in a linked list */ + list_add_tail(&i->list, &list); + /* Read the index node */ + idx = &i->idx; + err = ubifs_read_node(c, idx, UBIFS_IDX_NODE, len, lnum, offs); + if (err) + goto out_free; + /* Validate index node */ + child_cnt = le16_to_cpu(idx->child_cnt); + if (child_cnt < 1 || child_cnt > c->fanout) { + err = 1; + goto out_dump; + } + if (first) { + first = 0; + /* Check root level and sqnum */ + if (le16_to_cpu(idx->level) != c->old_zroot_level) { + err = 2; + goto out_dump; + } + if (le64_to_cpu(idx->ch.sqnum) != c->old_zroot_sqnum) { + err = 3; + goto out_dump; + } + /* Set last values as though root had a parent */ + last_level = le16_to_cpu(idx->level) + 1; + last_sqnum = le64_to_cpu(idx->ch.sqnum) + 1; + key_read(c, ubifs_idx_key(c, idx), &lower_key); + highest_ino_key(c, &upper_key, INUM_WATERMARK); + } + key_copy(c, &upper_key, &i->upper_key); + if (le16_to_cpu(idx->level) != last_level - 1) { + err = 3; + goto out_dump; + } + /* + * The index is always written bottom up hence a child's sqnum + * is always less than the parents. + */ + if (le64_to_cpu(idx->ch.sqnum) >= last_sqnum) { + err = 4; + goto out_dump; + } + /* Check key range */ + key_read(c, ubifs_idx_key(c, idx), &l_key); + br = ubifs_idx_branch(c, idx, child_cnt - 1); + key_read(c, &br->key, &u_key); + if (keys_cmp(c, &lower_key, &l_key) > 0) { + err = 5; + goto out_dump; + } + if (keys_cmp(c, &upper_key, &u_key) < 0) { + err = 6; + goto out_dump; + } + if (keys_cmp(c, &upper_key, &u_key) == 0) + if (!is_hash_key(c, &u_key)) { + err = 7; + goto out_dump; + } + /* Go to next index node */ + if (le16_to_cpu(idx->level) == 0) { + /* At the bottom, so go up until can go right */ + while (1) { + /* Drop the bottom of the list */ + list_del(&i->list); + kfree(i); + /* No more list means we are done */ + if (list_empty(&list)) + goto out; + /* Look at the new bottom */ + i = list_entry(list.prev, struct idx_node, + list); + idx = &i->idx; + /* Can we go right */ + if (iip + 1 < le16_to_cpu(idx->child_cnt)) { + iip = iip + 1; + break; + } else + /* Nope, so go up again */ + iip = i->iip; + } + } else + /* Go down left */ + iip = 0; + /* + * We have the parent in 'idx' and now we set up for reading the + * child pointed to by slot 'iip'. + */ + last_level = le16_to_cpu(idx->level); + last_sqnum = le64_to_cpu(idx->ch.sqnum); + br = ubifs_idx_branch(c, idx, iip); + lnum = le32_to_cpu(br->lnum); + offs = le32_to_cpu(br->offs); + len = le32_to_cpu(br->len); + key_read(c, &br->key, &lower_key); + if (iip + 1 < le16_to_cpu(idx->child_cnt)) { + br = ubifs_idx_branch(c, idx, iip + 1); + key_read(c, &br->key, &upper_key); + } else + key_copy(c, &i->upper_key, &upper_key); + } +out: + err = dbg_old_index_check_init(c, zroot); + if (err) + goto out_free; + + return 0; + +out_dump: + dbg_err("dumping index node (iip=%d)", i->iip); + dbg_dump_node(c, idx); + list_del(&i->list); + kfree(i); + if (!list_empty(&list)) { + i = list_entry(list.prev, struct idx_node, list); + dbg_err("dumping parent index node"); + dbg_dump_node(c, &i->idx); + } +out_free: + while (!list_empty(&list)) { + i = list_entry(list.next, struct idx_node, list); + list_del(&i->list); + kfree(i); + } + ubifs_err("failed, error %d", err); + if (err > 0) + err = -EINVAL; + return err; +} + +#endif /* CONFIG_UBIFS_FS_DEBUG */ diff --git a/fs/ubifs/compress.c b/fs/ubifs/compress.c new file mode 100644 index 0000000..5bb51da --- /dev/null +++ b/fs/ubifs/compress.c @@ -0,0 +1,253 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * Copyright (C) 2006, 2007 University of Szeged, Hungary + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Adrian Hunter + * Artem Bityutskiy (Битюцкий Артём) + * Zoltan Sogor + */ + +/* + * This file provides a single place to access to compression and + * decompression. + */ + +#include +#include "ubifs.h" + +/* Fake description object for the "none" compressor */ +static struct ubifs_compressor none_compr = { + .compr_type = UBIFS_COMPR_NONE, + .name = "no compression", + .capi_name = "", +}; + +#ifdef CONFIG_UBIFS_FS_LZO +static DEFINE_MUTEX(lzo_mutex); + +static struct ubifs_compressor lzo_compr = { + .compr_type = UBIFS_COMPR_LZO, + .comp_mutex = &lzo_mutex, + .name = "LZO", + .capi_name = "lzo", +}; +#else +static struct ubifs_compressor lzo_compr = { + .compr_type = UBIFS_COMPR_LZO, + .name = "LZO", +}; +#endif + +#ifdef CONFIG_UBIFS_FS_ZLIB +static DEFINE_MUTEX(deflate_mutex); +static DEFINE_MUTEX(inflate_mutex); + +static struct ubifs_compressor zlib_compr = { + .compr_type = UBIFS_COMPR_ZLIB, + .comp_mutex = &deflate_mutex, + .decomp_mutex = &inflate_mutex, + .name = "zlib", + .capi_name = "deflate", +}; +#else +static struct ubifs_compressor zlib_compr = { + .compr_type = UBIFS_COMPR_ZLIB, + .name = "zlib", +}; +#endif + +/* All UBIFS compressors */ +struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT]; + +/** + * ubifs_compress - compress data. + * @in_buf: data to compress + * @in_len: length of the data to compress + * @out_buf: output buffer where compressed data should be stored + * @out_len: output buffer length is returned here + * @compr_type: type of compression to use on enter, actually used compression + * type on exit + * + * This function compresses input buffer @in_buf of length @in_len and stores + * the result in the output buffer @out_buf and the resulting length in + * @out_len. If the input buffer does not compress, it is just copied to the + * @out_buf. The same happens if @compr_type is %UBIFS_COMPR_NONE or if + * compression error occurred. + * + * Note, if the input buffer was not compressed, it is copied to the output + * buffer and %UBIFS_COMPR_NONE is returned in @compr_type. + * + * This functions returns %0 on success or a negative error code on failure. + */ +void ubifs_compress(const void *in_buf, int in_len, void *out_buf, int *out_len, + int *compr_type) +{ + int err; + struct ubifs_compressor *compr = ubifs_compressors[*compr_type]; + + if (*compr_type == UBIFS_COMPR_NONE) + goto no_compr; + + /* If the input data is small, do not even try to compress it */ + if (in_len < UBIFS_MIN_COMPR_LEN) + goto no_compr; + + if (compr->comp_mutex) + mutex_lock(compr->comp_mutex); + err = crypto_comp_compress(compr->cc, in_buf, in_len, out_buf, + out_len); + if (compr->comp_mutex) + mutex_unlock(compr->comp_mutex); + if (unlikely(err)) { + ubifs_warn("cannot compress %d bytes, compressor %s, " + "error %d, leave data uncompressed", + in_len, compr->name, err); + goto no_compr; + } + + /* + * Presently, we just require that compression results in less data, + * rather than any defined minimum compression ratio or amount. + */ + if (ALIGN(*out_len, 8) >= ALIGN(in_len, 8)) + goto no_compr; + + return; + +no_compr: + memcpy(out_buf, in_buf, in_len); + *out_len = in_len; + *compr_type = UBIFS_COMPR_NONE; +} + +/** + * ubifs_decompress - decompress data. + * @in_buf: data to decompress + * @in_len: length of the data to decompress + * @out_buf: output buffer where decompressed data should + * @out_len: output length is returned here + * @compr_type: type of compression + * + * This function decompresses data from buffer @in_buf into buffer @out_buf. + * The length of the uncompressed data is returned in @out_len. This functions + * returns %0 on success or a negative error code on failure. + */ +int ubifs_decompress(const void *in_buf, int in_len, void *out_buf, + int *out_len, int compr_type) +{ + int err; + struct ubifs_compressor *compr; + + if (unlikely(compr_type < 0 || compr_type >= UBIFS_COMPR_TYPES_CNT)) { + ubifs_err("invalid compression type %d", compr_type); + return -EINVAL; + } + + compr = ubifs_compressors[compr_type]; + + if (unlikely(!compr->capi_name)) { + ubifs_err("%s compression is not compiled in", compr->name); + return -EINVAL; + } + + if (compr_type == UBIFS_COMPR_NONE) { + memcpy(out_buf, in_buf, in_len); + *out_len = in_len; + return 0; + } + + if (compr->decomp_mutex) + mutex_lock(compr->decomp_mutex); + err = crypto_comp_decompress(compr->cc, in_buf, in_len, out_buf, + out_len); + if (compr->decomp_mutex) + mutex_unlock(compr->decomp_mutex); + if (err) + ubifs_err("cannot decompress %d bytes, compressor %s, " + "error %d", in_len, compr->name, err); + + return err; +} + +/** + * compr_init - initialize a compressor. + * @compr: compressor description object + * + * This function initializes the requested compressor and returns zero in case + * of success or a negative error code in case of failure. + */ +static int __init compr_init(struct ubifs_compressor *compr) +{ + if (compr->capi_name) { + compr->cc = crypto_alloc_comp(compr->capi_name, 0, 0); + if (IS_ERR(compr->cc)) { + ubifs_err("cannot initialize compressor %s, error %ld", + compr->name, PTR_ERR(compr->cc)); + return PTR_ERR(compr->cc); + } + } + + ubifs_compressors[compr->compr_type] = compr; + return 0; +} + +/** + * compr_exit - de-initialize a compressor. + * @compr: compressor description object + */ +static void compr_exit(struct ubifs_compressor *compr) +{ + if (compr->capi_name) + crypto_free_comp(compr->cc); + return; +} + +/** + * ubifs_compressors_init - initialize UBIFS compressors. + * + * This function initializes the compressor which were compiled in. Returns + * zero in case of success and a negative error code in case of failure. + */ +int __init ubifs_compressors_init(void) +{ + int err; + + err = compr_init(&lzo_compr); + if (err) + return err; + + err = compr_init(&zlib_compr); + if (err) + goto out_lzo; + + ubifs_compressors[UBIFS_COMPR_NONE] = &none_compr; + return 0; + +out_lzo: + compr_exit(&lzo_compr); + return err; +} + +/** + * ubifs_compressors_exit - de-initialize UBIFS compressors. + */ +void __exit ubifs_compressors_exit(void) +{ + compr_exit(&lzo_compr); + compr_exit(&zlib_compr); +} diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c new file mode 100644 index 0000000..4e3aaeb --- /dev/null +++ b/fs/ubifs/debug.c @@ -0,0 +1,2289 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Artem Bityutskiy (Битюцкий Артём) + * Adrian Hunter + */ + +/* + * This file implements most of the debugging stuff which is compiled in only + * when it is enabled. But some debugging check functions are implemented in + * corresponding subsystem, just because they are closely related and utilize + * various local functions of those subsystems. + */ + +#define UBIFS_DBG_PRESERVE_UBI + +#include "ubifs.h" +#include +#include + +#ifdef CONFIG_UBIFS_FS_DEBUG + +DEFINE_SPINLOCK(dbg_lock); + +static char dbg_key_buf0[128]; +static char dbg_key_buf1[128]; + +unsigned int ubifs_msg_flags = UBIFS_MSG_FLAGS_DEFAULT; +unsigned int ubifs_chk_flags = UBIFS_CHK_FLAGS_DEFAULT; +unsigned int ubifs_tst_flags; + +module_param_named(debug_msgs, ubifs_msg_flags, uint, S_IRUGO | S_IWUSR); +module_param_named(debug_chks, ubifs_chk_flags, uint, S_IRUGO | S_IWUSR); +module_param_named(debug_tsts, ubifs_tst_flags, uint, S_IRUGO | S_IWUSR); + +MODULE_PARM_DESC(debug_msgs, "Debug message type flags"); +MODULE_PARM_DESC(debug_chks, "Debug check flags"); +MODULE_PARM_DESC(debug_tsts, "Debug special test flags"); + +static const char *get_key_fmt(int fmt) +{ + switch (fmt) { + case UBIFS_SIMPLE_KEY_FMT: + return "simple"; + default: + return "unknown/invalid format"; + } +} + +static const char *get_key_hash(int hash) +{ + switch (hash) { + case UBIFS_KEY_HASH_R5: + return "R5"; + case UBIFS_KEY_HASH_TEST: + return "test"; + default: + return "unknown/invalid name hash"; + } +} + +static const char *get_key_type(int type) +{ + switch (type) { + case UBIFS_INO_KEY: + return "inode"; + case UBIFS_DENT_KEY: + return "direntry"; + case UBIFS_XENT_KEY: + return "xentry"; + case UBIFS_DATA_KEY: + return "data"; + case UBIFS_TRUN_KEY: + return "truncate"; + default: + return "unknown/invalid key"; + } +} + +static void sprintf_key(const struct ubifs_info *c, const union ubifs_key *key, + char *buffer) +{ + char *p = buffer; + int type = key_type(c, key); + + if (c->key_fmt == UBIFS_SIMPLE_KEY_FMT) { + switch (type) { + case UBIFS_INO_KEY: + sprintf(p, "(%lu, %s)", key_inum(c, key), + get_key_type(type)); + break; + case UBIFS_DENT_KEY: + case UBIFS_XENT_KEY: + sprintf(p, "(%lu, %s, %#08x)", key_inum(c, key), + get_key_type(type), key_hash(c, key)); + break; + case UBIFS_DATA_KEY: + sprintf(p, "(%lu, %s, %u)", key_inum(c, key), + get_key_type(type), key_block(c, key)); + break; + case UBIFS_TRUN_KEY: + sprintf(p, "(%lu, %s)", + key_inum(c, key), get_key_type(type)); + break; + default: + sprintf(p, "(bad key type: %#08x, %#08x)", + key->u32[0], key->u32[1]); + } + } else + sprintf(p, "bad key format %d", c->key_fmt); +} + +const char *dbg_key_str0(const struct ubifs_info *c, const union ubifs_key *key) +{ + /* dbg_lock must be held */ + sprintf_key(c, key, dbg_key_buf0); + return dbg_key_buf0; +} + +const char *dbg_key_str1(const struct ubifs_info *c, const union ubifs_key *key) +{ + /* dbg_lock must be held */ + sprintf_key(c, key, dbg_key_buf1); + return dbg_key_buf1; +} + +const char *dbg_ntype(int type) +{ + switch (type) { + case UBIFS_PAD_NODE: + return "padding node"; + case UBIFS_SB_NODE: + return "superblock node"; + case UBIFS_MST_NODE: + return "master node"; + case UBIFS_REF_NODE: + return "reference node"; + case UBIFS_INO_NODE: + return "inode node"; + case UBIFS_DENT_NODE: + return "direntry node"; + case UBIFS_XENT_NODE: + return "xentry node"; + case UBIFS_DATA_NODE: + return "data node"; + case UBIFS_TRUN_NODE: + return "truncate node"; + case UBIFS_IDX_NODE: + return "indexing node"; + case UBIFS_CS_NODE: + return "commit start node"; + case UBIFS_ORPH_NODE: + return "orphan node"; + default: + return "unknown node"; + } +} + +static const char *dbg_gtype(int type) +{ + switch (type) { + case UBIFS_NO_NODE_GROUP: + return "no node group"; + case UBIFS_IN_NODE_GROUP: + return "in node group"; + case UBIFS_LAST_OF_NODE_GROUP: + return "last of node group"; + default: + return "unknown"; + } +} + +const char *dbg_cstate(int cmt_state) +{ + switch (cmt_state) { + case COMMIT_RESTING: + return "commit resting"; + case COMMIT_BACKGROUND: + return "background commit requested"; + case COMMIT_REQUIRED: + return "commit required"; + case COMMIT_RUNNING_BACKGROUND: + return "BACKGROUND commit running"; + case COMMIT_RUNNING_REQUIRED: + return "commit running and required"; + case COMMIT_BROKEN: + return "broken commit"; + default: + return "unknown commit state"; + } +} + +static void dump_ch(const struct ubifs_ch *ch) +{ + printk(KERN_DEBUG "\tmagic %#x\n", le32_to_cpu(ch->magic)); + printk(KERN_DEBUG "\tcrc %#x\n", le32_to_cpu(ch->crc)); + printk(KERN_DEBUG "\tnode_type %d (%s)\n", ch->node_type, + dbg_ntype(ch->node_type)); + printk(KERN_DEBUG "\tgroup_type %d (%s)\n", ch->group_type, + dbg_gtype(ch->group_type)); + printk(KERN_DEBUG "\tsqnum %llu\n", + (unsigned long long)le64_to_cpu(ch->sqnum)); + printk(KERN_DEBUG "\tlen %u\n", le32_to_cpu(ch->len)); +} + +void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode) +{ + const struct ubifs_inode *ui = ubifs_inode(inode); + + printk(KERN_DEBUG "inode %lu\n", inode->i_ino); + printk(KERN_DEBUG "size %llu\n", + (unsigned long long)i_size_read(inode)); + printk(KERN_DEBUG "nlink %u\n", inode->i_nlink); + printk(KERN_DEBUG "uid %u\n", (unsigned int)inode->i_uid); + printk(KERN_DEBUG "gid %u\n", (unsigned int)inode->i_gid); + printk(KERN_DEBUG "atime %u.%u\n", + (unsigned int)inode->i_atime.tv_sec, + (unsigned int)inode->i_atime.tv_nsec); + printk(KERN_DEBUG "mtime %u.%u\n", + (unsigned int)inode->i_mtime.tv_sec, + (unsigned int)inode->i_mtime.tv_nsec); + printk(KERN_DEBUG "ctime %u.%u\n", + (unsigned int)inode->i_ctime.tv_sec, + (unsigned int)inode->i_ctime.tv_nsec); + printk(KERN_DEBUG "creat_sqnum %llu\n", ui->creat_sqnum); + printk(KERN_DEBUG "xattr_size %u\n", ui->xattr_size); + printk(KERN_DEBUG "xattr_cnt %u\n", ui->xattr_cnt); + printk(KERN_DEBUG "xattr_names %u\n", ui->xattr_names); + printk(KERN_DEBUG "dirty %u\n", ui->dirty); + printk(KERN_DEBUG "xattr %u\n", ui->xattr); + printk(KERN_DEBUG "flags %d\n", ui->flags); + printk(KERN_DEBUG "compr_type %d\n", ui->compr_type); + printk(KERN_DEBUG "data_len %d\n", ui->data_len); +} + +void dbg_dump_node(const struct ubifs_info *c, const void *node) +{ + int i, n; + union ubifs_key key; + const struct ubifs_ch *ch = node; + + if (dbg_failure_mode) + return; + + /* If the magic is incorrect, just hexdump the first bytes */ + if (le32_to_cpu(ch->magic) != UBIFS_NODE_MAGIC) { + printk(KERN_DEBUG "Not a node, first %zu bytes:", UBIFS_CH_SZ); + print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, + (void *)node, UBIFS_CH_SZ, 1); + return; + } + + spin_lock(&dbg_lock); + dump_ch(node); + + switch (ch->node_type) { + case UBIFS_PAD_NODE: + { + const struct ubifs_pad_node *pad = node; + + printk(KERN_DEBUG "\tpad_len %u\n", + le32_to_cpu(pad->pad_len)); + break; + } + case UBIFS_SB_NODE: + { + const struct ubifs_sb_node *sup = node; + unsigned int sup_flags = le32_to_cpu(sup->flags); + + printk(KERN_DEBUG "\tkey_hash %d (%s)\n", + (int)sup->key_hash, get_key_hash(sup->key_hash)); + printk(KERN_DEBUG "\tkey_fmt %d (%s)\n", + (int)sup->key_fmt, get_key_fmt(sup->key_fmt)); + printk(KERN_DEBUG "\tflags %#x\n", sup_flags); + printk(KERN_DEBUG "\t big_lpt %u\n", + !!(sup_flags & UBIFS_FLG_BIGLPT)); + printk(KERN_DEBUG "\tmin_io_size %u\n", + le32_to_cpu(sup->min_io_size)); + printk(KERN_DEBUG "\tleb_size %u\n", + le32_to_cpu(sup->leb_size)); + printk(KERN_DEBUG "\tleb_cnt %u\n", + le32_to_cpu(sup->leb_cnt)); + printk(KERN_DEBUG "\tmax_leb_cnt %u\n", + le32_to_cpu(sup->max_leb_cnt)); + printk(KERN_DEBUG "\tmax_bud_bytes %llu\n", + (unsigned long long)le64_to_cpu(sup->max_bud_bytes)); + printk(KERN_DEBUG "\tlog_lebs %u\n", + le32_to_cpu(sup->log_lebs)); + printk(KERN_DEBUG "\tlpt_lebs %u\n", + le32_to_cpu(sup->lpt_lebs)); + printk(KERN_DEBUG "\torph_lebs %u\n", + le32_to_cpu(sup->orph_lebs)); + printk(KERN_DEBUG "\tjhead_cnt %u\n", + le32_to_cpu(sup->jhead_cnt)); + printk(KERN_DEBUG "\tfanout %u\n", + le32_to_cpu(sup->fanout)); + printk(KERN_DEBUG "\tlsave_cnt %u\n", + le32_to_cpu(sup->lsave_cnt)); + printk(KERN_DEBUG "\tdefault_compr %u\n", + (int)le16_to_cpu(sup->default_compr)); + printk(KERN_DEBUG "\trp_size %llu\n", + (unsigned long long)le64_to_cpu(sup->rp_size)); + printk(KERN_DEBUG "\trp_uid %u\n", + le32_to_cpu(sup->rp_uid)); + printk(KERN_DEBUG "\trp_gid %u\n", + le32_to_cpu(sup->rp_gid)); + printk(KERN_DEBUG "\tfmt_version %u\n", + le32_to_cpu(sup->fmt_version)); + printk(KERN_DEBUG "\ttime_gran %u\n", + le32_to_cpu(sup->time_gran)); + printk(KERN_DEBUG "\tUUID %02X%02X%02X%02X-%02X%02X" + "-%02X%02X-%02X%02X-%02X%02X%02X%02X%02X%02X\n", + sup->uuid[0], sup->uuid[1], sup->uuid[2], sup->uuid[3], + sup->uuid[4], sup->uuid[5], sup->uuid[6], sup->uuid[7], + sup->uuid[8], sup->uuid[9], sup->uuid[10], sup->uuid[11], + sup->uuid[12], sup->uuid[13], sup->uuid[14], + sup->uuid[15]); + break; + } + case UBIFS_MST_NODE: + { + const struct ubifs_mst_node *mst = node; + + printk(KERN_DEBUG "\thighest_inum %llu\n", + (unsigned long long)le64_to_cpu(mst->highest_inum)); + printk(KERN_DEBUG "\tcommit number %llu\n", + (unsigned long long)le64_to_cpu(mst->cmt_no)); + printk(KERN_DEBUG "\tflags %#x\n", + le32_to_cpu(mst->flags)); + printk(KERN_DEBUG "\tlog_lnum %u\n", + le32_to_cpu(mst->log_lnum)); + printk(KERN_DEBUG "\troot_lnum %u\n", + le32_to_cpu(mst->root_lnum)); + printk(KERN_DEBUG "\troot_offs %u\n", + le32_to_cpu(mst->root_offs)); + printk(KERN_DEBUG "\troot_len %u\n", + le32_to_cpu(mst->root_len)); + printk(KERN_DEBUG "\tgc_lnum %u\n", + le32_to_cpu(mst->gc_lnum)); + printk(KERN_DEBUG "\tihead_lnum %u\n", + le32_to_cpu(mst->ihead_lnum)); + printk(KERN_DEBUG "\tihead_offs %u\n", + le32_to_cpu(mst->ihead_offs)); + printk(KERN_DEBUG "\tindex_size %u\n", + le32_to_cpu(mst->index_size)); + printk(KERN_DEBUG "\tlpt_lnum %u\n", + le32_to_cpu(mst->lpt_lnum)); + printk(KERN_DEBUG "\tlpt_offs %u\n", + le32_to_cpu(mst->lpt_offs)); + printk(KERN_DEBUG "\tnhead_lnum %u\n", + le32_to_cpu(mst->nhead_lnum)); + printk(KERN_DEBUG "\tnhead_offs %u\n", + le32_to_cpu(mst->nhead_offs)); + printk(KERN_DEBUG "\tltab_lnum %u\n", + le32_to_cpu(mst->ltab_lnum)); + printk(KERN_DEBUG "\tltab_offs %u\n", + le32_to_cpu(mst->ltab_offs)); + printk(KERN_DEBUG "\tlsave_lnum %u\n", + le32_to_cpu(mst->lsave_lnum)); + printk(KERN_DEBUG "\tlsave_offs %u\n", + le32_to_cpu(mst->lsave_offs)); + printk(KERN_DEBUG "\tlscan_lnum %u\n", + le32_to_cpu(mst->lscan_lnum)); + printk(KERN_DEBUG "\tleb_cnt %u\n", + le32_to_cpu(mst->leb_cnt)); + printk(KERN_DEBUG "\tempty_lebs %u\n", + le32_to_cpu(mst->empty_lebs)); + printk(KERN_DEBUG "\tidx_lebs %u\n", + le32_to_cpu(mst->idx_lebs)); + printk(KERN_DEBUG "\ttotal_free %llu\n", + (unsigned long long)le64_to_cpu(mst->total_free)); + printk(KERN_DEBUG "\ttotal_dirty %llu\n", + (unsigned long long)le64_to_cpu(mst->total_dirty)); + printk(KERN_DEBUG "\ttotal_used %llu\n", + (unsigned long long)le64_to_cpu(mst->total_used)); + printk(KERN_DEBUG "\ttotal_dead %llu\n", + (unsigned long long)le64_to_cpu(mst->total_dead)); + printk(KERN_DEBUG "\ttotal_dark %llu\n", + (unsigned long long)le64_to_cpu(mst->total_dark)); + break; + } + case UBIFS_REF_NODE: + { + const struct ubifs_ref_node *ref = node; + + printk(KERN_DEBUG "\tlnum %u\n", + le32_to_cpu(ref->lnum)); + printk(KERN_DEBUG "\toffs %u\n", + le32_to_cpu(ref->offs)); + printk(KERN_DEBUG "\tjhead %u\n", + le32_to_cpu(ref->jhead)); + break; + } + case UBIFS_INO_NODE: + { + const struct ubifs_ino_node *ino = node; + + key_read(c, &ino->key, &key); + printk(KERN_DEBUG "\tkey %s\n", DBGKEY(&key)); + printk(KERN_DEBUG "\tcreat_sqnum %llu\n", + (unsigned long long)le64_to_cpu(ino->creat_sqnum)); + printk(KERN_DEBUG "\tsize %llu\n", + (unsigned long long)le64_to_cpu(ino->size)); + printk(KERN_DEBUG "\tnlink %u\n", + le32_to_cpu(ino->nlink)); + printk(KERN_DEBUG "\tatime %lld.%u\n", + (long long)le64_to_cpu(ino->atime_sec), + le32_to_cpu(ino->atime_nsec)); + printk(KERN_DEBUG "\tmtime %lld.%u\n", + (long long)le64_to_cpu(ino->mtime_sec), + le32_to_cpu(ino->mtime_nsec)); + printk(KERN_DEBUG "\tctime %lld.%u\n", + (long long)le64_to_cpu(ino->ctime_sec), + le32_to_cpu(ino->ctime_nsec)); + printk(KERN_DEBUG "\tuid %u\n", + le32_to_cpu(ino->uid)); + printk(KERN_DEBUG "\tgid %u\n", + le32_to_cpu(ino->gid)); + printk(KERN_DEBUG "\tmode %u\n", + le32_to_cpu(ino->mode)); + printk(KERN_DEBUG "\tflags %#x\n", + le32_to_cpu(ino->flags)); + printk(KERN_DEBUG "\txattr_cnt %u\n", + le32_to_cpu(ino->xattr_cnt)); + printk(KERN_DEBUG "\txattr_size %u\n", + le32_to_cpu(ino->xattr_size)); + printk(KERN_DEBUG "\txattr_names %u\n", + le32_to_cpu(ino->xattr_names)); + printk(KERN_DEBUG "\tcompr_type %#x\n", + (int)le16_to_cpu(ino->compr_type)); + printk(KERN_DEBUG "\tdata len %u\n", + le32_to_cpu(ino->data_len)); + break; + } + case UBIFS_DENT_NODE: + case UBIFS_XENT_NODE: + { + const struct ubifs_dent_node *dent = node; + int nlen = le16_to_cpu(dent->nlen); + + key_read(c, &dent->key, &key); + printk(KERN_DEBUG "\tkey %s\n", DBGKEY(&key)); + printk(KERN_DEBUG "\tinum %llu\n", + (unsigned long long)le64_to_cpu(dent->inum)); + printk(KERN_DEBUG "\ttype %d\n", (int)dent->type); + printk(KERN_DEBUG "\tnlen %d\n", nlen); + printk(KERN_DEBUG "\tname "); + + if (nlen > UBIFS_MAX_NLEN) + printk(KERN_DEBUG "(bad name length, not printing, " + "bad or corrupted node)"); + else { + for (i = 0; i < nlen && dent->name[i]; i++) + printk("%c", dent->name[i]); + } + printk("\n"); + + break; + } + case UBIFS_DATA_NODE: + { + const struct ubifs_data_node *dn = node; + int dlen = le32_to_cpu(ch->len) - UBIFS_DATA_NODE_SZ; + + key_read(c, &dn->key, &key); + printk(KERN_DEBUG "\tkey %s\n", DBGKEY(&key)); + printk(KERN_DEBUG "\tsize %u\n", + le32_to_cpu(dn->size)); + printk(KERN_DEBUG "\tcompr_typ %d\n", + (int)le16_to_cpu(dn->compr_type)); + printk(KERN_DEBUG "\tdata size %d\n", + dlen); + printk(KERN_DEBUG "\tdata:\n"); + print_hex_dump(KERN_DEBUG, "\t", DUMP_PREFIX_OFFSET, 32, 1, + (void *)&dn->data, dlen, 0); + break; + } + case UBIFS_TRUN_NODE: + { + const struct ubifs_trun_node *trun = node; + + printk(KERN_DEBUG "\tinum %u\n", + le32_to_cpu(trun->inum)); + printk(KERN_DEBUG "\told_size %llu\n", + (unsigned long long)le64_to_cpu(trun->old_size)); + printk(KERN_DEBUG "\tnew_size %llu\n", + (unsigned long long)le64_to_cpu(trun->new_size)); + break; + } + case UBIFS_IDX_NODE: + { + const struct ubifs_idx_node *idx = node; + + n = le16_to_cpu(idx->child_cnt); + printk(KERN_DEBUG "\tchild_cnt %d\n", n); + printk(KERN_DEBUG "\tlevel %d\n", + (int)le16_to_cpu(idx->level)); + printk(KERN_DEBUG "\tBranches:\n"); + + for (i = 0; i < n && i < c->fanout - 1; i++) { + const struct ubifs_branch *br; + + br = ubifs_idx_branch(c, idx, i); + key_read(c, &br->key, &key); + printk(KERN_DEBUG "\t%d: LEB %d:%d len %d key %s\n", + i, le32_to_cpu(br->lnum), le32_to_cpu(br->offs), + le32_to_cpu(br->len), DBGKEY(&key)); + } + break; + } + case UBIFS_CS_NODE: + break; + case UBIFS_ORPH_NODE: + { + const struct ubifs_orph_node *orph = node; + + printk(KERN_DEBUG "\tcommit number %llu\n", + (unsigned long long) + le64_to_cpu(orph->cmt_no) & LLONG_MAX); + printk(KERN_DEBUG "\tlast node flag %llu\n", + (unsigned long long)(le64_to_cpu(orph->cmt_no)) >> 63); + n = (le32_to_cpu(ch->len) - UBIFS_ORPH_NODE_SZ) >> 3; + printk(KERN_DEBUG "\t%d orphan inode numbers:\n", n); + for (i = 0; i < n; i++) + printk(KERN_DEBUG "\t ino %llu\n", + le64_to_cpu(orph->inos[i])); + break; + } + default: + printk(KERN_DEBUG "node type %d was not recognized\n", + (int)ch->node_type); + } + spin_unlock(&dbg_lock); +} + +void dbg_dump_budget_req(const struct ubifs_budget_req *req) +{ + spin_lock(&dbg_lock); + printk(KERN_DEBUG "Budgeting request: new_ino %d, dirtied_ino %d\n", + req->new_ino, req->dirtied_ino); + printk(KERN_DEBUG "\tnew_ino_d %d, dirtied_ino_d %d\n", + req->new_ino_d, req->dirtied_ino_d); + printk(KERN_DEBUG "\tnew_page %d, dirtied_page %d\n", + req->new_page, req->dirtied_page); + printk(KERN_DEBUG "\tnew_dent %d, mod_dent %d\n", + req->new_dent, req->mod_dent); + printk(KERN_DEBUG "\tidx_growth %d\n", req->idx_growth); + printk(KERN_DEBUG "\tdata_growth %d dd_growth %d\n", + req->data_growth, req->dd_growth); + spin_unlock(&dbg_lock); +} + +void dbg_dump_lstats(const struct ubifs_lp_stats *lst) +{ + spin_lock(&dbg_lock); + printk(KERN_DEBUG "Lprops statistics: empty_lebs %d, idx_lebs %d\n", + lst->empty_lebs, lst->idx_lebs); + printk(KERN_DEBUG "\ttaken_empty_lebs %d, total_free %lld, " + "total_dirty %lld\n", lst->taken_empty_lebs, lst->total_free, + lst->total_dirty); + printk(KERN_DEBUG "\ttotal_used %lld, total_dark %lld, " + "total_dead %lld\n", lst->total_used, lst->total_dark, + lst->total_dead); + spin_unlock(&dbg_lock); +} + +void dbg_dump_budg(struct ubifs_info *c) +{ + int i; + struct rb_node *rb; + struct ubifs_bud *bud; + struct ubifs_gced_idx_leb *idx_gc; + + spin_lock(&dbg_lock); + printk(KERN_DEBUG "Budgeting info: budg_data_growth %lld, " + "budg_dd_growth %lld, budg_idx_growth %lld\n", + c->budg_data_growth, c->budg_dd_growth, c->budg_idx_growth); + printk(KERN_DEBUG "\tdata budget sum %lld, total budget sum %lld, " + "freeable_cnt %d\n", c->budg_data_growth + c->budg_dd_growth, + c->budg_data_growth + c->budg_dd_growth + c->budg_idx_growth, + c->freeable_cnt); + printk(KERN_DEBUG "\tmin_idx_lebs %d, old_idx_sz %lld, " + "calc_idx_sz %lld, idx_gc_cnt %d\n", c->min_idx_lebs, + c->old_idx_sz, c->calc_idx_sz, c->idx_gc_cnt); + printk(KERN_DEBUG "\tdirty_pg_cnt %ld, dirty_zn_cnt %ld, " + "clean_zn_cnt %ld\n", atomic_long_read(&c->dirty_pg_cnt), + atomic_long_read(&c->dirty_zn_cnt), + atomic_long_read(&c->clean_zn_cnt)); + printk(KERN_DEBUG "\tdark_wm %d, dead_wm %d, max_idx_node_sz %d\n", + c->dark_wm, c->dead_wm, c->max_idx_node_sz); + printk(KERN_DEBUG "\tgc_lnum %d, ihead_lnum %d\n", + c->gc_lnum, c->ihead_lnum); + for (i = 0; i < c->jhead_cnt; i++) + printk(KERN_DEBUG "\tjhead %d\t LEB %d\n", + c->jheads[i].wbuf.jhead, c->jheads[i].wbuf.lnum); + for (rb = rb_first(&c->buds); rb; rb = rb_next(rb)) { + bud = rb_entry(rb, struct ubifs_bud, rb); + printk(KERN_DEBUG "\tbud LEB %d\n", bud->lnum); + } + list_for_each_entry(bud, &c->old_buds, list) + printk(KERN_DEBUG "\told bud LEB %d\n", bud->lnum); + list_for_each_entry(idx_gc, &c->idx_gc, list) + printk(KERN_DEBUG "\tGC'ed idx LEB %d unmap %d\n", + idx_gc->lnum, idx_gc->unmap); + printk(KERN_DEBUG "\tcommit state %d\n", c->cmt_state); + spin_unlock(&dbg_lock); +} + +void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp) +{ + printk(KERN_DEBUG "LEB %d lprops: free %d, dirty %d (used %d), " + "flags %#x\n", lp->lnum, lp->free, lp->dirty, + c->leb_size - lp->free - lp->dirty, lp->flags); +} + +void dbg_dump_lprops(struct ubifs_info *c) +{ + int lnum, err; + struct ubifs_lprops lp; + struct ubifs_lp_stats lst; + + printk(KERN_DEBUG "Dumping LEB properties\n"); + ubifs_get_lp_stats(c, &lst); + dbg_dump_lstats(&lst); + + for (lnum = c->main_first; lnum < c->leb_cnt; lnum++) { + err = ubifs_read_one_lp(c, lnum, &lp); + if (err) + ubifs_err("cannot read lprops for LEB %d", lnum); + + dbg_dump_lprop(c, &lp); + } +} + +void dbg_dump_leb(const struct ubifs_info *c, int lnum) +{ + struct ubifs_scan_leb *sleb; + struct ubifs_scan_node *snod; + + if (dbg_failure_mode) + return; + + printk(KERN_DEBUG "Dumping LEB %d\n", lnum); + + sleb = ubifs_scan(c, lnum, 0, c->dbg_buf); + if (IS_ERR(sleb)) { + ubifs_err("scan error %d", (int)PTR_ERR(sleb)); + return; + } + + printk(KERN_DEBUG "LEB %d has %d nodes ending at %d\n", lnum, + sleb->nodes_cnt, sleb->endpt); + + list_for_each_entry(snod, &sleb->nodes, list) { + cond_resched(); + printk(KERN_DEBUG "Dumping node at LEB %d:%d len %d\n", lnum, + snod->offs, snod->len); + dbg_dump_node(c, snod->node); + } + + ubifs_scan_destroy(sleb); + return; +} + +void dbg_dump_znode(const struct ubifs_info *c, + const struct ubifs_znode *znode) +{ + int n; + const struct ubifs_zbranch *zbr; + + spin_lock(&dbg_lock); + if (znode->parent) + zbr = &znode->parent->zbranch[znode->iip]; + else + zbr = &c->zroot; + + printk(KERN_DEBUG "znode %p, LEB %d:%d len %d parent %p iip %d level %d" + " child_cnt %d flags %lx\n", znode, zbr->lnum, zbr->offs, + zbr->len, znode->parent, znode->iip, znode->level, + znode->child_cnt, znode->flags); + + if (znode->child_cnt <= 0 || znode->child_cnt > c->fanout) { + spin_unlock(&dbg_lock); + return; + } + + printk(KERN_DEBUG "zbranches:\n"); + for (n = 0; n < znode->child_cnt; n++) { + zbr = &znode->zbranch[n]; + if (znode->level > 0) + printk(KERN_DEBUG "\t%d: znode %p LEB %d:%d len %d key " + "%s\n", n, zbr->znode, zbr->lnum, + zbr->offs, zbr->len, + DBGKEY(&zbr->key)); + else + printk(KERN_DEBUG "\t%d: LNC %p LEB %d:%d len %d key " + "%s\n", n, zbr->znode, zbr->lnum, + zbr->offs, zbr->len, + DBGKEY(&zbr->key)); + } + spin_unlock(&dbg_lock); +} + +void dbg_dump_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat) +{ + int i; + + printk(KERN_DEBUG "Dumping heap cat %d (%d elements)\n", + cat, heap->cnt); + for (i = 0; i < heap->cnt; i++) { + struct ubifs_lprops *lprops = heap->arr[i]; + + printk(KERN_DEBUG "\t%d. LEB %d hpos %d free %d dirty %d " + "flags %d\n", i, lprops->lnum, lprops->hpos, + lprops->free, lprops->dirty, lprops->flags); + } +} + +void dbg_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, + struct ubifs_nnode *parent, int iip) +{ + int i; + + printk(KERN_DEBUG "Dumping pnode:\n"); + printk(KERN_DEBUG "\taddress %zx parent %zx cnext %zx\n", + (size_t)pnode, (size_t)parent, (size_t)pnode->cnext); + printk(KERN_DEBUG "\tflags %lu iip %d level %d num %d\n", + pnode->flags, iip, pnode->level, pnode->num); + for (i = 0; i < UBIFS_LPT_FANOUT; i++) { + struct ubifs_lprops *lp = &pnode->lprops[i]; + + printk(KERN_DEBUG "\t%d: free %d dirty %d flags %d lnum %d\n", + i, lp->free, lp->dirty, lp->flags, lp->lnum); + } +} + +void dbg_dump_tnc(struct ubifs_info *c) +{ + struct ubifs_znode *znode; + int level; + + printk(KERN_DEBUG "\n"); + printk(KERN_DEBUG "Dumping the TNC tree\n"); + znode = ubifs_tnc_levelorder_next(c->zroot.znode, NULL); + level = znode->level; + printk(KERN_DEBUG "== Level %d ==\n", level); + while (znode) { + if (level != znode->level) { + level = znode->level; + printk(KERN_DEBUG "== Level %d ==\n", level); + } + dbg_dump_znode(c, znode); + znode = ubifs_tnc_levelorder_next(c->zroot.znode, znode); + } + + printk(KERN_DEBUG "\n"); +} + +static int dump_znode(struct ubifs_info *c, struct ubifs_znode *znode, + void *priv) +{ + dbg_dump_znode(c, znode); + return 0; +} + +/** + * dbg_dump_index - dump the on-flash index. + * @c: UBIFS file-system description object + * + * This function dumps whole UBIFS indexing B-tree, unlike 'dbg_dump_tnc()' + * which dumps only in-memory znodes and does not read znodes which from flash. + */ +void dbg_dump_index(struct ubifs_info *c) +{ + dbg_walk_index(c, NULL, dump_znode, NULL); +} + +/** + * dbg_check_synced_i_size - check synchronized inode size. + * @inode: inode to check + * + * If inode is clean, synchronized inode size has to be equivalent to current + * inode size. This function has to be called only for locked inodes (@i_mutex + * has to be locked). Returns %0 if synchronized inode size if correct, and + * %-EINVAL if not. + */ +int dbg_check_synced_i_size(struct inode *inode) +{ + int err = 0; + struct ubifs_inode *ui = ubifs_inode(inode); + + if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) + return 0; + if (!S_ISREG(inode->i_mode)) + return 0; + + mutex_lock(&ui->ui_mutex); + spin_lock(&ui->ui_lock); + if (ui->ui_size != ui->synced_i_size && !ui->dirty) { + ubifs_err("ui_size is %lld, synced_i_size is %lld, but inode " + "is clean", ui->ui_size, ui->synced_i_size); + ubifs_err("i_ino %lu, i_mode %#x, i_size %lld", inode->i_ino, + inode->i_mode, i_size_read(inode)); + dbg_dump_stack(); + err = -EINVAL; + } + spin_unlock(&ui->ui_lock); + mutex_unlock(&ui->ui_mutex); + return err; +} + +/* + * dbg_check_dir - check directory inode size and link count. + * @c: UBIFS file-system description object + * @dir: the directory to calculate size for + * @size: the result is returned here + * + * This function makes sure that directory size and link count are correct. + * Returns zero in case of success and a negative error code in case of + * failure. + * + * Note, it is good idea to make sure the @dir->i_mutex is locked before + * calling this function. + */ +int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir) +{ + unsigned int nlink = 2; + union ubifs_key key; + struct ubifs_dent_node *dent, *pdent = NULL; + struct qstr nm = { .name = NULL }; + loff_t size = UBIFS_INO_NODE_SZ; + + if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) + return 0; + + if (!S_ISDIR(dir->i_mode)) + return 0; + + lowest_dent_key(c, &key, dir->i_ino); + while (1) { + int err; + + dent = ubifs_tnc_next_ent(c, &key, &nm); + if (IS_ERR(dent)) { + err = PTR_ERR(dent); + if (err == -ENOENT) + break; + return err; + } + + nm.name = dent->name; + nm.len = le16_to_cpu(dent->nlen); + size += CALC_DENT_SIZE(nm.len); + if (dent->type == UBIFS_ITYPE_DIR) + nlink += 1; + kfree(pdent); + pdent = dent; + key_read(c, &dent->key, &key); + } + kfree(pdent); + + if (i_size_read(dir) != size) { + ubifs_err("directory inode %lu has size %llu, " + "but calculated size is %llu", dir->i_ino, + (unsigned long long)i_size_read(dir), + (unsigned long long)size); + dump_stack(); + return -EINVAL; + } + if (dir->i_nlink != nlink) { + ubifs_err("directory inode %lu has nlink %u, but calculated " + "nlink is %u", dir->i_ino, dir->i_nlink, nlink); + dump_stack(); + return -EINVAL; + } + + return 0; +} + +/** + * dbg_check_key_order - make sure that colliding keys are properly ordered. + * @c: UBIFS file-system description object + * @zbr1: first zbranch + * @zbr2: following zbranch + * + * In UBIFS indexing B-tree colliding keys has to be sorted in binary order of + * names of the direntries/xentries which are referred by the keys. This + * function reads direntries/xentries referred by @zbr1 and @zbr2 and makes + * sure the name of direntry/xentry referred by @zbr1 is less than + * direntry/xentry referred by @zbr2. Returns zero if this is true, %1 if not, + * and a negative error code in case of failure. + */ +static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1, + struct ubifs_zbranch *zbr2) +{ + int err, nlen1, nlen2, cmp; + struct ubifs_dent_node *dent1, *dent2; + union ubifs_key key; + + ubifs_assert(!keys_cmp(c, &zbr1->key, &zbr2->key)); + dent1 = kmalloc(UBIFS_MAX_DENT_NODE_SZ, GFP_NOFS); + if (!dent1) + return -ENOMEM; + dent2 = kmalloc(UBIFS_MAX_DENT_NODE_SZ, GFP_NOFS); + if (!dent2) { + err = -ENOMEM; + goto out_free; + } + + err = ubifs_tnc_read_node(c, zbr1, dent1); + if (err) + goto out_free; + err = ubifs_validate_entry(c, dent1); + if (err) + goto out_free; + + err = ubifs_tnc_read_node(c, zbr2, dent2); + if (err) + goto out_free; + err = ubifs_validate_entry(c, dent2); + if (err) + goto out_free; + + /* Make sure node keys are the same as in zbranch */ + err = 1; + key_read(c, &dent1->key, &key); + if (keys_cmp(c, &zbr1->key, &key)) { + dbg_err("1st entry at %d:%d has key %s", zbr1->lnum, + zbr1->offs, DBGKEY(&key)); + dbg_err("but it should have key %s according to tnc", + DBGKEY(&zbr1->key)); + dbg_dump_node(c, dent1); + goto out_free; + } + + key_read(c, &dent2->key, &key); + if (keys_cmp(c, &zbr2->key, &key)) { + dbg_err("2nd entry at %d:%d has key %s", zbr1->lnum, + zbr1->offs, DBGKEY(&key)); + dbg_err("but it should have key %s according to tnc", + DBGKEY(&zbr2->key)); + dbg_dump_node(c, dent2); + goto out_free; + } + + nlen1 = le16_to_cpu(dent1->nlen); + nlen2 = le16_to_cpu(dent2->nlen); + + cmp = memcmp(dent1->name, dent2->name, min_t(int, nlen1, nlen2)); + if (cmp < 0 || (cmp == 0 && nlen1 < nlen2)) { + err = 0; + goto out_free; + } + if (cmp == 0 && nlen1 == nlen2) + dbg_err("2 xent/dent nodes with the same name"); + else + dbg_err("bad order of colliding key %s", + DBGKEY(&key)); + + dbg_msg("first node at %d:%d\n", zbr1->lnum, zbr1->offs); + dbg_dump_node(c, dent1); + dbg_msg("second node at %d:%d\n", zbr2->lnum, zbr2->offs); + dbg_dump_node(c, dent2); + +out_free: + kfree(dent2); + kfree(dent1); + return err; +} + +/** + * dbg_check_znode - check if znode is all right. + * @c: UBIFS file-system description object + * @zbr: zbranch which points to this znode + * + * This function makes sure that znode referred to by @zbr is all right. + * Returns zero if it is, and %-EINVAL if it is not. + */ +static int dbg_check_znode(struct ubifs_info *c, struct ubifs_zbranch *zbr) +{ + struct ubifs_znode *znode = zbr->znode; + struct ubifs_znode *zp = znode->parent; + int n, err, cmp; + + if (znode->child_cnt <= 0 || znode->child_cnt > c->fanout) { + err = 1; + goto out; + } + if (znode->level < 0) { + err = 2; + goto out; + } + if (znode->iip < 0 || znode->iip >= c->fanout) { + err = 3; + goto out; + } + + if (zbr->len == 0) + /* Only dirty zbranch may have no on-flash nodes */ + if (!ubifs_zn_dirty(znode)) { + err = 4; + goto out; + } + + if (ubifs_zn_dirty(znode)) { + /* + * If znode is dirty, its parent has to be dirty as well. The + * order of the operation is important, so we have to have + * memory barriers. + */ + smp_mb(); + if (zp && !ubifs_zn_dirty(zp)) { + /* + * The dirty flag is atomic and is cleared outside the + * TNC mutex, so znode's dirty flag may now have + * been cleared. The child is always cleared before the + * parent, so we just need to check again. + */ + smp_mb(); + if (ubifs_zn_dirty(znode)) { + err = 5; + goto out; + } + } + } + + if (zp) { + const union ubifs_key *min, *max; + + if (znode->level != zp->level - 1) { + err = 6; + goto out; + } + + /* Make sure the 'parent' pointer in our znode is correct */ + err = ubifs_search_zbranch(c, zp, &zbr->key, &n); + if (!err) { + /* This zbranch does not exist in the parent */ + err = 7; + goto out; + } + + if (znode->iip >= zp->child_cnt) { + err = 8; + goto out; + } + + if (znode->iip != n) { + /* This may happen only in case of collisions */ + if (keys_cmp(c, &zp->zbranch[n].key, + &zp->zbranch[znode->iip].key)) { + err = 9; + goto out; + } + n = znode->iip; + } + + /* + * Make sure that the first key in our znode is greater than or + * equal to the key in the pointing zbranch. + */ + min = &zbr->key; + cmp = keys_cmp(c, min, &znode->zbranch[0].key); + if (cmp == 1) { + err = 10; + goto out; + } + + if (n + 1 < zp->child_cnt) { + max = &zp->zbranch[n + 1].key; + + /* + * Make sure the last key in our znode is less or + * equivalent than the the key in zbranch which goes + * after our pointing zbranch. + */ + cmp = keys_cmp(c, max, + &znode->zbranch[znode->child_cnt - 1].key); + if (cmp == -1) { + err = 11; + goto out; + } + } + } else { + /* This may only be root znode */ + if (zbr != &c->zroot) { + err = 12; + goto out; + } + } + + /* + * Make sure that next key is greater or equivalent then the previous + * one. + */ + for (n = 1; n < znode->child_cnt; n++) { + cmp = keys_cmp(c, &znode->zbranch[n - 1].key, + &znode->zbranch[n].key); + if (cmp > 0) { + err = 13; + goto out; + } + if (cmp == 0) { + /* This can only be keys with colliding hash */ + if (!is_hash_key(c, &znode->zbranch[n].key)) { + err = 14; + goto out; + } + + if (znode->level != 0 || c->replaying) + continue; + + /* + * Colliding keys should follow binary order of + * corresponding xentry/dentry names. + */ + err = dbg_check_key_order(c, &znode->zbranch[n - 1], + &znode->zbranch[n]); + if (err < 0) + return err; + if (err) { + err = 15; + goto out; + } + } + } + + for (n = 0; n < znode->child_cnt; n++) { + if (!znode->zbranch[n].znode && + (znode->zbranch[n].lnum == 0 || + znode->zbranch[n].len == 0)) { + err = 16; + goto out; + } + + if (znode->zbranch[n].lnum != 0 && + znode->zbranch[n].len == 0) { + err = 17; + goto out; + } + + if (znode->zbranch[n].lnum == 0 && + znode->zbranch[n].len != 0) { + err = 18; + goto out; + } + + if (znode->zbranch[n].lnum == 0 && + znode->zbranch[n].offs != 0) { + err = 19; + goto out; + } + + if (znode->level != 0 && znode->zbranch[n].znode) + if (znode->zbranch[n].znode->parent != znode) { + err = 20; + goto out; + } + } + + return 0; + +out: + ubifs_err("failed, error %d", err); + ubifs_msg("dump of the znode"); + dbg_dump_znode(c, znode); + if (zp) { + ubifs_msg("dump of the parent znode"); + dbg_dump_znode(c, zp); + } + dump_stack(); + return -EINVAL; +} + +/** + * dbg_check_tnc - check TNC tree. + * @c: UBIFS file-system description object + * @extra: do extra checks that are possible at start commit + * + * This function traverses whole TNC tree and checks every znode. Returns zero + * if everything is all right and %-EINVAL if something is wrong with TNC. + */ +int dbg_check_tnc(struct ubifs_info *c, int extra) +{ + struct ubifs_znode *znode; + long clean_cnt = 0, dirty_cnt = 0; + int err, last; + + if (!(ubifs_chk_flags & UBIFS_CHK_TNC)) + return 0; + + ubifs_assert(mutex_is_locked(&c->tnc_mutex)); + if (!c->zroot.znode) + return 0; + + znode = ubifs_tnc_postorder_first(c->zroot.znode); + while (1) { + struct ubifs_znode *prev; + struct ubifs_zbranch *zbr; + + if (!znode->parent) + zbr = &c->zroot; + else + zbr = &znode->parent->zbranch[znode->iip]; + + err = dbg_check_znode(c, zbr); + if (err) + return err; + + if (extra) { + if (ubifs_zn_dirty(znode)) + dirty_cnt += 1; + else + clean_cnt += 1; + } + + prev = znode; + znode = ubifs_tnc_postorder_next(znode); + if (!znode) + break; + + /* + * If the last key of this znode is equivalent to the first key + * of the next znode (collision), then check order of the keys. + */ + last = prev->child_cnt - 1; + if (prev->level == 0 && znode->level == 0 && !c->replaying && + !keys_cmp(c, &prev->zbranch[last].key, + &znode->zbranch[0].key)) { + err = dbg_check_key_order(c, &prev->zbranch[last], + &znode->zbranch[0]); + if (err < 0) + return err; + if (err) { + ubifs_msg("first znode"); + dbg_dump_znode(c, prev); + ubifs_msg("second znode"); + dbg_dump_znode(c, znode); + return -EINVAL; + } + } + } + + if (extra) { + if (clean_cnt != atomic_long_read(&c->clean_zn_cnt)) { + ubifs_err("incorrect clean_zn_cnt %ld, calculated %ld", + atomic_long_read(&c->clean_zn_cnt), + clean_cnt); + return -EINVAL; + } + if (dirty_cnt != atomic_long_read(&c->dirty_zn_cnt)) { + ubifs_err("incorrect dirty_zn_cnt %ld, calculated %ld", + atomic_long_read(&c->dirty_zn_cnt), + dirty_cnt); + return -EINVAL; + } + } + + return 0; +} + +/** + * dbg_walk_index - walk the on-flash index. + * @c: UBIFS file-system description object + * @leaf_cb: called for each leaf node + * @znode_cb: called for each indexing node + * @priv: private date which is passed to callbacks + * + * This function walks the UBIFS index and calls the @leaf_cb for each leaf + * node and @znode_cb for each indexing node. Returns zero in case of success + * and a negative error code in case of failure. + * + * It would be better if this function removed every znode it pulled to into + * the TNC, so that the behavior more closely matched the non-debugging + * behavior. + */ +int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb, + dbg_znode_callback znode_cb, void *priv) +{ + int err; + struct ubifs_zbranch *zbr; + struct ubifs_znode *znode, *child; + + mutex_lock(&c->tnc_mutex); + /* If the root indexing node is not in TNC - pull it */ + if (!c->zroot.znode) { + c->zroot.znode = ubifs_load_znode(c, &c->zroot, NULL, 0); + if (IS_ERR(c->zroot.znode)) { + err = PTR_ERR(c->zroot.znode); + c->zroot.znode = NULL; + goto out_unlock; + } + } + + /* + * We are going to traverse the indexing tree in the postorder manner. + * Go down and find the leftmost indexing node where we are going to + * start from. + */ + znode = c->zroot.znode; + while (znode->level > 0) { + zbr = &znode->zbranch[0]; + child = zbr->znode; + if (!child) { + child = ubifs_load_znode(c, zbr, znode, 0); + if (IS_ERR(child)) { + err = PTR_ERR(child); + goto out_unlock; + } + zbr->znode = child; + } + + znode = child; + } + + /* Iterate over all indexing nodes */ + while (1) { + int idx; + + cond_resched(); + + if (znode_cb) { + err = znode_cb(c, znode, priv); + if (err) { + ubifs_err("znode checking function returned " + "error %d", err); + dbg_dump_znode(c, znode); + goto out_dump; + } + } + if (leaf_cb && znode->level == 0) { + for (idx = 0; idx < znode->child_cnt; idx++) { + zbr = &znode->zbranch[idx]; + err = leaf_cb(c, zbr, priv); + if (err) { + ubifs_err("leaf checking function " + "returned error %d, for leaf " + "at LEB %d:%d", + err, zbr->lnum, zbr->offs); + goto out_dump; + } + } + } + + if (!znode->parent) + break; + + idx = znode->iip + 1; + znode = znode->parent; + if (idx < znode->child_cnt) { + /* Switch to the next index in the parent */ + zbr = &znode->zbranch[idx]; + child = zbr->znode; + if (!child) { + child = ubifs_load_znode(c, zbr, znode, idx); + if (IS_ERR(child)) { + err = PTR_ERR(child); + goto out_unlock; + } + zbr->znode = child; + } + znode = child; + } else + /* + * This is the last child, switch to the parent and + * continue. + */ + continue; + + /* Go to the lowest leftmost znode in the new sub-tree */ + while (znode->level > 0) { + zbr = &znode->zbranch[0]; + child = zbr->znode; + if (!child) { + child = ubifs_load_znode(c, zbr, znode, 0); + if (IS_ERR(child)) { + err = PTR_ERR(child); + goto out_unlock; + } + zbr->znode = child; + } + znode = child; + } + } + + mutex_unlock(&c->tnc_mutex); + return 0; + +out_dump: + if (znode->parent) + zbr = &znode->parent->zbranch[znode->iip]; + else + zbr = &c->zroot; + ubifs_msg("dump of znode at LEB %d:%d", zbr->lnum, zbr->offs); + dbg_dump_znode(c, znode); +out_unlock: + mutex_unlock(&c->tnc_mutex); + return err; +} + +/** + * add_size - add znode size to partially calculated index size. + * @c: UBIFS file-system description object + * @znode: znode to add size for + * @priv: partially calculated index size + * + * This is a helper function for 'dbg_check_idx_size()' which is called for + * every indexing node and adds its size to the 'long long' variable pointed to + * by @priv. + */ +static int add_size(struct ubifs_info *c, struct ubifs_znode *znode, void *priv) +{ + long long *idx_size = priv; + int add; + + add = ubifs_idx_node_sz(c, znode->child_cnt); + add = ALIGN(add, 8); + *idx_size += add; + return 0; +} + +/** + * dbg_check_idx_size - check index size. + * @c: UBIFS file-system description object + * @idx_size: size to check + * + * This function walks the UBIFS index, calculates its size and checks that the + * size is equivalent to @idx_size. Returns zero in case of success and a + * negative error code in case of failure. + */ +int dbg_check_idx_size(struct ubifs_info *c, long long idx_size) +{ + int err; + long long calc = 0; + + if (!(ubifs_chk_flags & UBIFS_CHK_IDX_SZ)) + return 0; + + err = dbg_walk_index(c, NULL, add_size, &calc); + if (err) { + ubifs_err("error %d while walking the index", err); + return err; + } + + if (calc != idx_size) { + ubifs_err("index size check failed: calculated size is %lld, " + "should be %lld", calc, idx_size); + dump_stack(); + return -EINVAL; + } + + return 0; +} + +/** + * struct fsck_inode - information about an inode used when checking the file-system. + * @rb: link in the RB-tree of inodes + * @inum: inode number + * @mode: inode type, permissions, etc + * @nlink: inode link count + * @xattr_cnt: count of extended attributes + * @references: how many directory/xattr entries refer this inode (calculated + * while walking the index) + * @calc_cnt: for directory inode count of child directories + * @size: inode size (read from on-flash inode) + * @xattr_sz: summary size of all extended attributes (read from on-flash + * inode) + * @calc_sz: for directories calculated directory size + * @calc_xcnt: count of extended attributes + * @calc_xsz: calculated summary size of all extended attributes + * @xattr_nms: sum of lengths of all extended attribute names belonging to this + * inode (read from on-flash inode) + * @calc_xnms: calculated sum of lengths of all extended attribute names + */ +struct fsck_inode { + struct rb_node rb; + ino_t inum; + umode_t mode; + unsigned int nlink; + unsigned int xattr_cnt; + int references; + int calc_cnt; + long long size; + unsigned int xattr_sz; + long long calc_sz; + long long calc_xcnt; + long long calc_xsz; + unsigned int xattr_nms; + long long calc_xnms; +}; + +/** + * struct fsck_data - private FS checking information. + * @inodes: RB-tree of all inodes (contains @struct fsck_inode objects) + */ +struct fsck_data { + struct rb_root inodes; +}; + +/** + * add_inode - add inode information to RB-tree of inodes. + * @c: UBIFS file-system description object + * @fsckd: FS checking information + * @ino: raw UBIFS inode to add + * + * This is a helper function for 'check_leaf()' which adds information about + * inode @ino to the RB-tree of inodes. Returns inode information pointer in + * case of success and a negative error code in case of failure. + */ +static struct fsck_inode *add_inode(struct ubifs_info *c, + struct fsck_data *fsckd, + struct ubifs_ino_node *ino) +{ + struct rb_node **p, *parent = NULL; + struct fsck_inode *fscki; + ino_t inum = key_inum_flash(c, &ino->key); + + p = &fsckd->inodes.rb_node; + while (*p) { + parent = *p; + fscki = rb_entry(parent, struct fsck_inode, rb); + if (inum < fscki->inum) + p = &(*p)->rb_left; + else if (inum > fscki->inum) + p = &(*p)->rb_right; + else + return fscki; + } + + if (inum > c->highest_inum) { + ubifs_err("too high inode number, max. is %lu", + c->highest_inum); + return ERR_PTR(-EINVAL); + } + + fscki = kzalloc(sizeof(struct fsck_inode), GFP_NOFS); + if (!fscki) + return ERR_PTR(-ENOMEM); + + fscki->inum = inum; + fscki->nlink = le32_to_cpu(ino->nlink); + fscki->size = le64_to_cpu(ino->size); + fscki->xattr_cnt = le32_to_cpu(ino->xattr_cnt); + fscki->xattr_sz = le32_to_cpu(ino->xattr_size); + fscki->xattr_nms = le32_to_cpu(ino->xattr_names); + fscki->mode = le32_to_cpu(ino->mode); + if (S_ISDIR(fscki->mode)) { + fscki->calc_sz = UBIFS_INO_NODE_SZ; + fscki->calc_cnt = 2; + } + rb_link_node(&fscki->rb, parent, p); + rb_insert_color(&fscki->rb, &fsckd->inodes); + return fscki; +} + +/** + * search_inode - search inode in the RB-tree of inodes. + * @fsckd: FS checking information + * @inum: inode number to search + * + * This is a helper function for 'check_leaf()' which searches inode @inum in + * the RB-tree of inodes and returns an inode information pointer or %NULL if + * the inode was not found. + */ +static struct fsck_inode *search_inode(struct fsck_data *fsckd, ino_t inum) +{ + struct rb_node *p; + struct fsck_inode *fscki; + + p = fsckd->inodes.rb_node; + while (p) { + fscki = rb_entry(p, struct fsck_inode, rb); + if (inum < fscki->inum) + p = p->rb_left; + else if (inum > fscki->inum) + p = p->rb_right; + else + return fscki; + } + return NULL; +} + +/** + * read_add_inode - read inode node and add it to RB-tree of inodes. + * @c: UBIFS file-system description object + * @fsckd: FS checking information + * @inum: inode number to read + * + * This is a helper function for 'check_leaf()' which finds inode node @inum in + * the index, reads it, and adds it to the RB-tree of inodes. Returns inode + * information pointer in case of success and a negative error code in case of + * failure. + */ +static struct fsck_inode *read_add_inode(struct ubifs_info *c, + struct fsck_data *fsckd, ino_t inum) +{ + int n, err; + union ubifs_key key; + struct ubifs_znode *znode; + struct ubifs_zbranch *zbr; + struct ubifs_ino_node *ino; + struct fsck_inode *fscki; + + fscki = search_inode(fsckd, inum); + if (fscki) + return fscki; + + ino_key_init(c, &key, inum); + err = ubifs_lookup_level0(c, &key, &znode, &n); + if (!err) { + ubifs_err("inode %lu not found in index", inum); + return ERR_PTR(-ENOENT); + } else if (err < 0) { + ubifs_err("error %d while looking up inode %lu", err, inum); + return ERR_PTR(err); + } + + zbr = &znode->zbranch[n]; + if (zbr->len < UBIFS_INO_NODE_SZ) { + ubifs_err("bad node %lu node length %d", inum, zbr->len); + return ERR_PTR(-EINVAL); + } + + ino = kmalloc(zbr->len, GFP_NOFS); + if (!ino) + return ERR_PTR(-ENOMEM); + + err = ubifs_tnc_read_node(c, zbr, ino); + if (err) { + ubifs_err("cannot read inode node at LEB %d:%d, error %d", + zbr->lnum, zbr->offs, err); + kfree(ino); + return ERR_PTR(err); + } + + fscki = add_inode(c, fsckd, ino); + kfree(ino); + if (IS_ERR(fscki)) { + ubifs_err("error %ld while adding inode %lu node", + PTR_ERR(fscki), inum); + return fscki; + } + + return fscki; +} + +/** + * check_leaf - check leaf node. + * @c: UBIFS file-system description object + * @zbr: zbranch of the leaf node to check + * @priv: FS checking information + * + * This is a helper function for 'dbg_check_filesystem()' which is called for + * every single leaf node while walking the indexing tree. It checks that the + * leaf node referred from the indexing tree exists, has correct CRC, and does + * some other basic validation. This function is also responsible for building + * an RB-tree of inodes - it adds all inodes into the RB-tree. It also + * calculates reference count, size, etc for each inode in order to later + * compare them to the information stored inside the inodes and detect possible + * inconsistencies. Returns zero in case of success and a negative error code + * in case of failure. + */ +static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr, + void *priv) +{ + ino_t inum; + void *node; + struct ubifs_ch *ch; + int err, type = key_type(c, &zbr->key); + struct fsck_inode *fscki; + + if (zbr->len < UBIFS_CH_SZ) { + ubifs_err("bad leaf length %d (LEB %d:%d)", + zbr->len, zbr->lnum, zbr->offs); + return -EINVAL; + } + + node = kmalloc(zbr->len, GFP_NOFS); + if (!node) + return -ENOMEM; + + err = ubifs_tnc_read_node(c, zbr, node); + if (err) { + ubifs_err("cannot read leaf node at LEB %d:%d, error %d", + zbr->lnum, zbr->offs, err); + goto out_free; + } + + /* If this is an inode node, add it to RB-tree of inodes */ + if (type == UBIFS_INO_KEY) { + fscki = add_inode(c, priv, node); + if (IS_ERR(fscki)) { + err = PTR_ERR(fscki); + ubifs_err("error %d while adding inode node", err); + goto out_dump; + } + goto out; + } + + if (type != UBIFS_DENT_KEY && type != UBIFS_XENT_KEY && + type != UBIFS_DATA_KEY) { + ubifs_err("unexpected node type %d at LEB %d:%d", + type, zbr->lnum, zbr->offs); + err = -EINVAL; + goto out_free; + } + + ch = node; + if (le64_to_cpu(ch->sqnum) > c->max_sqnum) { + ubifs_err("too high sequence number, max. is %llu", + c->max_sqnum); + err = -EINVAL; + goto out_dump; + } + + if (type == UBIFS_DATA_KEY) { + long long blk_offs; + struct ubifs_data_node *dn = node; + + /* + * Search the inode node this data node belongs to and insert + * it to the RB-tree of inodes. + */ + inum = key_inum_flash(c, &dn->key); + fscki = read_add_inode(c, priv, inum); + if (IS_ERR(fscki)) { + err = PTR_ERR(fscki); + ubifs_err("error %d while processing data node and " + "trying to find inode node %lu", err, inum); + goto out_dump; + } + + /* Make sure the data node is within inode size */ + blk_offs = key_block_flash(c, &dn->key); + blk_offs <<= UBIFS_BLOCK_SHIFT; + blk_offs += le32_to_cpu(dn->size); + if (blk_offs > fscki->size) { + ubifs_err("data node at LEB %d:%d is not within inode " + "size %lld", zbr->lnum, zbr->offs, + fscki->size); + err = -EINVAL; + goto out_dump; + } + } else { + int nlen; + struct ubifs_dent_node *dent = node; + struct fsck_inode *fscki1; + + err = ubifs_validate_entry(c, dent); + if (err) + goto out_dump; + + /* + * Search the inode node this entry refers to and the parent + * inode node and insert them to the RB-tree of inodes. + */ + inum = le64_to_cpu(dent->inum); + fscki = read_add_inode(c, priv, inum); + if (IS_ERR(fscki)) { + err = PTR_ERR(fscki); + ubifs_err("error %d while processing entry node and " + "trying to find inode node %lu", err, inum); + goto out_dump; + } + + /* Count how many direntries or xentries refers this inode */ + fscki->references += 1; + + inum = key_inum_flash(c, &dent->key); + fscki1 = read_add_inode(c, priv, inum); + if (IS_ERR(fscki1)) { + err = PTR_ERR(fscki); + ubifs_err("error %d while processing entry node and " + "trying to find parent inode node %lu", + err, inum); + goto out_dump; + } + + nlen = le16_to_cpu(dent->nlen); + if (type == UBIFS_XENT_KEY) { + fscki1->calc_xcnt += 1; + fscki1->calc_xsz += CALC_DENT_SIZE(nlen); + fscki1->calc_xsz += CALC_XATTR_BYTES(fscki->size); + fscki1->calc_xnms += nlen; + } else { + fscki1->calc_sz += CALC_DENT_SIZE(nlen); + if (dent->type == UBIFS_ITYPE_DIR) + fscki1->calc_cnt += 1; + } + } + +out: + kfree(node); + return 0; + +out_dump: + ubifs_msg("dump of node at LEB %d:%d", zbr->lnum, zbr->offs); + dbg_dump_node(c, node); +out_free: + kfree(node); + return err; +} + +/** + * free_inodes - free RB-tree of inodes. + * @fsckd: FS checking information + */ +static void free_inodes(struct fsck_data *fsckd) +{ + struct rb_node *this = fsckd->inodes.rb_node; + struct fsck_inode *fscki; + + while (this) { + if (this->rb_left) + this = this->rb_left; + else if (this->rb_right) + this = this->rb_right; + else { + fscki = rb_entry(this, struct fsck_inode, rb); + this = rb_parent(this); + if (this) { + if (this->rb_left == &fscki->rb) + this->rb_left = NULL; + else + this->rb_right = NULL; + } + kfree(fscki); + } + } +} + +/** + * check_inodes - checks all inodes. + * @c: UBIFS file-system description object + * @fsckd: FS checking information + * + * This is a helper function for 'dbg_check_filesystem()' which walks the + * RB-tree of inodes after the index scan has been finished, and checks that + * inode nlink, size, etc are correct. Returns zero if inodes are fine, + * %-EINVAL if not, and a negative error code in case of failure. + */ +static int check_inodes(struct ubifs_info *c, struct fsck_data *fsckd) +{ + int n, err; + union ubifs_key key; + struct ubifs_znode *znode; + struct ubifs_zbranch *zbr; + struct ubifs_ino_node *ino; + struct fsck_inode *fscki; + struct rb_node *this = rb_first(&fsckd->inodes); + + while (this) { + fscki = rb_entry(this, struct fsck_inode, rb); + this = rb_next(this); + + if (S_ISDIR(fscki->mode)) { + /* + * Directories have to have exactly one reference (they + * cannot have hardlinks), although root inode is an + * exception. + */ + if (fscki->inum != UBIFS_ROOT_INO && + fscki->references != 1) { + ubifs_err("directory inode %lu has %d " + "direntries which refer it, but " + "should be 1", fscki->inum, + fscki->references); + goto out_dump; + } + if (fscki->inum == UBIFS_ROOT_INO && + fscki->references != 0) { + ubifs_err("root inode %lu has non-zero (%d) " + "direntries which refer it", + fscki->inum, fscki->references); + goto out_dump; + } + if (fscki->calc_sz != fscki->size) { + ubifs_err("directory inode %lu size is %lld, " + "but calculated size is %lld", + fscki->inum, fscki->size, + fscki->calc_sz); + goto out_dump; + } + if (fscki->calc_cnt != fscki->nlink) { + ubifs_err("directory inode %lu nlink is %d, " + "but calculated nlink is %d", + fscki->inum, fscki->nlink, + fscki->calc_cnt); + goto out_dump; + } + } else { + if (fscki->references != fscki->nlink) { + ubifs_err("inode %lu nlink is %d, but " + "calculated nlink is %d", fscki->inum, + fscki->nlink, fscki->references); + goto out_dump; + } + } + if (fscki->xattr_sz != fscki->calc_xsz) { + ubifs_err("inode %lu has xattr size %u, but " + "calculated size is %lld", + fscki->inum, fscki->xattr_sz, + fscki->calc_xsz); + goto out_dump; + } + if (fscki->xattr_cnt != fscki->calc_xcnt) { + ubifs_err("inode %lu has %u xattrs, but " + "calculated count is %lld", fscki->inum, + fscki->xattr_cnt, fscki->calc_xcnt); + goto out_dump; + } + if (fscki->xattr_nms != fscki->calc_xnms) { + ubifs_err("inode %lu has xattr names' size %u, but " + "calculated names' size is %lld", + fscki->inum, fscki->xattr_nms, + fscki->calc_xnms); + goto out_dump; + } + } + + return 0; + +out_dump: + /* Read the bad inode and dump it */ + ino_key_init(c, &key, fscki->inum); + err = ubifs_lookup_level0(c, &key, &znode, &n); + if (!err) { + ubifs_err("inode %lu not found in index", fscki->inum); + return -ENOENT; + } else if (err < 0) { + ubifs_err("error %d while looking up inode %lu", + err, fscki->inum); + return err; + } + + zbr = &znode->zbranch[n]; + ino = kmalloc(zbr->len, GFP_NOFS); + if (!ino) + return -ENOMEM; + + err = ubifs_tnc_read_node(c, zbr, ino); + if (err) { + ubifs_err("cannot read inode node at LEB %d:%d, error %d", + zbr->lnum, zbr->offs, err); + kfree(ino); + return err; + } + + ubifs_msg("dump of the inode %lu sitting in LEB %d:%d", + fscki->inum, zbr->lnum, zbr->offs); + dbg_dump_node(c, ino); + kfree(ino); + return -EINVAL; +} + +/** + * dbg_check_filesystem - check the file-system. + * @c: UBIFS file-system description object + * + * This function checks the file system, namely: + * o makes sure that all leaf nodes exist and their CRCs are correct; + * o makes sure inode nlink, size, xattr size/count are correct (for all + * inodes). + * + * The function reads whole indexing tree and all nodes, so it is pretty + * heavy-weight. Returns zero if the file-system is consistent, %-EINVAL if + * not, and a negative error code in case of failure. + */ +int dbg_check_filesystem(struct ubifs_info *c) +{ + int err; + struct fsck_data fsckd; + + if (!(ubifs_chk_flags & UBIFS_CHK_FS)) + return 0; + + fsckd.inodes = RB_ROOT; + err = dbg_walk_index(c, check_leaf, NULL, &fsckd); + if (err) + goto out_free; + + err = check_inodes(c, &fsckd); + if (err) + goto out_free; + + free_inodes(&fsckd); + return 0; + +out_free: + ubifs_err("file-system check failed with error %d", err); + dump_stack(); + free_inodes(&fsckd); + return err; +} + +static int invocation_cnt; + +int dbg_force_in_the_gaps(void) +{ + if (!dbg_force_in_the_gaps_enabled) + return 0; + /* Force in-the-gaps every 8th commit */ + return !((invocation_cnt++) & 0x7); +} + +/* Failure mode for recovery testing */ + +#define chance(n, d) (simple_rand() <= (n) * 32768LL / (d)) + +struct failure_mode_info { + struct list_head list; + struct ubifs_info *c; +}; + +static LIST_HEAD(fmi_list); +static DEFINE_SPINLOCK(fmi_lock); + +static unsigned int next; + +static int simple_rand(void) +{ + if (next == 0) + next = current->pid; + next = next * 1103515245 + 12345; + return (next >> 16) & 32767; +} + +void dbg_failure_mode_registration(struct ubifs_info *c) +{ + struct failure_mode_info *fmi; + + fmi = kmalloc(sizeof(struct failure_mode_info), GFP_NOFS); + if (!fmi) { + dbg_err("Failed to register failure mode - no memory"); + return; + } + fmi->c = c; + spin_lock(&fmi_lock); + list_add_tail(&fmi->list, &fmi_list); + spin_unlock(&fmi_lock); +} + +void dbg_failure_mode_deregistration(struct ubifs_info *c) +{ + struct failure_mode_info *fmi, *tmp; + + spin_lock(&fmi_lock); + list_for_each_entry_safe(fmi, tmp, &fmi_list, list) + if (fmi->c == c) { + list_del(&fmi->list); + kfree(fmi); + } + spin_unlock(&fmi_lock); +} + +static struct ubifs_info *dbg_find_info(struct ubi_volume_desc *desc) +{ + struct failure_mode_info *fmi; + + spin_lock(&fmi_lock); + list_for_each_entry(fmi, &fmi_list, list) + if (fmi->c->ubi == desc) { + struct ubifs_info *c = fmi->c; + + spin_unlock(&fmi_lock); + return c; + } + spin_unlock(&fmi_lock); + return NULL; +} + +static int in_failure_mode(struct ubi_volume_desc *desc) +{ + struct ubifs_info *c = dbg_find_info(desc); + + if (c && dbg_failure_mode) + return c->failure_mode; + return 0; +} + +static int do_fail(struct ubi_volume_desc *desc, int lnum, int write) +{ + struct ubifs_info *c = dbg_find_info(desc); + + if (!c || !dbg_failure_mode) + return 0; + if (c->failure_mode) + return 1; + if (!c->fail_cnt) { + /* First call - decide delay to failure */ + if (chance(1, 2)) { + unsigned int delay = 1 << (simple_rand() >> 11); + + if (chance(1, 2)) { + c->fail_delay = 1; + c->fail_timeout = jiffies + + msecs_to_jiffies(delay); + dbg_rcvry("failing after %ums", delay); + } else { + c->fail_delay = 2; + c->fail_cnt_max = delay; + dbg_rcvry("failing after %u calls", delay); + } + } + c->fail_cnt += 1; + } + /* Determine if failure delay has expired */ + if (c->fail_delay == 1) { + if (time_before(jiffies, c->fail_timeout)) + return 0; + } else if (c->fail_delay == 2) + if (c->fail_cnt++ < c->fail_cnt_max) + return 0; + if (lnum == UBIFS_SB_LNUM) { + if (write) { + if (chance(1, 2)) + return 0; + } else if (chance(19, 20)) + return 0; + dbg_rcvry("failing in super block LEB %d", lnum); + } else if (lnum == UBIFS_MST_LNUM || lnum == UBIFS_MST_LNUM + 1) { + if (chance(19, 20)) + return 0; + dbg_rcvry("failing in master LEB %d", lnum); + } else if (lnum >= UBIFS_LOG_LNUM && lnum <= c->log_last) { + if (write) { + if (chance(99, 100)) + return 0; + } else if (chance(399, 400)) + return 0; + dbg_rcvry("failing in log LEB %d", lnum); + } else if (lnum >= c->lpt_first && lnum <= c->lpt_last) { + if (write) { + if (chance(7, 8)) + return 0; + } else if (chance(19, 20)) + return 0; + dbg_rcvry("failing in LPT LEB %d", lnum); + } else if (lnum >= c->orph_first && lnum <= c->orph_last) { + if (write) { + if (chance(1, 2)) + return 0; + } else if (chance(9, 10)) + return 0; + dbg_rcvry("failing in orphan LEB %d", lnum); + } else if (lnum == c->ihead_lnum) { + if (chance(99, 100)) + return 0; + dbg_rcvry("failing in index head LEB %d", lnum); + } else if (c->jheads && lnum == c->jheads[GCHD].wbuf.lnum) { + if (chance(9, 10)) + return 0; + dbg_rcvry("failing in GC head LEB %d", lnum); + } else if (write && !RB_EMPTY_ROOT(&c->buds) && + !ubifs_search_bud(c, lnum)) { + if (chance(19, 20)) + return 0; + dbg_rcvry("failing in non-bud LEB %d", lnum); + } else if (c->cmt_state == COMMIT_RUNNING_BACKGROUND || + c->cmt_state == COMMIT_RUNNING_REQUIRED) { + if (chance(999, 1000)) + return 0; + dbg_rcvry("failing in bud LEB %d commit running", lnum); + } else { + if (chance(9999, 10000)) + return 0; + dbg_rcvry("failing in bud LEB %d commit not running", lnum); + } + ubifs_err("*** SETTING FAILURE MODE ON (LEB %d) ***", lnum); + c->failure_mode = 1; + dump_stack(); + return 1; +} + +static void cut_data(const void *buf, int len) +{ + int flen, i; + unsigned char *p = (void *)buf; + + flen = (len * (long long)simple_rand()) >> 15; + for (i = flen; i < len; i++) + p[i] = 0xff; +} + +int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset, + int len, int check) +{ + if (in_failure_mode(desc)) + return -EIO; + return ubi_leb_read(desc, lnum, buf, offset, len, check); +} + +int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf, + int offset, int len, int dtype) +{ + int err; + + if (in_failure_mode(desc)) + return -EIO; + if (do_fail(desc, lnum, 1)) + cut_data(buf, len); + err = ubi_leb_write(desc, lnum, buf, offset, len, dtype); + if (err) + return err; + if (in_failure_mode(desc)) + return -EIO; + return 0; +} + +int dbg_leb_change(struct ubi_volume_desc *desc, int lnum, const void *buf, + int len, int dtype) +{ + int err; + + if (do_fail(desc, lnum, 1)) + return -EIO; + err = ubi_leb_change(desc, lnum, buf, len, dtype); + if (err) + return err; + if (do_fail(desc, lnum, 1)) + return -EIO; + return 0; +} + +int dbg_leb_erase(struct ubi_volume_desc *desc, int lnum) +{ + int err; + + if (do_fail(desc, lnum, 0)) + return -EIO; + err = ubi_leb_erase(desc, lnum); + if (err) + return err; + if (do_fail(desc, lnum, 0)) + return -EIO; + return 0; +} + +int dbg_leb_unmap(struct ubi_volume_desc *desc, int lnum) +{ + int err; + + if (do_fail(desc, lnum, 0)) + return -EIO; + err = ubi_leb_unmap(desc, lnum); + if (err) + return err; + if (do_fail(desc, lnum, 0)) + return -EIO; + return 0; +} + +int dbg_is_mapped(struct ubi_volume_desc *desc, int lnum) +{ + if (in_failure_mode(desc)) + return -EIO; + return ubi_is_mapped(desc, lnum); +} + +int dbg_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype) +{ + int err; + + if (do_fail(desc, lnum, 0)) + return -EIO; + err = ubi_leb_map(desc, lnum, dtype); + if (err) + return err; + if (do_fail(desc, lnum, 0)) + return -EIO; + return 0; +} + +#endif /* CONFIG_UBIFS_FS_DEBUG */ diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h new file mode 100644 index 0000000..3c4f1e9 --- /dev/null +++ b/fs/ubifs/debug.h @@ -0,0 +1,403 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Artem Bityutskiy (Битюцкий Артём) + * Adrian Hunter + */ + +#ifndef __UBIFS_DEBUG_H__ +#define __UBIFS_DEBUG_H__ + +#ifdef CONFIG_UBIFS_FS_DEBUG + +#define UBIFS_DBG(op) op + +#define ubifs_assert(expr) do { \ + if (unlikely(!(expr))) { \ + printk(KERN_CRIT "UBIFS assert failed in %s at %u (pid %d)\n", \ + __func__, __LINE__, current->pid); \ + dbg_dump_stack(); \ + } \ +} while (0) + +#define ubifs_assert_cmt_locked(c) do { \ + if (unlikely(down_write_trylock(&(c)->commit_sem))) { \ + up_write(&(c)->commit_sem); \ + printk(KERN_CRIT "commit lock is not locked!\n"); \ + ubifs_assert(0); \ + } \ +} while (0) + +#define dbg_dump_stack() do { \ + if (!dbg_failure_mode) \ + dump_stack(); \ +} while (0) + +/* Generic debugging messages */ +#define dbg_msg(fmt, ...) do { \ + spin_lock(&dbg_lock); \ + printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", current->pid, \ + __func__, ##__VA_ARGS__); \ + spin_unlock(&dbg_lock); \ +} while (0) + +#define dbg_do_msg(typ, fmt, ...) do { \ + if (ubifs_msg_flags & typ) \ + dbg_msg(fmt, ##__VA_ARGS__); \ +} while (0) + +#define dbg_err(fmt, ...) do { \ + spin_lock(&dbg_lock); \ + ubifs_err(fmt, ##__VA_ARGS__); \ + spin_unlock(&dbg_lock); \ +} while (0) + +const char *dbg_key_str0(const struct ubifs_info *c, + const union ubifs_key *key); +const char *dbg_key_str1(const struct ubifs_info *c, + const union ubifs_key *key); + +/* + * DBGKEY macros require dbg_lock to be held, which it is in the dbg message + * macros. + */ +#define DBGKEY(key) dbg_key_str0(c, (key)) +#define DBGKEY1(key) dbg_key_str1(c, (key)) + +/* General messages */ +#define dbg_gen(fmt, ...) dbg_do_msg(UBIFS_MSG_GEN, fmt, ##__VA_ARGS__) + +/* Additional journal messages */ +#define dbg_jnl(fmt, ...) dbg_do_msg(UBIFS_MSG_JNL, fmt, ##__VA_ARGS__) + +/* Additional TNC messages */ +#define dbg_tnc(fmt, ...) dbg_do_msg(UBIFS_MSG_TNC, fmt, ##__VA_ARGS__) + +/* Additional lprops messages */ +#define dbg_lp(fmt, ...) dbg_do_msg(UBIFS_MSG_LP, fmt, ##__VA_ARGS__) + +/* Additional LEB find messages */ +#define dbg_find(fmt, ...) dbg_do_msg(UBIFS_MSG_FIND, fmt, ##__VA_ARGS__) + +/* Additional mount messages */ +#define dbg_mnt(fmt, ...) dbg_do_msg(UBIFS_MSG_MNT, fmt, ##__VA_ARGS__) + +/* Additional I/O messages */ +#define dbg_io(fmt, ...) dbg_do_msg(UBIFS_MSG_IO, fmt, ##__VA_ARGS__) + +/* Additional commit messages */ +#define dbg_cmt(fmt, ...) dbg_do_msg(UBIFS_MSG_CMT, fmt, ##__VA_ARGS__) + +/* Additional budgeting messages */ +#define dbg_budg(fmt, ...) dbg_do_msg(UBIFS_MSG_BUDG, fmt, ##__VA_ARGS__) + +/* Additional log messages */ +#define dbg_log(fmt, ...) dbg_do_msg(UBIFS_MSG_LOG, fmt, ##__VA_ARGS__) + +/* Additional gc messages */ +#define dbg_gc(fmt, ...) dbg_do_msg(UBIFS_MSG_GC, fmt, ##__VA_ARGS__) + +/* Additional scan messages */ +#define dbg_scan(fmt, ...) dbg_do_msg(UBIFS_MSG_SCAN, fmt, ##__VA_ARGS__) + +/* Additional recovery messages */ +#define dbg_rcvry(fmt, ...) dbg_do_msg(UBIFS_MSG_RCVRY, fmt, ##__VA_ARGS__) + +/* + * Debugging message type flags (must match msg_type_names in debug.c). + * + * UBIFS_MSG_GEN: general messages + * UBIFS_MSG_JNL: journal messages + * UBIFS_MSG_MNT: mount messages + * UBIFS_MSG_CMT: commit messages + * UBIFS_MSG_FIND: LEB find messages + * UBIFS_MSG_BUDG: budgeting messages + * UBIFS_MSG_GC: garbage collection messages + * UBIFS_MSG_TNC: TNC messages + * UBIFS_MSG_LP: lprops messages + * UBIFS_MSG_IO: I/O messages + * UBIFS_MSG_LOG: log messages + * UBIFS_MSG_SCAN: scan messages + * UBIFS_MSG_RCVRY: recovery messages + */ +enum { + UBIFS_MSG_GEN = 0x1, + UBIFS_MSG_JNL = 0x2, + UBIFS_MSG_MNT = 0x4, + UBIFS_MSG_CMT = 0x8, + UBIFS_MSG_FIND = 0x10, + UBIFS_MSG_BUDG = 0x20, + UBIFS_MSG_GC = 0x40, + UBIFS_MSG_TNC = 0x80, + UBIFS_MSG_LP = 0x100, + UBIFS_MSG_IO = 0x200, + UBIFS_MSG_LOG = 0x400, + UBIFS_MSG_SCAN = 0x800, + UBIFS_MSG_RCVRY = 0x1000, +}; + +/* Debugging message type flags for each default debug message level */ +#define UBIFS_MSG_LVL_0 0 +#define UBIFS_MSG_LVL_1 0x1 +#define UBIFS_MSG_LVL_2 0x7f +#define UBIFS_MSG_LVL_3 0xffff + +/* + * Debugging check flags (must match chk_names in debug.c). + * + * UBIFS_CHK_GEN: general checks + * UBIFS_CHK_TNC: check TNC + * UBIFS_CHK_IDX_SZ: check index size + * UBIFS_CHK_ORPH: check orphans + * UBIFS_CHK_OLD_IDX: check the old index + * UBIFS_CHK_LPROPS: check lprops + * UBIFS_CHK_FS: check the file-system + */ +enum { + UBIFS_CHK_GEN = 0x1, + UBIFS_CHK_TNC = 0x2, + UBIFS_CHK_IDX_SZ = 0x4, + UBIFS_CHK_ORPH = 0x8, + UBIFS_CHK_OLD_IDX = 0x10, + UBIFS_CHK_LPROPS = 0x20, + UBIFS_CHK_FS = 0x40, +}; + +/* + * Special testing flags (must match tst_names in debug.c). + * + * UBIFS_TST_FORCE_IN_THE_GAPS: force the use of in-the-gaps method + * UBIFS_TST_RCVRY: failure mode for recovery testing + */ +enum { + UBIFS_TST_FORCE_IN_THE_GAPS = 0x2, + UBIFS_TST_RCVRY = 0x4, +}; + +#if CONFIG_UBIFS_FS_DEBUG_MSG_LVL == 1 +#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_1 +#elif CONFIG_UBIFS_FS_DEBUG_MSG_LVL == 2 +#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_2 +#elif CONFIG_UBIFS_FS_DEBUG_MSG_LVL == 3 +#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_3 +#else +#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_0 +#endif + +#ifdef CONFIG_UBIFS_FS_DEBUG_CHKS +#define UBIFS_CHK_FLAGS_DEFAULT 0xffffffff +#else +#define UBIFS_CHK_FLAGS_DEFAULT 0 +#endif + +extern spinlock_t dbg_lock; + +extern unsigned int ubifs_msg_flags; +extern unsigned int ubifs_chk_flags; +extern unsigned int ubifs_tst_flags; + +/* Dump functions */ + +const char *dbg_ntype(int type); +const char *dbg_cstate(int cmt_state); +const char *dbg_get_key_dump(const struct ubifs_info *c, + const union ubifs_key *key); +void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode); +void dbg_dump_node(const struct ubifs_info *c, const void *node); +void dbg_dump_budget_req(const struct ubifs_budget_req *req); +void dbg_dump_lstats(const struct ubifs_lp_stats *lst); +void dbg_dump_budg(struct ubifs_info *c); +void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp); +void dbg_dump_lprops(struct ubifs_info *c); +void dbg_dump_leb(const struct ubifs_info *c, int lnum); +void dbg_dump_znode(const struct ubifs_info *c, + const struct ubifs_znode *znode); +void dbg_dump_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat); +void dbg_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, + struct ubifs_nnode *parent, int iip); +void dbg_dump_tnc(struct ubifs_info *c); +void dbg_dump_index(struct ubifs_info *c); + +/* Checking helper functions */ + +typedef int (*dbg_leaf_callback)(struct ubifs_info *c, + struct ubifs_zbranch *zbr, void *priv); +typedef int (*dbg_znode_callback)(struct ubifs_info *c, + struct ubifs_znode *znode, void *priv); + +int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb, + dbg_znode_callback znode_cb, void *priv); + +/* Checking functions */ + +int dbg_check_lprops(struct ubifs_info *c); + +int dbg_old_index_check_init(struct ubifs_info *c, struct ubifs_zbranch *zroot); +int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot); + +int dbg_check_cats(struct ubifs_info *c); + +int dbg_check_ltab(struct ubifs_info *c); + +int dbg_check_synced_i_size(struct inode *inode); + +int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir); + +int dbg_check_tnc(struct ubifs_info *c, int extra); + +int dbg_check_idx_size(struct ubifs_info *c, long long idx_size); + +int dbg_check_filesystem(struct ubifs_info *c); + +void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat, + int add_pos); + +int dbg_check_lprops(struct ubifs_info *c); +int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode, + int row, int col); + +/* Force the use of in-the-gaps method for testing */ + +#define dbg_force_in_the_gaps_enabled \ + (ubifs_tst_flags & UBIFS_TST_FORCE_IN_THE_GAPS) + +int dbg_force_in_the_gaps(void); + +/* Failure mode for recovery testing */ + +#define dbg_failure_mode (ubifs_tst_flags & UBIFS_TST_RCVRY) + +void dbg_failure_mode_registration(struct ubifs_info *c); +void dbg_failure_mode_deregistration(struct ubifs_info *c); + +#ifndef UBIFS_DBG_PRESERVE_UBI + +#define ubi_leb_read dbg_leb_read +#define ubi_leb_write dbg_leb_write +#define ubi_leb_change dbg_leb_change +#define ubi_leb_erase dbg_leb_erase +#define ubi_leb_unmap dbg_leb_unmap +#define ubi_is_mapped dbg_is_mapped +#define ubi_leb_map dbg_leb_map + +#endif + +int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset, + int len, int check); +int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf, + int offset, int len, int dtype); +int dbg_leb_change(struct ubi_volume_desc *desc, int lnum, const void *buf, + int len, int dtype); +int dbg_leb_erase(struct ubi_volume_desc *desc, int lnum); +int dbg_leb_unmap(struct ubi_volume_desc *desc, int lnum); +int dbg_is_mapped(struct ubi_volume_desc *desc, int lnum); +int dbg_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype); + +static inline int dbg_read(struct ubi_volume_desc *desc, int lnum, char *buf, + int offset, int len) +{ + return dbg_leb_read(desc, lnum, buf, offset, len, 0); +} + +static inline int dbg_write(struct ubi_volume_desc *desc, int lnum, + const void *buf, int offset, int len) +{ + return dbg_leb_write(desc, lnum, buf, offset, len, UBI_UNKNOWN); +} + +static inline int dbg_change(struct ubi_volume_desc *desc, int lnum, + const void *buf, int len) +{ + return dbg_leb_change(desc, lnum, buf, len, UBI_UNKNOWN); +} + +#else /* !CONFIG_UBIFS_FS_DEBUG */ + +#define UBIFS_DBG(op) +#define ubifs_assert(expr) ({}) +#define ubifs_assert_cmt_locked(c) +#define dbg_dump_stack() +#define dbg_err(fmt, ...) ({}) +#define dbg_msg(fmt, ...) ({}) +#define dbg_key(c, key, fmt, ...) ({}) + +#define dbg_gen(fmt, ...) ({}) +#define dbg_jnl(fmt, ...) ({}) +#define dbg_tnc(fmt, ...) ({}) +#define dbg_lp(fmt, ...) ({}) +#define dbg_find(fmt, ...) ({}) +#define dbg_mnt(fmt, ...) ({}) +#define dbg_io(fmt, ...) ({}) +#define dbg_cmt(fmt, ...) ({}) +#define dbg_budg(fmt, ...) ({}) +#define dbg_log(fmt, ...) ({}) +#define dbg_gc(fmt, ...) ({}) +#define dbg_scan(fmt, ...) ({}) +#define dbg_rcvry(fmt, ...) ({}) + +#define dbg_ntype(type) "" +#define dbg_cstate(cmt_state) "" +#define dbg_get_key_dump(c, key) ({}) +#define dbg_dump_inode(c, inode) ({}) +#define dbg_dump_node(c, node) ({}) +#define dbg_dump_budget_req(req) ({}) +#define dbg_dump_lstats(lst) ({}) +#define dbg_dump_budg(c) ({}) +#define dbg_dump_lprop(c, lp) ({}) +#define dbg_dump_lprops(c) ({}) +#define dbg_dump_leb(c, lnum) ({}) +#define dbg_dump_znode(c, znode) ({}) +#define dbg_dump_heap(c, heap, cat) ({}) +#define dbg_dump_pnode(c, pnode, parent, iip) ({}) +#define dbg_dump_tnc(c) ({}) +#define dbg_dump_index(c) ({}) + +#define dbg_walk_index(c, leaf_cb, znode_cb, priv) 0 + +#define dbg_old_index_check_init(c, zroot) 0 +#define dbg_check_old_index(c, zroot) 0 + +#define dbg_check_cats(c) 0 + +#define dbg_check_ltab(c) 0 + +#define dbg_check_synced_i_size(inode) 0 + +#define dbg_check_dir_size(c, dir) 0 + +#define dbg_check_tnc(c, x) 0 + +#define dbg_check_idx_size(c, idx_size) 0 + +#define dbg_check_filesystem(c) 0 + +#define dbg_check_heap(c, heap, cat, add_pos) ({}) + +#define dbg_check_lprops(c) 0 +#define dbg_check_lpt_nodes(c, cnode, row, col) 0 + +#define dbg_force_in_the_gaps_enabled 0 +#define dbg_force_in_the_gaps() 0 + +#define dbg_failure_mode 0 +#define dbg_failure_mode_registration(c) ({}) +#define dbg_failure_mode_deregistration(c) ({}) + +#endif /* !CONFIG_UBIFS_FS_DEBUG */ + +#endif /* !__UBIFS_DEBUG_H__ */ diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c new file mode 100644 index 0000000..e90374b --- /dev/null +++ b/fs/ubifs/dir.c @@ -0,0 +1,1240 @@ +/* * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * Copyright (C) 2006, 2007 University of Szeged, Hungary + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Artem Bityutskiy (Битюцкий Артём) + * Adrian Hunter + * Zoltan Sogor + */ + +/* + * This file implements directory operations. + * + * All FS operations in this file allocate budget before writing anything to the + * media. If they fail to allocate it, the error is returned. The only + * exceptions are 'ubifs_unlink()' and 'ubifs_rmdir()' which keep working even + * if they unable to allocate the budget, because deletion %-ENOSPC failure is + * not what users are usually ready to get. UBIFS budgeting subsystem has some + * space reserved for these purposes. + * + * All operations in this file write all inodes which they change straight + * away, instead of marking them dirty. For example, 'ubifs_link()' changes + * @i_size of the parent inode and writes the parent inode together with the + * target inode. This was done to simplify file-system recovery which would + * otherwise be very difficult to do. The only exception is rename which marks + * the re-named inode dirty (because its @i_ctime is updated) but does not + * write it, but just marks it as dirty. + */ + +#include "ubifs.h" + +/** + * inherit_flags - inherit flags of the parent inode. + * @dir: parent inode + * @mode: new inode mode flags + * + * This is a helper function for 'ubifs_new_inode()' which inherits flag of the + * parent directory inode @dir. UBIFS inodes inherit the following flags: + * o %UBIFS_COMPR_FL, which is useful to switch compression on/of on + * sub-directory basis; + * o %UBIFS_SYNC_FL - useful for the same reasons; + * o %UBIFS_DIRSYNC_FL - similar, but relevant only to directories. + * + * This function returns the inherited flags. + */ +static int inherit_flags(const struct inode *dir, int mode) +{ + int flags; + const struct ubifs_inode *ui = ubifs_inode(dir); + + if (!S_ISDIR(dir->i_mode)) + /* + * The parent is not a directory, which means that an extended + * attribute inode is being created. No flags. + */ + return 0; + + flags = ui->flags & (UBIFS_COMPR_FL | UBIFS_SYNC_FL | UBIFS_DIRSYNC_FL); + if (!S_ISDIR(mode)) + /* The "DIRSYNC" flag only applies to directories */ + flags &= ~UBIFS_DIRSYNC_FL; + return flags; +} + +/** + * ubifs_new_inode - allocate new UBIFS inode object. + * @c: UBIFS file-system description object + * @dir: parent directory inode + * @mode: inode mode flags + * + * This function finds an unused inode number, allocates new inode and + * initializes it. Returns new inode in case of success and an error code in + * case of failure. + */ +struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir, + int mode) +{ + struct inode *inode; + struct ubifs_inode *ui; + + inode = new_inode(c->vfs_sb); + ui = ubifs_inode(inode); + if (!inode) + return ERR_PTR(-ENOMEM); + + /* + * Set 'S_NOCMTIME' to prevent VFS form updating [mc]time of inodes and + * marking them dirty in file write path (see 'file_update_time()'). + * UBIFS has to fully control "clean <-> dirty" transitions of inodes + * to make budgeting work. + */ + inode->i_flags |= (S_NOCMTIME); + + inode->i_uid = current->fsuid; + if (dir->i_mode & S_ISGID) { + inode->i_gid = dir->i_gid; + if (S_ISDIR(mode)) + mode |= S_ISGID; + } else + inode->i_gid = current->fsgid; + inode->i_mode = mode; + inode->i_mtime = inode->i_atime = inode->i_ctime = + ubifs_current_time(inode); + inode->i_mapping->nrpages = 0; + /* Disable readahead */ + inode->i_mapping->backing_dev_info = &c->bdi; + + switch (mode & S_IFMT) { + case S_IFREG: + inode->i_mapping->a_ops = &ubifs_file_address_operations; + inode->i_op = &ubifs_file_inode_operations; + inode->i_fop = &ubifs_file_operations; + break; + case S_IFDIR: + inode->i_op = &ubifs_dir_inode_operations; + inode->i_fop = &ubifs_dir_operations; + inode->i_size = ui->ui_size = UBIFS_INO_NODE_SZ; + break; + case S_IFLNK: + inode->i_op = &ubifs_symlink_inode_operations; + break; + case S_IFSOCK: + case S_IFIFO: + case S_IFBLK: + case S_IFCHR: + inode->i_op = &ubifs_file_inode_operations; + break; + default: + BUG(); + } + + ui->flags = inherit_flags(dir, mode); + ubifs_set_inode_flags(inode); + if (S_ISREG(mode)) + ui->compr_type = c->default_compr; + else + ui->compr_type = UBIFS_COMPR_NONE; + ui->synced_i_size = 0; + + spin_lock(&c->cnt_lock); + /* Inode number overflow is currently not supported */ + if (c->highest_inum >= INUM_WARN_WATERMARK) { + if (c->highest_inum >= INUM_WATERMARK) { + spin_unlock(&c->cnt_lock); + ubifs_err("out of inode numbers"); + make_bad_inode(inode); + iput(inode); + return ERR_PTR(-EINVAL); + } + ubifs_warn("running out of inode numbers (current %lu, max %d)", + c->highest_inum, INUM_WATERMARK); + } + + inode->i_ino = ++c->highest_inum; + inode->i_generation = ++c->vfs_gen; + /* + * The creation sequence number remains with this inode for its + * lifetime. All nodes for this inode have a greater sequence number, + * and so it is possible to distinguish obsolete nodes belonging to a + * previous incarnation of the same inode number - for example, for the + * purpose of rebuilding the index. + */ + ui->creat_sqnum = ++c->max_sqnum; + spin_unlock(&c->cnt_lock); + return inode; +} + +#ifdef CONFIG_UBIFS_FS_DEBUG + +static int dbg_check_name(struct ubifs_dent_node *dent, struct qstr *nm) +{ + if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) + return 0; + if (le16_to_cpu(dent->nlen) != nm->len) + return -EINVAL; + if (memcmp(dent->name, nm->name, nm->len)) + return -EINVAL; + return 0; +} + +#else + +#define dbg_check_name(dent, nm) 0 + +#endif + +static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry, + struct nameidata *nd) +{ + int err; + union ubifs_key key; + struct inode *inode = NULL; + struct ubifs_dent_node *dent; + struct ubifs_info *c = dir->i_sb->s_fs_info; + + dbg_gen("'%.*s' in dir ino %lu", + dentry->d_name.len, dentry->d_name.name, dir->i_ino); + + if (dentry->d_name.len > UBIFS_MAX_NLEN) + return ERR_PTR(-ENAMETOOLONG); + + dent = kmalloc(UBIFS_MAX_DENT_NODE_SZ, GFP_NOFS); + if (!dent) + return ERR_PTR(-ENOMEM); + + dent_key_init(c, &key, dir->i_ino, &dentry->d_name); + + err = ubifs_tnc_lookup_nm(c, &key, dent, &dentry->d_name); + if (err) { + /* + * Do not hash the direntry if parent 'i_nlink' is zero, because + * this has side-effects - '->delete_inode()' call will not be + * called for the parent orphan inode, because 'd_count' of its + * direntry will stay 1 (it'll be negative direntry I guess) + * and prevent 'iput_final()' until the dentry is destroyed due + * to unmount or memory pressure. + */ + if (err == -ENOENT && dir->i_nlink != 0) { + dbg_gen("not found"); + goto done; + } + goto out; + } + + if (dbg_check_name(dent, &dentry->d_name)) { + err = -EINVAL; + goto out; + } + + inode = ubifs_iget(dir->i_sb, le64_to_cpu(dent->inum)); + if (IS_ERR(inode)) { + /* + * This should not happen. Probably the file-system needs + * checking. + */ + err = PTR_ERR(inode); + ubifs_err("dead directory entry '%.*s', error %d", + dentry->d_name.len, dentry->d_name.name, err); + ubifs_ro_mode(c, err); + goto out; + } + +done: + kfree(dent); + /* + * Note, d_splice_alias() would be required instead if we supported + * NFS. + */ + d_add(dentry, inode); + return NULL; + +out: + kfree(dent); + return ERR_PTR(err); +} + +static int ubifs_create(struct inode *dir, struct dentry *dentry, int mode, + struct nameidata *nd) +{ + struct inode *inode; + struct ubifs_info *c = dir->i_sb->s_fs_info; + int err, sz_change = CALC_DENT_SIZE(dentry->d_name.len); + struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, + .dirtied_ino = 1 }; + struct ubifs_inode *dir_ui = ubifs_inode(dir); + + /* + * Budget request settings: new inode, new direntry, changing the + * parent directory inode. + */ + + dbg_gen("dent '%.*s', mode %#x in dir ino %lu", + dentry->d_name.len, dentry->d_name.name, mode, dir->i_ino); + + err = ubifs_budget_space(c, &req); + if (err) + return err; + + inode = ubifs_new_inode(c, dir, mode); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + goto out_budg; + } + + mutex_lock(&dir_ui->ui_mutex); + dir->i_size += sz_change; + dir_ui->ui_size = dir->i_size; + dir->i_mtime = dir->i_ctime = inode->i_ctime; + err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 0, 0); + if (err) + goto out_cancel; + mutex_unlock(&dir_ui->ui_mutex); + + ubifs_release_budget(c, &req); + insert_inode_hash(inode); + d_instantiate(dentry, inode); + return 0; + +out_cancel: + dir->i_size -= sz_change; + dir_ui->ui_size = dir->i_size; + mutex_unlock(&dir_ui->ui_mutex); + make_bad_inode(inode); + iput(inode); +out_budg: + ubifs_release_budget(c, &req); + ubifs_err("cannot create regular file, error %d", err); + return err; +} + +/** + * vfs_dent_type - get VFS directory entry type. + * @type: UBIFS directory entry type + * + * This function converts UBIFS directory entry type into VFS directory entry + * type. + */ +static unsigned int vfs_dent_type(uint8_t type) +{ + switch (type) { + case UBIFS_ITYPE_REG: + return DT_REG; + case UBIFS_ITYPE_DIR: + return DT_DIR; + case UBIFS_ITYPE_LNK: + return DT_LNK; + case UBIFS_ITYPE_BLK: + return DT_BLK; + case UBIFS_ITYPE_CHR: + return DT_CHR; + case UBIFS_ITYPE_FIFO: + return DT_FIFO; + case UBIFS_ITYPE_SOCK: + return DT_SOCK; + default: + BUG(); + } + return 0; +} + +/* + * The classical Unix view for directory is that it is a linear array of + * (name, inode number) entries. Linux/VFS assumes this model as well. + * Particularly, 'readdir()' call wants us to return a directory entry offset + * which later may be used to continue 'readdir()'ing the directory or to + * 'seek()' to that specific direntry. Obviously UBIFS does not really fit this + * model because directory entries are identified by keys, which may collide. + * + * UBIFS uses directory entry hash value for directory offsets, so + * 'seekdir()'/'telldir()' may not always work because of possible key + * collisions. But UBIFS guarantees that consecutive 'readdir()' calls work + * properly by means of saving full directory entry name in the private field + * of the file description object. + * + * This means that UBIFS cannot support NFS which requires full + * 'seekdir()'/'telldir()' support. + */ +static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir) +{ + int err, over = 0; + struct qstr nm; + union ubifs_key key; + struct ubifs_dent_node *dent; + struct inode *dir = file->f_path.dentry->d_inode; + struct ubifs_info *c = dir->i_sb->s_fs_info; + + dbg_gen("dir ino %lu, f_pos %#llx", dir->i_ino, file->f_pos); + + if (file->f_pos > UBIFS_S_KEY_HASH_MASK || file->f_pos == 2) + /* + * The directory was seek'ed to a senseless position or there + * are no more entries. + */ + return 0; + + /* File positions 0 and 1 correspond to "." and ".." */ + if (file->f_pos == 0) { + ubifs_assert(!file->private_data); + over = filldir(dirent, ".", 1, 0, dir->i_ino, DT_DIR); + if (over) + return 0; + file->f_pos = 1; + } + + if (file->f_pos == 1) { + ubifs_assert(!file->private_data); + over = filldir(dirent, "..", 2, 1, + parent_ino(file->f_path.dentry), DT_DIR); + if (over) + return 0; + + /* Find the first entry in TNC and save it */ + lowest_dent_key(c, &key, dir->i_ino); + nm.name = NULL; + dent = ubifs_tnc_next_ent(c, &key, &nm); + if (IS_ERR(dent)) { + err = PTR_ERR(dent); + goto out; + } + + file->f_pos = key_hash_flash(c, &dent->key); + file->private_data = dent; + } + + dent = file->private_data; + if (!dent) { + /* + * The directory was seek'ed to and is now readdir'ed. + * Find the entry corresponding to @file->f_pos or the + * closest one. + */ + dent_key_init_hash(c, &key, dir->i_ino, file->f_pos); + nm.name = NULL; + dent = ubifs_tnc_next_ent(c, &key, &nm); + if (IS_ERR(dent)) { + err = PTR_ERR(dent); + goto out; + } + file->f_pos = key_hash_flash(c, &dent->key); + file->private_data = dent; + } + + while (1) { + dbg_gen("feed '%s', ino %llu, new f_pos %#x", + dent->name, le64_to_cpu(dent->inum), + key_hash_flash(c, &dent->key)); + ubifs_assert(dent->ch.sqnum > ubifs_inode(dir)->creat_sqnum); + + nm.len = le16_to_cpu(dent->nlen); + over = filldir(dirent, dent->name, nm.len, file->f_pos, + le64_to_cpu(dent->inum), + vfs_dent_type(dent->type)); + if (over) + return 0; + + /* Switch to the next entry */ + key_read(c, &dent->key, &key); + nm.name = dent->name; + dent = ubifs_tnc_next_ent(c, &key, &nm); + if (IS_ERR(dent)) { + err = PTR_ERR(dent); + goto out; + } + + kfree(file->private_data); + file->f_pos = key_hash_flash(c, &dent->key); + file->private_data = dent; + cond_resched(); + } + +out: + if (err != -ENOENT) { + ubifs_err("cannot find next direntry, error %d", err); + return err; + } + + kfree(file->private_data); + file->private_data = NULL; + file->f_pos = 2; + return 0; +} + +/* If a directory is seeked, we have to free saved readdir() state */ +static loff_t ubifs_dir_llseek(struct file *file, loff_t offset, int origin) +{ + kfree(file->private_data); + file->private_data = NULL; + return generic_file_llseek(file, offset, origin); +} + +/* Free saved readdir() state when the directory is closed */ +static int ubifs_dir_release(struct inode *dir, struct file *file) +{ + kfree(file->private_data); + file->private_data = NULL; + return 0; +} + +/** + * lock_2_inodes - lock two UBIFS inodes. + * @inode1: first inode + * @inode2: second inode + */ +static void lock_2_inodes(struct inode *inode1, struct inode *inode2) +{ + if (inode1->i_ino < inode2->i_ino) { + mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_2); + mutex_lock_nested(&ubifs_inode(inode2)->ui_mutex, WB_MUTEX_3); + } else { + mutex_lock_nested(&ubifs_inode(inode2)->ui_mutex, WB_MUTEX_2); + mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_3); + } +} + +/** + * unlock_2_inodes - unlock two UBIFS inodes inodes. + * @inode1: first inode + * @inode2: second inode + */ +static void unlock_2_inodes(struct inode *inode1, struct inode *inode2) +{ + mutex_unlock(&ubifs_inode(inode1)->ui_mutex); + mutex_unlock(&ubifs_inode(inode2)->ui_mutex); +} + +static int ubifs_link(struct dentry *old_dentry, struct inode *dir, + struct dentry *dentry) +{ + struct ubifs_info *c = dir->i_sb->s_fs_info; + struct inode *inode = old_dentry->d_inode; + struct ubifs_inode *ui = ubifs_inode(inode); + struct ubifs_inode *dir_ui = ubifs_inode(dir); + int err, sz_change = CALC_DENT_SIZE(dentry->d_name.len); + struct ubifs_budget_req req = { .new_dent = 1, .dirtied_ino = 2, + .dirtied_ino_d = ui->data_len }; + + /* + * Budget request settings: new direntry, changing the target inode, + * changing the parent inode. + */ + + dbg_gen("dent '%.*s' to ino %lu (nlink %d) in dir ino %lu", + dentry->d_name.len, dentry->d_name.name, inode->i_ino, + inode->i_nlink, dir->i_ino); + err = dbg_check_synced_i_size(inode); + if (err) + return err; + + err = ubifs_budget_space(c, &req); + if (err) + return err; + + lock_2_inodes(dir, inode); + inc_nlink(inode); + atomic_inc(&inode->i_count); + inode->i_ctime = ubifs_current_time(inode); + dir->i_size += sz_change; + dir_ui->ui_size = dir->i_size; + dir->i_mtime = dir->i_ctime = inode->i_ctime; + err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 0, 0); + if (err) + goto out_cancel; + unlock_2_inodes(dir, inode); + + ubifs_release_budget(c, &req); + d_instantiate(dentry, inode); + return 0; + +out_cancel: + dir->i_size -= sz_change; + dir_ui->ui_size = dir->i_size; + drop_nlink(inode); + unlock_2_inodes(dir, inode); + ubifs_release_budget(c, &req); + iput(inode); + return err; +} + +static int ubifs_unlink(struct inode *dir, struct dentry *dentry) +{ + struct ubifs_info *c = dir->i_sb->s_fs_info; + struct inode *inode = dentry->d_inode; + struct ubifs_inode *dir_ui = ubifs_inode(dir); + int sz_change = CALC_DENT_SIZE(dentry->d_name.len); + int err, budgeted = 1; + struct ubifs_budget_req req = { .mod_dent = 1, .dirtied_ino = 2 }; + + /* + * Budget request settings: deletion direntry, deletion inode (+1 for + * @dirtied_ino), changing the parent directory inode. If budgeting + * fails, go ahead anyway because we have extra space reserved for + * deletions. + */ + + dbg_gen("dent '%.*s' from ino %lu (nlink %d) in dir ino %lu", + dentry->d_name.len, dentry->d_name.name, inode->i_ino, + inode->i_nlink, dir->i_ino); + err = dbg_check_synced_i_size(inode); + if (err) + return err; + + err = ubifs_budget_space(c, &req); + if (err) { + if (err != -ENOSPC) + return err; + err = 0; + budgeted = 0; + } + + lock_2_inodes(dir, inode); + inode->i_ctime = ubifs_current_time(dir); + drop_nlink(inode); + dir->i_size -= sz_change; + dir_ui->ui_size = dir->i_size; + dir->i_mtime = dir->i_ctime = inode->i_ctime; + err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 1, 0); + if (err) + goto out_cancel; + unlock_2_inodes(dir, inode); + + if (budgeted) + ubifs_release_budget(c, &req); + else { + /* We've deleted something - clean the "no space" flags */ + c->nospace = c->nospace_rp = 0; + smp_wmb(); + } + return 0; + +out_cancel: + dir->i_size += sz_change; + dir_ui->ui_size = dir->i_size; + inc_nlink(inode); + unlock_2_inodes(dir, inode); + if (budgeted) + ubifs_release_budget(c, &req); + return err; +} + +/** + * check_dir_empty - check if a directory is empty or not. + * @c: UBIFS file-system description object + * @dir: VFS inode object of the directory to check + * + * This function checks if directory @dir is empty. Returns zero if the + * directory is empty, %-ENOTEMPTY if it is not, and other negative error codes + * in case of of errors. + */ +static int check_dir_empty(struct ubifs_info *c, struct inode *dir) +{ + struct qstr nm = { .name = NULL }; + struct ubifs_dent_node *dent; + union ubifs_key key; + int err; + + lowest_dent_key(c, &key, dir->i_ino); + dent = ubifs_tnc_next_ent(c, &key, &nm); + if (IS_ERR(dent)) { + err = PTR_ERR(dent); + if (err == -ENOENT) + err = 0; + } else { + kfree(dent); + err = -ENOTEMPTY; + } + return err; +} + +static int ubifs_rmdir(struct inode *dir, struct dentry *dentry) +{ + struct ubifs_info *c = dir->i_sb->s_fs_info; + struct inode *inode = dentry->d_inode; + int sz_change = CALC_DENT_SIZE(dentry->d_name.len); + int err, budgeted = 1; + struct ubifs_inode *dir_ui = ubifs_inode(dir); + struct ubifs_budget_req req = { .mod_dent = 1, .dirtied_ino = 2 }; + + /* + * Budget request settings: deletion direntry, deletion inode and + * changing the parent inode. If budgeting fails, go ahead anyway + * because we have extra space reserved for deletions. + */ + + dbg_gen("directory '%.*s', ino %lu in dir ino %lu", dentry->d_name.len, + dentry->d_name.name, inode->i_ino, dir->i_ino); + + err = check_dir_empty(c, dentry->d_inode); + if (err) + return err; + + err = ubifs_budget_space(c, &req); + if (err) { + if (err != -ENOSPC) + return err; + budgeted = 0; + } + + lock_2_inodes(dir, inode); + inode->i_ctime = ubifs_current_time(dir); + clear_nlink(inode); + drop_nlink(dir); + dir->i_size -= sz_change; + dir_ui->ui_size = dir->i_size; + dir->i_mtime = dir->i_ctime = inode->i_ctime; + err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 1, 0); + if (err) + goto out_cancel; + unlock_2_inodes(dir, inode); + + if (budgeted) + ubifs_release_budget(c, &req); + else { + /* We've deleted something - clean the "no space" flags */ + c->nospace = c->nospace_rp = 0; + smp_wmb(); + } + return 0; + +out_cancel: + dir->i_size += sz_change; + dir_ui->ui_size = dir->i_size; + inc_nlink(dir); + inc_nlink(inode); + inc_nlink(inode); + unlock_2_inodes(dir, inode); + if (budgeted) + ubifs_release_budget(c, &req); + return err; +} + +static int ubifs_mkdir(struct inode *dir, struct dentry *dentry, int mode) +{ + struct inode *inode; + struct ubifs_inode *dir_ui = ubifs_inode(dir); + struct ubifs_info *c = dir->i_sb->s_fs_info; + int err, sz_change = CALC_DENT_SIZE(dentry->d_name.len); + struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, + .dirtied_ino_d = 1 }; + + /* + * Budget request settings: new inode, new direntry and changing parent + * directory inode. + */ + + dbg_gen("dent '%.*s', mode %#x in dir ino %lu", + dentry->d_name.len, dentry->d_name.name, mode, dir->i_ino); + + err = ubifs_budget_space(c, &req); + if (err) + return err; + + inode = ubifs_new_inode(c, dir, S_IFDIR | mode); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + goto out_budg; + } + + mutex_lock(&dir_ui->ui_mutex); + insert_inode_hash(inode); + inc_nlink(inode); + inc_nlink(dir); + dir->i_size += sz_change; + dir_ui->ui_size = dir->i_size; + dir->i_mtime = dir->i_ctime = inode->i_ctime; + err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 0, 0); + if (err) { + ubifs_err("cannot create directory, error %d", err); + goto out_cancel; + } + mutex_unlock(&dir_ui->ui_mutex); + + ubifs_release_budget(c, &req); + d_instantiate(dentry, inode); + return 0; + +out_cancel: + dir->i_size -= sz_change; + dir_ui->ui_size = dir->i_size; + drop_nlink(dir); + mutex_unlock(&dir_ui->ui_mutex); + make_bad_inode(inode); + iput(inode); +out_budg: + ubifs_release_budget(c, &req); + return err; +} + +static int ubifs_mknod(struct inode *dir, struct dentry *dentry, + int mode, dev_t rdev) +{ + struct inode *inode; + struct ubifs_inode *ui; + struct ubifs_inode *dir_ui = ubifs_inode(dir); + struct ubifs_info *c = dir->i_sb->s_fs_info; + union ubifs_dev_desc *dev = NULL; + int sz_change = CALC_DENT_SIZE(dentry->d_name.len); + int err, devlen = 0; + struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, + .new_ino_d = devlen, .dirtied_ino = 1 }; + + /* + * Budget request settings: new inode, new direntry and changing parent + * directory inode. + */ + + dbg_gen("dent '%.*s' in dir ino %lu", + dentry->d_name.len, dentry->d_name.name, dir->i_ino); + + if (!new_valid_dev(rdev)) + return -EINVAL; + + if (S_ISBLK(mode) || S_ISCHR(mode)) { + dev = kmalloc(sizeof(union ubifs_dev_desc), GFP_NOFS); + if (!dev) + return -ENOMEM; + devlen = ubifs_encode_dev(dev, rdev); + } + + err = ubifs_budget_space(c, &req); + if (err) { + kfree(dev); + return err; + } + + inode = ubifs_new_inode(c, dir, mode); + if (IS_ERR(inode)) { + kfree(dev); + err = PTR_ERR(inode); + goto out_budg; + } + + init_special_inode(inode, inode->i_mode, rdev); + inode->i_size = ubifs_inode(inode)->ui_size = devlen; + ui = ubifs_inode(inode); + ui->data = dev; + ui->data_len = devlen; + + mutex_lock(&dir_ui->ui_mutex); + dir->i_size += sz_change; + dir_ui->ui_size = dir->i_size; + dir->i_mtime = dir->i_ctime = inode->i_ctime; + err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 0, 0); + if (err) + goto out_cancel; + mutex_unlock(&dir_ui->ui_mutex); + + ubifs_release_budget(c, &req); + insert_inode_hash(inode); + d_instantiate(dentry, inode); + return 0; + +out_cancel: + dir->i_size -= sz_change; + dir_ui->ui_size = dir->i_size; + mutex_unlock(&dir_ui->ui_mutex); + make_bad_inode(inode); + iput(inode); +out_budg: + ubifs_release_budget(c, &req); + return err; +} + +static int ubifs_symlink(struct inode *dir, struct dentry *dentry, + const char *symname) +{ + struct inode *inode; + struct ubifs_inode *ui; + struct ubifs_inode *dir_ui = ubifs_inode(dir); + struct ubifs_info *c = dir->i_sb->s_fs_info; + int err, len = strlen(symname); + int sz_change = CALC_DENT_SIZE(dentry->d_name.len); + struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, + .new_ino_d = len, .dirtied_ino = 1 }; + + /* + * Budget request settings: new inode, new direntry and changing parent + * directory inode. + */ + + dbg_gen("dent '%.*s', target '%s' in dir ino %lu", dentry->d_name.len, + dentry->d_name.name, symname, dir->i_ino); + + if (len > UBIFS_MAX_INO_DATA) + return -ENAMETOOLONG; + + err = ubifs_budget_space(c, &req); + if (err) + return err; + + inode = ubifs_new_inode(c, dir, S_IFLNK | S_IRWXUGO); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + goto out_budg; + } + + ui = ubifs_inode(inode); + ui->data = kmalloc(len + 1, GFP_NOFS); + if (!ui->data) { + err = -ENOMEM; + goto out_inode; + } + + memcpy(ui->data, symname, len); + ((char *)ui->data)[len] = '\0'; + /* + * The terminating zero byte is not written to the flash media and it + * is put just to make later in-memory string processing simpler. Thus, + * data length is @len, not @len + %1. + */ + ui->data_len = len; + inode->i_size = ubifs_inode(inode)->ui_size = len; + + mutex_lock(&dir_ui->ui_mutex); + dir->i_size += sz_change; + dir_ui->ui_size = dir->i_size; + dir->i_mtime = dir->i_ctime = inode->i_ctime; + err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 0, 0); + if (err) + goto out_cancel; + mutex_unlock(&dir_ui->ui_mutex); + + ubifs_release_budget(c, &req); + insert_inode_hash(inode); + d_instantiate(dentry, inode); + return 0; + +out_cancel: + dir->i_size -= sz_change; + dir_ui->ui_size = dir->i_size; + mutex_unlock(&dir_ui->ui_mutex); +out_inode: + make_bad_inode(inode); + iput(inode); +out_budg: + ubifs_release_budget(c, &req); + return err; +} + +/** + * lock_3_inodes - lock three UBIFS inodes for rename. + * @inode1: first inode + * @inode2: second inode + * @inode3: third inode + * + * For 'ubifs_rename()', @inode1 may be the same as @inode2 whereas @inode3 may + * be null. + */ +static void lock_3_inodes(struct inode *inode1, struct inode *inode2, + struct inode *inode3) +{ + struct inode *i1, *i2, *i3; + + if (!inode3) { + if (inode1 != inode2) { + lock_2_inodes(inode1, inode2); + return; + } + mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_1); + return; + } + + if (inode1 == inode2) { + lock_2_inodes(inode1, inode3); + return; + } + + /* 3 different inodes */ + if (inode1 < inode2) { + i3 = inode2; + if (inode1 < inode3) { + i1 = inode1; + i2 = inode3; + } else { + i1 = inode3; + i2 = inode1; + } + } else { + i3 = inode1; + if (inode2 < inode3) { + i1 = inode2; + i2 = inode3; + } else { + i1 = inode3; + i2 = inode2; + } + } + mutex_lock_nested(&ubifs_inode(i1)->ui_mutex, WB_MUTEX_1); + lock_2_inodes(i2, i3); +} + +/** + * unlock_3_inodes - unlock three UBIFS inodes for rename. + * @inode1: first inode + * @inode2: second inode + * @inode3: third inode + */ +static void unlock_3_inodes(struct inode *inode1, struct inode *inode2, + struct inode *inode3) +{ + mutex_unlock(&ubifs_inode(inode1)->ui_mutex); + if (inode1 != inode2) + mutex_unlock(&ubifs_inode(inode2)->ui_mutex); + if (inode3) + mutex_unlock(&ubifs_inode(inode3)->ui_mutex); +} + +static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry) +{ + struct ubifs_info *c = old_dir->i_sb->s_fs_info; + struct inode *old_inode = old_dentry->d_inode; + struct inode *new_inode = new_dentry->d_inode; + struct ubifs_inode *old_inode_ui = ubifs_inode(old_inode); + int err, release, sync = 0, move = (new_dir != old_dir); + int is_dir = S_ISDIR(old_inode->i_mode); + int unlink = !!new_inode; + int new_sz = CALC_DENT_SIZE(new_dentry->d_name.len); + int old_sz = CALC_DENT_SIZE(old_dentry->d_name.len); + struct ubifs_budget_req req = { .new_dent = 1, .mod_dent = 1, + .dirtied_ino = 3 }; + struct ubifs_budget_req ino_req = { .dirtied_ino = 1, + .dirtied_ino_d = old_inode_ui->data_len }; + struct timespec time; + + /* + * Budget request settings: deletion direntry, new direntry, removing + * the old inode, and changing old and new parent directory inodes. + * + * However, this operation also marks the target inode as dirty and + * does not write it, so we allocate budget for the target inode + * separately. + */ + + dbg_gen("dent '%.*s' ino %lu in dir ino %lu to dent '%.*s' in " + "dir ino %lu", old_dentry->d_name.len, old_dentry->d_name.name, + old_inode->i_ino, old_dir->i_ino, new_dentry->d_name.len, + new_dentry->d_name.name, new_dir->i_ino); + + if (unlink && is_dir) { + err = check_dir_empty(c, new_inode); + if (err) + return err; + } + + err = ubifs_budget_space(c, &req); + if (err) + return err; + err = ubifs_budget_space(c, &ino_req); + if (err) { + ubifs_release_budget(c, &req); + return err; + } + + lock_3_inodes(old_dir, new_dir, new_inode); + + /* + * Like most other Unix systems, set the @i_ctime for inodes on a + * rename. + */ + time = ubifs_current_time(old_dir); + old_inode->i_ctime = time; + + /* We must adjust parent link count when renaming directories */ + if (is_dir) { + if (move) { + /* + * @old_dir loses a link because we are moving + * @old_inode to a different directory. + */ + drop_nlink(old_dir); + /* + * @new_dir only gains a link if we are not also + * overwriting an existing directory. + */ + if (!unlink) + inc_nlink(new_dir); + } else { + /* + * @old_inode is not moving to a different directory, + * but @old_dir still loses a link if we are + * overwriting an existing directory. + */ + if (unlink) + drop_nlink(old_dir); + } + } + + old_dir->i_size -= old_sz; + ubifs_inode(old_dir)->ui_size = old_dir->i_size; + old_dir->i_mtime = old_dir->i_ctime = time; + new_dir->i_mtime = new_dir->i_ctime = time; + + /* + * And finally, if we unlinked a direntry which happened to have the + * same name as the moved direntry, we have to decrement @i_nlink of + * the unlinked inode and change its ctime. + */ + if (unlink) { + /* + * Directories cannot have hard-links, so if this is a + * directory, decrement its @i_nlink twice because an empty + * directory has @i_nlink 2. + */ + if (is_dir) + drop_nlink(new_inode); + new_inode->i_ctime = time; + drop_nlink(new_inode); + } else { + new_dir->i_size += new_sz; + ubifs_inode(new_dir)->ui_size = new_dir->i_size; + } + + /* + * Do not ask 'ubifs_jnl_rename()' to flush write-buffer if @old_inode + * is dirty, because this will be done later on at the end of + * 'ubifs_rename()'. + */ + if (IS_SYNC(old_inode)) { + sync = IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir); + if (unlink && IS_SYNC(new_inode)) + sync = 1; + } + err = ubifs_jnl_rename(c, old_dir, old_dentry, new_dir, new_dentry, + sync); + if (err) + goto out_cancel; + + unlock_3_inodes(old_dir, new_dir, new_inode); + ubifs_release_budget(c, &req); + + mutex_lock(&old_inode_ui->ui_mutex); + release = old_inode_ui->dirty; + mark_inode_dirty_sync(old_inode); + mutex_unlock(&old_inode_ui->ui_mutex); + + if (release) + ubifs_release_budget(c, &ino_req); + if (IS_SYNC(old_inode)) + err = old_inode->i_sb->s_op->write_inode(old_inode, 1); + return err; + +out_cancel: + if (unlink) { + if (is_dir) + inc_nlink(new_inode); + inc_nlink(new_inode); + } else { + new_dir->i_size -= new_sz; + ubifs_inode(new_dir)->ui_size = new_dir->i_size; + } + old_dir->i_size += old_sz; + ubifs_inode(old_dir)->ui_size = old_dir->i_size; + if (is_dir) { + if (move) { + inc_nlink(old_dir); + if (!unlink) + drop_nlink(new_dir); + } else { + if (unlink) + inc_nlink(old_dir); + } + } + unlock_3_inodes(old_dir, new_dir, new_inode); + ubifs_release_budget(c, &ino_req); + ubifs_release_budget(c, &req); + return err; +} + +int ubifs_getattr(struct vfsmount *mnt, struct dentry *dentry, + struct kstat *stat) +{ + loff_t size; + struct inode *inode = dentry->d_inode; + struct ubifs_inode *ui = ubifs_inode(inode); + + mutex_lock(&ui->ui_mutex); + stat->dev = inode->i_sb->s_dev; + stat->ino = inode->i_ino; + stat->mode = inode->i_mode; + stat->nlink = inode->i_nlink; + stat->uid = inode->i_uid; + stat->gid = inode->i_gid; + stat->rdev = inode->i_rdev; + stat->atime = inode->i_atime; + stat->mtime = inode->i_mtime; + stat->ctime = inode->i_ctime; + stat->blksize = UBIFS_BLOCK_SIZE; + stat->size = ui->ui_size; + + /* + * Unfortunately, the 'stat()' system call was designed for block + * device based file systems, and it is not appropriate for UBIFS, + * because UBIFS does not have notion of "block". For example, it is + * difficult to tell how many block a directory takes - it actually + * takes less than 300 bytes, but we have to round it to block size, + * which introduces large mistake. This makes utilities like 'du' to + * report completely senseless numbers. This is the reason why UBIFS + * goes the same way as JFFS2 - it reports zero blocks for everything + * but regular files, which makes more sense than reporting completely + * wrong sizes. + */ + if (S_ISREG(inode->i_mode)) { + size = ui->xattr_size; + size += stat->size; + size = ALIGN(size, UBIFS_BLOCK_SIZE); + /* + * Note, user-space expects 512-byte blocks count irrespectively + * of what was reported in @stat->size. + */ + stat->blocks = size >> 9; + } else + stat->blocks = 0; + mutex_unlock(&ui->ui_mutex); + return 0; +} + +struct inode_operations ubifs_dir_inode_operations = { + .lookup = ubifs_lookup, + .create = ubifs_create, + .link = ubifs_link, + .symlink = ubifs_symlink, + .unlink = ubifs_unlink, + .mkdir = ubifs_mkdir, + .rmdir = ubifs_rmdir, + .mknod = ubifs_mknod, + .rename = ubifs_rename, + .setattr = ubifs_setattr, + .getattr = ubifs_getattr, +#ifdef CONFIG_UBIFS_FS_XATTR + .setxattr = ubifs_setxattr, + .getxattr = ubifs_getxattr, + .listxattr = ubifs_listxattr, + .removexattr = ubifs_removexattr, +#endif +}; + +struct file_operations ubifs_dir_operations = { + .llseek = ubifs_dir_llseek, + .release = ubifs_dir_release, + .read = generic_read_dir, + .readdir = ubifs_readdir, + .fsync = ubifs_fsync, + .unlocked_ioctl = ubifs_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = ubifs_compat_ioctl, +#endif +}; diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c new file mode 100644 index 0000000..005a3b8 --- /dev/null +++ b/fs/ubifs/file.c @@ -0,0 +1,1275 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Artem Bityutskiy (Битюцкий Артём) + * Adrian Hunter + */ + +/* + * This file implements VFS file and inode operations of regular files, device + * nodes and symlinks as well as address space operations. + * + * UBIFS uses 2 page flags: PG_private and PG_checked. PG_private is set if the + * page is dirty and is used for budgeting purposes - dirty pages should not be + * budgeted. The PG_checked flag is set if full budgeting is required for the + * page e.g., when it corresponds to a file hole or it is just beyond the file + * size. The budgeting is done in 'ubifs_write_begin()', because it is OK to + * fail in this function, and the budget is released in 'ubifs_write_end()'. So + * the PG_private and PG_checked flags carry the information about how the page + * was budgeted, to make it possible to release the budget properly. + * + * A thing to keep in mind: inode's 'i_mutex' is locked in most VFS operations + * we implement. However, this is not true for '->writepage()', which might be + * called with 'i_mutex' unlocked. For example, when pdflush is performing + * write-back, it calls 'writepage()' with unlocked 'i_mutex', although the + * inode has 'I_LOCK' flag in this case. At "normal" work-paths 'i_mutex' is + * locked in '->writepage', e.g. in "sys_write -> alloc_pages -> direct reclaim + * path'. So, in '->writepage()' we are only guaranteed that the page is + * locked. + * + * Similarly, 'i_mutex' does not have to be locked in readpage(), e.g., + * readahead path does not have it locked ("sys_read -> generic_file_aio_read + * -> ondemand_readahead -> readpage"). In case of readahead, 'I_LOCK' flag is + * not set as well. However, UBIFS disables readahead. + * + * This, for example means that there might be 2 concurrent '->writepage()' + * calls for the same inode, but different inode dirty pages. + */ + +#include "ubifs.h" +#include + +static int read_block(struct inode *inode, void *addr, unsigned int block, + struct ubifs_data_node *dn) +{ + struct ubifs_info *c = inode->i_sb->s_fs_info; + int err, len, out_len; + union ubifs_key key; + unsigned int dlen; + + data_key_init(c, &key, inode->i_ino, block); + err = ubifs_tnc_lookup(c, &key, dn); + if (err) { + if (err == -ENOENT) + /* Not found, so it must be a hole */ + memset(addr, 0, UBIFS_BLOCK_SIZE); + return err; + } + + ubifs_assert(dn->ch.sqnum > ubifs_inode(inode)->creat_sqnum); + + len = le32_to_cpu(dn->size); + if (len <= 0 || len > UBIFS_BLOCK_SIZE) + goto dump; + + dlen = le32_to_cpu(dn->ch.len) - UBIFS_DATA_NODE_SZ; + out_len = UBIFS_BLOCK_SIZE; + err = ubifs_decompress(&dn->data, dlen, addr, &out_len, + le16_to_cpu(dn->compr_type)); + if (err || len != out_len) + goto dump; + + /* + * Data length can be less than a full block, even for blocks that are + * not the last in the file (e.g., as a result of making a hole and + * appending data). Ensure that the remainder is zeroed out. + */ + if (len < UBIFS_BLOCK_SIZE) + memset(addr + len, 0, UBIFS_BLOCK_SIZE - len); + + return 0; + +dump: + ubifs_err("bad data node (block %u, inode %lu)", + block, inode->i_ino); + dbg_dump_node(c, dn); + return -EINVAL; +} + +static int do_readpage(struct page *page) +{ + void *addr; + int err = 0, i; + unsigned int block, beyond; + struct ubifs_data_node *dn; + struct inode *inode = page->mapping->host; + loff_t i_size = i_size_read(inode); + + dbg_gen("ino %lu, pg %lu, i_size %lld, flags %#lx", + inode->i_ino, page->index, i_size, page->flags); + ubifs_assert(!PageChecked(page)); + ubifs_assert(!PagePrivate(page)); + + addr = kmap(page); + + block = page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT; + beyond = (i_size + UBIFS_BLOCK_SIZE - 1) >> UBIFS_BLOCK_SHIFT; + if (block >= beyond) { + /* Reading beyond inode */ + SetPageChecked(page); + memset(addr, 0, PAGE_CACHE_SIZE); + goto out; + } + + dn = kmalloc(UBIFS_MAX_DATA_NODE_SZ, GFP_NOFS); + if (!dn) { + err = -ENOMEM; + goto error; + } + + i = 0; + while (1) { + int ret; + + if (block >= beyond) { + /* Reading beyond inode */ + err = -ENOENT; + memset(addr, 0, UBIFS_BLOCK_SIZE); + } else { + ret = read_block(inode, addr, block, dn); + if (ret) { + err = ret; + if (err != -ENOENT) + break; + } + } + if (++i >= UBIFS_BLOCKS_PER_PAGE) + break; + block += 1; + addr += UBIFS_BLOCK_SIZE; + } + if (err) { + if (err == -ENOENT) { + /* Not found, so it must be a hole */ + SetPageChecked(page); + dbg_gen("hole"); + goto out_free; + } + ubifs_err("cannot read page %lu of inode %lu, error %d", + page->index, inode->i_ino, err); + goto error; + } + +out_free: + kfree(dn); +out: + SetPageUptodate(page); + ClearPageError(page); + flush_dcache_page(page); + kunmap(page); + return 0; + +error: + kfree(dn); + ClearPageUptodate(page); + SetPageError(page); + flush_dcache_page(page); + kunmap(page); + return err; +} + +/** + * release_new_page_budget - release budget of a new page. + * @c: UBIFS file-system description object + * + * This is a helper function which releases budget corresponding to the budget + * of one new page of data. + */ +static void release_new_page_budget(struct ubifs_info *c) +{ + struct ubifs_budget_req req = { .recalculate = 1, .new_page = 1 }; + + ubifs_release_budget(c, &req); +} + +/** + * release_existing_page_budget - release budget of an existing page. + * @c: UBIFS file-system description object + * + * This is a helper function which releases budget corresponding to the budget + * of changing one one page of data which already exists on the flash media. + */ +static void release_existing_page_budget(struct ubifs_info *c) +{ + struct ubifs_budget_req req = { .dd_growth = c->page_budget}; + + ubifs_release_budget(c, &req); +} + +static int write_begin_slow(struct address_space *mapping, + loff_t pos, unsigned len, struct page **pagep) +{ + struct inode *inode = mapping->host; + struct ubifs_info *c = inode->i_sb->s_fs_info; + pgoff_t index = pos >> PAGE_CACHE_SHIFT; + struct ubifs_budget_req req = { .new_page = 1 }; + int uninitialized_var(err), appending = !!(pos + len > inode->i_size); + struct page *page; + + dbg_gen("ino %lu, pos %llu, len %u, i_size %lld", + inode->i_ino, pos, len, inode->i_size); + + /* + * At the slow path we have to budget before locking the page, because + * budgeting may force write-back, which would wait on locked pages and + * deadlock if we had the page locked. At this point we do not know + * anything about the page, so assume that this is a new page which is + * written to a hole. This corresponds to largest budget. Later the + * budget will be amended if this is not true. + */ + if (appending) + /* We are appending data, budget for inode change */ + req.dirtied_ino = 1; + + err = ubifs_budget_space(c, &req); + if (unlikely(err)) + return err; + + page = __grab_cache_page(mapping, index); + if (unlikely(!page)) { + ubifs_release_budget(c, &req); + return -ENOMEM; + } + + if (!PageUptodate(page)) { + if (!(pos & PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE) + SetPageChecked(page); + else { + err = do_readpage(page); + if (err) { + unlock_page(page); + page_cache_release(page); + return err; + } + } + + SetPageUptodate(page); + ClearPageError(page); + } + + if (PagePrivate(page)) + /* + * The page is dirty, which means it was budgeted twice: + * o first time the budget was allocated by the task which + * made the page dirty and set the PG_private flag; + * o and then we budgeted for it for the second time at the + * very beginning of this function. + * + * So what we have to do is to release the page budget we + * allocated. + */ + release_new_page_budget(c); + else if (!PageChecked(page)) + /* + * We are changing a page which already exists on the media. + * This means that changing the page does not make the amount + * of indexing information larger, and this part of the budget + * which we have already acquired may be released. + */ + ubifs_convert_page_budget(c); + + if (appending) { + struct ubifs_inode *ui = ubifs_inode(inode); + + /* + * 'ubifs_write_end()' is optimized from the fast-path part of + * 'ubifs_write_begin()' and expects the @ui_mutex to be locked + * if data is appended. + */ + mutex_lock(&ui->ui_mutex); + if (ui->dirty) + /* + * The inode is dirty already, so we may free the + * budget we allocated. + */ + ubifs_release_dirty_inode_budget(c, ui); + } + + *pagep = page; + return 0; +} + +/** + * allocate_budget - allocate budget for 'ubifs_write_begin()'. + * @c: UBIFS file-system description object + * @page: page to allocate budget for + * @ui: UBIFS inode object the page belongs to + * @appending: non-zero if the page is appended + * + * This is a helper function for 'ubifs_write_begin()' which allocates budget + * for the operation. The budget is allocated differently depending on whether + * this is appending, whether the page is dirty or not, and so on. This + * function leaves the @ui->ui_mutex locked in case of appending. Returns zero + * in case of success and %-ENOSPC in case of failure. + */ +static int allocate_budget(struct ubifs_info *c, struct page *page, + struct ubifs_inode *ui, int appending) +{ + struct ubifs_budget_req req = { .fast = 1 }; + + if (PagePrivate(page)) { + if (!appending) + /* + * The page is dirty and we are not appending, which + * means no budget is needed at all. + */ + return 0; + + mutex_lock(&ui->ui_mutex); + if (ui->dirty) + /* + * The page is dirty and we are appending, so the inode + * has to be marked as dirty. However, it is already + * dirty, so we do not need any budget. We may return, + * but @ui->ui_mutex hast to be left locked because we + * should prevent write-back from flushing the inode + * and freeing the budget. The lock will be released in + * 'ubifs_write_end()'. + */ + return 0; + + /* + * The page is dirty, we are appending, the inode is clean, so + * we need to budget the inode change. + */ + req.dirtied_ino = 1; + } else { + if (PageChecked(page)) + /* + * The page corresponds to a hole and does not + * exist on the media. So changing it makes + * make the amount of indexing information + * larger, and we have to budget for a new + * page. + */ + req.new_page = 1; + else + /* + * Not a hole, the change will not add any new + * indexing information, budget for page + * change. + */ + req.dirtied_page = 1; + + if (appending) { + mutex_lock(&ui->ui_mutex); + if (!ui->dirty) + /* + * The inode is clean but we will have to mark + * it as dirty because we are appending. This + * needs a budget. + */ + req.dirtied_ino = 1; + } + } + + return ubifs_budget_space(c, &req); +} + +/* + * This function is called when a page of data is going to be written. Since + * the page of data will not necessarily go to the flash straight away, UBIFS + * has to reserve space on the media for it, which is done by means of + * budgeting. + * + * This is the hot-path of the file-system and we are trying to optimize it as + * much as possible. For this reasons it is split on 2 parts - slow and fast. + * + * There many budgeting cases: + * o a new page is appended - we have to budget for a new page and for + * changing the inode; however, if the inode is already dirty, there is + * no need to budget for it; + * o an existing clean page is changed - we have budget for it; if the page + * does not exist on the media (a hole), we have to budget for a new + * page; otherwise, we may budget for changing an existing page; the + * difference between these cases is that changing an existing page does + * not introduce anything new to the FS indexing information, so it does + * not grow, and smaller budget is acquired in this case; + * o an existing dirty page is changed - no need to budget at all, because + * the page budget has been acquired by earlier, when the page has been + * marked dirty. + * + * UBIFS budgeting sub-system may force write-back if it thinks there is no + * space to reserve. This imposes some locking restrictions and makes it + * impossible to take into account the above cases, and makes it impossible to + * optimize budgeting. + * + * The solution for this is that the fast path of 'ubifs_write_begin()' assumes + * there is a plenty of flash space and the budget will be acquired quickly, + * without forcing write-back. The slow path does not make this assumption. + */ +static int ubifs_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) +{ + struct inode *inode = mapping->host; + struct ubifs_info *c = inode->i_sb->s_fs_info; + struct ubifs_inode *ui = ubifs_inode(inode); + pgoff_t index = pos >> PAGE_CACHE_SHIFT; + int uninitialized_var(err), appending = !!(pos + len > inode->i_size); + struct page *page; + + + ubifs_assert(ubifs_inode(inode)->ui_size == inode->i_size); + + if (unlikely(c->ro_media)) + return -EROFS; + + /* Try out the fast-path part first */ + page = __grab_cache_page(mapping, index); + if (unlikely(!page)) + return -ENOMEM; + + if (!PageUptodate(page)) { + /* The page is not loaded from the flash */ + if (!(pos & PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE) + /* + * We change whole page so no need to load it. But we + * have to set the @PG_checked flag to make the further + * code the page is new. This might be not true, but it + * is better to budget more that to read the page from + * the media. + */ + SetPageChecked(page); + else { + err = do_readpage(page); + if (err) { + unlock_page(page); + page_cache_release(page); + return err; + } + } + + SetPageUptodate(page); + ClearPageError(page); + } + + err = allocate_budget(c, page, ui, appending); + if (unlikely(err)) { + ubifs_assert(err == -ENOSPC); + /* + * Budgeting failed which means it would have to force + * write-back but didn't, because we set the @fast flag in the + * request. Write-back cannot be done now, while we have the + * page locked, because it would deadlock. Unlock and free + * everything and fall-back to slow-path. + */ + if (appending) { + ubifs_assert(mutex_is_locked(&ui->ui_mutex)); + mutex_unlock(&ui->ui_mutex); + } + unlock_page(page); + page_cache_release(page); + + return write_begin_slow(mapping, pos, len, pagep); + } + + /* + * Whee, we aquired budgeting quickly - without involving + * garbage-collection, committing or forceing write-back. We return + * with @ui->ui_mutex locked if we are appending pages, and unlocked + * otherwise. This is an optimization (slightly hacky though). + */ + *pagep = page; + return 0; + +} + +/** + * cancel_budget - cancel budget. + * @c: UBIFS file-system description object + * @page: page to cancel budget for + * @ui: UBIFS inode object the page belongs to + * @appending: non-zero if the page is appended + * + * This is a helper function for a page write operation. It unlocks the + * @ui->ui_mutex in case of appending. + */ +static void cancel_budget(struct ubifs_info *c, struct page *page, + struct ubifs_inode *ui, int appending) +{ + if (appending) { + if (!ui->dirty) + ubifs_release_dirty_inode_budget(c, ui); + mutex_unlock(&ui->ui_mutex); + } + if (!PagePrivate(page)) { + if (PageChecked(page)) + release_new_page_budget(c); + else + release_existing_page_budget(c); + } +} + +static int ubifs_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) +{ + struct inode *inode = mapping->host; + struct ubifs_inode *ui = ubifs_inode(inode); + struct ubifs_info *c = inode->i_sb->s_fs_info; + loff_t end_pos = pos + len; + int appending = !!(end_pos > inode->i_size); + + dbg_gen("ino %lu, pos %llu, pg %lu, len %u, copied %d, i_size %lld", + inode->i_ino, pos, page->index, len, copied, inode->i_size); + + if (unlikely(copied < len && len == PAGE_CACHE_SIZE)) { + /* + * VFS copied less data to the page that it intended and + * declared in its '->write_begin()' call via the @len + * argument. If the page was not up-to-date, and @len was + * @PAGE_CACHE_SIZE, the 'ubifs_write_begin()' function did + * not load it from the media (for optimization reasons). This + * means that part of the page contains garbage. So read the + * page now. + */ + dbg_gen("copied %d instead of %d, read page and repeat", + copied, len); + cancel_budget(c, page, ui, appending); + + /* + * Return 0 to force VFS to repeat the whole operation, or the + * error code if 'do_readpage()' failes. + */ + copied = do_readpage(page); + goto out; + } + + if (!PagePrivate(page)) { + SetPagePrivate(page); + atomic_long_inc(&c->dirty_pg_cnt); + __set_page_dirty_nobuffers(page); + } + + if (appending) { + i_size_write(inode, end_pos); + ui->ui_size = end_pos; + /* + * Note, we do not set @I_DIRTY_PAGES (which means that the + * inode has dirty pages), this has been done in + * '__set_page_dirty_nobuffers()'. + */ + __mark_inode_dirty(inode, I_DIRTY_DATASYNC); + ubifs_assert(mutex_is_locked(&ui->ui_mutex)); + mutex_unlock(&ui->ui_mutex); + } + +out: + unlock_page(page); + page_cache_release(page); + return copied; +} + +static int ubifs_readpage(struct file *file, struct page *page) +{ + do_readpage(page); + unlock_page(page); + return 0; +} + +static int do_writepage(struct page *page, int len) +{ + int err = 0, i, blen; + unsigned int block; + void *addr; + union ubifs_key key; + struct inode *inode = page->mapping->host; + struct ubifs_info *c = inode->i_sb->s_fs_info; + +#ifdef UBIFS_DEBUG + spin_lock(&ui->ui_lock); + ubifs_assert(page->index <= ui->synced_i_size << PAGE_CACHE_SIZE); + spin_unlock(&ui->ui_lock); +#endif + + /* Update radix tree tags */ + set_page_writeback(page); + + addr = kmap(page); + block = page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT; + i = 0; + while (len) { + blen = min_t(int, len, UBIFS_BLOCK_SIZE); + data_key_init(c, &key, inode->i_ino, block); + err = ubifs_jnl_write_data(c, inode, &key, addr, blen); + if (err) + break; + if (++i >= UBIFS_BLOCKS_PER_PAGE) + break; + block += 1; + addr += blen; + len -= blen; + } + if (err) { + SetPageError(page); + ubifs_err("cannot write page %lu of inode %lu, error %d", + page->index, inode->i_ino, err); + ubifs_ro_mode(c, err); + } + + ubifs_assert(PagePrivate(page)); + if (PageChecked(page)) + release_new_page_budget(c); + else + release_existing_page_budget(c); + + atomic_long_dec(&c->dirty_pg_cnt); + ClearPagePrivate(page); + ClearPageChecked(page); + + kunmap(page); + unlock_page(page); + end_page_writeback(page); + return err; +} + +/* + * When writing-back dirty inodes, VFS first writes-back pages belonging to the + * inode, then the inode itself. For UBIFS this may cause a problem. Consider a + * situation when a we have an inode with size 0, then a megabyte of data is + * appended to the inode, then write-back starts and flushes some amount of the + * dirty pages, the journal becomes full, commit happens and finishes, and then + * an unclean reboot happens. When the file system is mounted next time, the + * inode size would still be 0, but there would be many pages which are beyond + * the inode size, they would be indexed and consume flash space. Because the + * journal has been committed, the replay would not be able to detect this + * situation and correct the inode size. This means UBIFS would have to scan + * whole index and correct all inode sizes, which is long an unacceptable. + * + * To prevent situations like this, UBIFS writes pages back only if they are + * within last synchronized inode size, i.e. the the size which has been + * written to the flash media last time. Otherwise, UBIFS forces inode + * write-back, thus making sure the on-flash inode contains current inode size, + * and then keeps writing pages back. + * + * Some locking issues explanation. 'ubifs_writepage()' first is called with + * the page locked, and it locks @ui_mutex. However, write-back does take inode + * @i_mutex, which means other VFS operations may be run on this inode at the + * same time. And the problematic one is truncation to smaller size, from where + * we have to call 'vmtruncate()', which first changes @inode->i_size, then + * drops the truncated pages. And while dropping the pages, it takes the page + * lock. This means that 'do_truncation()' cannot call 'vmtruncate()' with + * @ui_mutex locked, because it would deadlock with 'ubifs_writepage()'. This + * means that @inode->i_size is changed while @ui_mutex is unlocked. + * + * But in 'ubifs_writepage()' we have to guarantee that we do not write beyond + * inode size. How do we do this if @inode->i_size may became smaller while we + * are in the middle of 'ubifs_writepage()'? The UBIFS solution is the + * @ui->ui_isize "shadow" field which UBIFS uses instead of @inode->i_size + * internally and updates it under @ui_mutex. + * + * Q: why we do not worry that if we race with truncation, we may end up with a + * situation when the inode is truncated while we are in the middle of + * 'do_writepage()', so we do write beyond inode size? + * A: If we are in the middle of 'do_writepage()', truncation would be locked + * on the page lock and it would not write the truncated inode node to the + * journal before we have finished. + */ +static int ubifs_writepage(struct page *page, struct writeback_control *wbc) +{ + struct inode *inode = page->mapping->host; + struct ubifs_inode *ui = ubifs_inode(inode); + loff_t i_size = i_size_read(inode), synced_i_size; + pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; + int err, len = i_size & (PAGE_CACHE_SIZE - 1); + void *kaddr; + + dbg_gen("ino %lu, pg %lu, pg flags %#lx", + inode->i_ino, page->index, page->flags); + ubifs_assert(PagePrivate(page)); + + /* Is the page fully outside @i_size? (truncate in progress) */ + if (page->index > end_index || (page->index == end_index && !len)) { + err = 0; + goto out_unlock; + } + + spin_lock(&ui->ui_lock); + synced_i_size = ui->synced_i_size; + spin_unlock(&ui->ui_lock); + + /* Is the page fully inside @i_size? */ + if (page->index < end_index) { + if (page->index >= synced_i_size >> PAGE_CACHE_SHIFT) { + err = inode->i_sb->s_op->write_inode(inode, 1); + if (err) + goto out_unlock; + /* + * The inode has been written, but the write-buffer has + * not been synchronized, so in case of an unclean + * reboot we may end up with some pages beyond inode + * size, but they would be in the journal (because + * commit flushes write buffers) and recovery would deal + * with this. + */ + } + return do_writepage(page, PAGE_CACHE_SIZE); + } + + /* + * The page straddles @i_size. It must be zeroed out on each and every + * writepage invocation because it may be mmapped. "A file is mapped + * in multiples of the page size. For a file that is not a multiple of + * the page size, the remaining memory is zeroed when mapped, and + * writes to that region are not written out to the file." + */ + kaddr = kmap_atomic(page, KM_USER0); + memset(kaddr + len, 0, PAGE_CACHE_SIZE - len); + flush_dcache_page(page); + kunmap_atomic(kaddr, KM_USER0); + + if (i_size > synced_i_size) { + err = inode->i_sb->s_op->write_inode(inode, 1); + if (err) + goto out_unlock; + } + + return do_writepage(page, len); + +out_unlock: + unlock_page(page); + return err; +} + +/** + * do_attr_changes - change inode attributes. + * @inode: inode to change attributes for + * @attr: describes attributes to change + */ +static void do_attr_changes(struct inode *inode, const struct iattr *attr) +{ + if (attr->ia_valid & ATTR_UID) + inode->i_uid = attr->ia_uid; + if (attr->ia_valid & ATTR_GID) + inode->i_gid = attr->ia_gid; + if (attr->ia_valid & ATTR_ATIME) + inode->i_atime = timespec_trunc(attr->ia_atime, + inode->i_sb->s_time_gran); + if (attr->ia_valid & ATTR_MTIME) + inode->i_mtime = timespec_trunc(attr->ia_mtime, + inode->i_sb->s_time_gran); + if (attr->ia_valid & ATTR_CTIME) + inode->i_ctime = timespec_trunc(attr->ia_ctime, + inode->i_sb->s_time_gran); + if (attr->ia_valid & ATTR_MODE) { + umode_t mode = attr->ia_mode; + + if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) + mode &= ~S_ISGID; + inode->i_mode = mode; + } +} + +/** + * do_truncation - truncate an inode. + * @c: UBIFS file-system description object + * @inode: inode to truncate + * @attr: inode attribute changes description + * + * This function implements VFS '->setattr()' call when the inode is truncated + * to a smaller size. Returns zero in case of success and a negative error code + * in case of failure. + */ +static int do_truncation(struct ubifs_info *c, struct inode *inode, + const struct iattr *attr) +{ + int err; + struct ubifs_budget_req req; + loff_t old_size = inode->i_size, new_size = attr->ia_size; + int offset = new_size & (UBIFS_BLOCK_SIZE - 1); + struct ubifs_inode *ui = ubifs_inode(inode); + + dbg_gen("ino %lu, size %lld -> %lld", inode->i_ino, old_size, new_size); + memset(&req, 0, sizeof(struct ubifs_budget_req)); + + /* + * If this is truncation to a smaller size, and we do not truncate on a + * block boundary, budget for changing one data block, because the last + * block will be re-written. + */ + if (new_size & (UBIFS_BLOCK_SIZE - 1)) + req.dirtied_page = 1; + + req.dirtied_ino = 1; + /* A funny way to budget for truncation node */ + req.dirtied_ino_d = UBIFS_TRUN_NODE_SZ; + err = ubifs_budget_space(c, &req); + if (err) + return err; + + err = vmtruncate(inode, new_size); + if (err) + goto out_budg; + + if (offset) { + pgoff_t index = new_size >> PAGE_CACHE_SHIFT; + struct page *page; + + page = find_lock_page(inode->i_mapping, index); + if (page) { + if (PageDirty(page)) { + /* + * 'ubifs_jnl_truncate()' will try to truncate + * the last data node, but it contains + * out-of-date data because the page is dirty. + * Write the page now, so that + * 'ubifs_jnl_truncate()' will see an already + * truncated (and up to date) data node. + */ + ubifs_assert(PagePrivate(page)); + + clear_page_dirty_for_io(page); + if (UBIFS_BLOCKS_PER_PAGE_SHIFT) + offset = new_size & + (PAGE_CACHE_SIZE - 1); + err = do_writepage(page, offset); + page_cache_release(page); + if (err) + goto out_budg; + /* + * We could now tell 'ubifs_jnl_truncate()' not + * to read the last block. + */ + } else { + /* + * We could 'kmap()' the page and pass the data + * to 'ubifs_jnl_truncate()' to save it from + * having to read it. + */ + unlock_page(page); + page_cache_release(page); + } + } + } + + mutex_lock(&ui->ui_mutex); + ui->ui_size = inode->i_size; + /* Truncation changes inode [mc]time */ + inode->i_mtime = inode->i_ctime = ubifs_current_time(inode); + /* The other attributes may be changed at the same time as well */ + do_attr_changes(inode, attr); + + err = ubifs_jnl_truncate(c, inode, old_size, new_size); + mutex_unlock(&ui->ui_mutex); +out_budg: + ubifs_release_budget(c, &req); + return err; +} + +/** + * do_setattr - change inode attributes. + * @c: UBIFS file-system description object + * @inode: inode to change attributes for + * @attr: inode attribute changes description + * + * This function implements VFS '->setattr()' call for all cases except + * truncations to smaller size. Returns zero in case of success and a negative + * error code in case of failure. + */ +static int do_setattr(struct ubifs_info *c, struct inode *inode, + const struct iattr *attr) +{ + int err, release; + loff_t new_size = attr->ia_size; + struct ubifs_inode *ui = ubifs_inode(inode); + struct ubifs_budget_req req = { .dirtied_ino = 1, + .dirtied_ino_d = ui->data_len }; + + err = ubifs_budget_space(c, &req); + if (err) + return err; + + if (attr->ia_valid & ATTR_SIZE) { + dbg_gen("size %lld -> %lld", inode->i_size, new_size); + err = vmtruncate(inode, new_size); + if (err) + goto out; + } + + mutex_lock(&ui->ui_mutex); + if (attr->ia_valid & ATTR_SIZE) { + /* Truncation changes inode [mc]time */ + inode->i_mtime = inode->i_ctime = ubifs_current_time(inode); + /* 'vmtruncate()' changed @i_size, update @ui_size */ + ui->ui_size = inode->i_size; + } + + do_attr_changes(inode, attr); + + release = ui->dirty; + if (attr->ia_valid & ATTR_SIZE) + /* + * Inode length changed, so we have to make sure + * @I_DIRTY_DATASYNC is set. + */ + __mark_inode_dirty(inode, I_DIRTY_SYNC | I_DIRTY_DATASYNC); + else + mark_inode_dirty_sync(inode); + mutex_unlock(&ui->ui_mutex); + + if (release) + ubifs_release_budget(c, &req); + if (IS_SYNC(inode)) + err = inode->i_sb->s_op->write_inode(inode, 1); + return err; + +out: + ubifs_release_budget(c, &req); + return err; +} + +int ubifs_setattr(struct dentry *dentry, struct iattr *attr) +{ + int err; + struct inode *inode = dentry->d_inode; + struct ubifs_info *c = inode->i_sb->s_fs_info; + + dbg_gen("ino %lu, ia_valid %#x", inode->i_ino, attr->ia_valid); + err = inode_change_ok(inode, attr); + if (err) + return err; + + err = dbg_check_synced_i_size(inode); + if (err) + return err; + + if ((attr->ia_valid & ATTR_SIZE) && attr->ia_size < inode->i_size) + /* Truncation to a smaller size */ + err = do_truncation(c, inode, attr); + else + err = do_setattr(c, inode, attr); + + return err; +} + +static void ubifs_invalidatepage(struct page *page, unsigned long offset) +{ + struct inode *inode = page->mapping->host; + struct ubifs_info *c = inode->i_sb->s_fs_info; + + ubifs_assert(PagePrivate(page)); + if (offset) + /* Partial page remains dirty */ + return; + + if (PageChecked(page)) + release_new_page_budget(c); + else + release_existing_page_budget(c); + + atomic_long_dec(&c->dirty_pg_cnt); + ClearPagePrivate(page); + ClearPageChecked(page); +} + +static void *ubifs_follow_link(struct dentry *dentry, struct nameidata *nd) +{ + struct ubifs_inode *ui = ubifs_inode(dentry->d_inode); + + nd_set_link(nd, ui->data); + return NULL; +} + +int ubifs_fsync(struct file *file, struct dentry *dentry, int datasync) +{ + struct inode *inode = dentry->d_inode; + struct ubifs_info *c = inode->i_sb->s_fs_info; + int err; + + dbg_gen("syncing inode %lu", inode->i_ino); + + /* + * VFS has already synchronized dirty pages for this inode. Synchronize + * the inode unless this is a 'datasync()' call. + */ + if (!datasync || (inode->i_state & I_DIRTY_DATASYNC)) { + err = inode->i_sb->s_op->write_inode(inode, 1); + if (err) + return err; + } + + /* + * Nodes related to this inode may still sit in a write-buffer. Flush + * them. + */ + err = ubifs_sync_wbufs_by_inode(c, inode); + if (err) + return err; + + return 0; +} + +/** + * mctime_update_needed - check if mtime or ctime update is needed. + * @inode: the inode to do the check for + * @now: current time + * + * This helper function checks if the inode mtime/ctime should be updated or + * not. If current values of the time-stamps are within the UBIFS inode time + * granularity, they are not updated. This is an optimization. + */ +static inline int mctime_update_needed(const struct inode *inode, + const struct timespec *now) +{ + if (!timespec_equal(&inode->i_mtime, now) || + !timespec_equal(&inode->i_ctime, now)) + return 1; + return 0; +} + +/** + * update_ctime - update mtime and ctime of an inode. + * @c: UBIFS file-system description object + * @inode: inode to update + * + * This function updates mtime and ctime of the inode if it is not equivalent to + * current time. Returns zero in case of success and a negative error code in + * case of failure. + */ +static int update_mctime(struct ubifs_info *c, struct inode *inode) +{ + struct timespec now = ubifs_current_time(inode); + struct ubifs_inode *ui = ubifs_inode(inode); + + if (mctime_update_needed(inode, &now)) { + int err, release; + struct ubifs_budget_req req = { .dirtied_ino = 1, + .dirtied_ino_d = ui->data_len }; + + err = ubifs_budget_space(c, &req); + if (err) + return err; + + mutex_lock(&ui->ui_mutex); + inode->i_mtime = inode->i_ctime = ubifs_current_time(inode); + release = ui->dirty; + mark_inode_dirty_sync(inode); + mutex_unlock(&ui->ui_mutex); + if (release) + ubifs_release_budget(c, &req); + } + + return 0; +} + +static ssize_t ubifs_aio_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) +{ + int err; + ssize_t ret; + struct inode *inode = iocb->ki_filp->f_mapping->host; + struct ubifs_info *c = inode->i_sb->s_fs_info; + + err = update_mctime(c, inode); + if (err) + return err; + + ret = generic_file_aio_write(iocb, iov, nr_segs, pos); + if (ret < 0) + return ret; + + if (ret > 0 && (IS_SYNC(inode) || iocb->ki_filp->f_flags & O_SYNC)) { + err = ubifs_sync_wbufs_by_inode(c, inode); + if (err) + return err; + } + + return ret; +} + +static int ubifs_set_page_dirty(struct page *page) +{ + int ret; + + ret = __set_page_dirty_nobuffers(page); + /* + * An attempt to dirty a page without budgeting for it - should not + * happen. + */ + ubifs_assert(ret == 0); + return ret; +} + +static int ubifs_releasepage(struct page *page, gfp_t unused_gfp_flags) +{ + /* + * An attempt to release a dirty page without budgeting for it - should + * not happen. + */ + if (PageWriteback(page)) + return 0; + ubifs_assert(PagePrivate(page)); + ubifs_assert(0); + ClearPagePrivate(page); + ClearPageChecked(page); + return 1; +} + +/* + * mmap()d file has taken write protection fault and is being made + * writable. UBIFS must ensure page is budgeted for. + */ +static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page) +{ + struct inode *inode = vma->vm_file->f_path.dentry->d_inode; + struct ubifs_info *c = inode->i_sb->s_fs_info; + struct timespec now = ubifs_current_time(inode); + struct ubifs_budget_req req = { .new_page = 1 }; + int err, update_time; + + dbg_gen("ino %lu, pg %lu, i_size %lld", inode->i_ino, page->index, + i_size_read(inode)); + ubifs_assert(!(inode->i_sb->s_flags & MS_RDONLY)); + + if (unlikely(c->ro_media)) + return -EROFS; + + /* + * We have not locked @page so far so we may budget for changing the + * page. Note, we cannot do this after we locked the page, because + * budgeting may cause write-back which would cause deadlock. + * + * At the moment we do not know whether the page is dirty or not, so we + * assume that it is not and budget for a new page. We could look at + * the @PG_private flag and figure this out, but we may race with write + * back and the page state may change by the time we lock it, so this + * would need additional care. We do not bother with this at the + * moment, although it might be good idea to do. Instead, we allocate + * budget for a new page and amend it later on if the page was in fact + * dirty. + * + * The budgeting-related logic of this function is similar to what we + * do in 'ubifs_write_begin()' and 'ubifs_write_end()'. Glance there + * for more comments. + */ + update_time = mctime_update_needed(inode, &now); + if (update_time) + /* + * We have to change inode time stamp which requires extra + * budgeting. + */ + req.dirtied_ino = 1; + + err = ubifs_budget_space(c, &req); + if (unlikely(err)) { + if (err == -ENOSPC) + ubifs_warn("out of space for mmapped file " + "(inode number %lu)", inode->i_ino); + return err; + } + + lock_page(page); + if (unlikely(page->mapping != inode->i_mapping || + page_offset(page) > i_size_read(inode))) { + /* Page got truncated out from underneath us */ + err = -EINVAL; + goto out_unlock; + } + + if (PagePrivate(page)) + release_new_page_budget(c); + else { + if (!PageChecked(page)) + ubifs_convert_page_budget(c); + SetPagePrivate(page); + atomic_long_inc(&c->dirty_pg_cnt); + __set_page_dirty_nobuffers(page); + } + + if (update_time) { + int release; + struct ubifs_inode *ui = ubifs_inode(inode); + + mutex_lock(&ui->ui_mutex); + inode->i_mtime = inode->i_ctime = ubifs_current_time(inode); + release = ui->dirty; + mark_inode_dirty_sync(inode); + mutex_unlock(&ui->ui_mutex); + if (release) + ubifs_release_dirty_inode_budget(c, ui); + } + + unlock_page(page); + return 0; + +out_unlock: + unlock_page(page); + ubifs_release_budget(c, &req); + return err; +} + +static struct vm_operations_struct ubifs_file_vm_ops = { + .fault = filemap_fault, + .page_mkwrite = ubifs_vm_page_mkwrite, +}; + +static int ubifs_file_mmap(struct file *file, struct vm_area_struct *vma) +{ + int err; + + /* 'generic_file_mmap()' takes care of NOMMU case */ + err = generic_file_mmap(file, vma); + if (err) + return err; + vma->vm_ops = &ubifs_file_vm_ops; + return 0; +} + +struct address_space_operations ubifs_file_address_operations = { + .readpage = ubifs_readpage, + .writepage = ubifs_writepage, + .write_begin = ubifs_write_begin, + .write_end = ubifs_write_end, + .invalidatepage = ubifs_invalidatepage, + .set_page_dirty = ubifs_set_page_dirty, + .releasepage = ubifs_releasepage, +}; + +struct inode_operations ubifs_file_inode_operations = { + .setattr = ubifs_setattr, + .getattr = ubifs_getattr, +#ifdef CONFIG_UBIFS_FS_XATTR + .setxattr = ubifs_setxattr, + .getxattr = ubifs_getxattr, + .listxattr = ubifs_listxattr, + .removexattr = ubifs_removexattr, +#endif +}; + +struct inode_operations ubifs_symlink_inode_operations = { + .readlink = generic_readlink, + .follow_link = ubifs_follow_link, + .setattr = ubifs_setattr, + .getattr = ubifs_getattr, +}; + +struct file_operations ubifs_file_operations = { + .llseek = generic_file_llseek, + .read = do_sync_read, + .write = do_sync_write, + .aio_read = generic_file_aio_read, + .aio_write = ubifs_aio_write, + .mmap = ubifs_file_mmap, + .fsync = ubifs_fsync, + .unlocked_ioctl = ubifs_ioctl, + .splice_read = generic_file_splice_read, +#ifdef CONFIG_COMPAT + .compat_ioctl = ubifs_compat_ioctl, +#endif +}; diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c new file mode 100644 index 0000000..10394c5 --- /dev/null +++ b/fs/ubifs/find.c @@ -0,0 +1,975 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Artem Bityutskiy (Битюцкий Артём) + * Adrian Hunter + */ + +/* + * This file contains functions for finding LEBs for various purposes e.g. + * garbage collection. In general, lprops category heaps and lists are used + * for fast access, falling back on scanning the LPT as a last resort. + */ + +#include +#include "ubifs.h" + +/** + * struct scan_data - data provided to scan callback functions + * @min_space: minimum number of bytes for which to scan + * @pick_free: whether it is OK to scan for empty LEBs + * @lnum: LEB number found is returned here + * @exclude_index: whether to exclude index LEBs + */ +struct scan_data { + int min_space; + int pick_free; + int lnum; + int exclude_index; +}; + +/** + * valuable - determine whether LEB properties are valuable. + * @c: the UBIFS file-system description object + * @lprops: LEB properties + * + * This function return %1 if the LEB properties should be added to the LEB + * properties tree in memory. Otherwise %0 is returned. + */ +static int valuable(struct ubifs_info *c, const struct ubifs_lprops *lprops) +{ + int n, cat = lprops->flags & LPROPS_CAT_MASK; + struct ubifs_lpt_heap *heap; + + switch (cat) { + case LPROPS_DIRTY: + case LPROPS_DIRTY_IDX: + case LPROPS_FREE: + heap = &c->lpt_heap[cat - 1]; + if (heap->cnt < heap->max_cnt) + return 1; + if (lprops->free + lprops->dirty >= c->dark_wm) + return 1; + return 0; + case LPROPS_EMPTY: + n = c->lst.empty_lebs + c->freeable_cnt - + c->lst.taken_empty_lebs; + if (n < c->lsave_cnt) + return 1; + return 0; + case LPROPS_FREEABLE: + return 1; + case LPROPS_FRDI_IDX: + return 1; + } + return 0; +} + +/** + * scan_for_dirty_cb - dirty space scan callback. + * @c: the UBIFS file-system description object + * @lprops: LEB properties to scan + * @in_tree: whether the LEB properties are in main memory + * @data: information passed to and from the caller of the scan + * + * This function returns a code that indicates whether the scan should continue + * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree + * in main memory (%LPT_SCAN_ADD), or whether the scan should stop + * (%LPT_SCAN_STOP). + */ +static int scan_for_dirty_cb(struct ubifs_info *c, + const struct ubifs_lprops *lprops, int in_tree, + struct scan_data *data) +{ + int ret = LPT_SCAN_CONTINUE; + + /* Exclude LEBs that are currently in use */ + if (lprops->flags & LPROPS_TAKEN) + return LPT_SCAN_CONTINUE; + /* Determine whether to add these LEB properties to the tree */ + if (!in_tree && valuable(c, lprops)) + ret |= LPT_SCAN_ADD; + /* Exclude LEBs with too little space */ + if (lprops->free + lprops->dirty < data->min_space) + return ret; + /* If specified, exclude index LEBs */ + if (data->exclude_index && lprops->flags & LPROPS_INDEX) + return ret; + /* If specified, exclude empty or freeable LEBs */ + if (lprops->free + lprops->dirty == c->leb_size) { + if (!data->pick_free) + return ret; + /* Exclude LEBs with too little dirty space (unless it is empty) */ + } else if (lprops->dirty < c->dead_wm) + return ret; + /* Finally we found space */ + data->lnum = lprops->lnum; + return LPT_SCAN_ADD | LPT_SCAN_STOP; +} + +/** + * scan_for_dirty - find a data LEB with free space. + * @c: the UBIFS file-system description object + * @min_space: minimum amount free plus dirty space the returned LEB has to + * have + * @pick_free: if it is OK to return a free or freeable LEB + * @exclude_index: whether to exclude index LEBs + * + * This function returns a pointer to the LEB properties found or a negative + * error code. + */ +static const struct ubifs_lprops *scan_for_dirty(struct ubifs_info *c, + int min_space, int pick_free, + int exclude_index) +{ + const struct ubifs_lprops *lprops; + struct ubifs_lpt_heap *heap; + struct scan_data data; + int err, i; + + /* There may be an LEB with enough dirty space on the free heap */ + heap = &c->lpt_heap[LPROPS_FREE - 1]; + for (i = 0; i < heap->cnt; i++) { + lprops = heap->arr[i]; + if (lprops->free + lprops->dirty < min_space) + continue; + if (lprops->dirty < c->dead_wm) + continue; + return lprops; + } + /* + * A LEB may have fallen off of the bottom of the dirty heap, and ended + * up as uncategorized even though it has enough dirty space for us now, + * so check the uncategorized list. N.B. neither empty nor freeable LEBs + * can end up as uncategorized because they are kept on lists not + * finite-sized heaps. + */ + list_for_each_entry(lprops, &c->uncat_list, list) { + if (lprops->flags & LPROPS_TAKEN) + continue; + if (lprops->free + lprops->dirty < min_space) + continue; + if (exclude_index && (lprops->flags & LPROPS_INDEX)) + continue; + if (lprops->dirty < c->dead_wm) + continue; + return lprops; + } + /* We have looked everywhere in main memory, now scan the flash */ + if (c->pnodes_have >= c->pnode_cnt) + /* All pnodes are in memory, so skip scan */ + return ERR_PTR(-ENOSPC); + data.min_space = min_space; + data.pick_free = pick_free; + data.lnum = -1; + data.exclude_index = exclude_index; + err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum, + (ubifs_lpt_scan_callback)scan_for_dirty_cb, + &data); + if (err) + return ERR_PTR(err); + ubifs_assert(data.lnum >= c->main_first && data.lnum < c->leb_cnt); + c->lscan_lnum = data.lnum; + lprops = ubifs_lpt_lookup_dirty(c, data.lnum); + if (IS_ERR(lprops)) + return lprops; + ubifs_assert(lprops->lnum == data.lnum); + ubifs_assert(lprops->free + lprops->dirty >= min_space); + ubifs_assert(lprops->dirty >= c->dead_wm || + (pick_free && + lprops->free + lprops->dirty == c->leb_size)); + ubifs_assert(!(lprops->flags & LPROPS_TAKEN)); + ubifs_assert(!exclude_index || !(lprops->flags & LPROPS_INDEX)); + return lprops; +} + +/** + * ubifs_find_dirty_leb - find a dirty LEB for the Garbage Collector. + * @c: the UBIFS file-system description object + * @ret_lp: LEB properties are returned here on exit + * @min_space: minimum amount free plus dirty space the returned LEB has to + * have + * @pick_free: controls whether it is OK to pick empty or index LEBs + * + * This function tries to find a dirty logical eraseblock which has at least + * @min_space free and dirty space. It prefers to take an LEB from the dirty or + * dirty index heap, and it falls-back to LPT scanning if the heaps are empty + * or do not have an LEB which satisfies the @min_space criteria. + * + * Note: + * o LEBs which have less than dead watermark of dirty space are never picked + * by this function; + * + * Returns zero and the LEB properties of + * found dirty LEB in case of success, %-ENOSPC if no dirty LEB was found and a + * negative error code in case of other failures. The returned LEB is marked as + * "taken". + * + * The additional @pick_free argument controls if this function has to return a + * free or freeable LEB if one is present. For example, GC must to set it to %1, + * when called from the journal space reservation function, because the + * appearance of free space may coincide with the loss of enough dirty space + * for GC to succeed anyway. + * + * In contrast, if the Garbage Collector is called from budgeting, it should + * just make free space, not return LEBs which are already free or freeable. + * + * In addition @pick_free is set to %2 by the recovery process in order to + * recover gc_lnum in which case an index LEB must not be returned. + */ +int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, + int min_space, int pick_free) +{ + int err = 0, sum, exclude_index = pick_free == 2 ? 1 : 0; + const struct ubifs_lprops *lp = NULL, *idx_lp = NULL; + struct ubifs_lpt_heap *heap, *idx_heap; + + ubifs_get_lprops(c); + + if (pick_free) { + int lebs, rsvd_idx_lebs = 0; + + spin_lock(&c->space_lock); + lebs = c->lst.empty_lebs; + lebs += c->freeable_cnt - c->lst.taken_empty_lebs; + + /* + * Note, the index may consume more LEBs than have been reserved + * for it. It is OK because it might be consolidated by GC. + * But if the index takes fewer LEBs than it is reserved for it, + * this function must avoid picking those reserved LEBs. + */ + if (c->min_idx_lebs >= c->lst.idx_lebs) { + rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs; + exclude_index = 1; + } + spin_unlock(&c->space_lock); + + /* Check if there are enough free LEBs for the index */ + if (rsvd_idx_lebs < lebs) { + /* OK, try to find an empty LEB */ + lp = ubifs_fast_find_empty(c); + if (lp) + goto found; + + /* Or a freeable LEB */ + lp = ubifs_fast_find_freeable(c); + if (lp) + goto found; + } else + /* + * We cannot pick free/freeable LEBs in the below code. + */ + pick_free = 0; + } else { + spin_lock(&c->space_lock); + exclude_index = (c->min_idx_lebs >= c->lst.idx_lebs); + spin_unlock(&c->space_lock); + } + + /* Look on the dirty and dirty index heaps */ + heap = &c->lpt_heap[LPROPS_DIRTY - 1]; + idx_heap = &c->lpt_heap[LPROPS_DIRTY_IDX - 1]; + + if (idx_heap->cnt && !exclude_index) { + idx_lp = idx_heap->arr[0]; + sum = idx_lp->free + idx_lp->dirty; + /* + * Since we reserve twice as more space for the index than it + * actually takes, it does not make sense to pick indexing LEBs + * with less than half LEB of dirty space. + */ + if (sum < min_space || sum < c->half_leb_size) + idx_lp = NULL; + } + + if (heap->cnt) { + lp = heap->arr[0]; + if (lp->dirty + lp->free < min_space) + lp = NULL; + } + + /* Pick the LEB with most space */ + if (idx_lp && lp) { + if (idx_lp->free + idx_lp->dirty >= lp->free + lp->dirty) + lp = idx_lp; + } else if (idx_lp && !lp) + lp = idx_lp; + + if (lp) { + ubifs_assert(lp->dirty >= c->dead_wm); + goto found; + } + + /* Did not find a dirty LEB on the dirty heaps, have to scan */ + dbg_find("scanning LPT for a dirty LEB"); + lp = scan_for_dirty(c, min_space, pick_free, exclude_index); + if (IS_ERR(lp)) { + err = PTR_ERR(lp); + goto out; + } + ubifs_assert(lp->dirty >= c->dead_wm || + (pick_free && lp->free + lp->dirty == c->leb_size)); + +found: + dbg_find("found LEB %d, free %d, dirty %d, flags %#x", + lp->lnum, lp->free, lp->dirty, lp->flags); + + lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC, + lp->flags | LPROPS_TAKEN, 0); + if (IS_ERR(lp)) { + err = PTR_ERR(lp); + goto out; + } + + memcpy(ret_lp, lp, sizeof(struct ubifs_lprops)); + +out: + ubifs_release_lprops(c); + return err; +} + +/** + * scan_for_free_cb - free space scan callback. + * @c: the UBIFS file-system description object + * @lprops: LEB properties to scan + * @in_tree: whether the LEB properties are in main memory + * @data: information passed to and from the caller of the scan + * + * This function returns a code that indicates whether the scan should continue + * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree + * in main memory (%LPT_SCAN_ADD), or whether the scan should stop + * (%LPT_SCAN_STOP). + */ +static int scan_for_free_cb(struct ubifs_info *c, + const struct ubifs_lprops *lprops, int in_tree, + struct scan_data *data) +{ + int ret = LPT_SCAN_CONTINUE; + + /* Exclude LEBs that are currently in use */ + if (lprops->flags & LPROPS_TAKEN) + return LPT_SCAN_CONTINUE; + /* Determine whether to add these LEB properties to the tree */ + if (!in_tree && valuable(c, lprops)) + ret |= LPT_SCAN_ADD; + /* Exclude index LEBs */ + if (lprops->flags & LPROPS_INDEX) + return ret; + /* Exclude LEBs with too little space */ + if (lprops->free < data->min_space) + return ret; + /* If specified, exclude empty LEBs */ + if (!data->pick_free && lprops->free == c->leb_size) + return ret; + /* + * LEBs that have only free and dirty space must not be allocated + * because they may have been unmapped already or they may have data + * that is obsolete only because of nodes that are still sitting in a + * wbuf. + */ + if (lprops->free + lprops->dirty == c->leb_size && lprops->dirty > 0) + return ret; + /* Finally we found space */ + data->lnum = lprops->lnum; + return LPT_SCAN_ADD | LPT_SCAN_STOP; +} + +/** + * do_find_free_space - find a data LEB with free space. + * @c: the UBIFS file-system description object + * @min_space: minimum amount of free space required + * @pick_free: whether it is OK to scan for empty LEBs + * @squeeze: whether to try to find space in a non-empty LEB first + * + * This function returns a pointer to the LEB properties found or a negative + * error code. + */ +static +const struct ubifs_lprops *do_find_free_space(struct ubifs_info *c, + int min_space, int pick_free, + int squeeze) +{ + const struct ubifs_lprops *lprops; + struct ubifs_lpt_heap *heap; + struct scan_data data; + int err, i; + + if (squeeze) { + lprops = ubifs_fast_find_free(c); + if (lprops && lprops->free >= min_space) + return lprops; + } + if (pick_free) { + lprops = ubifs_fast_find_empty(c); + if (lprops) + return lprops; + } + if (!squeeze) { + lprops = ubifs_fast_find_free(c); + if (lprops && lprops->free >= min_space) + return lprops; + } + /* There may be an LEB with enough free space on the dirty heap */ + heap = &c->lpt_heap[LPROPS_DIRTY - 1]; + for (i = 0; i < heap->cnt; i++) { + lprops = heap->arr[i]; + if (lprops->free >= min_space) + return lprops; + } + /* + * A LEB may have fallen off of the bottom of the free heap, and ended + * up as uncategorized even though it has enough free space for us now, + * so check the uncategorized list. N.B. neither empty nor freeable LEBs + * can end up as uncategorized because they are kept on lists not + * finite-sized heaps. + */ + list_for_each_entry(lprops, &c->uncat_list, list) { + if (lprops->flags & LPROPS_TAKEN) + continue; + if (lprops->flags & LPROPS_INDEX) + continue; + if (lprops->free >= min_space) + return lprops; + } + /* We have looked everywhere in main memory, now scan the flash */ + if (c->pnodes_have >= c->pnode_cnt) + /* All pnodes are in memory, so skip scan */ + return ERR_PTR(-ENOSPC); + data.min_space = min_space; + data.pick_free = pick_free; + data.lnum = -1; + err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum, + (ubifs_lpt_scan_callback)scan_for_free_cb, + &data); + if (err) + return ERR_PTR(err); + ubifs_assert(data.lnum >= c->main_first && data.lnum < c->leb_cnt); + c->lscan_lnum = data.lnum; + lprops = ubifs_lpt_lookup_dirty(c, data.lnum); + if (IS_ERR(lprops)) + return lprops; + ubifs_assert(lprops->lnum == data.lnum); + ubifs_assert(lprops->free >= min_space); + ubifs_assert(!(lprops->flags & LPROPS_TAKEN)); + ubifs_assert(!(lprops->flags & LPROPS_INDEX)); + return lprops; +} + +/** + * ubifs_find_free_space - find a data LEB with free space. + * @c: the UBIFS file-system description object + * @min_space: minimum amount of required free space + * @free: contains amount of free space in the LEB on exit + * @squeeze: whether to try to find space in a non-empty LEB first + * + * This function looks for an LEB with at least @min_space bytes of free space. + * It tries to find an empty LEB if possible. If no empty LEBs are available, + * this function searches for a non-empty data LEB. The returned LEB is marked + * as "taken". + * + * This function returns found LEB number in case of success, %-ENOSPC if it + * failed to find a LEB with @min_space bytes of free space and other a negative + * error codes in case of failure. + */ +int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free, + int squeeze) +{ + const struct ubifs_lprops *lprops; + int lebs, rsvd_idx_lebs, pick_free = 0, err, lnum, flags; + + dbg_find("min_space %d", min_space); + ubifs_get_lprops(c); + + /* Check if there are enough empty LEBs for commit */ + spin_lock(&c->space_lock); + if (c->min_idx_lebs > c->lst.idx_lebs) + rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs; + else + rsvd_idx_lebs = 0; + lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - + c->lst.taken_empty_lebs; + ubifs_assert(lebs + c->lst.idx_lebs >= c->min_idx_lebs); + if (rsvd_idx_lebs < lebs) + /* + * OK to allocate an empty LEB, but we still don't want to go + * looking for one if there aren't any. + */ + if (c->lst.empty_lebs - c->lst.taken_empty_lebs > 0) { + pick_free = 1; + /* + * Because we release the space lock, we must account + * for this allocation here. After the LEB properties + * flags have been updated, we subtract one. Note, the + * result of this is that lprops also decreases + * @taken_empty_lebs in 'ubifs_change_lp()', so it is + * off by one for a short period of time which may + * introduce a small disturbance to budgeting + * calculations, but this is harmless because at the + * worst case this would make the budgeting subsystem + * be more pessimistic than needed. + * + * Fundamentally, this is about serialization of the + * budgeting and lprops subsystems. We could make the + * @space_lock a mutex and avoid dropping it before + * calling 'ubifs_change_lp()', but mutex is more + * heavy-weight, and we want budgeting to be as fast as + * possible. + */ + c->lst.taken_empty_lebs += 1; + } + spin_unlock(&c->space_lock); + + lprops = do_find_free_space(c, min_space, pick_free, squeeze); + if (IS_ERR(lprops)) { + err = PTR_ERR(lprops); + goto out; + } + + lnum = lprops->lnum; + flags = lprops->flags | LPROPS_TAKEN; + + lprops = ubifs_change_lp(c, lprops, LPROPS_NC, LPROPS_NC, flags, 0); + if (IS_ERR(lprops)) { + err = PTR_ERR(lprops); + goto out; + } + + if (pick_free) { + spin_lock(&c->space_lock); + c->lst.taken_empty_lebs -= 1; + spin_unlock(&c->space_lock); + } + + *free = lprops->free; + ubifs_release_lprops(c); + + if (*free == c->leb_size) { + /* + * Ensure that empty LEBs have been unmapped. They may not have + * been, for example, because of an unclean unmount. Also + * LEBs that were freeable LEBs (free + dirty == leb_size) will + * not have been unmapped. + */ + err = ubifs_leb_unmap(c, lnum); + if (err) + return err; + } + + dbg_find("found LEB %d, free %d", lnum, *free); + ubifs_assert(*free >= min_space); + return lnum; + +out: + if (pick_free) { + spin_lock(&c->space_lock); + c->lst.taken_empty_lebs -= 1; + spin_unlock(&c->space_lock); + } + ubifs_release_lprops(c); + return err; +} + +/** + * scan_for_idx_cb - callback used by the scan for a free LEB for the index. + * @c: the UBIFS file-system description object + * @lprops: LEB properties to scan + * @in_tree: whether the LEB properties are in main memory + * @data: information passed to and from the caller of the scan + * + * This function returns a code that indicates whether the scan should continue + * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree + * in main memory (%LPT_SCAN_ADD), or whether the scan should stop + * (%LPT_SCAN_STOP). + */ +static int scan_for_idx_cb(struct ubifs_info *c, + const struct ubifs_lprops *lprops, int in_tree, + struct scan_data *data) +{ + int ret = LPT_SCAN_CONTINUE; + + /* Exclude LEBs that are currently in use */ + if (lprops->flags & LPROPS_TAKEN) + return LPT_SCAN_CONTINUE; + /* Determine whether to add these LEB properties to the tree */ + if (!in_tree && valuable(c, lprops)) + ret |= LPT_SCAN_ADD; + /* Exclude index LEBS */ + if (lprops->flags & LPROPS_INDEX) + return ret; + /* Exclude LEBs that cannot be made empty */ + if (lprops->free + lprops->dirty != c->leb_size) + return ret; + /* + * We are allocating for the index so it is safe to allocate LEBs with + * only free and dirty space, because write buffers are sync'd at commit + * start. + */ + data->lnum = lprops->lnum; + return LPT_SCAN_ADD | LPT_SCAN_STOP; +} + +/** + * scan_for_leb_for_idx - scan for a free LEB for the index. + * @c: the UBIFS file-system description object + */ +static const struct ubifs_lprops *scan_for_leb_for_idx(struct ubifs_info *c) +{ + struct ubifs_lprops *lprops; + struct scan_data data; + int err; + + data.lnum = -1; + err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum, + (ubifs_lpt_scan_callback)scan_for_idx_cb, + &data); + if (err) + return ERR_PTR(err); + ubifs_assert(data.lnum >= c->main_first && data.lnum < c->leb_cnt); + c->lscan_lnum = data.lnum; + lprops = ubifs_lpt_lookup_dirty(c, data.lnum); + if (IS_ERR(lprops)) + return lprops; + ubifs_assert(lprops->lnum == data.lnum); + ubifs_assert(lprops->free + lprops->dirty == c->leb_size); + ubifs_assert(!(lprops->flags & LPROPS_TAKEN)); + ubifs_assert(!(lprops->flags & LPROPS_INDEX)); + return lprops; +} + +/** + * ubifs_find_free_leb_for_idx - find a free LEB for the index. + * @c: the UBIFS file-system description object + * + * This function looks for a free LEB and returns that LEB number. The returned + * LEB is marked as "taken", "index". + * + * Only empty LEBs are allocated. This is for two reasons. First, the commit + * calculates the number of LEBs to allocate based on the assumption that they + * will be empty. Secondly, free space at the end of an index LEB is not + * guaranteed to be empty because it may have been used by the in-the-gaps + * method prior to an unclean unmount. + * + * If no LEB is found %-ENOSPC is returned. For other failures another negative + * error code is returned. + */ +int ubifs_find_free_leb_for_idx(struct ubifs_info *c) +{ + const struct ubifs_lprops *lprops; + int lnum = -1, err, flags; + + ubifs_get_lprops(c); + + lprops = ubifs_fast_find_empty(c); + if (!lprops) { + lprops = ubifs_fast_find_freeable(c); + if (!lprops) { + ubifs_assert(c->freeable_cnt == 0); + if (c->lst.empty_lebs - c->lst.taken_empty_lebs > 0) { + lprops = scan_for_leb_for_idx(c); + if (IS_ERR(lprops)) { + err = PTR_ERR(lprops); + goto out; + } + } + } + } + + if (!lprops) { + err = -ENOSPC; + goto out; + } + + lnum = lprops->lnum; + + dbg_find("found LEB %d, free %d, dirty %d, flags %#x", + lnum, lprops->free, lprops->dirty, lprops->flags); + + flags = lprops->flags | LPROPS_TAKEN | LPROPS_INDEX; + lprops = ubifs_change_lp(c, lprops, c->leb_size, 0, flags, 0); + if (IS_ERR(lprops)) { + err = PTR_ERR(lprops); + goto out; + } + + ubifs_release_lprops(c); + + /* + * Ensure that empty LEBs have been unmapped. They may not have been, + * for example, because of an unclean unmount. Also LEBs that were + * freeable LEBs (free + dirty == leb_size) will not have been unmapped. + */ + err = ubifs_leb_unmap(c, lnum); + if (err) { + ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0, + LPROPS_TAKEN | LPROPS_INDEX, 0); + return err; + } + + return lnum; + +out: + ubifs_release_lprops(c); + return err; +} + +static int cmp_dirty_idx(const struct ubifs_lprops **a, + const struct ubifs_lprops **b) +{ + const struct ubifs_lprops *lpa = *a; + const struct ubifs_lprops *lpb = *b; + + return lpa->dirty + lpa->free - lpb->dirty - lpb->free; +} + +static void swap_dirty_idx(struct ubifs_lprops **a, struct ubifs_lprops **b, + int size) +{ + struct ubifs_lprops *t = *a; + + *a = *b; + *b = t; +} + +/** + * ubifs_save_dirty_idx_lnums - save an array of the most dirty index LEB nos. + * @c: the UBIFS file-system description object + * + * This function is called each commit to create an array of LEB numbers of + * dirty index LEBs sorted in order of dirty and free space. This is used by + * the in-the-gaps method of TNC commit. + */ +int ubifs_save_dirty_idx_lnums(struct ubifs_info *c) +{ + int i; + + ubifs_get_lprops(c); + /* Copy the LPROPS_DIRTY_IDX heap */ + c->dirty_idx.cnt = c->lpt_heap[LPROPS_DIRTY_IDX - 1].cnt; + memcpy(c->dirty_idx.arr, c->lpt_heap[LPROPS_DIRTY_IDX - 1].arr, + sizeof(void *) * c->dirty_idx.cnt); + /* Sort it so that the dirtiest is now at the end */ + sort(c->dirty_idx.arr, c->dirty_idx.cnt, sizeof(void *), + (int (*)(const void *, const void *))cmp_dirty_idx, + (void (*)(void *, void *, int))swap_dirty_idx); + dbg_find("found %d dirty index LEBs", c->dirty_idx.cnt); + if (c->dirty_idx.cnt) + dbg_find("dirtiest index LEB is %d with dirty %d and free %d", + c->dirty_idx.arr[c->dirty_idx.cnt - 1]->lnum, + c->dirty_idx.arr[c->dirty_idx.cnt - 1]->dirty, + c->dirty_idx.arr[c->dirty_idx.cnt - 1]->free); + /* Replace the lprops pointers with LEB numbers */ + for (i = 0; i < c->dirty_idx.cnt; i++) + c->dirty_idx.arr[i] = (void *)(size_t)c->dirty_idx.arr[i]->lnum; + ubifs_release_lprops(c); + return 0; +} + +/** + * scan_dirty_idx_cb - callback used by the scan for a dirty index LEB. + * @c: the UBIFS file-system description object + * @lprops: LEB properties to scan + * @in_tree: whether the LEB properties are in main memory + * @data: information passed to and from the caller of the scan + * + * This function returns a code that indicates whether the scan should continue + * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree + * in main memory (%LPT_SCAN_ADD), or whether the scan should stop + * (%LPT_SCAN_STOP). + */ +static int scan_dirty_idx_cb(struct ubifs_info *c, + const struct ubifs_lprops *lprops, int in_tree, + struct scan_data *data) +{ + int ret = LPT_SCAN_CONTINUE; + + /* Exclude LEBs that are currently in use */ + if (lprops->flags & LPROPS_TAKEN) + return LPT_SCAN_CONTINUE; + /* Determine whether to add these LEB properties to the tree */ + if (!in_tree && valuable(c, lprops)) + ret |= LPT_SCAN_ADD; + /* Exclude non-index LEBs */ + if (!(lprops->flags & LPROPS_INDEX)) + return ret; + /* Exclude LEBs with too little space */ + if (lprops->free + lprops->dirty < c->min_idx_node_sz) + return ret; + /* Finally we found space */ + data->lnum = lprops->lnum; + return LPT_SCAN_ADD | LPT_SCAN_STOP; +} + +/** + * find_dirty_idx_leb - find a dirty index LEB. + * @c: the UBIFS file-system description object + * + * This function returns LEB number upon success and a negative error code upon + * failure. In particular, -ENOSPC is returned if a dirty index LEB is not + * found. + * + * Note that this function scans the entire LPT but it is called very rarely. + */ +static int find_dirty_idx_leb(struct ubifs_info *c) +{ + const struct ubifs_lprops *lprops; + struct ubifs_lpt_heap *heap; + struct scan_data data; + int err, i, ret; + + /* Check all structures in memory first */ + data.lnum = -1; + heap = &c->lpt_heap[LPROPS_DIRTY_IDX - 1]; + for (i = 0; i < heap->cnt; i++) { + lprops = heap->arr[i]; + ret = scan_dirty_idx_cb(c, lprops, 1, &data); + if (ret & LPT_SCAN_STOP) + goto found; + } + list_for_each_entry(lprops, &c->frdi_idx_list, list) { + ret = scan_dirty_idx_cb(c, lprops, 1, &data); + if (ret & LPT_SCAN_STOP) + goto found; + } + list_for_each_entry(lprops, &c->uncat_list, list) { + ret = scan_dirty_idx_cb(c, lprops, 1, &data); + if (ret & LPT_SCAN_STOP) + goto found; + } + if (c->pnodes_have >= c->pnode_cnt) + /* All pnodes are in memory, so skip scan */ + return -ENOSPC; + err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum, + (ubifs_lpt_scan_callback)scan_dirty_idx_cb, + &data); + if (err) + return err; +found: + ubifs_assert(data.lnum >= c->main_first && data.lnum < c->leb_cnt); + c->lscan_lnum = data.lnum; + lprops = ubifs_lpt_lookup_dirty(c, data.lnum); + if (IS_ERR(lprops)) + return PTR_ERR(lprops); + ubifs_assert(lprops->lnum == data.lnum); + ubifs_assert(lprops->free + lprops->dirty >= c->min_idx_node_sz); + ubifs_assert(!(lprops->flags & LPROPS_TAKEN)); + ubifs_assert((lprops->flags & LPROPS_INDEX)); + + dbg_find("found dirty LEB %d, free %d, dirty %d, flags %#x", + lprops->lnum, lprops->free, lprops->dirty, lprops->flags); + + lprops = ubifs_change_lp(c, lprops, LPROPS_NC, LPROPS_NC, + lprops->flags | LPROPS_TAKEN, 0); + if (IS_ERR(lprops)) + return PTR_ERR(lprops); + + return lprops->lnum; +} + +/** + * get_idx_gc_leb - try to get a LEB number from trivial GC. + * @c: the UBIFS file-system description object + */ +static int get_idx_gc_leb(struct ubifs_info *c) +{ + const struct ubifs_lprops *lp; + int err, lnum; + + err = ubifs_get_idx_gc_leb(c); + if (err < 0) + return err; + lnum = err; + /* + * The LEB was due to be unmapped after the commit but + * it is needed now for this commit. + */ + lp = ubifs_lpt_lookup_dirty(c, lnum); + if (unlikely(IS_ERR(lp))) + return PTR_ERR(lp); + lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC, + lp->flags | LPROPS_INDEX, -1); + if (unlikely(IS_ERR(lp))) + return PTR_ERR(lp); + dbg_find("LEB %d, dirty %d and free %d flags %#x", + lp->lnum, lp->dirty, lp->free, lp->flags); + return lnum; +} + +/** + * find_dirtiest_idx_leb - find dirtiest index LEB from dirtiest array. + * @c: the UBIFS file-system description object + */ +static int find_dirtiest_idx_leb(struct ubifs_info *c) +{ + const struct ubifs_lprops *lp; + int lnum; + + while (1) { + if (!c->dirty_idx.cnt) + return -ENOSPC; + /* The lprops pointers were replaced by LEB numbers */ + lnum = (size_t)c->dirty_idx.arr[--c->dirty_idx.cnt]; + lp = ubifs_lpt_lookup(c, lnum); + if (IS_ERR(lp)) + return PTR_ERR(lp); + if ((lp->flags & LPROPS_TAKEN) || !(lp->flags & LPROPS_INDEX)) + continue; + lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC, + lp->flags | LPROPS_TAKEN, 0); + if (IS_ERR(lp)) + return PTR_ERR(lp); + break; + } + dbg_find("LEB %d, dirty %d and free %d flags %#x", lp->lnum, lp->dirty, + lp->free, lp->flags); + ubifs_assert(lp->flags | LPROPS_TAKEN); + ubifs_assert(lp->flags | LPROPS_INDEX); + return lnum; +} + +/** + * ubifs_find_dirty_idx_leb - try to find dirtiest index LEB as at last commit. + * @c: the UBIFS file-system description object + * + * This function attempts to find an untaken index LEB with the most free and + * dirty space that can be used without overwriting index nodes that were in the + * last index committed. + */ +int ubifs_find_dirty_idx_leb(struct ubifs_info *c) +{ + int err; + + ubifs_get_lprops(c); + + /* + * We made an array of the dirtiest index LEB numbers as at the start of + * last commit. Try that array first. + */ + err = find_dirtiest_idx_leb(c); + + /* Next try scanning the entire LPT */ + if (err == -ENOSPC) + err = find_dirty_idx_leb(c); + + /* Finally take any index LEBs awaiting trivial GC */ + if (err == -ENOSPC) + err = get_idx_gc_leb(c); + + ubifs_release_lprops(c); + return err; +} diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c new file mode 100644 index 0000000..d0f3dac --- /dev/null +++ b/fs/ubifs/gc.c @@ -0,0 +1,773 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Adrian Hunter + * Artem Bityutskiy (Битюцкий Артём) + */ + +/* + * This file implements garbage collection. The procedure for garbage collection + * is different depending on whether a LEB as an index LEB (contains index + * nodes) or not. For non-index LEBs, garbage collection finds a LEB which + * contains a lot of dirty space (obsolete nodes), and copies the non-obsolete + * nodes to the journal, at which point the garbage-collected LEB is free to be + * reused. For index LEBs, garbage collection marks the non-obsolete index nodes + * dirty in the TNC, and after the next commit, the garbage-collected LEB is + * to be reused. Garbage collection will cause the number of dirty index nodes + * to grow, however sufficient space is reserved for the index to ensure the + * commit will never run out of space. + */ + +#include +#include "ubifs.h" + +/* + * GC tries to optimize the way it fit nodes to available space, and it sorts + * nodes a little. The below constants are watermarks which define "large", + * "medium", and "small" nodes. + */ +#define MEDIUM_NODE_WM (UBIFS_BLOCK_SIZE / 4) +#define SMALL_NODE_WM UBIFS_MAX_DENT_NODE_SZ + +/* + * GC may need to move more then one LEB to make progress. The below constants + * define "soft" and "hard" limits on the number of LEBs the garbage collector + * may move. + */ +#define SOFT_LEBS_LIMIT 4 +#define HARD_LEBS_LIMIT 32 + +/** + * switch_gc_head - switch the garbage collection journal head. + * @c: UBIFS file-system description object + * @buf: buffer to write + * @len: length of the buffer to write + * @lnum: LEB number written is returned here + * @offs: offset written is returned here + * + * This function switch the GC head to the next LEB which is reserved in + * @c->gc_lnum. Returns %0 in case of success, %-EAGAIN if commit is required, + * and other negative error code in case of failures. + */ +static int switch_gc_head(struct ubifs_info *c) +{ + int err, gc_lnum = c->gc_lnum; + struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf; + + ubifs_assert(gc_lnum != -1); + dbg_gc("switch GC head from LEB %d:%d to LEB %d (waste %d bytes)", + wbuf->lnum, wbuf->offs + wbuf->used, gc_lnum, + c->leb_size - wbuf->offs - wbuf->used); + + err = ubifs_wbuf_sync_nolock(wbuf); + if (err) + return err; + + /* + * The GC write-buffer was synchronized, we may safely unmap + * 'c->gc_lnum'. + */ + err = ubifs_leb_unmap(c, gc_lnum); + if (err) + return err; + + err = ubifs_add_bud_to_log(c, GCHD, gc_lnum, 0); + if (err) + return err; + + c->gc_lnum = -1; + err = ubifs_wbuf_seek_nolock(wbuf, gc_lnum, 0, UBI_LONGTERM); + return err; +} + +/** + * move_nodes - move nodes. + * @c: UBIFS file-system description object + * @sleb: describes nodes to move + * + * This function moves valid nodes from data LEB described by @sleb to the GC + * journal head. The obsolete nodes are dropped. + * + * When moving nodes we have to deal with classical bin-packing problem: the + * space in the current GC journal head LEB and in @c->gc_lnum are the "bins", + * where the nodes in the @sleb->nodes list are the elements which should be + * fit optimally to the bins. This function uses the "first fit decreasing" + * strategy, although it does not really sort the nodes but just split them on + * 3 classes - large, medium, and small, so they are roughly sorted. + * + * This function returns zero in case of success, %-EAGAIN if commit is + * required, and other negative error codes in case of other failures. + */ +static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb) +{ + struct ubifs_scan_node *snod, *tmp; + struct list_head large, medium, small; + struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf; + int avail, err, min = INT_MAX; + + INIT_LIST_HEAD(&large); + INIT_LIST_HEAD(&medium); + INIT_LIST_HEAD(&small); + + list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) { + struct list_head *lst; + + ubifs_assert(snod->type != UBIFS_IDX_NODE); + ubifs_assert(snod->type != UBIFS_REF_NODE); + ubifs_assert(snod->type != UBIFS_CS_NODE); + + err = ubifs_tnc_has_node(c, &snod->key, 0, sleb->lnum, + snod->offs, 0); + if (err < 0) + goto out; + + lst = &snod->list; + list_del(lst); + if (!err) { + /* The node is obsolete, remove it from the list */ + kfree(snod); + continue; + } + + /* + * Sort the list of nodes so that large nodes go first, and + * small nodes go last. + */ + if (snod->len > MEDIUM_NODE_WM) + list_add(lst, &large); + else if (snod->len > SMALL_NODE_WM) + list_add(lst, &medium); + else + list_add(lst, &small); + + /* And find the smallest node */ + if (snod->len < min) + min = snod->len; + } + + /* + * Join the tree lists so that we'd have one roughly sorted list + * ('large' will be the head of the joined list). + */ + list_splice(&medium, large.prev); + list_splice(&small, large.prev); + + if (wbuf->lnum == -1) { + /* + * The GC journal head is not set, because it is the first GC + * invocation since mount. + */ + err = switch_gc_head(c); + if (err) + goto out; + } + + /* Write nodes to their new location. Use the first-fit strategy */ + while (1) { + avail = c->leb_size - wbuf->offs - wbuf->used; + list_for_each_entry_safe(snod, tmp, &large, list) { + int new_lnum, new_offs; + + if (avail < min) + break; + + if (snod->len > avail) + /* This node does not fit */ + continue; + + cond_resched(); + + new_lnum = wbuf->lnum; + new_offs = wbuf->offs + wbuf->used; + err = ubifs_wbuf_write_nolock(wbuf, snod->node, + snod->len); + if (err) + goto out; + err = ubifs_tnc_replace(c, &snod->key, sleb->lnum, + snod->offs, new_lnum, new_offs, + snod->len); + if (err) + goto out; + + avail = c->leb_size - wbuf->offs - wbuf->used; + list_del(&snod->list); + kfree(snod); + } + + if (list_empty(&large)) + break; + + /* + * Waste the rest of the space in the LEB and switch to the + * next LEB. + */ + err = switch_gc_head(c); + if (err) + goto out; + } + + return 0; + +out: + list_for_each_entry_safe(snod, tmp, &large, list) { + list_del(&snod->list); + kfree(snod); + } + return err; +} + +/** + * gc_sync_wbufs - sync write-buffers for GC. + * @c: UBIFS file-system description object + * + * We must guarantee that obsoleting nodes are on flash. Unfortunately they may + * be in a write-buffer instead. That is, a node could be written to a + * write-buffer, obsoleting another node in a LEB that is GC'd. If that LEB is + * erased before the write-buffer is sync'd and then there is an unclean + * unmount, then an existing node is lost. To avoid this, we sync all + * write-buffers. + * + * This function returns %0 on success or a negative error code on failure. + */ +static int gc_sync_wbufs(struct ubifs_info *c) +{ + int err, i; + + for (i = 0; i < c->jhead_cnt; i++) { + if (i == GCHD) + continue; + err = ubifs_wbuf_sync(&c->jheads[i].wbuf); + if (err) + return err; + } + return 0; +} + +/** + * ubifs_garbage_collect_leb - garbage-collect a logical eraseblock. + * @c: UBIFS file-system description object + * @lp: describes the LEB to garbage collect + * + * This function garbage-collects an LEB and returns one of the @LEB_FREED, + * @LEB_RETAINED, etc positive codes in case of success, %-EAGAIN if commit is + * required, and other negative error codes in case of failures. + */ +int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp) +{ + struct ubifs_scan_leb *sleb; + struct ubifs_scan_node *snod; + struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf; + int err = 0, lnum = lp->lnum; + + ubifs_assert(c->gc_lnum != -1 || wbuf->offs + wbuf->used == 0 || + c->need_recovery); + ubifs_assert(c->gc_lnum != lnum); + ubifs_assert(wbuf->lnum != lnum); + + /* + * We scan the entire LEB even though we only really need to scan up to + * (c->leb_size - lp->free). + */ + sleb = ubifs_scan(c, lnum, 0, c->sbuf); + if (IS_ERR(sleb)) + return PTR_ERR(sleb); + + ubifs_assert(!list_empty(&sleb->nodes)); + snod = list_entry(sleb->nodes.next, struct ubifs_scan_node, list); + + if (snod->type == UBIFS_IDX_NODE) { + struct ubifs_gced_idx_leb *idx_gc; + + dbg_gc("indexing LEB %d (free %d, dirty %d)", + lnum, lp->free, lp->dirty); + list_for_each_entry(snod, &sleb->nodes, list) { + struct ubifs_idx_node *idx = snod->node; + int level = le16_to_cpu(idx->level); + + ubifs_assert(snod->type == UBIFS_IDX_NODE); + key_read(c, ubifs_idx_key(c, idx), &snod->key); + err = ubifs_dirty_idx_node(c, &snod->key, level, lnum, + snod->offs); + if (err) + goto out; + } + + idx_gc = kmalloc(sizeof(struct ubifs_gced_idx_leb), GFP_NOFS); + if (!idx_gc) { + err = -ENOMEM; + goto out; + } + + idx_gc->lnum = lnum; + idx_gc->unmap = 0; + list_add(&idx_gc->list, &c->idx_gc); + + /* + * Don't release the LEB until after the next commit, because + * it may contain date which is needed for recovery. So + * although we freed this LEB, it will become usable only after + * the commit. + */ + err = ubifs_change_one_lp(c, lnum, c->leb_size, 0, 0, + LPROPS_INDEX, 1); + if (err) + goto out; + err = LEB_FREED_IDX; + } else { + dbg_gc("data LEB %d (free %d, dirty %d)", + lnum, lp->free, lp->dirty); + + err = move_nodes(c, sleb); + if (err) + goto out; + + err = gc_sync_wbufs(c); + if (err) + goto out; + + err = ubifs_change_one_lp(c, lnum, c->leb_size, 0, 0, 0, 0); + if (err) + goto out; + + if (c->gc_lnum == -1) { + c->gc_lnum = lnum; + err = LEB_RETAINED; + } else { + err = ubifs_wbuf_sync_nolock(wbuf); + if (err) + goto out; + + err = ubifs_leb_unmap(c, lnum); + if (err) + goto out; + + err = LEB_FREED; + } + } + +out: + ubifs_scan_destroy(sleb); + return err; +} + +/** + * ubifs_garbage_collect - UBIFS garbage collector. + * @c: UBIFS file-system description object + * @anyway: do GC even if there are free LEBs + * + * This function does out-of-place garbage collection. The return codes are: + * o positive LEB number if the LEB has been freed and may be used; + * o %-EAGAIN if the caller has to run commit; + * o %-ENOSPC if GC failed to make any progress; + * o other negative error codes in case of other errors. + * + * Garbage collector writes data to the journal when GC'ing data LEBs, and just + * marking indexing nodes dirty when GC'ing indexing LEBs. Thus, at some point + * commit may be required. But commit cannot be run from inside GC, because the + * caller might be holding the commit lock, so %-EAGAIN is returned instead; + * And this error code means that the caller has to run commit, and re-run GC + * if there is still no free space. + * + * There are many reasons why this function may return %-EAGAIN: + * o the log is full and there is no space to write an LEB reference for + * @c->gc_lnum; + * o the journal is too large and exceeds size limitations; + * o GC moved indexing LEBs, but they can be used only after the commit; + * o the shrinker fails to find clean znodes to free and requests the commit; + * o etc. + * + * Note, if the file-system is close to be full, this function may return + * %-EAGAIN infinitely, so the caller has to limit amount of re-invocations of + * the function. E.g., this happens if the limits on the journal size are too + * tough and GC writes too much to the journal before an LEB is freed. This + * might also mean that the journal is too large, and the TNC becomes to big, + * so that the shrinker is constantly called, finds not clean znodes to free, + * and requests commit. Well, this may also happen if the journal is all right, + * but another kernel process consumes too much memory. Anyway, infinite + * %-EAGAIN may happen, but in some extreme/misconfiguration cases. + */ +int ubifs_garbage_collect(struct ubifs_info *c, int anyway) +{ + int i, err, ret, min_space = c->dead_wm; + struct ubifs_lprops lp; + struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf; + + ubifs_assert_cmt_locked(c); + + if (ubifs_gc_should_commit(c)) + return -EAGAIN; + + mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); + + if (c->ro_media) { + ret = -EROFS; + goto out_unlock; + } + + /* We expect the write-buffer to be empty on entry */ + ubifs_assert(!wbuf->used); + + for (i = 0; ; i++) { + int space_before = c->leb_size - wbuf->offs - wbuf->used; + int space_after; + + cond_resched(); + + /* Give the commit an opportunity to run */ + if (ubifs_gc_should_commit(c)) { + ret = -EAGAIN; + break; + } + + if (i > SOFT_LEBS_LIMIT && !list_empty(&c->idx_gc)) { + /* + * We've done enough iterations. Indexing LEBs were + * moved and will be available after the commit. + */ + dbg_gc("soft limit, some index LEBs GC'ed, -EAGAIN"); + ubifs_commit_required(c); + ret = -EAGAIN; + break; + } + + if (i > HARD_LEBS_LIMIT) { + /* + * We've moved too many LEBs and have not made + * progress, give up. + */ + dbg_gc("hard limit, -ENOSPC"); + ret = -ENOSPC; + break; + } + + /* + * Empty and freeable LEBs can turn up while we waited for + * the wbuf lock, or while we have been running GC. In that + * case, we should just return one of those instead of + * continuing to GC dirty LEBs. Hence we request + * 'ubifs_find_dirty_leb()' to return an empty LEB if it can. + */ + ret = ubifs_find_dirty_leb(c, &lp, min_space, anyway ? 0 : 1); + if (ret) { + if (ret == -ENOSPC) + dbg_gc("no more dirty LEBs"); + break; + } + + dbg_gc("found LEB %d: free %d, dirty %d, sum %d " + "(min. space %d)", lp.lnum, lp.free, lp.dirty, + lp.free + lp.dirty, min_space); + + if (lp.free + lp.dirty == c->leb_size) { + /* An empty LEB was returned */ + dbg_gc("LEB %d is free, return it", lp.lnum); + /* + * ubifs_find_dirty_leb() doesn't return freeable index + * LEBs. + */ + ubifs_assert(!(lp.flags & LPROPS_INDEX)); + if (lp.free != c->leb_size) { + /* + * Write buffers must be sync'd before + * unmapping freeable LEBs, because one of them + * may contain data which obsoletes something + * in 'lp.pnum'. + */ + ret = gc_sync_wbufs(c); + if (ret) + goto out; + ret = ubifs_change_one_lp(c, lp.lnum, + c->leb_size, 0, 0, 0, + 0); + if (ret) + goto out; + } + ret = ubifs_leb_unmap(c, lp.lnum); + if (ret) + goto out; + ret = lp.lnum; + break; + } + + space_before = c->leb_size - wbuf->offs - wbuf->used; + if (wbuf->lnum == -1) + space_before = 0; + + ret = ubifs_garbage_collect_leb(c, &lp); + if (ret < 0) { + if (ret == -EAGAIN || ret == -ENOSPC) { + /* + * These codes are not errors, so we have to + * return the LEB to lprops. But if the + * 'ubifs_return_leb()' function fails, its + * failure code is propagated to the caller + * instead of the original '-EAGAIN' or + * '-ENOSPC'. + */ + err = ubifs_return_leb(c, lp.lnum); + if (err) + ret = err; + break; + } + goto out; + } + + if (ret == LEB_FREED) { + /* An LEB has been freed and is ready for use */ + dbg_gc("LEB %d freed, return", lp.lnum); + ret = lp.lnum; + break; + } + + if (ret == LEB_FREED_IDX) { + /* + * This was an indexing LEB and it cannot be + * immediately used. And instead of requesting the + * commit straight away, we try to garbage collect some + * more. + */ + dbg_gc("indexing LEB %d freed, continue", lp.lnum); + continue; + } + + ubifs_assert(ret == LEB_RETAINED); + space_after = c->leb_size - wbuf->offs - wbuf->used; + dbg_gc("LEB %d retained, freed %d bytes", lp.lnum, + space_after - space_before); + + if (space_after > space_before) { + /* GC makes progress, keep working */ + min_space >>= 1; + if (min_space < c->dead_wm) + min_space = c->dead_wm; + continue; + } + + dbg_gc("did not make progress"); + + /* + * GC moved an LEB bud have not done any progress. This means + * that the previous GC head LEB contained too few free space + * and the LEB which was GC'ed contained only large nodes which + * did not fit that space. + * + * We can do 2 things: + * 1. pick another LEB in a hope it'll contain a small node + * which will fit the space we have at the end of current GC + * head LEB, but there is no guarantee, so we try this out + * unless we have already been working for too long; + * 2. request an LEB with more dirty space, which will force + * 'ubifs_find_dirty_leb()' to start scanning the lprops + * table, instead of just picking one from the heap + * (previously it already picked the dirtiest LEB). + */ + if (i < SOFT_LEBS_LIMIT) { + dbg_gc("try again"); + continue; + } + + min_space <<= 1; + if (min_space > c->dark_wm) + min_space = c->dark_wm; + dbg_gc("set min. space to %d", min_space); + } + + if (ret == -ENOSPC && !list_empty(&c->idx_gc)) { + dbg_gc("no space, some index LEBs GC'ed, -EAGAIN"); + ubifs_commit_required(c); + ret = -EAGAIN; + } + + err = ubifs_wbuf_sync_nolock(wbuf); + if (!err) + err = ubifs_leb_unmap(c, c->gc_lnum); + if (err) { + ret = err; + goto out; + } +out_unlock: + mutex_unlock(&wbuf->io_mutex); + return ret; + +out: + ubifs_assert(ret < 0); + ubifs_assert(ret != -ENOSPC && ret != -EAGAIN); + ubifs_ro_mode(c, ret); + ubifs_wbuf_sync_nolock(wbuf); + mutex_unlock(&wbuf->io_mutex); + ubifs_return_leb(c, lp.lnum); + return ret; +} + +/** + * ubifs_gc_start_commit - garbage collection at start of commit. + * @c: UBIFS file-system description object + * + * If a LEB has only dirty and free space, then we may safely unmap it and make + * it free. Note, we cannot do this with indexing LEBs because dirty space may + * correspond index nodes that are required for recovery. In that case, the + * LEB cannot be unmapped until after the next commit. + * + * This function returns %0 upon success and a negative error code upon failure. + */ +int ubifs_gc_start_commit(struct ubifs_info *c) +{ + struct ubifs_gced_idx_leb *idx_gc; + const struct ubifs_lprops *lp; + int err = 0, flags; + + ubifs_get_lprops(c); + + /* + * Unmap (non-index) freeable LEBs. Note that recovery requires that all + * wbufs are sync'd before this, which is done in 'do_commit()'. + */ + while (1) { + lp = ubifs_fast_find_freeable(c); + if (unlikely(IS_ERR(lp))) { + err = PTR_ERR(lp); + goto out; + } + if (!lp) + break; + ubifs_assert(!(lp->flags & LPROPS_TAKEN)); + ubifs_assert(!(lp->flags & LPROPS_INDEX)); + err = ubifs_leb_unmap(c, lp->lnum); + if (err) + goto out; + lp = ubifs_change_lp(c, lp, c->leb_size, 0, lp->flags, 0); + if (unlikely(IS_ERR(lp))) { + err = PTR_ERR(lp); + goto out; + } + ubifs_assert(!(lp->flags & LPROPS_TAKEN)); + ubifs_assert(!(lp->flags & LPROPS_INDEX)); + } + + /* Mark GC'd index LEBs OK to unmap after this commit finishes */ + list_for_each_entry(idx_gc, &c->idx_gc, list) + idx_gc->unmap = 1; + + /* Record index freeable LEBs for unmapping after commit */ + while (1) { + lp = ubifs_fast_find_frdi_idx(c); + if (unlikely(IS_ERR(lp))) { + err = PTR_ERR(lp); + goto out; + } + if (!lp) + break; + idx_gc = kmalloc(sizeof(struct ubifs_gced_idx_leb), GFP_NOFS); + if (!idx_gc) { + err = -ENOMEM; + goto out; + } + ubifs_assert(!(lp->flags & LPROPS_TAKEN)); + ubifs_assert(lp->flags & LPROPS_INDEX); + /* Don't release the LEB until after the next commit */ + flags = (lp->flags | LPROPS_TAKEN) ^ LPROPS_INDEX; + lp = ubifs_change_lp(c, lp, c->leb_size, 0, flags, 1); + if (unlikely(IS_ERR(lp))) { + err = PTR_ERR(lp); + kfree(idx_gc); + goto out; + } + ubifs_assert(lp->flags & LPROPS_TAKEN); + ubifs_assert(!(lp->flags & LPROPS_INDEX)); + idx_gc->lnum = lp->lnum; + idx_gc->unmap = 1; + list_add(&idx_gc->list, &c->idx_gc); + } +out: + ubifs_release_lprops(c); + return err; +} + +/** + * ubifs_gc_end_commit - garbage collection at end of commit. + * @c: UBIFS file-system description object + * + * This function completes out-of-place garbage collection of index LEBs. + */ +int ubifs_gc_end_commit(struct ubifs_info *c) +{ + struct ubifs_gced_idx_leb *idx_gc, *tmp; + struct ubifs_wbuf *wbuf; + int err = 0; + + wbuf = &c->jheads[GCHD].wbuf; + mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); + list_for_each_entry_safe(idx_gc, tmp, &c->idx_gc, list) + if (idx_gc->unmap) { + dbg_gc("LEB %d", idx_gc->lnum); + err = ubifs_leb_unmap(c, idx_gc->lnum); + if (err) + goto out; + err = ubifs_change_one_lp(c, idx_gc->lnum, LPROPS_NC, + LPROPS_NC, 0, LPROPS_TAKEN, -1); + if (err) + goto out; + list_del(&idx_gc->list); + kfree(idx_gc); + } +out: + mutex_unlock(&wbuf->io_mutex); + return err; +} + +/** + * ubifs_destroy_idx_gc - destroy idx_gc list. + * @c: UBIFS file-system description object + * + * This function destroys the idx_gc list. It is called when unmounting or + * remounting read-only so locks are not needed. + */ +void ubifs_destroy_idx_gc(struct ubifs_info *c) +{ + while (!list_empty(&c->idx_gc)) { + struct ubifs_gced_idx_leb *idx_gc; + + idx_gc = list_entry(c->idx_gc.next, struct ubifs_gced_idx_leb, + list); + c->idx_gc_cnt -= 1; + list_del(&idx_gc->list); + kfree(idx_gc); + } + +} + +/** + * ubifs_get_idx_gc_leb - get a LEB from GC'd index LEB list. + * @c: UBIFS file-system description object + * + * Called during start commit so locks are not needed. + */ +int ubifs_get_idx_gc_leb(struct ubifs_info *c) +{ + struct ubifs_gced_idx_leb *idx_gc; + int lnum; + + if (list_empty(&c->idx_gc)) + return -ENOSPC; + idx_gc = list_entry(c->idx_gc.next, struct ubifs_gced_idx_leb, list); + lnum = idx_gc->lnum; + /* c->idx_gc_cnt is updated by the caller when lprops are updated */ + list_del(&idx_gc->list); + kfree(idx_gc); + return lnum; +} diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c new file mode 100644 index 0000000..3374f91 --- /dev/null +++ b/fs/ubifs/io.c @@ -0,0 +1,914 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * Copyright (C) 2006, 2007 University of Szeged, Hungary + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Artem Bityutskiy (Битюцкий Артём) + * Adrian Hunter + * Zoltan Sogor + */ + +/* + * This file implements UBIFS I/O subsystem which provides various I/O-related + * helper functions (reading/writing/checking/validating nodes) and implements + * write-buffering support. Write buffers help to save space which otherwise + * would have been wasted for padding to the nearest minimal I/O unit boundary. + * Instead, data first goes to the write-buffer and is flushed when the + * buffer is full or when it is not used for some time (by timer). This is + * similarto the mechanism is used by JFFS2. + * + * Write-buffers are defined by 'struct ubifs_wbuf' objects and protected by + * mutexes defined inside these objects. Since sometimes upper-level code + * has to lock the write-buffer (e.g. journal space reservation code), many + * functions related to write-buffers have "nolock" suffix which means that the + * caller has to lock the write-buffer before calling this function. + * + * UBIFS stores nodes at 64 bit-aligned addresses. If the node length is not + * aligned, UBIFS starts the next node from the aligned address, and the padded + * bytes may contain any rubbish. In other words, UBIFS does not put padding + * bytes in those small gaps. Common headers of nodes store real node lengths, + * not aligned lengths. Indexing nodes also store real lengths in branches. + * + * UBIFS uses padding when it pads to the next min. I/O unit. In this case it + * uses padding nodes or padding bytes, if the padding node does not fit. + * + * All UBIFS nodes are protected by CRC checksums and UBIFS checks all nodes + * every time they are read from the flash media. + */ + +#include +#include "ubifs.h" + +/** + * ubifs_check_node - check node. + * @c: UBIFS file-system description object + * @buf: node to check + * @lnum: logical eraseblock number + * @offs: offset within the logical eraseblock + * @quiet: print no messages + * + * This function checks node magic number and CRC checksum. This function also + * validates node length to prevent UBIFS from becoming crazy when an attacker + * feeds it a file-system image with incorrect nodes. For example, too large + * node length in the common header could cause UBIFS to read memory outside of + * allocated buffer when checking the CRC checksum. + * + * This function returns zero in case of success %-EUCLEAN in case of bad CRC + * or magic. + */ +int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum, + int offs, int quiet) +{ + int err = -EINVAL, type, node_len; + uint32_t crc, node_crc, magic; + const struct ubifs_ch *ch = buf; + + ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0); + ubifs_assert(!(offs & 7) && offs < c->leb_size); + + magic = le32_to_cpu(ch->magic); + if (magic != UBIFS_NODE_MAGIC) { + if (!quiet) + ubifs_err("bad magic %#08x, expected %#08x", + magic, UBIFS_NODE_MAGIC); + err = -EUCLEAN; + goto out; + } + + type = ch->node_type; + if (type < 0 || type >= UBIFS_NODE_TYPES_CNT) { + if (!quiet) + ubifs_err("bad node type %d", type); + goto out; + } + + node_len = le32_to_cpu(ch->len); + if (node_len + offs > c->leb_size) + goto out_len; + + if (c->ranges[type].max_len == 0) { + if (node_len != c->ranges[type].len) + goto out_len; + } else if (node_len < c->ranges[type].min_len || + node_len > c->ranges[type].max_len) + goto out_len; + + crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8); + node_crc = le32_to_cpu(ch->crc); + if (crc != node_crc) { + if (!quiet) + ubifs_err("bad CRC: calculated %#08x, read %#08x", + crc, node_crc); + err = -EUCLEAN; + goto out; + } + + return 0; + +out_len: + if (!quiet) + ubifs_err("bad node length %d", node_len); +out: + if (!quiet) { + ubifs_err("bad node at LEB %d:%d", lnum, offs); + dbg_dump_node(c, buf); + dbg_dump_stack(); + } + return err; +} + +/** + * ubifs_pad - pad flash space. + * @c: UBIFS file-system description object + * @buf: buffer to put padding to + * @pad: how many bytes to pad + * + * The flash media obliges us to write only in chunks of %c->min_io_size and + * when we have to write less data we add padding node to the write-buffer and + * pad it to the next minimal I/O unit's boundary. Padding nodes help when the + * media is being scanned. If the amount of wasted space is not enough to fit a + * padding node which takes %UBIFS_PAD_NODE_SZ bytes, we write padding bytes + * pattern (%UBIFS_PADDING_BYTE). + * + * Padding nodes are also used to fill gaps when the "commit-in-gaps" method is + * used. + */ +void ubifs_pad(const struct ubifs_info *c, void *buf, int pad) +{ + uint32_t crc; + + ubifs_assert(pad >= 0 && !(pad & 7)); + + if (pad >= UBIFS_PAD_NODE_SZ) { + struct ubifs_ch *ch = buf; + struct ubifs_pad_node *pad_node = buf; + + ch->magic = cpu_to_le32(UBIFS_NODE_MAGIC); + ch->node_type = UBIFS_PAD_NODE; + ch->group_type = UBIFS_NO_NODE_GROUP; + ch->padding[0] = ch->padding[1] = 0; + ch->sqnum = 0; + ch->len = cpu_to_le32(UBIFS_PAD_NODE_SZ); + pad -= UBIFS_PAD_NODE_SZ; + pad_node->pad_len = cpu_to_le32(pad); + crc = crc32(UBIFS_CRC32_INIT, buf + 8, UBIFS_PAD_NODE_SZ - 8); + ch->crc = cpu_to_le32(crc); + memset(buf + UBIFS_PAD_NODE_SZ, 0, pad); + } else if (pad > 0) + /* Too little space, padding node won't fit */ + memset(buf, UBIFS_PADDING_BYTE, pad); +} + +/** + * next_sqnum - get next sequence number. + * @c: UBIFS file-system description object + */ +static unsigned long long next_sqnum(struct ubifs_info *c) +{ + unsigned long long sqnum; + + spin_lock(&c->cnt_lock); + sqnum = ++c->max_sqnum; + spin_unlock(&c->cnt_lock); + + if (unlikely(sqnum >= SQNUM_WARN_WATERMARK)) { + if (sqnum >= SQNUM_WATERMARK) { + ubifs_err("sequence number overflow %llu, end of life", + sqnum); + ubifs_ro_mode(c, -EINVAL); + } + ubifs_warn("running out of sequence numbers, end of life soon"); + } + + return sqnum; +} + +/** + * ubifs_prepare_node - prepare node to be written to flash. + * @c: UBIFS file-system description object + * @node: the node to pad + * @len: node length + * @pad: if the buffer has to be padded + * + * This function prepares node at @node to be written to the media - it + * calculates node CRC, fills the common header, and adds proper padding up to + * the next minimum I/O unit if @pad is not zero. + */ +void ubifs_prepare_node(struct ubifs_info *c, void *node, int len, int pad) +{ + uint32_t crc; + struct ubifs_ch *ch = node; + unsigned long long sqnum = next_sqnum(c); + + ubifs_assert(len >= UBIFS_CH_SZ); + + ch->magic = cpu_to_le32(UBIFS_NODE_MAGIC); + ch->len = cpu_to_le32(len); + ch->group_type = UBIFS_NO_NODE_GROUP; + ch->sqnum = cpu_to_le64(sqnum); + ch->padding[0] = ch->padding[1] = 0; + crc = crc32(UBIFS_CRC32_INIT, node + 8, len - 8); + ch->crc = cpu_to_le32(crc); + + if (pad) { + len = ALIGN(len, 8); + pad = ALIGN(len, c->min_io_size) - len; + ubifs_pad(c, node + len, pad); + } +} + +/** + * ubifs_prep_grp_node - prepare node of a group to be written to flash. + * @c: UBIFS file-system description object + * @node: the node to pad + * @len: node length + * @last: indicates the last node of the group + * + * This function prepares node at @node to be written to the media - it + * calculates node CRC and fills the common header. + */ +void ubifs_prep_grp_node(struct ubifs_info *c, void *node, int len, int last) +{ + uint32_t crc; + struct ubifs_ch *ch = node; + unsigned long long sqnum = next_sqnum(c); + + ubifs_assert(len >= UBIFS_CH_SZ); + + ch->magic = cpu_to_le32(UBIFS_NODE_MAGIC); + ch->len = cpu_to_le32(len); + if (last) + ch->group_type = UBIFS_LAST_OF_NODE_GROUP; + else + ch->group_type = UBIFS_IN_NODE_GROUP; + ch->sqnum = cpu_to_le64(sqnum); + ch->padding[0] = ch->padding[1] = 0; + crc = crc32(UBIFS_CRC32_INIT, node + 8, len - 8); + ch->crc = cpu_to_le32(crc); +} + +/** + * wbuf_timer_callback - write-buffer timer callback function. + * @data: timer data (write-buffer descriptor) + * + * This function is called when the write-buffer timer expires. + */ +static void wbuf_timer_callback_nolock(unsigned long data) +{ + struct ubifs_wbuf *wbuf = (struct ubifs_wbuf *)data; + + wbuf->need_sync = 1; + wbuf->c->need_wbuf_sync = 1; + ubifs_wake_up_bgt(wbuf->c); +} + +/** + * new_wbuf_timer - start new write-buffer timer. + * @wbuf: write-buffer descriptor + */ +static void new_wbuf_timer_nolock(struct ubifs_wbuf *wbuf) +{ + ubifs_assert(!timer_pending(&wbuf->timer)); + + if (!wbuf->timeout) + return; + + wbuf->timer.expires = jiffies + wbuf->timeout; + add_timer(&wbuf->timer); +} + +/** + * cancel_wbuf_timer - cancel write-buffer timer. + * @wbuf: write-buffer descriptor + */ +static void cancel_wbuf_timer_nolock(struct ubifs_wbuf *wbuf) +{ + /* + * If the syncer is waiting for the lock (from the background thread's + * context) and another task is changing write-buffer then the syncing + * should be canceled. + */ + wbuf->need_sync = 0; + del_timer(&wbuf->timer); +} + +/** + * ubifs_wbuf_sync_nolock - synchronize write-buffer. + * @wbuf: write-buffer to synchronize + * + * This function synchronizes write-buffer @buf and returns zero in case of + * success or a negative error code in case of failure. + */ +int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf) +{ + struct ubifs_info *c = wbuf->c; + int err, dirt; + + cancel_wbuf_timer_nolock(wbuf); + if (!wbuf->used || wbuf->lnum == -1) + /* Write-buffer is empty or not seeked */ + return 0; + + dbg_io("LEB %d:%d, %d bytes", + wbuf->lnum, wbuf->offs, wbuf->used); + ubifs_assert(!(c->vfs_sb->s_flags & MS_RDONLY)); + ubifs_assert(!(wbuf->avail & 7)); + ubifs_assert(wbuf->offs + c->min_io_size <= c->leb_size); + + if (c->ro_media) + return -EROFS; + + ubifs_pad(c, wbuf->buf + wbuf->used, wbuf->avail); + err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs, + c->min_io_size, wbuf->dtype); + if (err) { + ubifs_err("cannot write %d bytes to LEB %d:%d", + c->min_io_size, wbuf->lnum, wbuf->offs); + dbg_dump_stack(); + return err; + } + + dirt = wbuf->avail; + + spin_lock(&wbuf->lock); + wbuf->offs += c->min_io_size; + wbuf->avail = c->min_io_size; + wbuf->used = 0; + wbuf->next_ino = 0; + spin_unlock(&wbuf->lock); + + if (wbuf->sync_callback) + err = wbuf->sync_callback(c, wbuf->lnum, + c->leb_size - wbuf->offs, dirt); + return err; +} + +/** + * ubifs_wbuf_seek_nolock - seek write-buffer. + * @wbuf: write-buffer + * @lnum: logical eraseblock number to seek to + * @offs: logical eraseblock offset to seek to + * @dtype: data type + * + * This function targets the write buffer to logical eraseblock @lnum:@offs. + * The write-buffer is synchronized if it is not empty. Returns zero in case of + * success and a negative error code in case of failure. + */ +int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs, + int dtype) +{ + const struct ubifs_info *c = wbuf->c; + + dbg_io("LEB %d:%d", lnum, offs); + ubifs_assert(lnum >= 0 && lnum < c->leb_cnt); + ubifs_assert(offs >= 0 && offs <= c->leb_size); + ubifs_assert(offs % c->min_io_size == 0 && !(offs & 7)); + ubifs_assert(lnum != wbuf->lnum); + + if (wbuf->used > 0) { + int err = ubifs_wbuf_sync_nolock(wbuf); + + if (err) + return err; + } + + spin_lock(&wbuf->lock); + wbuf->lnum = lnum; + wbuf->offs = offs; + wbuf->avail = c->min_io_size; + wbuf->used = 0; + spin_unlock(&wbuf->lock); + wbuf->dtype = dtype; + + return 0; +} + +/** + * ubifs_bg_wbufs_sync - synchronize write-buffers. + * @c: UBIFS file-system description object + * + * This function is called by background thread to synchronize write-buffers. + * Returns zero in case of success and a negative error code in case of + * failure. + */ +int ubifs_bg_wbufs_sync(struct ubifs_info *c) +{ + int err, i; + + if (!c->need_wbuf_sync) + return 0; + c->need_wbuf_sync = 0; + + if (c->ro_media) { + err = -EROFS; + goto out_timers; + } + + dbg_io("synchronize"); + for (i = 0; i < c->jhead_cnt; i++) { + struct ubifs_wbuf *wbuf = &c->jheads[i].wbuf; + + cond_resched(); + + /* + * If the mutex is locked then wbuf is being changed, so + * synchronization is not necessary. + */ + if (mutex_is_locked(&wbuf->io_mutex)) + continue; + + mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); + if (!wbuf->need_sync) { + mutex_unlock(&wbuf->io_mutex); + continue; + } + + err = ubifs_wbuf_sync_nolock(wbuf); + mutex_unlock(&wbuf->io_mutex); + if (err) { + ubifs_err("cannot sync write-buffer, error %d", err); + ubifs_ro_mode(c, err); + goto out_timers; + } + } + + return 0; + +out_timers: + /* Cancel all timers to prevent repeated errors */ + for (i = 0; i < c->jhead_cnt; i++) { + struct ubifs_wbuf *wbuf = &c->jheads[i].wbuf; + + mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); + cancel_wbuf_timer_nolock(wbuf); + mutex_unlock(&wbuf->io_mutex); + } + return err; +} + +/** + * ubifs_wbuf_write_nolock - write data to flash via write-buffer. + * @wbuf: write-buffer + * @buf: node to write + * @len: node length + * + * This function writes data to flash via write-buffer @wbuf. This means that + * the last piece of the node won't reach the flash media immediately if it + * does not take whole minimal I/O unit. Instead, the node will sit in RAM + * until the write-buffer is synchronized (e.g., by timer). + * + * This function returns zero in case of success and a negative error code in + * case of failure. If the node cannot be written because there is no more + * space in this logical eraseblock, %-ENOSPC is returned. + */ +int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) +{ + struct ubifs_info *c = wbuf->c; + int err, written, n, aligned_len = ALIGN(len, 8), offs; + + dbg_io("%d bytes (%s) to wbuf at LEB %d:%d", len, + dbg_ntype(((struct ubifs_ch *)buf)->node_type), wbuf->lnum, + wbuf->offs + wbuf->used); + ubifs_assert(len > 0 && wbuf->lnum >= 0 && wbuf->lnum < c->leb_cnt); + ubifs_assert(wbuf->offs >= 0 && wbuf->offs % c->min_io_size == 0); + ubifs_assert(!(wbuf->offs & 7) && wbuf->offs <= c->leb_size); + ubifs_assert(wbuf->avail > 0 && wbuf->avail <= c->min_io_size); + ubifs_assert(mutex_is_locked(&wbuf->io_mutex)); + + if (c->leb_size - wbuf->offs - wbuf->used < aligned_len) { + err = -ENOSPC; + goto out; + } + + cancel_wbuf_timer_nolock(wbuf); + + if (c->ro_media) + return -EROFS; + + if (aligned_len <= wbuf->avail) { + /* + * The node is not very large and fits entirely within + * write-buffer. + */ + memcpy(wbuf->buf + wbuf->used, buf, len); + + if (aligned_len == wbuf->avail) { + dbg_io("flush wbuf to LEB %d:%d", wbuf->lnum, + wbuf->offs); + err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, + wbuf->offs, c->min_io_size, + wbuf->dtype); + if (err) + goto out; + + spin_lock(&wbuf->lock); + wbuf->offs += c->min_io_size; + wbuf->avail = c->min_io_size; + wbuf->used = 0; + wbuf->next_ino = 0; + spin_unlock(&wbuf->lock); + } else { + spin_lock(&wbuf->lock); + wbuf->avail -= aligned_len; + wbuf->used += aligned_len; + spin_unlock(&wbuf->lock); + } + + goto exit; + } + + /* + * The node is large enough and does not fit entirely within current + * minimal I/O unit. We have to fill and flush write-buffer and switch + * to the next min. I/O unit. + */ + dbg_io("flush wbuf to LEB %d:%d", wbuf->lnum, wbuf->offs); + memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail); + err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs, + c->min_io_size, wbuf->dtype); + if (err) + goto out; + + offs = wbuf->offs + c->min_io_size; + len -= wbuf->avail; + aligned_len -= wbuf->avail; + written = wbuf->avail; + + /* + * The remaining data may take more whole min. I/O units, so write the + * remains multiple to min. I/O unit size directly to the flash media. + * We align node length to 8-byte boundary because we anyway flash wbuf + * if the remaining space is less than 8 bytes. + */ + n = aligned_len >> c->min_io_shift; + if (n) { + n <<= c->min_io_shift; + dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, offs); + err = ubi_leb_write(c->ubi, wbuf->lnum, buf + written, offs, n, + wbuf->dtype); + if (err) + goto out; + offs += n; + aligned_len -= n; + len -= n; + written += n; + } + + spin_lock(&wbuf->lock); + if (aligned_len) + /* + * And now we have what's left and what does not take whole + * min. I/O unit, so write it to the write-buffer and we are + * done. + */ + memcpy(wbuf->buf, buf + written, len); + + wbuf->offs = offs; + wbuf->used = aligned_len; + wbuf->avail = c->min_io_size - aligned_len; + wbuf->next_ino = 0; + spin_unlock(&wbuf->lock); + +exit: + if (wbuf->sync_callback) { + int free = c->leb_size - wbuf->offs - wbuf->used; + + err = wbuf->sync_callback(c, wbuf->lnum, free, 0); + if (err) + goto out; + } + + if (wbuf->used) + new_wbuf_timer_nolock(wbuf); + + return 0; + +out: + ubifs_err("cannot write %d bytes to LEB %d:%d, error %d", + len, wbuf->lnum, wbuf->offs, err); + dbg_dump_node(c, buf); + dbg_dump_stack(); + dbg_dump_leb(c, wbuf->lnum); + return err; +} + +/** + * ubifs_write_node - write node to the media. + * @c: UBIFS file-system description object + * @buf: the node to write + * @len: node length + * @lnum: logical eraseblock number + * @offs: offset within the logical eraseblock + * @dtype: node life-time hint (%UBI_LONGTERM, %UBI_SHORTTERM, %UBI_UNKNOWN) + * + * This function automatically fills node magic number, assigns sequence + * number, and calculates node CRC checksum. The length of the @buf buffer has + * to be aligned to the minimal I/O unit size. This function automatically + * appends padding node and padding bytes if needed. Returns zero in case of + * success and a negative error code in case of failure. + */ +int ubifs_write_node(struct ubifs_info *c, void *buf, int len, int lnum, + int offs, int dtype) +{ + int err, buf_len = ALIGN(len, c->min_io_size); + + dbg_io("LEB %d:%d, %s, length %d (aligned %d)", + lnum, offs, dbg_ntype(((struct ubifs_ch *)buf)->node_type), len, + buf_len); + ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0); + ubifs_assert(offs % c->min_io_size == 0 && offs < c->leb_size); + + if (c->ro_media) + return -EROFS; + + ubifs_prepare_node(c, buf, len, 1); + err = ubi_leb_write(c->ubi, lnum, buf, offs, buf_len, dtype); + if (err) { + ubifs_err("cannot write %d bytes to LEB %d:%d, error %d", + buf_len, lnum, offs, err); + dbg_dump_node(c, buf); + dbg_dump_stack(); + } + + return err; +} + +/** + * ubifs_read_node_wbuf - read node from the media or write-buffer. + * @wbuf: wbuf to check for un-written data + * @buf: buffer to read to + * @type: node type + * @len: node length + * @lnum: logical eraseblock number + * @offs: offset within the logical eraseblock + * + * This function reads a node of known type and length, checks it and stores + * in @buf. If the node partially or fully sits in the write-buffer, this + * function takes data from the buffer, otherwise it reads the flash media. + * Returns zero in case of success, %-EUCLEAN if CRC mismatched and a negative + * error code in case of failure. + */ +int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len, + int lnum, int offs) +{ + const struct ubifs_info *c = wbuf->c; + int err, rlen, overlap; + struct ubifs_ch *ch = buf; + + dbg_io("LEB %d:%d, %s, length %d", lnum, offs, dbg_ntype(type), len); + ubifs_assert(wbuf && lnum >= 0 && lnum < c->leb_cnt && offs >= 0); + ubifs_assert(!(offs & 7) && offs < c->leb_size); + ubifs_assert(type >= 0 && type < UBIFS_NODE_TYPES_CNT); + + spin_lock(&wbuf->lock); + overlap = (lnum == wbuf->lnum && offs + len > wbuf->offs); + if (!overlap) { + /* We may safely unlock the write-buffer and read the data */ + spin_unlock(&wbuf->lock); + return ubifs_read_node(c, buf, type, len, lnum, offs); + } + + /* Don't read under wbuf */ + rlen = wbuf->offs - offs; + if (rlen < 0) + rlen = 0; + + /* Copy the rest from the write-buffer */ + memcpy(buf + rlen, wbuf->buf + offs + rlen - wbuf->offs, len - rlen); + spin_unlock(&wbuf->lock); + + if (rlen > 0) { + /* Read everything that goes before write-buffer */ + err = ubi_read(c->ubi, lnum, buf, offs, rlen); + if (err && err != -EBADMSG) { + ubifs_err("failed to read node %d from LEB %d:%d, " + "error %d", type, lnum, offs, err); + dbg_dump_stack(); + return err; + } + } + + if (type != ch->node_type) { + ubifs_err("bad node type (%d but expected %d)", + ch->node_type, type); + goto out; + } + + err = ubifs_check_node(c, buf, lnum, offs, 0); + if (err) { + ubifs_err("expected node type %d", type); + return err; + } + + rlen = le32_to_cpu(ch->len); + if (rlen != len) { + ubifs_err("bad node length %d, expected %d", rlen, len); + goto out; + } + + return 0; + +out: + ubifs_err("bad node at LEB %d:%d", lnum, offs); + dbg_dump_node(c, buf); + dbg_dump_stack(); + return -EINVAL; +} + +/** + * ubifs_read_node - read node. + * @c: UBIFS file-system description object + * @buf: buffer to read to + * @type: node type + * @len: node length (not aligned) + * @lnum: logical eraseblock number + * @offs: offset within the logical eraseblock + * + * This function reads a node of known type and and length, checks it and + * stores in @buf. Returns zero in case of success, %-EUCLEAN if CRC mismatched + * and a negative error code in case of failure. + */ +int ubifs_read_node(const struct ubifs_info *c, void *buf, int type, int len, + int lnum, int offs) +{ + int err, l; + struct ubifs_ch *ch = buf; + + dbg_io("LEB %d:%d, %s, length %d", lnum, offs, dbg_ntype(type), len); + ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0); + ubifs_assert(len >= UBIFS_CH_SZ && offs + len <= c->leb_size); + ubifs_assert(!(offs & 7) && offs < c->leb_size); + ubifs_assert(type >= 0 && type < UBIFS_NODE_TYPES_CNT); + + err = ubi_read(c->ubi, lnum, buf, offs, len); + if (err && err != -EBADMSG) { + ubifs_err("cannot read node %d from LEB %d:%d, error %d", + type, lnum, offs, err); + return err; + } + + if (type != ch->node_type) { + ubifs_err("bad node type (%d but expected %d)", + ch->node_type, type); + goto out; + } + + err = ubifs_check_node(c, buf, lnum, offs, 0); + if (err) { + ubifs_err("expected node type %d", type); + return err; + } + + l = le32_to_cpu(ch->len); + if (l != len) { + ubifs_err("bad node length %d, expected %d", l, len); + goto out; + } + + return 0; + +out: + ubifs_err("bad node at LEB %d:%d", lnum, offs); + dbg_dump_node(c, buf); + dbg_dump_stack(); + return -EINVAL; +} + +/** + * ubifs_wbuf_init - initialize write-buffer. + * @c: UBIFS file-system description object + * @wbuf: write-buffer to initialize + * + * This function initializes write buffer. Returns zero in case of success + * %-ENOMEM in case of failure. + */ +int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf) +{ + size_t size; + + wbuf->buf = kmalloc(c->min_io_size, GFP_KERNEL); + if (!wbuf->buf) + return -ENOMEM; + + size = (c->min_io_size / UBIFS_CH_SZ + 1) * sizeof(ino_t); + wbuf->inodes = kmalloc(size, GFP_KERNEL); + if (!wbuf->inodes) { + kfree(wbuf->buf); + wbuf->buf = NULL; + return -ENOMEM; + } + + wbuf->used = 0; + wbuf->lnum = wbuf->offs = -1; + wbuf->avail = c->min_io_size; + wbuf->dtype = UBI_UNKNOWN; + wbuf->sync_callback = NULL; + mutex_init(&wbuf->io_mutex); + spin_lock_init(&wbuf->lock); + + wbuf->c = c; + init_timer(&wbuf->timer); + wbuf->timer.function = wbuf_timer_callback_nolock; + wbuf->timer.data = (unsigned long)wbuf; + wbuf->timeout = DEFAULT_WBUF_TIMEOUT; + wbuf->next_ino = 0; + + return 0; +} + +/** + * ubifs_wbuf_add_ino_nolock - add an inode number into the wbuf inode array. + * @wbuf: the write-buffer whereto add + * @inum: the inode number + * + * This function adds an inode number to the inode array of the write-buffer. + */ +void ubifs_wbuf_add_ino_nolock(struct ubifs_wbuf *wbuf, ino_t inum) +{ + if (!wbuf->buf) + /* NOR flash or something similar */ + return; + + spin_lock(&wbuf->lock); + if (wbuf->used) + wbuf->inodes[wbuf->next_ino++] = inum; + spin_unlock(&wbuf->lock); +} + +/** + * wbuf_has_ino - returns if the wbuf contains data from the inode. + * @wbuf: the write-buffer + * @inum: the inode number + * + * This function returns with %1 if the write-buffer contains some data from the + * given inode otherwise it returns with %0. + */ +static int wbuf_has_ino(struct ubifs_wbuf *wbuf, ino_t inum) +{ + int i, ret = 0; + + spin_lock(&wbuf->lock); + for (i = 0; i < wbuf->next_ino; i++) + if (inum == wbuf->inodes[i]) { + ret = 1; + break; + } + spin_unlock(&wbuf->lock); + + return ret; +} + +/** + * ubifs_sync_wbufs_by_inode - synchronize write-buffers for an inode. + * @c: UBIFS file-system description object + * @inode: inode to synchronize + * + * This function synchronizes write-buffers which contain nodes belonging to + * @inode. Returns zero in case of success and a negative error code in case of + * failure. + */ +int ubifs_sync_wbufs_by_inode(struct ubifs_info *c, struct inode *inode) +{ + int i, err = 0; + + for (i = 0; i < c->jhead_cnt; i++) { + struct ubifs_wbuf *wbuf = &c->jheads[i].wbuf; + + if (i == GCHD) + /* + * GC head is special, do not look at it. Even if the + * head contains something related to this inode, it is + * a _copy_ of corresponding on-flash node which sits + * somewhere else. + */ + continue; + + if (!wbuf_has_ino(wbuf, inode->i_ino)) + continue; + + mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); + if (wbuf_has_ino(wbuf, inode->i_ino)) + err = ubifs_wbuf_sync_nolock(wbuf); + mutex_unlock(&wbuf->io_mutex); + + if (err) { + ubifs_ro_mode(c, err); + return err; + } + } + return 0; +} diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c new file mode 100644 index 0000000..5e82cff --- /dev/null +++ b/fs/ubifs/ioctl.c @@ -0,0 +1,204 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * Copyright (C) 2006, 2007 University of Szeged, Hungary + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Zoltan Sogor + * Artem Bityutskiy (Битюцкий Артём) + * Adrian Hunter + */ + +/* This file implements EXT2-compatible extended attribute ioctl() calls */ + +#include +#include +#include +#include "ubifs.h" + +/** + * ubifs_set_inode_flags - set VFS inode flags. + * @inode: VFS inode to set flags for + * + * This function propagates flags from UBIFS inode object to VFS inode object. + */ +void ubifs_set_inode_flags(struct inode *inode) +{ + unsigned int flags = ubifs_inode(inode)->flags; + + inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE | S_DIRSYNC); + if (flags & UBIFS_SYNC_FL) + inode->i_flags |= S_SYNC; + if (flags & UBIFS_APPEND_FL) + inode->i_flags |= S_APPEND; + if (flags & UBIFS_IMMUTABLE_FL) + inode->i_flags |= S_IMMUTABLE; + if (flags & UBIFS_DIRSYNC_FL) + inode->i_flags |= S_DIRSYNC; +} + +/* + * ioctl2ubifs - convert ioctl inode flags to UBIFS inode flags. + * @ioctl_flags: flags to convert + * + * This function convert ioctl flags (@FS_COMPR_FL, etc) to UBIFS inode flags + * (@UBIFS_COMPR_FL, etc). + */ +static int ioctl2ubifs(int ioctl_flags) +{ + int ubifs_flags = 0; + + if (ioctl_flags & FS_COMPR_FL) + ubifs_flags |= UBIFS_COMPR_FL; + if (ioctl_flags & FS_SYNC_FL) + ubifs_flags |= UBIFS_SYNC_FL; + if (ioctl_flags & FS_APPEND_FL) + ubifs_flags |= UBIFS_APPEND_FL; + if (ioctl_flags & FS_IMMUTABLE_FL) + ubifs_flags |= UBIFS_IMMUTABLE_FL; + if (ioctl_flags & FS_DIRSYNC_FL) + ubifs_flags |= UBIFS_DIRSYNC_FL; + + return ubifs_flags; +} + +/* + * ubifs2ioctl - convert UBIFS inode flags to ioctl inode flags. + * @ubifs_flags: flags to convert + * + * This function convert UBIFS (@UBIFS_COMPR_FL, etc) to ioctl flags + * (@FS_COMPR_FL, etc). + */ +static int ubifs2ioctl(int ubifs_flags) +{ + int ioctl_flags = 0; + + if (ubifs_flags & UBIFS_COMPR_FL) + ioctl_flags |= FS_COMPR_FL; + if (ubifs_flags & UBIFS_SYNC_FL) + ioctl_flags |= FS_SYNC_FL; + if (ubifs_flags & UBIFS_APPEND_FL) + ioctl_flags |= FS_APPEND_FL; + if (ubifs_flags & UBIFS_IMMUTABLE_FL) + ioctl_flags |= FS_IMMUTABLE_FL; + if (ubifs_flags & UBIFS_DIRSYNC_FL) + ioctl_flags |= FS_DIRSYNC_FL; + + return ioctl_flags; +} + +static int setflags(struct inode *inode, int flags) +{ + int oldflags, err, release; + struct ubifs_inode *ui = ubifs_inode(inode); + struct ubifs_info *c = inode->i_sb->s_fs_info; + struct ubifs_budget_req req = { .dirtied_ino = 1, + .dirtied_ino_d = ui->data_len }; + + err = ubifs_budget_space(c, &req); + if (err) + return err; + + /* + * The IMMUTABLE and APPEND_ONLY flags can only be changed by + * the relevant capability. + */ + mutex_lock(&ui->ui_mutex); + oldflags = ubifs2ioctl(ui->flags); + if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) { + if (!capable(CAP_LINUX_IMMUTABLE)) { + err = -EPERM; + goto out_unlock; + } + } + + ui->flags = ioctl2ubifs(flags); + ubifs_set_inode_flags(inode); + inode->i_ctime = ubifs_current_time(inode); + release = ui->dirty; + mark_inode_dirty_sync(inode); + mutex_unlock(&ui->ui_mutex); + + if (release) + ubifs_release_budget(c, &req); + if (IS_SYNC(inode)) + err = write_inode_now(inode, 1); + return err; + +out_unlock: + ubifs_err("can't modify inode %lu attributes", inode->i_ino); + mutex_unlock(&ui->ui_mutex); + ubifs_release_budget(c, &req); + return err; +} + +long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + int flags, err; + struct inode *inode = file->f_path.dentry->d_inode; + + switch (cmd) { + case FS_IOC_GETFLAGS: + flags = ubifs2ioctl(ubifs_inode(inode)->flags); + + return put_user(flags, (int __user *) arg); + + case FS_IOC_SETFLAGS: { + if (IS_RDONLY(inode)) + return -EROFS; + + if (!is_owner_or_cap(inode)) + return -EACCES; + + if (get_user(flags, (int __user *) arg)) + return -EFAULT; + + if (!S_ISDIR(inode->i_mode)) + flags &= ~FS_DIRSYNC_FL; + + /* + * Make sure the file-system is read-write and make sure it + * will not become read-only while we are changing the flags. + */ + err = mnt_want_write(file->f_path.mnt); + if (err) + return err; + err = setflags(inode, flags); + mnt_drop_write(file->f_path.mnt); + return err; + } + + default: + return -ENOTTY; + } +} + +#ifdef CONFIG_COMPAT +long ubifs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + switch (cmd) { + case FS_IOC32_GETFLAGS: + cmd = FS_IOC_GETFLAGS; + break; + case FS_IOC32_SETFLAGS: + cmd = FS_IOC_SETFLAGS; + break; + default: + return -ENOIOCTLCMD; + } + return ubifs_ioctl(file, cmd, (unsigned long)compat_ptr(arg)); +} +#endif diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c new file mode 100644 index 0000000..283155a --- /dev/null +++ b/fs/ubifs/journal.c @@ -0,0 +1,1387 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Artem Bityutskiy (Битюцкий Артём) + * Adrian Hunter + */ + +/* + * This file implements UBIFS journal. + * + * The journal consists of 2 parts - the log and bud LEBs. The log has fixed + * length and position, while a bud logical eraseblock is any LEB in the main + * area. Buds contain file system data - data nodes, inode nodes, etc. The log + * contains only references to buds and some other stuff like commit + * start node. The idea is that when we commit the journal, we do + * not copy the data, the buds just become indexed. Since after the commit the + * nodes in bud eraseblocks become leaf nodes of the file system index tree, we + * use term "bud". Analogy is obvious, bud eraseblocks contain nodes which will + * become leafs in the future. + * + * The journal is multi-headed because we want to write data to the journal as + * optimally as possible. It is nice to have nodes belonging to the same inode + * in one LEB, so we may write data owned by different inodes to different + * journal heads, although at present only one data head is used. + * + * For recovery reasons, the base head contains all inode nodes, all directory + * entry nodes and all truncate nodes. This means that the other heads contain + * only data nodes. + * + * Bud LEBs may be half-indexed. For example, if the bud was not full at the + * time of commit, the bud is retained to continue to be used in the journal, + * even though the "front" of the LEB is now indexed. In that case, the log + * reference contains the offset where the bud starts for the purposes of the + * journal. + * + * The journal size has to be limited, because the larger is the journal, the + * longer it takes to mount UBIFS (scanning the journal) and the more memory it + * takes (indexing in the TNC). + * + * All the journal write operations like 'ubifs_jnl_update()' here, which write + * multiple UBIFS nodes to the journal at one go, are atomic with respect to + * unclean reboots. Should the unclean reboot happen, the recovery code drops + * all the nodes. + */ + +#include "ubifs.h" + +/** + * zero_ino_node_unused - zero out unused fields of an on-flash inode node. + * @ino: the inode to zero out + */ +static inline void zero_ino_node_unused(struct ubifs_ino_node *ino) +{ + memset(ino->padding1, 0, 4); + memset(ino->padding2, 0, 26); +} + +/** + * zero_dent_node_unused - zero out unused fields of an on-flash directory + * entry node. + * @dent: the directory entry to zero out + */ +static inline void zero_dent_node_unused(struct ubifs_dent_node *dent) +{ + dent->padding1 = 0; + memset(dent->padding2, 0, 4); +} + +/** + * zero_data_node_unused - zero out unused fields of an on-flash data node. + * @data: the data node to zero out + */ +static inline void zero_data_node_unused(struct ubifs_data_node *data) +{ + memset(data->padding, 0, 2); +} + +/** + * zero_trun_node_unused - zero out unused fields of an on-flash truncation + * node. + * @trun: the truncation node to zero out + */ +static inline void zero_trun_node_unused(struct ubifs_trun_node *trun) +{ + memset(trun->padding, 0, 12); +} + +/** + * reserve_space - reserve space in the journal. + * @c: UBIFS file-system description object + * @jhead: journal head number + * @len: node length + * + * This function reserves space in journal head @head. If the reservation + * succeeded, the journal head stays locked and later has to be unlocked using + * 'release_head()'. 'write_node()' and 'write_head()' functions also unlock + * it. Returns zero in case of success, %-EAGAIN if commit has to be done, and + * other negative error codes in case of other failures. + */ +static int reserve_space(struct ubifs_info *c, int jhead, int len) +{ + int err = 0, err1, retries = 0, avail, lnum, offs, free, squeeze; + struct ubifs_wbuf *wbuf = &c->jheads[jhead].wbuf; + + /* + * Typically, the base head has smaller nodes written to it, so it is + * better to try to allocate space at the ends of eraseblocks. This is + * what the squeeze parameter does. + */ + squeeze = (jhead == BASEHD); +again: + mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); + + if (c->ro_media) { + err = -EROFS; + goto out_unlock; + } + + avail = c->leb_size - wbuf->offs - wbuf->used; + if (wbuf->lnum != -1 && avail >= len) + return 0; + + /* + * Write buffer wasn't seek'ed or there is no enough space - look for an + * LEB with some empty space. + */ + lnum = ubifs_find_free_space(c, len, &free, squeeze); + if (lnum >= 0) { + /* Found an LEB, add it to the journal head */ + offs = c->leb_size - free; + err = ubifs_add_bud_to_log(c, jhead, lnum, offs); + if (err) + goto out_return; + /* A new bud was successfully allocated and added to the log */ + goto out; + } + + err = lnum; + if (err != -ENOSPC) + goto out_unlock; + + /* + * No free space, we have to run garbage collector to make + * some. But the write-buffer mutex has to be unlocked because + * GC also takes it. + */ + dbg_jnl("no free space jhead %d, run GC", jhead); + mutex_unlock(&wbuf->io_mutex); + + lnum = ubifs_garbage_collect(c, 0); + if (lnum < 0) { + err = lnum; + if (err != -ENOSPC) + return err; + + /* + * GC could not make a free LEB. But someone else may + * have allocated new bud for this journal head, + * because we dropped @wbuf->io_mutex, so try once + * again. + */ + dbg_jnl("GC couldn't make a free LEB for jhead %d", jhead); + if (retries++ < 2) { + dbg_jnl("retry (%d)", retries); + goto again; + } + + dbg_jnl("return -ENOSPC"); + return err; + } + + mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); + dbg_jnl("got LEB %d for jhead %d", lnum, jhead); + avail = c->leb_size - wbuf->offs - wbuf->used; + + if (wbuf->lnum != -1 && avail >= len) { + /* + * Someone else has switched the journal head and we have + * enough space now. This happens when more then one process is + * trying to write to the same journal head at the same time. + */ + dbg_jnl("return LEB %d back, already have LEB %d:%d", + lnum, wbuf->lnum, wbuf->offs + wbuf->used); + err = ubifs_return_leb(c, lnum); + if (err) + goto out_unlock; + return 0; + } + + err = ubifs_add_bud_to_log(c, jhead, lnum, 0); + if (err) + goto out_return; + offs = 0; + +out: + err = ubifs_wbuf_seek_nolock(wbuf, lnum, offs, UBI_SHORTTERM); + if (err) + goto out_unlock; + + return 0; + +out_unlock: + mutex_unlock(&wbuf->io_mutex); + return err; + +out_return: + /* An error occurred and the LEB has to be returned to lprops */ + ubifs_assert(err < 0); + err1 = ubifs_return_leb(c, lnum); + if (err1 && err == -EAGAIN) + /* + * Return original error code only if it is not %-EAGAIN, + * which is not really an error. Otherwise, return the error + * code of 'ubifs_return_leb()'. + */ + err = err1; + mutex_unlock(&wbuf->io_mutex); + return err; +} + +/** + * write_node - write node to a journal head. + * @c: UBIFS file-system description object + * @jhead: journal head + * @node: node to write + * @len: node length + * @lnum: LEB number written is returned here + * @offs: offset written is returned here + * + * This function writes a node to reserved space of journal head @jhead. + * Returns zero in case of success and a negative error code in case of + * failure. + */ +static int write_node(struct ubifs_info *c, int jhead, void *node, int len, + int *lnum, int *offs) +{ + struct ubifs_wbuf *wbuf = &c->jheads[jhead].wbuf; + + ubifs_assert(jhead != GCHD); + + *lnum = c->jheads[jhead].wbuf.lnum; + *offs = c->jheads[jhead].wbuf.offs + c->jheads[jhead].wbuf.used; + + dbg_jnl("jhead %d, LEB %d:%d, len %d", jhead, *lnum, *offs, len); + ubifs_prepare_node(c, node, len, 0); + + return ubifs_wbuf_write_nolock(wbuf, node, len); +} + +/** + * write_head - write data to a journal head. + * @c: UBIFS file-system description object + * @jhead: journal head + * @buf: buffer to write + * @len: length to write + * @lnum: LEB number written is returned here + * @offs: offset written is returned here + * @sync: non-zero if the write-buffer has to by synchronized + * + * This function is the same as 'write_node()' but it does not assume the + * buffer it is writing is a node, so it does not prepare it (which means + * initializing common header and calculating CRC). + */ +static int write_head(struct ubifs_info *c, int jhead, void *buf, int len, + int *lnum, int *offs, int sync) +{ + int err; + struct ubifs_wbuf *wbuf = &c->jheads[jhead].wbuf; + + ubifs_assert(jhead != GCHD); + + *lnum = c->jheads[jhead].wbuf.lnum; + *offs = c->jheads[jhead].wbuf.offs + c->jheads[jhead].wbuf.used; + dbg_jnl("jhead %d, LEB %d:%d, len %d", jhead, *lnum, *offs, len); + + err = ubifs_wbuf_write_nolock(wbuf, buf, len); + if (err) + return err; + if (sync) + err = ubifs_wbuf_sync_nolock(wbuf); + return err; +} + +/** + * make_reservation - reserve journal space. + * @c: UBIFS file-system description object + * @jhead: journal head + * @len: how many bytes to reserve + * + * This function makes space reservation in journal head @jhead. The function + * takes the commit lock and locks the journal head, and the caller has to + * unlock the head and finish the reservation with 'finish_reservation()'. + * Returns zero in case of success and a negative error code in case of + * failure. + * + * Note, the journal head may be unlocked as soon as the data is written, while + * the commit lock has to be released after the data has been added to the + * TNC. + */ +static int make_reservation(struct ubifs_info *c, int jhead, int len) +{ + int err, cmt_retries = 0, nospc_retries = 0; + +again: + down_read(&c->commit_sem); + err = reserve_space(c, jhead, len); + if (!err) + return 0; + up_read(&c->commit_sem); + + if (err == -ENOSPC) { + /* + * GC could not make any progress. We should try to commit + * once because it could make some dirty space and GC would + * make progress, so make the error -EAGAIN so that the below + * will commit and re-try. + */ + if (nospc_retries++ < 2) { + dbg_jnl("no space, retry"); + err = -EAGAIN; + } + + /* + * This means that the budgeting is incorrect. We always have + * to be able to write to the media, because all operations are + * budgeted. Deletions are not budgeted, though, but we reserve + * an extra LEB for them. + */ + } + + if (err != -EAGAIN) + goto out; + + /* + * -EAGAIN means that the journal is full or too large, or the above + * code wants to do one commit. Do this and re-try. + */ + if (cmt_retries > 128) { + /* + * This should not happen unless the journal size limitations + * are too tough. + */ + ubifs_err("stuck in space allocation"); + err = -ENOSPC; + goto out; + } else if (cmt_retries > 32) + ubifs_warn("too many space allocation re-tries (%d)", + cmt_retries); + + dbg_jnl("-EAGAIN, commit and retry (retried %d times)", + cmt_retries); + cmt_retries += 1; + + err = ubifs_run_commit(c); + if (err) + return err; + goto again; + +out: + ubifs_err("cannot reserve %d bytes in jhead %d, error %d", + len, jhead, err); + if (err == -ENOSPC) { + /* This are some budgeting problems, print useful information */ + down_write(&c->commit_sem); + spin_lock(&c->space_lock); + dbg_dump_stack(); + dbg_dump_budg(c); + spin_unlock(&c->space_lock); + dbg_dump_lprops(c); + cmt_retries = dbg_check_lprops(c); + up_write(&c->commit_sem); + } + return err; +} + +/** + * release_head - release a journal head. + * @c: UBIFS file-system description object + * @jhead: journal head + * + * This function releases journal head @jhead which was locked by + * the 'make_reservation()' function. It has to be called after each successful + * 'make_reservation()' invocation. + */ +static inline void release_head(struct ubifs_info *c, int jhead) +{ + mutex_unlock(&c->jheads[jhead].wbuf.io_mutex); +} + +/** + * finish_reservation - finish a reservation. + * @c: UBIFS file-system description object + * + * This function finishes journal space reservation. It must be called after + * 'make_reservation()'. + */ +static void finish_reservation(struct ubifs_info *c) +{ + up_read(&c->commit_sem); +} + +/** + * get_dent_type - translate VFS inode mode to UBIFS directory entry type. + * @mode: inode mode + */ +static int get_dent_type(int mode) +{ + switch (mode & S_IFMT) { + case S_IFREG: + return UBIFS_ITYPE_REG; + case S_IFDIR: + return UBIFS_ITYPE_DIR; + case S_IFLNK: + return UBIFS_ITYPE_LNK; + case S_IFBLK: + return UBIFS_ITYPE_BLK; + case S_IFCHR: + return UBIFS_ITYPE_CHR; + case S_IFIFO: + return UBIFS_ITYPE_FIFO; + case S_IFSOCK: + return UBIFS_ITYPE_SOCK; + default: + BUG(); + } + return 0; +} + +/** + * pack_inode - pack an inode node. + * @c: UBIFS file-system description object + * @ino: buffer in which to pack inode node + * @inode: inode to pack + * @last: indicates the last node of the group + * @last_reference: non-zero if this is a deletion inode + */ +static void pack_inode(struct ubifs_info *c, struct ubifs_ino_node *ino, + const struct inode *inode, int last, + int last_reference) +{ + int data_len = 0; + struct ubifs_inode *ui = ubifs_inode(inode); + + ino->ch.node_type = UBIFS_INO_NODE; + ino_key_init_flash(c, &ino->key, inode->i_ino); + ino->creat_sqnum = cpu_to_le64(ui->creat_sqnum); + ino->atime_sec = cpu_to_le64(inode->i_atime.tv_sec); + ino->atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec); + ino->ctime_sec = cpu_to_le64(inode->i_ctime.tv_sec); + ino->ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); + ino->mtime_sec = cpu_to_le64(inode->i_mtime.tv_sec); + ino->mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); + ino->uid = cpu_to_le32(inode->i_uid); + ino->gid = cpu_to_le32(inode->i_gid); + ino->mode = cpu_to_le32(inode->i_mode); + ino->flags = cpu_to_le32(ui->flags); + ino->size = cpu_to_le64(ui->ui_size); + ino->nlink = cpu_to_le32(inode->i_nlink); + ino->compr_type = cpu_to_le16(ui->compr_type); + ino->data_len = cpu_to_le32(ui->data_len); + ino->xattr_cnt = cpu_to_le32(ui->xattr_cnt); + ino->xattr_size = cpu_to_le32(ui->xattr_size); + ino->xattr_names = cpu_to_le32(ui->xattr_names); + zero_ino_node_unused(ino); + + /* + * Drop the attached data if this is a deletion inode, the data is not + * needed anymore. + */ + if (!last_reference) { + memcpy(ino->data, ui->data, ui->data_len); + data_len = ui->data_len; + } + + ubifs_prep_grp_node(c, ino, UBIFS_INO_NODE_SZ + data_len, last); +} + +/** + * mark_inode_clean - mark UBIFS inode as clean. + * @c: UBIFS file-system description object + * @ui: UBIFS inode to mark as clean + * + * This helper function marks UBIFS inode @ui as clean by cleaning the + * @ui->dirty flag and releasing its budget. Note, VFS may still treat the + * inode as dirty and try to write it back, but 'ubifs_write_inode()' would + * just do nothing. + */ +static void mark_inode_clean(struct ubifs_info *c, struct ubifs_inode *ui) +{ + if (ui->dirty) + ubifs_release_dirty_inode_budget(c, ui); + ui->dirty = 0; +} + +/** + * ubifs_jnl_update - update inode. + * @c: UBIFS file-system description object + * @dir: parent inode or host inode in case of extended attributes + * @nm: directory entry name + * @inode: inode to update + * @deletion: indicates a directory entry deletion i.e unlink or rmdir + * @xent: non-zero if the directory entry is an extended attribute entry + * + * This function updates an inode by writing a directory entry (or extended + * attribute entry), the inode itself, and the parent directory inode (or the + * host inode) to the journal. + * + * The function writes the host inode @dir last, which is important in case of + * extended attributes. Indeed, then we guarantee that if the host inode gets + * synchronized (with 'fsync()'), and the write-buffer it sits in gets flushed, + * the extended attribute inode gets flushed too. And this is exactly what the + * user expects - synchronizing the host inode synchronizes its extended + * attributes. Similarly, this guarantees that if @dir is synchronized, its + * directory entry corresponding to @nm gets synchronized too. + * + * If the inode (@inode) or the parent directory (@dir) are synchronous, this + * function synchronizes the write-buffer. + * + * This function marks the @dir and @inode inodes as clean and returns zero on + * success. In case of failure, a negative error code is returned. + */ +int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir, + const struct qstr *nm, const struct inode *inode, + int deletion, int xent) +{ + int err, dlen, ilen, len, lnum, ino_offs, dent_offs; + int aligned_dlen, aligned_ilen, sync = IS_DIRSYNC(dir); + int last_reference = !!(deletion && inode->i_nlink == 0); + struct ubifs_inode *ui = ubifs_inode(inode); + struct ubifs_inode *dir_ui = ubifs_inode(dir); + struct ubifs_dent_node *dent; + struct ubifs_ino_node *ino; + union ubifs_key dent_key, ino_key; + + dbg_jnl("ino %lu, dent '%.*s', data len %d in dir ino %lu", + inode->i_ino, nm->len, nm->name, ui->data_len, dir->i_ino); + ubifs_assert(dir_ui->data_len == 0); + ubifs_assert(mutex_is_locked(&dir_ui->ui_mutex)); + + dlen = UBIFS_DENT_NODE_SZ + nm->len + 1; + ilen = UBIFS_INO_NODE_SZ; + + /* + * If the last reference to the inode is being deleted, then there is + * no need to attach and write inode data, it is being deleted anyway. + * And if the inode is being deleted, no need to synchronize + * write-buffer even if the inode is synchronous. + */ + if (!last_reference) { + ilen += ui->data_len; + sync |= IS_SYNC(inode); + } + + aligned_dlen = ALIGN(dlen, 8); + aligned_ilen = ALIGN(ilen, 8); + len = aligned_dlen + aligned_ilen + UBIFS_INO_NODE_SZ; + dent = kmalloc(len, GFP_NOFS); + if (!dent) + return -ENOMEM; + + /* Make reservation before allocating sequence numbers */ + err = make_reservation(c, BASEHD, len); + if (err) + goto out_free; + + if (!xent) { + dent->ch.node_type = UBIFS_DENT_NODE; + dent_key_init(c, &dent_key, dir->i_ino, nm); + } else { + dent->ch.node_type = UBIFS_XENT_NODE; + xent_key_init(c, &dent_key, dir->i_ino, nm); + } + + key_write(c, &dent_key, dent->key); + dent->inum = deletion ? 0 : cpu_to_le64(inode->i_ino); + dent->type = get_dent_type(inode->i_mode); + dent->nlen = cpu_to_le16(nm->len); + memcpy(dent->name, nm->name, nm->len); + dent->name[nm->len] = '\0'; + zero_dent_node_unused(dent); + ubifs_prep_grp_node(c, dent, dlen, 0); + + ino = (void *)dent + aligned_dlen; + pack_inode(c, ino, inode, 0, last_reference); + ino = (void *)ino + aligned_ilen; + pack_inode(c, ino, dir, 1, 0); + + if (last_reference) { + err = ubifs_add_orphan(c, inode->i_ino); + if (err) { + release_head(c, BASEHD); + goto out_finish; + } + } + + err = write_head(c, BASEHD, dent, len, &lnum, &dent_offs, sync); + if (err) + goto out_release; + if (!sync) { + struct ubifs_wbuf *wbuf = &c->jheads[BASEHD].wbuf; + + ubifs_wbuf_add_ino_nolock(wbuf, inode->i_ino); + ubifs_wbuf_add_ino_nolock(wbuf, dir->i_ino); + } + release_head(c, BASEHD); + kfree(dent); + + if (deletion) { + err = ubifs_tnc_remove_nm(c, &dent_key, nm); + if (err) + goto out_ro; + err = ubifs_add_dirt(c, lnum, dlen); + } else + err = ubifs_tnc_add_nm(c, &dent_key, lnum, dent_offs, dlen, nm); + if (err) + goto out_ro; + + /* + * Note, we do not remove the inode from TNC even if the last reference + * to it has just been deleted, because the inode may still be opened. + * Instead, the inode has been added to orphan lists and the orphan + * subsystem will take further care about it. + */ + ino_key_init(c, &ino_key, inode->i_ino); + ino_offs = dent_offs + aligned_dlen; + err = ubifs_tnc_add(c, &ino_key, lnum, ino_offs, ilen); + if (err) + goto out_ro; + + ino_key_init(c, &ino_key, dir->i_ino); + ino_offs += aligned_ilen; + err = ubifs_tnc_add(c, &ino_key, lnum, ino_offs, UBIFS_INO_NODE_SZ); + if (err) + goto out_ro; + + finish_reservation(c); + spin_lock(&ui->ui_lock); + ui->synced_i_size = ui->ui_size; + spin_unlock(&ui->ui_lock); + mark_inode_clean(c, ui); + mark_inode_clean(c, dir_ui); + return 0; + +out_finish: + finish_reservation(c); +out_free: + kfree(dent); + return err; + +out_release: + release_head(c, BASEHD); +out_ro: + ubifs_ro_mode(c, err); + if (last_reference) + ubifs_delete_orphan(c, inode->i_ino); + finish_reservation(c); + return err; +} + +/** + * ubifs_jnl_write_data - write a data node to the journal. + * @c: UBIFS file-system description object + * @inode: inode the data node belongs to + * @key: node key + * @buf: buffer to write + * @len: data length (must not exceed %UBIFS_BLOCK_SIZE) + * + * This function writes a data node to the journal. Returns %0 if the data node + * was successfully written, and a negative error code in case of failure. + */ +int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode, + const union ubifs_key *key, const void *buf, int len) +{ + struct ubifs_data_node *data; + int err, lnum, offs, compr_type, out_len; + int dlen = UBIFS_DATA_NODE_SZ + UBIFS_BLOCK_SIZE * WORST_COMPR_FACTOR; + struct ubifs_inode *ui = ubifs_inode(inode); + + dbg_jnl("ino %lu, blk %u, len %d, key %s", key_inum(c, key), + key_block(c, key), len, DBGKEY(key)); + ubifs_assert(len <= UBIFS_BLOCK_SIZE); + + data = kmalloc(dlen, GFP_NOFS); + if (!data) + return -ENOMEM; + + data->ch.node_type = UBIFS_DATA_NODE; + key_write(c, key, &data->key); + data->size = cpu_to_le32(len); + zero_data_node_unused(data); + + if (!(ui->flags && UBIFS_COMPR_FL)) + /* Compression is disabled for this inode */ + compr_type = UBIFS_COMPR_NONE; + else + compr_type = ui->compr_type; + + out_len = dlen - UBIFS_DATA_NODE_SZ; + ubifs_compress(buf, len, &data->data, &out_len, &compr_type); + ubifs_assert(out_len <= UBIFS_BLOCK_SIZE); + + dlen = UBIFS_DATA_NODE_SZ + out_len; + data->compr_type = cpu_to_le16(compr_type); + + /* Make reservation before allocating sequence numbers */ + err = make_reservation(c, DATAHD, dlen); + if (err) + goto out_free; + + err = write_node(c, DATAHD, data, dlen, &lnum, &offs); + if (err) + goto out_release; + ubifs_wbuf_add_ino_nolock(&c->jheads[DATAHD].wbuf, key_inum(c, key)); + release_head(c, DATAHD); + + err = ubifs_tnc_add(c, key, lnum, offs, dlen); + if (err) + goto out_ro; + + finish_reservation(c); + kfree(data); + return 0; + +out_release: + release_head(c, DATAHD); +out_ro: + ubifs_ro_mode(c, err); + finish_reservation(c); +out_free: + kfree(data); + return err; +} + +/** + * ubifs_jnl_write_inode - flush inode to the journal. + * @c: UBIFS file-system description object + * @inode: inode to flush + * @deletion: inode has been deleted + * + * This function writes inode @inode to the journal. If the inode is + * synchronous, it also synchronizes the write-buffer. Returns zero in case of + * success and a negative error code in case of failure. + */ +int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode, + int deletion) +{ + int err, len, lnum, offs, sync = 0; + struct ubifs_ino_node *ino; + struct ubifs_inode *ui = ubifs_inode(inode); + + dbg_jnl("ino %lu%s", inode->i_ino, + deletion ? " (last reference)" : ""); + if (deletion) + ubifs_assert(inode->i_nlink == 0); + + len = UBIFS_INO_NODE_SZ; + /* + * If the inode is being deleted, do not write the attached data. No + * need to synchronize the write-buffer either. + */ + if (!deletion) { + len += ui->data_len; + sync = IS_SYNC(inode); + } + ino = kmalloc(len, GFP_NOFS); + if (!ino) + return -ENOMEM; + + /* Make reservation before allocating sequence numbers */ + err = make_reservation(c, BASEHD, len); + if (err) + goto out_free; + + pack_inode(c, ino, inode, 1, deletion); + err = write_head(c, BASEHD, ino, len, &lnum, &offs, sync); + if (err) + goto out_release; + if (!sync) + ubifs_wbuf_add_ino_nolock(&c->jheads[BASEHD].wbuf, + inode->i_ino); + release_head(c, BASEHD); + + if (deletion) { + err = ubifs_tnc_remove_ino(c, inode->i_ino); + if (err) + goto out_ro; + ubifs_delete_orphan(c, inode->i_ino); + err = ubifs_add_dirt(c, lnum, len); + } else { + union ubifs_key key; + + ino_key_init(c, &key, inode->i_ino); + err = ubifs_tnc_add(c, &key, lnum, offs, len); + } + if (err) + goto out_ro; + + finish_reservation(c); + spin_lock(&ui->ui_lock); + ui->synced_i_size = ui->ui_size; + spin_unlock(&ui->ui_lock); + kfree(ino); + return 0; + +out_release: + release_head(c, BASEHD); +out_ro: + ubifs_ro_mode(c, err); + finish_reservation(c); +out_free: + kfree(ino); + return err; +} + +/** + * ubifs_jnl_rename - rename a directory entry. + * @c: UBIFS file-system description object + * @old_dir: parent inode of directory entry to rename + * @old_dentry: directory entry to rename + * @new_dir: parent inode of directory entry to rename + * @new_dentry: new directory entry (or directory entry to replace) + * @sync: non-zero if the write-buffer has to be synchronized + * + * This function implements the re-name operation which may involve writing up + * to 3 inodes and 2 directory entries. It marks the written inodes as clean + * and returns zero on success. In case of failure, a negative error code is + * returned. + */ +int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, + const struct dentry *old_dentry, + const struct inode *new_dir, + const struct dentry *new_dentry, int sync) +{ + void *p; + union ubifs_key key; + struct ubifs_dent_node *dent, *dent2; + int err, dlen1, dlen2, ilen, lnum, offs, len; + const struct inode *old_inode = old_dentry->d_inode; + const struct inode *new_inode = new_dentry->d_inode; + int aligned_dlen1, aligned_dlen2, plen = UBIFS_INO_NODE_SZ; + int last_reference = !!(new_inode && new_inode->i_nlink == 0); + int move = (old_dir != new_dir); + struct ubifs_inode *uninitialized_var(new_ui); + + dbg_jnl("dent '%.*s' in dir ino %lu to dent '%.*s' in dir ino %lu", + old_dentry->d_name.len, old_dentry->d_name.name, + old_dir->i_ino, new_dentry->d_name.len, + new_dentry->d_name.name, new_dir->i_ino); + ubifs_assert(ubifs_inode(old_dir)->data_len == 0); + ubifs_assert(ubifs_inode(new_dir)->data_len == 0); + ubifs_assert(mutex_is_locked(&ubifs_inode(old_dir)->ui_mutex)); + ubifs_assert(mutex_is_locked(&ubifs_inode(new_dir)->ui_mutex)); + + dlen1 = UBIFS_DENT_NODE_SZ + new_dentry->d_name.len + 1; + dlen2 = UBIFS_DENT_NODE_SZ + old_dentry->d_name.len + 1; + if (new_inode) { + new_ui = ubifs_inode(new_inode); + ubifs_assert(mutex_is_locked(&new_ui->ui_mutex)); + ilen = UBIFS_INO_NODE_SZ; + if (!last_reference) + ilen += new_ui->data_len; + } else + ilen = 0; + + aligned_dlen1 = ALIGN(dlen1, 8); + aligned_dlen2 = ALIGN(dlen2, 8); + len = aligned_dlen1 + aligned_dlen2 + ALIGN(ilen, 8) + ALIGN(plen, 8); + if (old_dir != new_dir) + len += plen; + dent = kmalloc(len, GFP_NOFS); + if (!dent) + return -ENOMEM; + + /* Make reservation before allocating sequence numbers */ + err = make_reservation(c, BASEHD, len); + if (err) + goto out_free; + + /* Make new dent */ + dent->ch.node_type = UBIFS_DENT_NODE; + dent_key_init_flash(c, &dent->key, new_dir->i_ino, &new_dentry->d_name); + dent->inum = cpu_to_le64(old_inode->i_ino); + dent->type = get_dent_type(old_inode->i_mode); + dent->nlen = cpu_to_le16(new_dentry->d_name.len); + memcpy(dent->name, new_dentry->d_name.name, new_dentry->d_name.len); + dent->name[new_dentry->d_name.len] = '\0'; + zero_dent_node_unused(dent); + ubifs_prep_grp_node(c, dent, dlen1, 0); + + /* Make deletion dent */ + dent2 = (void *)dent + aligned_dlen1; + dent2->ch.node_type = UBIFS_DENT_NODE; + dent_key_init_flash(c, &dent2->key, old_dir->i_ino, + &old_dentry->d_name); + dent2->inum = 0; + dent2->type = DT_UNKNOWN; + dent2->nlen = cpu_to_le16(old_dentry->d_name.len); + memcpy(dent2->name, old_dentry->d_name.name, old_dentry->d_name.len); + dent2->name[old_dentry->d_name.len] = '\0'; + zero_dent_node_unused(dent2); + ubifs_prep_grp_node(c, dent2, dlen2, 0); + + p = (void *)dent2 + aligned_dlen2; + if (new_inode) { + pack_inode(c, p, new_inode, 0, last_reference); + p += ALIGN(ilen, 8); + } + + if (!move) + pack_inode(c, p, old_dir, 1, 0); + else { + pack_inode(c, p, old_dir, 0, 0); + p += ALIGN(plen, 8); + pack_inode(c, p, new_dir, 1, 0); + } + + if (last_reference) { + err = ubifs_add_orphan(c, new_inode->i_ino); + if (err) { + release_head(c, BASEHD); + goto out_finish; + } + } + + err = write_head(c, BASEHD, dent, len, &lnum, &offs, sync); + if (err) + goto out_release; + if (!sync) { + struct ubifs_wbuf *wbuf = &c->jheads[BASEHD].wbuf; + + ubifs_wbuf_add_ino_nolock(wbuf, new_dir->i_ino); + ubifs_wbuf_add_ino_nolock(wbuf, old_dir->i_ino); + if (new_inode) + ubifs_wbuf_add_ino_nolock(&c->jheads[BASEHD].wbuf, + new_inode->i_ino); + } + release_head(c, BASEHD); + + dent_key_init(c, &key, new_dir->i_ino, &new_dentry->d_name); + err = ubifs_tnc_add_nm(c, &key, lnum, offs, dlen1, &new_dentry->d_name); + if (err) + goto out_ro; + + err = ubifs_add_dirt(c, lnum, dlen2); + if (err) + goto out_ro; + + dent_key_init(c, &key, old_dir->i_ino, &old_dentry->d_name); + err = ubifs_tnc_remove_nm(c, &key, &old_dentry->d_name); + if (err) + goto out_ro; + + offs += aligned_dlen1 + aligned_dlen2; + if (new_inode) { + ino_key_init(c, &key, new_inode->i_ino); + err = ubifs_tnc_add(c, &key, lnum, offs, ilen); + if (err) + goto out_ro; + offs += ALIGN(ilen, 8); + } + + ino_key_init(c, &key, old_dir->i_ino); + err = ubifs_tnc_add(c, &key, lnum, offs, plen); + if (err) + goto out_ro; + + if (old_dir != new_dir) { + offs += ALIGN(plen, 8); + ino_key_init(c, &key, new_dir->i_ino); + err = ubifs_tnc_add(c, &key, lnum, offs, plen); + if (err) + goto out_ro; + } + + finish_reservation(c); + if (new_inode) { + mark_inode_clean(c, new_ui); + spin_lock(&new_ui->ui_lock); + new_ui->synced_i_size = new_ui->ui_size; + spin_unlock(&new_ui->ui_lock); + } + mark_inode_clean(c, ubifs_inode(old_dir)); + if (move) + mark_inode_clean(c, ubifs_inode(new_dir)); + kfree(dent); + return 0; + +out_release: + release_head(c, BASEHD); +out_ro: + ubifs_ro_mode(c, err); + if (last_reference) + ubifs_delete_orphan(c, new_inode->i_ino); +out_finish: + finish_reservation(c); +out_free: + kfree(dent); + return err; +} + +/** + * recomp_data_node - re-compress a truncated data node. + * @dn: data node to re-compress + * @new_len: new length + * + * This function is used when an inode is truncated and the last data node of + * the inode has to be re-compressed and re-written. + */ +static int recomp_data_node(struct ubifs_data_node *dn, int *new_len) +{ + void *buf; + int err, len, compr_type, out_len; + + out_len = le32_to_cpu(dn->size); + buf = kmalloc(out_len * WORST_COMPR_FACTOR, GFP_NOFS); + if (!buf) + return -ENOMEM; + + len = le32_to_cpu(dn->ch.len) - UBIFS_DATA_NODE_SZ; + compr_type = le16_to_cpu(dn->compr_type); + err = ubifs_decompress(&dn->data, len, buf, &out_len, compr_type); + if (err) + goto out; + + ubifs_compress(buf, *new_len, &dn->data, &out_len, &compr_type); + ubifs_assert(out_len <= UBIFS_BLOCK_SIZE); + dn->compr_type = cpu_to_le16(compr_type); + dn->size = cpu_to_le32(*new_len); + *new_len = UBIFS_DATA_NODE_SZ + out_len; +out: + kfree(buf); + return err; +} + +/** + * ubifs_jnl_truncate - update the journal for a truncation. + * @c: UBIFS file-system description object + * @inode: inode to truncate + * @old_size: old size + * @new_size: new size + * + * When the size of a file decreases due to truncation, a truncation node is + * written, the journal tree is updated, and the last data block is re-written + * if it has been affected. The inode is also updated in order to synchronize + * the new inode size. + * + * This function marks the inode as clean and returns zero on success. In case + * of failure, a negative error code is returned. + */ +int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode, + loff_t old_size, loff_t new_size) +{ + union ubifs_key key, to_key; + struct ubifs_ino_node *ino; + struct ubifs_trun_node *trun; + struct ubifs_data_node *uninitialized_var(dn); + int err, dlen, len, lnum, offs, bit, sz, sync = IS_SYNC(inode); + struct ubifs_inode *ui = ubifs_inode(inode); + ino_t inum = inode->i_ino; + unsigned int blk; + + dbg_jnl("ino %lu, size %lld -> %lld", inum, old_size, new_size); + ubifs_assert(!ui->data_len); + ubifs_assert(S_ISREG(inode->i_mode)); + ubifs_assert(mutex_is_locked(&ui->ui_mutex)); + + sz = UBIFS_TRUN_NODE_SZ + UBIFS_INO_NODE_SZ + + UBIFS_MAX_DATA_NODE_SZ * WORST_COMPR_FACTOR; + ino = kmalloc(sz, GFP_NOFS); + if (!ino) + return -ENOMEM; + + trun = (void *)ino + UBIFS_INO_NODE_SZ; + trun->ch.node_type = UBIFS_TRUN_NODE; + trun->inum = cpu_to_le32(inum); + trun->old_size = cpu_to_le64(old_size); + trun->new_size = cpu_to_le64(new_size); + zero_trun_node_unused(trun); + + dlen = new_size & (UBIFS_BLOCK_SIZE - 1); + if (dlen) { + /* Get last data block so it can be truncated */ + dn = (void *)trun + UBIFS_TRUN_NODE_SZ; + blk = new_size >> UBIFS_BLOCK_SHIFT; + data_key_init(c, &key, inum, blk); + dbg_jnl("last block key %s", DBGKEY(&key)); + err = ubifs_tnc_lookup(c, &key, dn); + if (err == -ENOENT) + dlen = 0; /* Not found (so it is a hole) */ + else if (err) + goto out_free; + else { + if (le32_to_cpu(dn->size) <= dlen) + dlen = 0; /* Nothing to do */ + else { + int compr_type = le16_to_cpu(dn->compr_type); + + if (compr_type != UBIFS_COMPR_NONE) { + err = recomp_data_node(dn, &dlen); + if (err) + goto out_free; + } else { + dn->size = cpu_to_le32(dlen); + dlen += UBIFS_DATA_NODE_SZ; + } + zero_data_node_unused(dn); + } + } + } + + /* Must make reservation before allocating sequence numbers */ + len = UBIFS_TRUN_NODE_SZ + UBIFS_INO_NODE_SZ; + if (dlen) + len += dlen; + err = make_reservation(c, BASEHD, len); + if (err) + goto out_free; + + pack_inode(c, ino, inode, 0, 0); + ubifs_prep_grp_node(c, trun, UBIFS_TRUN_NODE_SZ, dlen ? 0 : 1); + if (dlen) + ubifs_prep_grp_node(c, dn, dlen, 1); + + err = write_head(c, BASEHD, ino, len, &lnum, &offs, sync); + if (err) + goto out_release; + if (!sync) + ubifs_wbuf_add_ino_nolock(&c->jheads[BASEHD].wbuf, inum); + release_head(c, BASEHD); + + if (dlen) { + sz = offs + UBIFS_INO_NODE_SZ + UBIFS_TRUN_NODE_SZ; + err = ubifs_tnc_add(c, &key, lnum, sz, dlen); + if (err) + goto out_ro; + } + + ino_key_init(c, &key, inum); + err = ubifs_tnc_add(c, &key, lnum, offs, UBIFS_INO_NODE_SZ); + if (err) + goto out_ro; + + err = ubifs_add_dirt(c, lnum, UBIFS_TRUN_NODE_SZ); + if (err) + goto out_ro; + + bit = new_size & (UBIFS_BLOCK_SIZE - 1); + blk = (new_size >> UBIFS_BLOCK_SHIFT) + (bit ? 1 : 0); + data_key_init(c, &key, inum, blk); + + bit = old_size & (UBIFS_BLOCK_SIZE - 1); + blk = (old_size >> UBIFS_BLOCK_SHIFT) - (bit ? 0: 1); + data_key_init(c, &to_key, inum, blk); + + err = ubifs_tnc_remove_range(c, &key, &to_key); + if (err) + goto out_ro; + + finish_reservation(c); + spin_lock(&ui->ui_lock); + ui->synced_i_size = ui->ui_size; + spin_unlock(&ui->ui_lock); + mark_inode_clean(c, ui); + kfree(ino); + return 0; + +out_release: + release_head(c, BASEHD); +out_ro: + ubifs_ro_mode(c, err); + finish_reservation(c); +out_free: + kfree(ino); + return err; +} + +#ifdef CONFIG_UBIFS_FS_XATTR + +/** + * ubifs_jnl_delete_xattr - delete an extended attribute. + * @c: UBIFS file-system description object + * @host: host inode + * @inode: extended attribute inode + * @nm: extended attribute entry name + * + * This function delete an extended attribute which is very similar to + * un-linking regular files - it writes a deletion xentry, a deletion inode and + * updates the target inode. Returns zero in case of success and a negative + * error code in case of failure. + */ +int ubifs_jnl_delete_xattr(struct ubifs_info *c, const struct inode *host, + const struct inode *inode, const struct qstr *nm) +{ + int err, xlen, hlen, len, lnum, xent_offs, aligned_xlen; + struct ubifs_dent_node *xent; + struct ubifs_ino_node *ino; + union ubifs_key xent_key, key1, key2; + int sync = IS_DIRSYNC(host); + struct ubifs_inode *host_ui = ubifs_inode(host); + + dbg_jnl("host %lu, xattr ino %lu, name '%s', data len %d", + host->i_ino, inode->i_ino, nm->name, + ubifs_inode(inode)->data_len); + ubifs_assert(inode->i_nlink == 0); + ubifs_assert(mutex_is_locked(&host_ui->ui_mutex)); + + /* + * Since we are deleting the inode, we do not bother to attach any data + * to it and assume its length is %UBIFS_INO_NODE_SZ. + */ + xlen = UBIFS_DENT_NODE_SZ + nm->len + 1; + aligned_xlen = ALIGN(xlen, 8); + hlen = host_ui->data_len + UBIFS_INO_NODE_SZ; + len = aligned_xlen + UBIFS_INO_NODE_SZ + ALIGN(hlen, 8); + + xent = kmalloc(len, GFP_NOFS); + if (!xent) + return -ENOMEM; + + /* Make reservation before allocating sequence numbers */ + err = make_reservation(c, BASEHD, len); + if (err) { + kfree(xent); + return err; + } + + xent->ch.node_type = UBIFS_XENT_NODE; + xent_key_init(c, &xent_key, host->i_ino, nm); + key_write(c, &xent_key, xent->key); + xent->inum = 0; + xent->type = get_dent_type(inode->i_mode); + xent->nlen = cpu_to_le16(nm->len); + memcpy(xent->name, nm->name, nm->len); + xent->name[nm->len] = '\0'; + zero_dent_node_unused(xent); + ubifs_prep_grp_node(c, xent, xlen, 0); + + ino = (void *)xent + aligned_xlen; + pack_inode(c, ino, inode, 0, 1); + ino = (void *)ino + UBIFS_INO_NODE_SZ; + pack_inode(c, ino, host, 1, 0); + + err = write_head(c, BASEHD, xent, len, &lnum, &xent_offs, sync); + if (!sync && !err) + ubifs_wbuf_add_ino_nolock(&c->jheads[BASEHD].wbuf, host->i_ino); + release_head(c, BASEHD); + kfree(xent); + if (err) + goto out_ro; + + /* Remove the extended attribute entry from TNC */ + err = ubifs_tnc_remove_nm(c, &xent_key, nm); + if (err) + goto out_ro; + err = ubifs_add_dirt(c, lnum, xlen); + if (err) + goto out_ro; + + /* + * Remove all nodes belonging to the extended attribute inode from TNC. + * Well, there actually must be only one node - the inode itself. + */ + lowest_ino_key(c, &key1, inode->i_ino); + highest_ino_key(c, &key2, inode->i_ino); + err = ubifs_tnc_remove_range(c, &key1, &key2); + if (err) + goto out_ro; + err = ubifs_add_dirt(c, lnum, UBIFS_INO_NODE_SZ); + if (err) + goto out_ro; + + /* And update TNC with the new host inode position */ + ino_key_init(c, &key1, host->i_ino); + err = ubifs_tnc_add(c, &key1, lnum, xent_offs + len - hlen, hlen); + if (err) + goto out_ro; + + finish_reservation(c); + spin_lock(&host_ui->ui_lock); + host_ui->synced_i_size = host_ui->ui_size; + spin_unlock(&host_ui->ui_lock); + mark_inode_clean(c, host_ui); + return 0; + +out_ro: + ubifs_ro_mode(c, err); + finish_reservation(c); + return err; +} + +/** + * ubifs_jnl_change_xattr - change an extended attribute. + * @c: UBIFS file-system description object + * @inode: extended attribute inode + * @host: host inode + * + * This function writes the updated version of an extended attribute inode and + * the host inode tho the journal (to the base head). The host inode is written + * after the extended attribute inode in order to guarantee that the extended + * attribute will be flushed when the inode is synchronized by 'fsync()' and + * consequently, the write-buffer is synchronized. This function returns zero + * in case of success and a negative error code in case of failure. + */ +int ubifs_jnl_change_xattr(struct ubifs_info *c, const struct inode *inode, + const struct inode *host) +{ + int err, len1, len2, aligned_len, aligned_len1, lnum, offs; + struct ubifs_inode *host_ui = ubifs_inode(inode); + struct ubifs_ino_node *ino; + union ubifs_key key; + int sync = IS_DIRSYNC(host); + + dbg_jnl("ino %lu, ino %lu", host->i_ino, inode->i_ino); + ubifs_assert(host->i_nlink > 0); + ubifs_assert(inode->i_nlink > 0); + ubifs_assert(mutex_is_locked(&host_ui->ui_mutex)); + + len1 = UBIFS_INO_NODE_SZ + host_ui->data_len; + len2 = UBIFS_INO_NODE_SZ + ubifs_inode(inode)->data_len; + aligned_len1 = ALIGN(len1, 8); + aligned_len = aligned_len1 + ALIGN(len2, 8); + + ino = kmalloc(aligned_len, GFP_NOFS); + if (!ino) + return -ENOMEM; + + /* Make reservation before allocating sequence numbers */ + err = make_reservation(c, BASEHD, aligned_len); + if (err) + goto out_free; + + pack_inode(c, ino, host, 0, 0); + pack_inode(c, (void *)ino + aligned_len1, inode, 1, 0); + + err = write_head(c, BASEHD, ino, aligned_len, &lnum, &offs, 0); + if (!sync && !err) { + struct ubifs_wbuf *wbuf = &c->jheads[BASEHD].wbuf; + + ubifs_wbuf_add_ino_nolock(wbuf, host->i_ino); + ubifs_wbuf_add_ino_nolock(wbuf, inode->i_ino); + } + release_head(c, BASEHD); + if (err) + goto out_ro; + + ino_key_init(c, &key, host->i_ino); + err = ubifs_tnc_add(c, &key, lnum, offs, len1); + if (err) + goto out_ro; + + ino_key_init(c, &key, inode->i_ino); + err = ubifs_tnc_add(c, &key, lnum, offs + aligned_len1, len2); + if (err) + goto out_ro; + + finish_reservation(c); + spin_lock(&host_ui->ui_lock); + host_ui->synced_i_size = host_ui->ui_size; + spin_unlock(&host_ui->ui_lock); + mark_inode_clean(c, host_ui); + kfree(ino); + return 0; + +out_ro: + ubifs_ro_mode(c, err); + finish_reservation(c); +out_free: + kfree(ino); + return err; +} + +#endif /* CONFIG_UBIFS_FS_XATTR */ diff --git a/fs/ubifs/key.h b/fs/ubifs/key.h new file mode 100644 index 0000000..8f74760 --- /dev/null +++ b/fs/ubifs/key.h @@ -0,0 +1,533 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Artem Bityutskiy (Битюцкий Артём) + * Adrian Hunter + */ + +/* + * This header contains various key-related definitions and helper function. + * UBIFS allows several key schemes, so we access key fields only via these + * helpers. At the moment only one key scheme is supported. + * + * Simple key scheme + * ~~~~~~~~~~~~~~~~~ + * + * Keys are 64-bits long. First 32-bits are inode number (parent inode number + * in case of direntry key). Next 3 bits are node type. The last 29 bits are + * 4KiB offset in case of inode node, and direntry hash in case of a direntry + * node. We use "r5" hash borrowed from reiserfs. + */ + +#ifndef __UBIFS_KEY_H__ +#define __UBIFS_KEY_H__ + +/** + * key_r5_hash - R5 hash function (borrowed from reiserfs). + * @s: direntry name + * @len: name length + */ +static inline uint32_t key_r5_hash(const char *s, int len) +{ + uint32_t a = 0; + const signed char *str = (const signed char *)s; + + while (*str) { + a += *str << 4; + a += *str >> 4; + a *= 11; + str++; + } + + a &= UBIFS_S_KEY_HASH_MASK; + + /* + * We use hash values as offset in directories, so values %0 and %1 are + * reserved for "." and "..". %2 is reserved for "end of readdir" + * marker. + */ + if (unlikely(a >= 0 && a <= 2)) + a += 3; + return a; +} + +/** + * key_test_hash - testing hash function. + * @str: direntry name + * @len: name length + */ +static inline uint32_t key_test_hash(const char *str, int len) +{ + uint32_t a = 0; + + len = min_t(uint32_t, len, 4); + memcpy(&a, str, len); + a &= UBIFS_S_KEY_HASH_MASK; + if (unlikely(a >= 0 && a <= 2)) + a += 3; + return a; +} + +/** + * ino_key_init - initialize inode key. + * @c: UBIFS file-system description object + * @key: key to initialize + * @inum: inode number + */ +static inline void ino_key_init(const struct ubifs_info *c, + union ubifs_key *key, ino_t inum) +{ + key->u32[0] = inum; + key->u32[1] = UBIFS_INO_KEY << UBIFS_S_KEY_BLOCK_BITS; +} + +/** + * ino_key_init_flash - initialize on-flash inode key. + * @c: UBIFS file-system description object + * @k: key to initialize + * @inum: inode number + */ +static inline void ino_key_init_flash(const struct ubifs_info *c, void *k, + ino_t inum) +{ + union ubifs_key *key = k; + + key->j32[0] = cpu_to_le32(inum); + key->j32[1] = cpu_to_le32(UBIFS_INO_KEY << UBIFS_S_KEY_BLOCK_BITS); + memset(k + 8, 0, UBIFS_MAX_KEY_LEN - 8); +} + +/** + * lowest_ino_key - get the lowest possible inode key. + * @c: UBIFS file-system description object + * @key: key to initialize + * @inum: inode number + */ +static inline void lowest_ino_key(const struct ubifs_info *c, + union ubifs_key *key, ino_t inum) +{ + key->u32[0] = inum; + key->u32[1] = 0; +} + +/** + * highest_ino_key - get the highest possible inode key. + * @c: UBIFS file-system description object + * @key: key to initialize + * @inum: inode number + */ +static inline void highest_ino_key(const struct ubifs_info *c, + union ubifs_key *key, ino_t inum) +{ + key->u32[0] = inum; + key->u32[1] = 0xffffffff; +} + +/** + * dent_key_init - initialize directory entry key. + * @c: UBIFS file-system description object + * @key: key to initialize + * @inum: parent inode number + * @nm: direntry name and length + */ +static inline void dent_key_init(const struct ubifs_info *c, + union ubifs_key *key, ino_t inum, + const struct qstr *nm) +{ + uint32_t hash = c->key_hash(nm->name, nm->len); + + ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK)); + key->u32[0] = inum; + key->u32[1] = hash | (UBIFS_DENT_KEY << UBIFS_S_KEY_HASH_BITS); +} + +/** + * dent_key_init_hash - initialize directory entry key without re-calculating + * hash function. + * @c: UBIFS file-system description object + * @key: key to initialize + * @inum: parent inode number + * @hash: direntry name hash + */ +static inline void dent_key_init_hash(const struct ubifs_info *c, + union ubifs_key *key, ino_t inum, + uint32_t hash) +{ + ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK)); + key->u32[0] = inum; + key->u32[1] = hash | (UBIFS_DENT_KEY << UBIFS_S_KEY_HASH_BITS); +} + +/** + * dent_key_init_flash - initialize on-flash directory entry key. + * @c: UBIFS file-system description object + * @k: key to initialize + * @inum: parent inode number + * @nm: direntry name and length + */ +static inline void dent_key_init_flash(const struct ubifs_info *c, void *k, + ino_t inum, const struct qstr *nm) +{ + union ubifs_key *key = k; + uint32_t hash = c->key_hash(nm->name, nm->len); + + ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK)); + key->j32[0] = cpu_to_le32(inum); + key->j32[1] = cpu_to_le32(hash | + (UBIFS_DENT_KEY << UBIFS_S_KEY_HASH_BITS)); + memset(k + 8, 0, UBIFS_MAX_KEY_LEN - 8); +} + +/** + * lowest_dent_key - get the lowest possible directory entry key. + * @c: UBIFS file-system description object + * @key: where to store the lowest key + * @inum: parent inode number + */ +static inline void lowest_dent_key(const struct ubifs_info *c, + union ubifs_key *key, ino_t inum) +{ + key->u32[0] = inum; + key->u32[1] = UBIFS_DENT_KEY << UBIFS_S_KEY_HASH_BITS; +} + +/** + * xent_key_init - initialize extended attribute entry key. + * @c: UBIFS file-system description object + * @key: key to initialize + * @inum: host inode number + * @nm: extended attribute entry name and length + */ +static inline void xent_key_init(const struct ubifs_info *c, + union ubifs_key *key, ino_t inum, + const struct qstr *nm) +{ + uint32_t hash = c->key_hash(nm->name, nm->len); + + ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK)); + key->u32[0] = inum; + key->u32[1] = hash | (UBIFS_XENT_KEY << UBIFS_S_KEY_HASH_BITS); +} + +/** + * xent_key_init_hash - initialize extended attribute entry key without + * re-calculating hash function. + * @c: UBIFS file-system description object + * @key: key to initialize + * @inum: host inode number + * @hash: extended attribute entry name hash + */ +static inline void xent_key_init_hash(const struct ubifs_info *c, + union ubifs_key *key, ino_t inum, + uint32_t hash) +{ + ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK)); + key->u32[0] = inum; + key->u32[1] = hash | (UBIFS_XENT_KEY << UBIFS_S_KEY_HASH_BITS); +} + +/** + * xent_key_init_flash - initialize on-flash extended attribute entry key. + * @c: UBIFS file-system description object + * @k: key to initialize + * @inum: host inode number + * @nm: extended attribute entry name and length + */ +static inline void xent_key_init_flash(const struct ubifs_info *c, void *k, + ino_t inum, const struct qstr *nm) +{ + union ubifs_key *key = k; + uint32_t hash = c->key_hash(nm->name, nm->len); + + ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK)); + key->j32[0] = cpu_to_le32(inum); + key->j32[1] = cpu_to_le32(hash | + (UBIFS_XENT_KEY << UBIFS_S_KEY_HASH_BITS)); + memset(k + 8, 0, UBIFS_MAX_KEY_LEN - 8); +} + +/** + * lowest_xent_key - get the lowest possible extended attribute entry key. + * @c: UBIFS file-system description object + * @key: where to store the lowest key + * @inum: host inode number + */ +static inline void lowest_xent_key(const struct ubifs_info *c, + union ubifs_key *key, ino_t inum) +{ + key->u32[0] = inum; + key->u32[1] = UBIFS_XENT_KEY << UBIFS_S_KEY_HASH_BITS; +} + +/** + * data_key_init - initialize data key. + * @c: UBIFS file-system description object + * @key: key to initialize + * @inum: inode number + * @block: block number + */ +static inline void data_key_init(const struct ubifs_info *c, + union ubifs_key *key, ino_t inum, + unsigned int block) +{ + ubifs_assert(!(block & ~UBIFS_S_KEY_BLOCK_MASK)); + key->u32[0] = inum; + key->u32[1] = block | (UBIFS_DATA_KEY << UBIFS_S_KEY_BLOCK_BITS); +} + +/** + * data_key_init_flash - initialize on-flash data key. + * @c: UBIFS file-system description object + * @k: key to initialize + * @inum: inode number + * @block: block number + */ +static inline void data_key_init_flash(const struct ubifs_info *c, void *k, + ino_t inum, unsigned int block) +{ + union ubifs_key *key = k; + + ubifs_assert(!(block & ~UBIFS_S_KEY_BLOCK_MASK)); + key->j32[0] = cpu_to_le32(inum); + key->j32[1] = cpu_to_le32(block | + (UBIFS_DATA_KEY << UBIFS_S_KEY_BLOCK_BITS)); + memset(k + 8, 0, UBIFS_MAX_KEY_LEN - 8); +} + +/** + * trun_key_init - initialize truncation node key. + * @c: UBIFS file-system description object + * @key: key to initialize + * @inum: inode number + * + * Note, UBIFS does not have truncation keys on the media and this function is + * only used for purposes of replay. + */ +static inline void trun_key_init(const struct ubifs_info *c, + union ubifs_key *key, ino_t inum) +{ + key->u32[0] = inum; + key->u32[1] = UBIFS_TRUN_KEY << UBIFS_S_KEY_BLOCK_BITS; +} + +/** + * key_type - get key type. + * @c: UBIFS file-system description object + * @key: key to get type of + */ +static inline int key_type(const struct ubifs_info *c, + const union ubifs_key *key) +{ + return key->u32[1] >> UBIFS_S_KEY_BLOCK_BITS; +} + +/** + * key_type_flash - get type of a on-flash formatted key. + * @c: UBIFS file-system description object + * @k: key to get type of + */ +static inline int key_type_flash(const struct ubifs_info *c, const void *k) +{ + const union ubifs_key *key = k; + + return le32_to_cpu(key->u32[1]) >> UBIFS_S_KEY_BLOCK_BITS; +} + +/** + * key_inum - fetch inode number from key. + * @c: UBIFS file-system description object + * @k: key to fetch inode number from + */ +static inline ino_t key_inum(const struct ubifs_info *c, const void *k) +{ + const union ubifs_key *key = k; + + return key->u32[0]; +} + +/** + * key_inum_flash - fetch inode number from an on-flash formatted key. + * @c: UBIFS file-system description object + * @k: key to fetch inode number from + */ +static inline ino_t key_inum_flash(const struct ubifs_info *c, const void *k) +{ + const union ubifs_key *key = k; + + return le32_to_cpu(key->j32[0]); +} + +/** + * key_hash - get directory entry hash. + * @c: UBIFS file-system description object + * @key: the key to get hash from + */ +static inline int key_hash(const struct ubifs_info *c, + const union ubifs_key *key) +{ + return key->u32[1] & UBIFS_S_KEY_HASH_MASK; +} + +/** + * key_hash_flash - get directory entry hash from an on-flash formatted key. + * @c: UBIFS file-system description object + * @k: the key to get hash from + */ +static inline int key_hash_flash(const struct ubifs_info *c, const void *k) +{ + const union ubifs_key *key = k; + + return le32_to_cpu(key->j32[1]) & UBIFS_S_KEY_HASH_MASK; +} + +/** + * key_block - get data block number. + * @c: UBIFS file-system description object + * @key: the key to get the block number from + */ +static inline unsigned int key_block(const struct ubifs_info *c, + const union ubifs_key *key) +{ + return key->u32[1] & UBIFS_S_KEY_BLOCK_MASK; +} + +/** + * key_block_flash - get data block number from an on-flash formatted key. + * @c: UBIFS file-system description object + * @k: the key to get the block number from + */ +static inline unsigned int key_block_flash(const struct ubifs_info *c, + const void *k) +{ + const union ubifs_key *key = k; + + return le32_to_cpu(key->u32[1]) & UBIFS_S_KEY_BLOCK_MASK; +} + +/** + * key_read - transform a key to in-memory format. + * @c: UBIFS file-system description object + * @from: the key to transform + * @to: the key to store the result + */ +static inline void key_read(const struct ubifs_info *c, const void *from, + union ubifs_key *to) +{ + const union ubifs_key *f = from; + + to->u32[0] = le32_to_cpu(f->j32[0]); + to->u32[1] = le32_to_cpu(f->j32[1]); +} + +/** + * key_write - transform a key from in-memory format. + * @c: UBIFS file-system description object + * @from: the key to transform + * @to: the key to store the result + */ +static inline void key_write(const struct ubifs_info *c, + const union ubifs_key *from, void *to) +{ + union ubifs_key *t = to; + + t->j32[0] = cpu_to_le32(from->u32[0]); + t->j32[1] = cpu_to_le32(from->u32[1]); + memset(to + 8, 0, UBIFS_MAX_KEY_LEN - 8); +} + +/** + * key_write_idx - transform a key from in-memory format for the index. + * @c: UBIFS file-system description object + * @from: the key to transform + * @to: the key to store the result + */ +static inline void key_write_idx(const struct ubifs_info *c, + const union ubifs_key *from, void *to) +{ + union ubifs_key *t = to; + + t->j32[0] = cpu_to_le32(from->u32[0]); + t->j32[1] = cpu_to_le32(from->u32[1]); +} + +/** + * key_copy - copy a key. + * @c: UBIFS file-system description object + * @from: the key to copy from + * @to: the key to copy to + */ +static inline void key_copy(const struct ubifs_info *c, + const union ubifs_key *from, union ubifs_key *to) +{ + to->u64[0] = from->u64[0]; +} + +/** + * keys_cmp - compare keys. + * @c: UBIFS file-system description object + * @key1: the first key to compare + * @key2: the second key to compare + * + * This function compares 2 keys and returns %-1 if @key1 is less than + * @key2, 0 if the keys are equivalent and %1 if @key1 is greater than @key2. + */ +static inline int keys_cmp(const struct ubifs_info *c, + const union ubifs_key *key1, + const union ubifs_key *key2) +{ + if (key1->u32[0] < key2->u32[0]) + return -1; + if (key1->u32[0] > key2->u32[0]) + return 1; + if (key1->u32[1] < key2->u32[1]) + return -1; + if (key1->u32[1] > key2->u32[1]) + return 1; + + return 0; +} + +/** + * is_hash_key - is a key vulnerable to hash collisions. + * @c: UBIFS file-system description object + * @key: key + * + * This function returns %1 if @key is a hashed key or %0 otherwise. + */ +static inline int is_hash_key(const struct ubifs_info *c, + const union ubifs_key *key) +{ + int type = key_type(c, key); + + return type == UBIFS_DENT_KEY || type == UBIFS_XENT_KEY; +} + +/** + * key_max_inode_size - get maximum file size allowed by current key format. + * @c: UBIFS file-system description object + */ +static inline unsigned long long key_max_inode_size(const struct ubifs_info *c) +{ + switch (c->key_fmt) { + case UBIFS_SIMPLE_KEY_FMT: + return (1ULL << UBIFS_S_KEY_BLOCK_BITS) * UBIFS_BLOCK_SIZE; + default: + return 0; + } +} +#endif /* !__UBIFS_KEY_H__ */ diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c new file mode 100644 index 0000000..36857b9 --- /dev/null +++ b/fs/ubifs/log.c @@ -0,0 +1,805 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Artem Bityutskiy (Битюцкий Артём) + * Adrian Hunter + */ + +/* + * This file is a part of UBIFS journal implementation and contains various + * functions which manipulate the log. The log is a fixed area on the flash + * which does not contain any data but refers to buds. The log is a part of the + * journal. + */ + +#include "ubifs.h" + +#ifdef CONFIG_UBIFS_FS_DEBUG +static int dbg_check_bud_bytes(struct ubifs_info *c); +#else +#define dbg_check_bud_bytes(c) 0 +#endif + +/** + * ubifs_search_bud - search bud LEB. + * @c: UBIFS file-system description object + * @lnum: logical eraseblock number to search + * + * This function searches bud LEB @lnum. Returns bud description object in case + * of success and %NULL if there is no bud with this LEB number. + */ +struct ubifs_bud *ubifs_search_bud(struct ubifs_info *c, int lnum) +{ + struct rb_node *p; + struct ubifs_bud *bud; + + spin_lock(&c->buds_lock); + p = c->buds.rb_node; + while (p) { + bud = rb_entry(p, struct ubifs_bud, rb); + if (lnum < bud->lnum) + p = p->rb_left; + else if (lnum > bud->lnum) + p = p->rb_right; + else { + spin_unlock(&c->buds_lock); + return bud; + } + } + spin_unlock(&c->buds_lock); + return NULL; +} + +/** + * ubifs_get_wbuf - get the wbuf associated with a LEB, if there is one. + * @c: UBIFS file-system description object + * @lnum: logical eraseblock number to search + * + * This functions returns the wbuf for @lnum or %NULL if there is not one. + */ +struct ubifs_wbuf *ubifs_get_wbuf(struct ubifs_info *c, int lnum) +{ + struct rb_node *p; + struct ubifs_bud *bud; + int jhead; + + if (!c->jheads) + return NULL; + + spin_lock(&c->buds_lock); + p = c->buds.rb_node; + while (p) { + bud = rb_entry(p, struct ubifs_bud, rb); + if (lnum < bud->lnum) + p = p->rb_left; + else if (lnum > bud->lnum) + p = p->rb_right; + else { + jhead = bud->jhead; + spin_unlock(&c->buds_lock); + return &c->jheads[jhead].wbuf; + } + } + spin_unlock(&c->buds_lock); + return NULL; +} + +/** + * next_log_lnum - switch to the next log LEB. + * @c: UBIFS file-system description object + * @lnum: current log LEB + */ +static inline int next_log_lnum(const struct ubifs_info *c, int lnum) +{ + lnum += 1; + if (lnum > c->log_last) + lnum = UBIFS_LOG_LNUM; + + return lnum; +} + +/** + * empty_log_bytes - calculate amount of empty space in the log. + * @c: UBIFS file-system description object + */ +static inline long long empty_log_bytes(const struct ubifs_info *c) +{ + long long h, t; + + h = (long long)c->lhead_lnum * c->leb_size + c->lhead_offs; + t = (long long)c->ltail_lnum * c->leb_size; + + if (h >= t) + return c->log_bytes - h + t; + else + return t - h; +} + +/** + * ubifs_add_bud - add bud LEB to the tree of buds and its journal head list. + * @c: UBIFS file-system description object + * @bud: the bud to add + */ +void ubifs_add_bud(struct ubifs_info *c, struct ubifs_bud *bud) +{ + struct rb_node **p, *parent = NULL; + struct ubifs_bud *b; + struct ubifs_jhead *jhead; + + spin_lock(&c->buds_lock); + p = &c->buds.rb_node; + while (*p) { + parent = *p; + b = rb_entry(parent, struct ubifs_bud, rb); + ubifs_assert(bud->lnum != b->lnum); + if (bud->lnum < b->lnum) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + + rb_link_node(&bud->rb, parent, p); + rb_insert_color(&bud->rb, &c->buds); + if (c->jheads) { + jhead = &c->jheads[bud->jhead]; + list_add_tail(&bud->list, &jhead->buds_list); + } else + ubifs_assert(c->replaying && (c->vfs_sb->s_flags & MS_RDONLY)); + + /* + * Note, although this is a new bud, we anyway account this space now, + * before any data has been written to it, because this is about to + * guarantee fixed mount time, and this bud will anyway be read and + * scanned. + */ + c->bud_bytes += c->leb_size - bud->start; + + dbg_log("LEB %d:%d, jhead %d, bud_bytes %lld", bud->lnum, + bud->start, bud->jhead, c->bud_bytes); + spin_unlock(&c->buds_lock); +} + +/** + * ubifs_create_buds_lists - create journal head buds lists for remount rw. + * @c: UBIFS file-system description object + */ +void ubifs_create_buds_lists(struct ubifs_info *c) +{ + struct rb_node *p; + + spin_lock(&c->buds_lock); + p = rb_first(&c->buds); + while (p) { + struct ubifs_bud *bud = rb_entry(p, struct ubifs_bud, rb); + struct ubifs_jhead *jhead = &c->jheads[bud->jhead]; + + list_add_tail(&bud->list, &jhead->buds_list); + p = rb_next(p); + } + spin_unlock(&c->buds_lock); +} + +/** + * ubifs_add_bud_to_log - add a new bud to the log. + * @c: UBIFS file-system description object + * @jhead: journal head the bud belongs to + * @lnum: LEB number of the bud + * @offs: starting offset of the bud + * + * This function writes reference node for the new bud LEB @lnum it to the log, + * and adds it to the buds tress. It also makes sure that log size does not + * exceed the 'c->max_bud_bytes' limit. Returns zero in case of success, + * %-EAGAIN if commit is required, and a negative error codes in case of + * failure. + */ +int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs) +{ + int err; + struct ubifs_bud *bud; + struct ubifs_ref_node *ref; + + bud = kmalloc(sizeof(struct ubifs_bud), GFP_NOFS); + if (!bud) + return -ENOMEM; + ref = kzalloc(c->ref_node_alsz, GFP_NOFS); + if (!ref) { + kfree(bud); + return -ENOMEM; + } + + mutex_lock(&c->log_mutex); + + if (c->ro_media) { + err = -EROFS; + goto out_unlock; + } + + /* Make sure we have enough space in the log */ + if (empty_log_bytes(c) - c->ref_node_alsz < c->min_log_bytes) { + dbg_log("not enough log space - %lld, required %d", + empty_log_bytes(c), c->min_log_bytes); + ubifs_commit_required(c); + err = -EAGAIN; + goto out_unlock; + } + + /* + * Make sure the the amount of space in buds will not exceed + * 'c->max_bud_bytes' limit, because we want to guarantee mount time + * limits. + * + * It is not necessary to hold @c->buds_lock when reading @c->bud_bytes + * because we are holding @c->log_mutex. All @c->bud_bytes take place + * when both @c->log_mutex and @c->bud_bytes are locked. + */ + if (c->bud_bytes + c->leb_size - offs > c->max_bud_bytes) { + dbg_log("bud bytes %lld (%lld max), require commit", + c->bud_bytes, c->max_bud_bytes); + ubifs_commit_required(c); + err = -EAGAIN; + goto out_unlock; + } + + /* + * If the journal is full enough - start background commit. Note, it is + * OK to read 'c->cmt_state' without spinlock because integer reads + * are atomic in the kernel. + */ + if (c->bud_bytes >= c->bg_bud_bytes && + c->cmt_state == COMMIT_RESTING) { + dbg_log("bud bytes %lld (%lld max), initiate BG commit", + c->bud_bytes, c->max_bud_bytes); + ubifs_request_bg_commit(c); + } + + bud->lnum = lnum; + bud->start = offs; + bud->jhead = jhead; + + ref->ch.node_type = UBIFS_REF_NODE; + ref->lnum = cpu_to_le32(bud->lnum); + ref->offs = cpu_to_le32(bud->start); + ref->jhead = cpu_to_le32(jhead); + + if (c->lhead_offs > c->leb_size - c->ref_node_alsz) { + c->lhead_lnum = next_log_lnum(c, c->lhead_lnum); + c->lhead_offs = 0; + } + + if (c->lhead_offs == 0) { + /* Must ensure next log LEB has been unmapped */ + err = ubifs_leb_unmap(c, c->lhead_lnum); + if (err) + goto out_unlock; + } + + if (bud->start == 0) { + /* + * Before writing the LEB reference which refers an empty LEB + * to the log, we have to make sure it is mapped, because + * otherwise we'd risk to refer an LEB with garbage in case of + * an unclean reboot, because the target LEB might have been + * unmapped, but not yet physically erased. + */ + err = ubi_leb_map(c->ubi, bud->lnum, UBI_SHORTTERM); + if (err) + goto out_unlock; + } + + dbg_log("write ref LEB %d:%d", + c->lhead_lnum, c->lhead_offs); + err = ubifs_write_node(c, ref, UBIFS_REF_NODE_SZ, c->lhead_lnum, + c->lhead_offs, UBI_SHORTTERM); + if (err) + goto out_unlock; + + c->lhead_offs += c->ref_node_alsz; + + ubifs_add_bud(c, bud); + + mutex_unlock(&c->log_mutex); + kfree(ref); + return 0; + +out_unlock: + mutex_unlock(&c->log_mutex); + kfree(ref); + kfree(bud); + return err; +} + +/** + * remove_buds - remove used buds. + * @c: UBIFS file-system description object + * + * This function removes use buds from the buds tree. It does not remove the + * buds which are pointed to by journal heads. + */ +static void remove_buds(struct ubifs_info *c) +{ + struct rb_node *p; + + ubifs_assert(list_empty(&c->old_buds)); + c->cmt_bud_bytes = 0; + spin_lock(&c->buds_lock); + p = rb_first(&c->buds); + while (p) { + struct rb_node *p1 = p; + struct ubifs_bud *bud; + struct ubifs_wbuf *wbuf; + + p = rb_next(p); + bud = rb_entry(p1, struct ubifs_bud, rb); + wbuf = &c->jheads[bud->jhead].wbuf; + + if (wbuf->lnum == bud->lnum) { + /* + * Do not remove buds which are pointed to by journal + * heads (non-closed buds). + */ + c->cmt_bud_bytes += wbuf->offs - bud->start; + dbg_log("preserve %d:%d, jhead %d, bud bytes %d, " + "cmt_bud_bytes %lld", bud->lnum, bud->start, + bud->jhead, wbuf->offs - bud->start, + c->cmt_bud_bytes); + bud->start = wbuf->offs; + } else { + c->cmt_bud_bytes += c->leb_size - bud->start; + dbg_log("remove %d:%d, jhead %d, bud bytes %d, " + "cmt_bud_bytes %lld", bud->lnum, bud->start, + bud->jhead, c->leb_size - bud->start, + c->cmt_bud_bytes); + rb_erase(p1, &c->buds); + list_del(&bud->list); + /* + * If the commit does not finish, the recovery will need + * to replay the journal, in which case the old buds + * must be unchanged. Do not release them until post + * commit i.e. do not allow them to be garbage + * collected. + */ + list_add(&bud->list, &c->old_buds); + } + } + spin_unlock(&c->buds_lock); +} + +/** + * ubifs_log_start_commit - start commit. + * @c: UBIFS file-system description object + * @ltail_lnum: return new log tail LEB number + * + * The commit operation starts with writing "commit start" node to the log and + * reference nodes for all journal heads which will define new journal after + * the commit has been finished. The commit start and reference nodes are + * written in one go to the nearest empty log LEB (hence, when commit is + * finished UBIFS may safely unmap all the previous log LEBs). This function + * returns zero in case of success and a negative error code in case of + * failure. + */ +int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum) +{ + void *buf; + struct ubifs_cs_node *cs; + struct ubifs_ref_node *ref; + int err, i, max_len, len; + + err = dbg_check_bud_bytes(c); + if (err) + return err; + + max_len = UBIFS_CS_NODE_SZ + c->jhead_cnt * UBIFS_REF_NODE_SZ; + max_len = ALIGN(max_len, c->min_io_size); + buf = cs = kmalloc(max_len, GFP_NOFS); + if (!buf) + return -ENOMEM; + + cs->ch.node_type = UBIFS_CS_NODE; + cs->cmt_no = cpu_to_le64(c->cmt_no + 1); + ubifs_prepare_node(c, cs, UBIFS_CS_NODE_SZ, 0); + + /* + * Note, we do not lock 'c->log_mutex' because this is the commit start + * phase and we are exclusively using the log. And we do not lock + * write-buffer because nobody can write to the file-system at this + * phase. + */ + + len = UBIFS_CS_NODE_SZ; + for (i = 0; i < c->jhead_cnt; i++) { + int lnum = c->jheads[i].wbuf.lnum; + int offs = c->jheads[i].wbuf.offs; + + if (lnum == -1 || offs == c->leb_size) + continue; + + dbg_log("add ref to LEB %d:%d for jhead %d", lnum, offs, i); + ref = buf + len; + ref->ch.node_type = UBIFS_REF_NODE; + ref->lnum = cpu_to_le32(lnum); + ref->offs = cpu_to_le32(offs); + ref->jhead = cpu_to_le32(i); + + ubifs_prepare_node(c, ref, UBIFS_REF_NODE_SZ, 0); + len += UBIFS_REF_NODE_SZ; + } + + ubifs_pad(c, buf + len, ALIGN(len, c->min_io_size) - len); + + /* Switch to the next log LEB */ + if (c->lhead_offs) { + c->lhead_lnum = next_log_lnum(c, c->lhead_lnum); + c->lhead_offs = 0; + } + + if (c->lhead_offs == 0) { + /* Must ensure next LEB has been unmapped */ + err = ubifs_leb_unmap(c, c->lhead_lnum); + if (err) + goto out; + } + + len = ALIGN(len, c->min_io_size); + dbg_log("writing commit start at LEB %d:0, len %d", c->lhead_lnum, len); + err = ubifs_leb_write(c, c->lhead_lnum, cs, 0, len, UBI_SHORTTERM); + if (err) + goto out; + + *ltail_lnum = c->lhead_lnum; + + c->lhead_offs += len; + if (c->lhead_offs == c->leb_size) { + c->lhead_lnum = next_log_lnum(c, c->lhead_lnum); + c->lhead_offs = 0; + } + + remove_buds(c); + + /* + * We have started the commit and now users may use the rest of the log + * for new writes. + */ + c->min_log_bytes = 0; + +out: + kfree(buf); + return err; +} + +/** + * ubifs_log_end_commit - end commit. + * @c: UBIFS file-system description object + * @ltail_lnum: new log tail LEB number + * + * This function is called on when the commit operation was finished. It + * moves log tail to new position and unmaps LEBs which contain obsolete data. + * Returns zero in case of success and a negative error code in case of + * failure. + */ +int ubifs_log_end_commit(struct ubifs_info *c, int ltail_lnum) +{ + int err; + + /* + * At this phase we have to lock 'c->log_mutex' because UBIFS allows FS + * writes during commit. Its only short "commit" start phase when + * writers are blocked. + */ + mutex_lock(&c->log_mutex); + + dbg_log("old tail was LEB %d:0, new tail is LEB %d:0", + c->ltail_lnum, ltail_lnum); + + c->ltail_lnum = ltail_lnum; + /* + * The commit is finished and from now on it must be guaranteed that + * there is always enough space for the next commit. + */ + c->min_log_bytes = c->leb_size; + + spin_lock(&c->buds_lock); + c->bud_bytes -= c->cmt_bud_bytes; + spin_unlock(&c->buds_lock); + + err = dbg_check_bud_bytes(c); + + mutex_unlock(&c->log_mutex); + return err; +} + +/** + * ubifs_log_post_commit - things to do after commit is completed. + * @c: UBIFS file-system description object + * @old_ltail_lnum: old log tail LEB number + * + * Release buds only after commit is completed, because they must be unchanged + * if recovery is needed. + * + * Unmap log LEBs only after commit is completed, because they may be needed for + * recovery. + * + * This function returns %0 on success and a negative error code on failure. + */ +int ubifs_log_post_commit(struct ubifs_info *c, int old_ltail_lnum) +{ + int lnum, err = 0; + + while (!list_empty(&c->old_buds)) { + struct ubifs_bud *bud; + + bud = list_entry(c->old_buds.next, struct ubifs_bud, list); + err = ubifs_return_leb(c, bud->lnum); + if (err) + return err; + list_del(&bud->list); + kfree(bud); + } + mutex_lock(&c->log_mutex); + for (lnum = old_ltail_lnum; lnum != c->ltail_lnum; + lnum = next_log_lnum(c, lnum)) { + dbg_log("unmap log LEB %d", lnum); + err = ubifs_leb_unmap(c, lnum); + if (err) + goto out; + } +out: + mutex_unlock(&c->log_mutex); + return err; +} + +/** + * struct done_ref - references that have been done. + * @rb: rb-tree node + * @lnum: LEB number + */ +struct done_ref { + struct rb_node rb; + int lnum; +}; + +/** + * done_already - determine if a reference has been done already. + * @done_tree: rb-tree to store references that have been done + * @lnum: LEB number of reference + * + * This function returns %1 if the reference has been done, %0 if not, otherwise + * a negative error code is returned. + */ +static int done_already(struct rb_root *done_tree, int lnum) +{ + struct rb_node **p = &done_tree->rb_node, *parent = NULL; + struct done_ref *dr; + + while (*p) { + parent = *p; + dr = rb_entry(parent, struct done_ref, rb); + if (lnum < dr->lnum) + p = &(*p)->rb_left; + else if (lnum > dr->lnum) + p = &(*p)->rb_right; + else + return 1; + } + + dr = kzalloc(sizeof(struct done_ref), GFP_NOFS); + if (!dr) + return -ENOMEM; + + dr->lnum = lnum; + + rb_link_node(&dr->rb, parent, p); + rb_insert_color(&dr->rb, done_tree); + + return 0; +} + +/** + * destroy_done_tree - destroy the done tree. + * @done_tree: done tree to destroy + */ +static void destroy_done_tree(struct rb_root *done_tree) +{ + struct rb_node *this = done_tree->rb_node; + struct done_ref *dr; + + while (this) { + if (this->rb_left) { + this = this->rb_left; + continue; + } else if (this->rb_right) { + this = this->rb_right; + continue; + } + dr = rb_entry(this, struct done_ref, rb); + this = rb_parent(this); + if (this) { + if (this->rb_left == &dr->rb) + this->rb_left = NULL; + else + this->rb_right = NULL; + } + kfree(dr); + } +} + +/** + * add_node - add a node to the consolidated log. + * @c: UBIFS file-system description object + * @buf: buffer to which to add + * @lnum: LEB number to which to write is passed and returned here + * @offs: offset to where to write is passed and returned here + * @node: node to add + * + * This function returns %0 on success and a negative error code on failure. + */ +static int add_node(struct ubifs_info *c, void *buf, int *lnum, int *offs, + void *node) +{ + struct ubifs_ch *ch = node; + int len = le32_to_cpu(ch->len), remains = c->leb_size - *offs; + + if (len > remains) { + int sz = ALIGN(*offs, c->min_io_size), err; + + ubifs_pad(c, buf + *offs, sz - *offs); + err = ubifs_leb_change(c, *lnum, buf, sz, UBI_SHORTTERM); + if (err) + return err; + *lnum = next_log_lnum(c, *lnum); + *offs = 0; + } + memcpy(buf + *offs, node, len); + *offs += ALIGN(len, 8); + return 0; +} + +/** + * ubifs_consolidate_log - consolidate the log. + * @c: UBIFS file-system description object + * + * Repeated failed commits could cause the log to be full, but at least 1 LEB is + * needed for commit. This function rewrites the reference nodes in the log + * omitting duplicates, and failed CS nodes, and leaving no gaps. + * + * This function returns %0 on success and a negative error code on failure. + */ +int ubifs_consolidate_log(struct ubifs_info *c) +{ + struct ubifs_scan_leb *sleb; + struct ubifs_scan_node *snod; + struct rb_root done_tree = RB_ROOT; + int lnum, err, first = 1, write_lnum, offs = 0; + void *buf; + + dbg_rcvry("log tail LEB %d, log head LEB %d", c->ltail_lnum, + c->lhead_lnum); + buf = vmalloc(c->leb_size); + if (!buf) + return -ENOMEM; + lnum = c->ltail_lnum; + write_lnum = lnum; + while (1) { + sleb = ubifs_scan(c, lnum, 0, c->sbuf); + if (IS_ERR(sleb)) { + err = PTR_ERR(sleb); + goto out_free; + } + list_for_each_entry(snod, &sleb->nodes, list) { + switch (snod->type) { + case UBIFS_REF_NODE: { + struct ubifs_ref_node *ref = snod->node; + int ref_lnum = le32_to_cpu(ref->lnum); + + err = done_already(&done_tree, ref_lnum); + if (err < 0) + goto out_scan; + if (err != 1) { + err = add_node(c, buf, &write_lnum, + &offs, snod->node); + if (err) + goto out_scan; + } + break; + } + case UBIFS_CS_NODE: + if (!first) + break; + err = add_node(c, buf, &write_lnum, &offs, + snod->node); + if (err) + goto out_scan; + first = 0; + break; + } + } + ubifs_scan_destroy(sleb); + if (lnum == c->lhead_lnum) + break; + lnum = next_log_lnum(c, lnum); + } + if (offs) { + int sz = ALIGN(offs, c->min_io_size); + + ubifs_pad(c, buf + offs, sz - offs); + err = ubifs_leb_change(c, write_lnum, buf, sz, UBI_SHORTTERM); + if (err) + goto out_free; + offs = ALIGN(offs, c->min_io_size); + } + destroy_done_tree(&done_tree); + vfree(buf); + if (write_lnum == c->lhead_lnum) { + ubifs_err("log is too full"); + return -EINVAL; + } + /* Unmap remaining LEBs */ + lnum = write_lnum; + do { + lnum = next_log_lnum(c, lnum); + err = ubifs_leb_unmap(c, lnum); + if (err) + return err; + } while (lnum != c->lhead_lnum); + c->lhead_lnum = write_lnum; + c->lhead_offs = offs; + dbg_rcvry("new log head at %d:%d", c->lhead_lnum, c->lhead_offs); + return 0; + +out_scan: + ubifs_scan_destroy(sleb); +out_free: + destroy_done_tree(&done_tree); + vfree(buf); + return err; +} + +#ifdef CONFIG_UBIFS_FS_DEBUG + +/** + * dbg_check_bud_bytes - make sure bud bytes calculation are all right. + * @c: UBIFS file-system description object + * + * This function makes sure the amount of flash space used by closed buds + * ('c->bud_bytes' is correct). Returns zero in case of success and %-EINVAL in + * case of failure. + */ +static int dbg_check_bud_bytes(struct ubifs_info *c) +{ + int i, err = 0; + struct ubifs_bud *bud; + long long bud_bytes = 0; + + if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) + return 0; + + spin_lock(&c->buds_lock); + for (i = 0; i < c->jhead_cnt; i++) + list_for_each_entry(bud, &c->jheads[i].buds_list, list) + bud_bytes += c->leb_size - bud->start; + + if (c->bud_bytes != bud_bytes) { + ubifs_err("bad bud_bytes %lld, calculated %lld", + c->bud_bytes, bud_bytes); + err = -EINVAL; + } + spin_unlock(&c->buds_lock); + + return err; +} + +#endif /* CONFIG_UBIFS_FS_DEBUG */ diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c new file mode 100644 index 0000000..2ba93da --- /dev/null +++ b/fs/ubifs/lprops.c @@ -0,0 +1,1357 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Adrian Hunter + * Artem Bityutskiy (Битюцкий Артём) + */ + +/* + * This file implements the functions that access LEB properties and their + * categories. LEBs are categorized based on the needs of UBIFS, and the + * categories are stored as either heaps or lists to provide a fast way of + * finding a LEB in a particular category. For example, UBIFS may need to find + * an empty LEB for the journal, or a very dirty LEB for garbage collection. + */ + +#include "ubifs.h" + +/** + * get_heap_comp_val - get the LEB properties value for heap comparisons. + * @lprops: LEB properties + * @cat: LEB category + */ +static int get_heap_comp_val(struct ubifs_lprops *lprops, int cat) +{ + switch (cat) { + case LPROPS_FREE: + return lprops->free; + case LPROPS_DIRTY_IDX: + return lprops->free + lprops->dirty; + default: + return lprops->dirty; + } +} + +/** + * move_up_lpt_heap - move a new heap entry up as far as possible. + * @c: UBIFS file-system description object + * @heap: LEB category heap + * @lprops: LEB properties to move + * @cat: LEB category + * + * New entries to a heap are added at the bottom and then moved up until the + * parent's value is greater. In the case of LPT's category heaps, the value + * is either the amount of free space or the amount of dirty space, depending + * on the category. + */ +static void move_up_lpt_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, + struct ubifs_lprops *lprops, int cat) +{ + int val1, val2, hpos; + + hpos = lprops->hpos; + if (!hpos) + return; /* Already top of the heap */ + val1 = get_heap_comp_val(lprops, cat); + /* Compare to parent and, if greater, move up the heap */ + do { + int ppos = (hpos - 1) / 2; + + val2 = get_heap_comp_val(heap->arr[ppos], cat); + if (val2 >= val1) + return; + /* Greater than parent so move up */ + heap->arr[ppos]->hpos = hpos; + heap->arr[hpos] = heap->arr[ppos]; + heap->arr[ppos] = lprops; + lprops->hpos = ppos; + hpos = ppos; + } while (hpos); +} + +/** + * adjust_lpt_heap - move a changed heap entry up or down the heap. + * @c: UBIFS file-system description object + * @heap: LEB category heap + * @lprops: LEB properties to move + * @hpos: heap position of @lprops + * @cat: LEB category + * + * Changed entries in a heap are moved up or down until the parent's value is + * greater. In the case of LPT's category heaps, the value is either the amount + * of free space or the amount of dirty space, depending on the category. + */ +static void adjust_lpt_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, + struct ubifs_lprops *lprops, int hpos, int cat) +{ + int val1, val2, val3, cpos; + + val1 = get_heap_comp_val(lprops, cat); + /* Compare to parent and, if greater than parent, move up the heap */ + if (hpos) { + int ppos = (hpos - 1) / 2; + + val2 = get_heap_comp_val(heap->arr[ppos], cat); + if (val1 > val2) { + /* Greater than parent so move up */ + while (1) { + heap->arr[ppos]->hpos = hpos; + heap->arr[hpos] = heap->arr[ppos]; + heap->arr[ppos] = lprops; + lprops->hpos = ppos; + hpos = ppos; + if (!hpos) + return; + ppos = (hpos - 1) / 2; + val2 = get_heap_comp_val(heap->arr[ppos], cat); + if (val1 <= val2) + return; + /* Still greater than parent so keep going */ + } + } + } + /* Not greater than parent, so compare to children */ + while (1) { + /* Compare to left child */ + cpos = hpos * 2 + 1; + if (cpos >= heap->cnt) + return; + val2 = get_heap_comp_val(heap->arr[cpos], cat); + if (val1 < val2) { + /* Less than left child, so promote biggest child */ + if (cpos + 1 < heap->cnt) { + val3 = get_heap_comp_val(heap->arr[cpos + 1], + cat); + if (val3 > val2) + cpos += 1; /* Right child is bigger */ + } + heap->arr[cpos]->hpos = hpos; + heap->arr[hpos] = heap->arr[cpos]; + heap->arr[cpos] = lprops; + lprops->hpos = cpos; + hpos = cpos; + continue; + } + /* Compare to right child */ + cpos += 1; + if (cpos >= heap->cnt) + return; + val3 = get_heap_comp_val(heap->arr[cpos], cat); + if (val1 < val3) { + /* Less than right child, so promote right child */ + heap->arr[cpos]->hpos = hpos; + heap->arr[hpos] = heap->arr[cpos]; + heap->arr[cpos] = lprops; + lprops->hpos = cpos; + hpos = cpos; + continue; + } + return; + } +} + +/** + * add_to_lpt_heap - add LEB properties to a LEB category heap. + * @c: UBIFS file-system description object + * @lprops: LEB properties to add + * @cat: LEB category + * + * This function returns %1 if @lprops is added to the heap for LEB category + * @cat, otherwise %0 is returned because the heap is full. + */ +static int add_to_lpt_heap(struct ubifs_info *c, struct ubifs_lprops *lprops, + int cat) +{ + struct ubifs_lpt_heap *heap = &c->lpt_heap[cat - 1]; + + if (heap->cnt >= heap->max_cnt) { + const int b = LPT_HEAP_SZ / 2 - 1; + int cpos, val1, val2; + + /* Compare to some other LEB on the bottom of heap */ + /* Pick a position kind of randomly */ + cpos = (((size_t)lprops >> 4) & b) + b; + ubifs_assert(cpos >= b); + ubifs_assert(cpos < LPT_HEAP_SZ); + ubifs_assert(cpos < heap->cnt); + + val1 = get_heap_comp_val(lprops, cat); + val2 = get_heap_comp_val(heap->arr[cpos], cat); + if (val1 > val2) { + struct ubifs_lprops *lp; + + lp = heap->arr[cpos]; + lp->flags &= ~LPROPS_CAT_MASK; + lp->flags |= LPROPS_UNCAT; + list_add(&lp->list, &c->uncat_list); + lprops->hpos = cpos; + heap->arr[cpos] = lprops; + move_up_lpt_heap(c, heap, lprops, cat); + dbg_check_heap(c, heap, cat, lprops->hpos); + return 1; /* Added to heap */ + } + dbg_check_heap(c, heap, cat, -1); + return 0; /* Not added to heap */ + } else { + lprops->hpos = heap->cnt++; + heap->arr[lprops->hpos] = lprops; + move_up_lpt_heap(c, heap, lprops, cat); + dbg_check_heap(c, heap, cat, lprops->hpos); + return 1; /* Added to heap */ + } +} + +/** + * remove_from_lpt_heap - remove LEB properties from a LEB category heap. + * @c: UBIFS file-system description object + * @lprops: LEB properties to remove + * @cat: LEB category + */ +static void remove_from_lpt_heap(struct ubifs_info *c, + struct ubifs_lprops *lprops, int cat) +{ + struct ubifs_lpt_heap *heap; + int hpos = lprops->hpos; + + heap = &c->lpt_heap[cat - 1]; + ubifs_assert(hpos >= 0 && hpos < heap->cnt); + ubifs_assert(heap->arr[hpos] == lprops); + heap->cnt -= 1; + if (hpos < heap->cnt) { + heap->arr[hpos] = heap->arr[heap->cnt]; + heap->arr[hpos]->hpos = hpos; + adjust_lpt_heap(c, heap, heap->arr[hpos], hpos, cat); + } + dbg_check_heap(c, heap, cat, -1); +} + +/** + * lpt_heap_replace - replace lprops in a category heap. + * @c: UBIFS file-system description object + * @old_lprops: LEB properties to replace + * @new_lprops: LEB properties with which to replace + * @cat: LEB category + * + * During commit it is sometimes necessary to copy a pnode (see dirty_cow_pnode) + * and the lprops that the pnode contains. When that happens, references in + * the category heaps to those lprops must be updated to point to the new + * lprops. This function does that. + */ +static void lpt_heap_replace(struct ubifs_info *c, + struct ubifs_lprops *old_lprops, + struct ubifs_lprops *new_lprops, int cat) +{ + struct ubifs_lpt_heap *heap; + int hpos = new_lprops->hpos; + + heap = &c->lpt_heap[cat - 1]; + heap->arr[hpos] = new_lprops; +} + +/** + * ubifs_add_to_cat - add LEB properties to a category list or heap. + * @c: UBIFS file-system description object + * @lprops: LEB properties to add + * @cat: LEB category to which to add + * + * LEB properties are categorized to enable fast find operations. + */ +void ubifs_add_to_cat(struct ubifs_info *c, struct ubifs_lprops *lprops, + int cat) +{ + switch (cat) { + case LPROPS_DIRTY: + case LPROPS_DIRTY_IDX: + case LPROPS_FREE: + if (add_to_lpt_heap(c, lprops, cat)) + break; + /* No more room on heap so make it uncategorized */ + cat = LPROPS_UNCAT; + /* Fall through */ + case LPROPS_UNCAT: + list_add(&lprops->list, &c->uncat_list); + break; + case LPROPS_EMPTY: + list_add(&lprops->list, &c->empty_list); + break; + case LPROPS_FREEABLE: + list_add(&lprops->list, &c->freeable_list); + c->freeable_cnt += 1; + break; + case LPROPS_FRDI_IDX: + list_add(&lprops->list, &c->frdi_idx_list); + break; + default: + ubifs_assert(0); + } + lprops->flags &= ~LPROPS_CAT_MASK; + lprops->flags |= cat; +} + +/** + * ubifs_remove_from_cat - remove LEB properties from a category list or heap. + * @c: UBIFS file-system description object + * @lprops: LEB properties to remove + * @cat: LEB category from which to remove + * + * LEB properties are categorized to enable fast find operations. + */ +static void ubifs_remove_from_cat(struct ubifs_info *c, + struct ubifs_lprops *lprops, int cat) +{ + switch (cat) { + case LPROPS_DIRTY: + case LPROPS_DIRTY_IDX: + case LPROPS_FREE: + remove_from_lpt_heap(c, lprops, cat); + break; + case LPROPS_FREEABLE: + c->freeable_cnt -= 1; + ubifs_assert(c->freeable_cnt >= 0); + /* Fall through */ + case LPROPS_UNCAT: + case LPROPS_EMPTY: + case LPROPS_FRDI_IDX: + ubifs_assert(!list_empty(&lprops->list)); + list_del(&lprops->list); + break; + default: + ubifs_assert(0); + } +} + +/** + * ubifs_replace_cat - replace lprops in a category list or heap. + * @c: UBIFS file-system description object + * @old_lprops: LEB properties to replace + * @new_lprops: LEB properties with which to replace + * + * During commit it is sometimes necessary to copy a pnode (see dirty_cow_pnode) + * and the lprops that the pnode contains. When that happens, references in + * category lists and heaps must be replaced. This function does that. + */ +void ubifs_replace_cat(struct ubifs_info *c, struct ubifs_lprops *old_lprops, + struct ubifs_lprops *new_lprops) +{ + int cat; + + cat = new_lprops->flags & LPROPS_CAT_MASK; + switch (cat) { + case LPROPS_DIRTY: + case LPROPS_DIRTY_IDX: + case LPROPS_FREE: + lpt_heap_replace(c, old_lprops, new_lprops, cat); + break; + case LPROPS_UNCAT: + case LPROPS_EMPTY: + case LPROPS_FREEABLE: + case LPROPS_FRDI_IDX: + list_replace(&old_lprops->list, &new_lprops->list); + break; + default: + ubifs_assert(0); + } +} + +/** + * ubifs_ensure_cat - ensure LEB properties are categorized. + * @c: UBIFS file-system description object + * @lprops: LEB properties + * + * A LEB may have fallen off of the bottom of a heap, and ended up as + * uncategorized even though it has enough space for us now. If that is the case + * this function will put the LEB back onto a heap. + */ +void ubifs_ensure_cat(struct ubifs_info *c, struct ubifs_lprops *lprops) +{ + int cat = lprops->flags & LPROPS_CAT_MASK; + + if (cat != LPROPS_UNCAT) + return; + cat = ubifs_categorize_lprops(c, lprops); + if (cat == LPROPS_UNCAT) + return; + ubifs_remove_from_cat(c, lprops, LPROPS_UNCAT); + ubifs_add_to_cat(c, lprops, cat); +} + +/** + * ubifs_categorize_lprops - categorize LEB properties. + * @c: UBIFS file-system description object + * @lprops: LEB properties to categorize + * + * LEB properties are categorized to enable fast find operations. This function + * returns the LEB category to which the LEB properties belong. Note however + * that if the LEB category is stored as a heap and the heap is full, the + * LEB properties may have their category changed to %LPROPS_UNCAT. + */ +int ubifs_categorize_lprops(const struct ubifs_info *c, + const struct ubifs_lprops *lprops) +{ + if (lprops->flags & LPROPS_TAKEN) + return LPROPS_UNCAT; + + if (lprops->free == c->leb_size) { + ubifs_assert(!(lprops->flags & LPROPS_INDEX)); + return LPROPS_EMPTY; + } + + if (lprops->free + lprops->dirty == c->leb_size) { + if (lprops->flags & LPROPS_INDEX) + return LPROPS_FRDI_IDX; + else + return LPROPS_FREEABLE; + } + + if (lprops->flags & LPROPS_INDEX) { + if (lprops->dirty + lprops->free >= c->min_idx_node_sz) + return LPROPS_DIRTY_IDX; + } else { + if (lprops->dirty >= c->dead_wm && + lprops->dirty > lprops->free) + return LPROPS_DIRTY; + if (lprops->free > 0) + return LPROPS_FREE; + } + + return LPROPS_UNCAT; +} + +/** + * change_category - change LEB properties category. + * @c: UBIFS file-system description object + * @lprops: LEB properties to recategorize + * + * LEB properties are categorized to enable fast find operations. When the LEB + * properties change they must be recategorized. + */ +static void change_category(struct ubifs_info *c, struct ubifs_lprops *lprops) +{ + int old_cat = lprops->flags & LPROPS_CAT_MASK; + int new_cat = ubifs_categorize_lprops(c, lprops); + + if (old_cat == new_cat) { + struct ubifs_lpt_heap *heap = &c->lpt_heap[new_cat - 1]; + + /* lprops on a heap now must be moved up or down */ + if (new_cat < 1 || new_cat > LPROPS_HEAP_CNT) + return; /* Not on a heap */ + heap = &c->lpt_heap[new_cat - 1]; + adjust_lpt_heap(c, heap, lprops, lprops->hpos, new_cat); + } else { + ubifs_remove_from_cat(c, lprops, old_cat); + ubifs_add_to_cat(c, lprops, new_cat); + } +} + +/** + * ubifs_get_lprops - get reference to LEB properties. + * @c: the UBIFS file-system description object + * + * This function locks lprops. Lprops have to be unlocked by + * 'ubifs_release_lprops()'. + */ +void ubifs_get_lprops(struct ubifs_info *c) +{ + mutex_lock(&c->lp_mutex); +} + +/** + * calc_dark - calculate LEB dark space size. + * @c: the UBIFS file-system description object + * @spc: amount of free and dirty space in the LEB + * + * This function calculates amount of dark space in an LEB which has @spc bytes + * of free and dirty space. Returns the calculations result. + * + * Dark space is the space which is not always usable - it depends on which + * nodes are written in which order. E.g., if an LEB has only 512 free bytes, + * it is dark space, because it cannot fit a large data node. So UBIFS cannot + * count on this LEB and treat these 512 bytes as usable because it is not true + * if, for example, only big chunks of uncompressible data will be written to + * the FS. + */ +static int calc_dark(struct ubifs_info *c, int spc) +{ + ubifs_assert(!(spc & 7)); + + if (spc < c->dark_wm) + return spc; + + /* + * If we have slightly more space then the dark space watermark, we can + * anyway safely assume it we'll be able to write a node of the + * smallest size there. + */ + if (spc - c->dark_wm < MIN_WRITE_SZ) + return spc - MIN_WRITE_SZ; + + return c->dark_wm; +} + +/** + * is_lprops_dirty - determine if LEB properties are dirty. + * @c: the UBIFS file-system description object + * @lprops: LEB properties to test + */ +static int is_lprops_dirty(struct ubifs_info *c, struct ubifs_lprops *lprops) +{ + struct ubifs_pnode *pnode; + int pos; + + pos = (lprops->lnum - c->main_first) & (UBIFS_LPT_FANOUT - 1); + pnode = (struct ubifs_pnode *)container_of(lprops - pos, + struct ubifs_pnode, + lprops[0]); + return !test_bit(COW_ZNODE, &pnode->flags) && + test_bit(DIRTY_CNODE, &pnode->flags); +} + +/** + * ubifs_change_lp - change LEB properties. + * @c: the UBIFS file-system description object + * @lp: LEB properties to change + * @free: new free space amount + * @dirty: new dirty space amount + * @flags: new flags + * @idx_gc_cnt: change to the count of idx_gc list + * + * This function changes LEB properties. This function does not change a LEB + * property (@free, @dirty or @flag) if the value passed is %LPROPS_NC. + * + * This function returns a pointer to the updated LEB properties on success + * and a negative error code on failure. N.B. the LEB properties may have had to + * be copied (due to COW) and consequently the pointer returned may not be the + * same as the pointer passed. + */ +const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c, + const struct ubifs_lprops *lp, + int free, int dirty, int flags, + int idx_gc_cnt) +{ + /* + * This is the only function that is allowed to change lprops, so we + * discard the const qualifier. + */ + struct ubifs_lprops *lprops = (struct ubifs_lprops *)lp; + + dbg_lp("LEB %d, free %d, dirty %d, flags %d", + lprops->lnum, free, dirty, flags); + + ubifs_assert(mutex_is_locked(&c->lp_mutex)); + ubifs_assert(c->lst.empty_lebs >= 0 && + c->lst.empty_lebs <= c->main_lebs); + ubifs_assert(c->freeable_cnt >= 0); + ubifs_assert(c->freeable_cnt <= c->main_lebs); + ubifs_assert(c->lst.taken_empty_lebs >= 0); + ubifs_assert(c->lst.taken_empty_lebs <= c->lst.empty_lebs); + ubifs_assert(!(c->lst.total_free & 7) && !(c->lst.total_dirty & 7)); + ubifs_assert(!(c->lst.total_dead & 7) && !(c->lst.total_dark & 7)); + ubifs_assert(!(c->lst.total_used & 7)); + ubifs_assert(free == LPROPS_NC || free >= 0); + ubifs_assert(dirty == LPROPS_NC || dirty >= 0); + + if (!is_lprops_dirty(c, lprops)) { + lprops = ubifs_lpt_lookup_dirty(c, lprops->lnum); + if (IS_ERR(lprops)) + return lprops; + } else + ubifs_assert(lprops == ubifs_lpt_lookup_dirty(c, lprops->lnum)); + + ubifs_assert(!(lprops->free & 7) && !(lprops->dirty & 7)); + + spin_lock(&c->space_lock); + + if ((lprops->flags & LPROPS_TAKEN) && lprops->free == c->leb_size) + c->lst.taken_empty_lebs -= 1; + + if (!(lprops->flags & LPROPS_INDEX)) { + int old_spc; + + old_spc = lprops->free + lprops->dirty; + if (old_spc < c->dead_wm) + c->lst.total_dead -= old_spc; + else + c->lst.total_dark -= calc_dark(c, old_spc); + + c->lst.total_used -= c->leb_size - old_spc; + } + + if (free != LPROPS_NC) { + free = ALIGN(free, 8); + c->lst.total_free += free - lprops->free; + + /* Increase or decrease empty LEBs counter if needed */ + if (free == c->leb_size) { + if (lprops->free != c->leb_size) + c->lst.empty_lebs += 1; + } else if (lprops->free == c->leb_size) + c->lst.empty_lebs -= 1; + lprops->free = free; + } + + if (dirty != LPROPS_NC) { + dirty = ALIGN(dirty, 8); + c->lst.total_dirty += dirty - lprops->dirty; + lprops->dirty = dirty; + } + + if (flags != LPROPS_NC) { + /* Take care about indexing LEBs counter if needed */ + if ((lprops->flags & LPROPS_INDEX)) { + if (!(flags & LPROPS_INDEX)) + c->lst.idx_lebs -= 1; + } else if (flags & LPROPS_INDEX) + c->lst.idx_lebs += 1; + lprops->flags = flags; + } + + if (!(lprops->flags & LPROPS_INDEX)) { + int new_spc; + + new_spc = lprops->free + lprops->dirty; + if (new_spc < c->dead_wm) + c->lst.total_dead += new_spc; + else + c->lst.total_dark += calc_dark(c, new_spc); + + c->lst.total_used += c->leb_size - new_spc; + } + + if ((lprops->flags & LPROPS_TAKEN) && lprops->free == c->leb_size) + c->lst.taken_empty_lebs += 1; + + change_category(c, lprops); + + c->idx_gc_cnt += idx_gc_cnt; + + spin_unlock(&c->space_lock); + + return lprops; +} + +/** + * ubifs_release_lprops - release lprops lock. + * @c: the UBIFS file-system description object + * + * This function has to be called after each 'ubifs_get_lprops()' call to + * unlock lprops. + */ +void ubifs_release_lprops(struct ubifs_info *c) +{ + ubifs_assert(mutex_is_locked(&c->lp_mutex)); + ubifs_assert(c->lst.empty_lebs >= 0 && + c->lst.empty_lebs <= c->main_lebs); + + mutex_unlock(&c->lp_mutex); +} + +/** + * ubifs_get_lp_stats - get lprops statistics. + * @c: UBIFS file-system description object + * @st: return statistics + */ +void ubifs_get_lp_stats(struct ubifs_info *c, struct ubifs_lp_stats *st) +{ + spin_lock(&c->space_lock); + memcpy(st, &c->lst, sizeof(struct ubifs_lp_stats)); + spin_unlock(&c->space_lock); +} + +/** + * ubifs_change_one_lp - change LEB properties. + * @c: the UBIFS file-system description object + * @lnum: LEB to change properties for + * @free: amount of free space + * @dirty: amount of dirty space + * @flags_set: flags to set + * @flags_clean: flags to clean + * @idx_gc_cnt: change to the count of idx_gc list + * + * This function changes properties of LEB @lnum. It is a helper wrapper over + * 'ubifs_change_lp()' which hides lprops get/release. The arguments are the + * same as in case of 'ubifs_change_lp()'. Returns zero in case of success and + * a negative error code in case of failure. + */ +int ubifs_change_one_lp(struct ubifs_info *c, int lnum, int free, int dirty, + int flags_set, int flags_clean, int idx_gc_cnt) +{ + int err = 0, flags; + const struct ubifs_lprops *lp; + + ubifs_get_lprops(c); + + lp = ubifs_lpt_lookup_dirty(c, lnum); + if (IS_ERR(lp)) { + err = PTR_ERR(lp); + goto out; + } + + flags = (lp->flags | flags_set) & ~flags_clean; + lp = ubifs_change_lp(c, lp, free, dirty, flags, idx_gc_cnt); + if (IS_ERR(lp)) + err = PTR_ERR(lp); + +out: + ubifs_release_lprops(c); + return err; +} + +/** + * ubifs_update_one_lp - update LEB properties. + * @c: the UBIFS file-system description object + * @lnum: LEB to change properties for + * @free: amount of free space + * @dirty: amount of dirty space to add + * @flags_set: flags to set + * @flags_clean: flags to clean + * + * This function is the same as 'ubifs_change_one_lp()' but @dirty is added to + * current dirty space, not substitutes it. + */ +int ubifs_update_one_lp(struct ubifs_info *c, int lnum, int free, int dirty, + int flags_set, int flags_clean) +{ + int err = 0, flags; + const struct ubifs_lprops *lp; + + ubifs_get_lprops(c); + + lp = ubifs_lpt_lookup_dirty(c, lnum); + if (IS_ERR(lp)) { + err = PTR_ERR(lp); + goto out; + } + + flags = (lp->flags | flags_set) & ~flags_clean; + lp = ubifs_change_lp(c, lp, free, lp->dirty + dirty, flags, 0); + if (IS_ERR(lp)) + err = PTR_ERR(lp); + +out: + ubifs_release_lprops(c); + return err; +} + +/** + * ubifs_read_one_lp - read LEB properties. + * @c: the UBIFS file-system description object + * @lnum: LEB to read properties for + * @lp: where to store read properties + * + * This helper function reads properties of a LEB @lnum and stores them in @lp. + * Returns zero in case of success and a negative error code in case of + * failure. + */ +int ubifs_read_one_lp(struct ubifs_info *c, int lnum, struct ubifs_lprops *lp) +{ + int err = 0; + const struct ubifs_lprops *lpp; + + ubifs_get_lprops(c); + + lpp = ubifs_lpt_lookup(c, lnum); + if (IS_ERR(lpp)) { + err = PTR_ERR(lpp); + goto out; + } + + memcpy(lp, lpp, sizeof(struct ubifs_lprops)); + +out: + ubifs_release_lprops(c); + return err; +} + +/** + * ubifs_fast_find_free - try to find a LEB with free space quickly. + * @c: the UBIFS file-system description object + * + * This function returns LEB properties for a LEB with free space or %NULL if + * the function is unable to find a LEB quickly. + */ +const struct ubifs_lprops *ubifs_fast_find_free(struct ubifs_info *c) +{ + struct ubifs_lprops *lprops; + struct ubifs_lpt_heap *heap; + + ubifs_assert(mutex_is_locked(&c->lp_mutex)); + + heap = &c->lpt_heap[LPROPS_FREE - 1]; + if (heap->cnt == 0) + return NULL; + + lprops = heap->arr[0]; + ubifs_assert(!(lprops->flags & LPROPS_TAKEN)); + ubifs_assert(!(lprops->flags & LPROPS_INDEX)); + return lprops; +} + +/** + * ubifs_fast_find_empty - try to find an empty LEB quickly. + * @c: the UBIFS file-system description object + * + * This function returns LEB properties for an empty LEB or %NULL if the + * function is unable to find an empty LEB quickly. + */ +const struct ubifs_lprops *ubifs_fast_find_empty(struct ubifs_info *c) +{ + struct ubifs_lprops *lprops; + + ubifs_assert(mutex_is_locked(&c->lp_mutex)); + + if (list_empty(&c->empty_list)) + return NULL; + + lprops = list_entry(c->empty_list.next, struct ubifs_lprops, list); + ubifs_assert(!(lprops->flags & LPROPS_TAKEN)); + ubifs_assert(!(lprops->flags & LPROPS_INDEX)); + ubifs_assert(lprops->free == c->leb_size); + return lprops; +} + +/** + * ubifs_fast_find_freeable - try to find a freeable LEB quickly. + * @c: the UBIFS file-system description object + * + * This function returns LEB properties for a freeable LEB or %NULL if the + * function is unable to find a freeable LEB quickly. + */ +const struct ubifs_lprops *ubifs_fast_find_freeable(struct ubifs_info *c) +{ + struct ubifs_lprops *lprops; + + ubifs_assert(mutex_is_locked(&c->lp_mutex)); + + if (list_empty(&c->freeable_list)) + return NULL; + + lprops = list_entry(c->freeable_list.next, struct ubifs_lprops, list); + ubifs_assert(!(lprops->flags & LPROPS_TAKEN)); + ubifs_assert(!(lprops->flags & LPROPS_INDEX)); + ubifs_assert(lprops->free + lprops->dirty == c->leb_size); + ubifs_assert(c->freeable_cnt > 0); + return lprops; +} + +/** + * ubifs_fast_find_frdi_idx - try to find a freeable index LEB quickly. + * @c: the UBIFS file-system description object + * + * This function returns LEB properties for a freeable index LEB or %NULL if the + * function is unable to find a freeable index LEB quickly. + */ +const struct ubifs_lprops *ubifs_fast_find_frdi_idx(struct ubifs_info *c) +{ + struct ubifs_lprops *lprops; + + ubifs_assert(mutex_is_locked(&c->lp_mutex)); + + if (list_empty(&c->frdi_idx_list)) + return NULL; + + lprops = list_entry(c->frdi_idx_list.next, struct ubifs_lprops, list); + ubifs_assert(!(lprops->flags & LPROPS_TAKEN)); + ubifs_assert((lprops->flags & LPROPS_INDEX)); + ubifs_assert(lprops->free + lprops->dirty == c->leb_size); + return lprops; +} + +#ifdef CONFIG_UBIFS_FS_DEBUG + +/** + * dbg_check_cats - check category heaps and lists. + * @c: UBIFS file-system description object + * + * This function returns %0 on success and a negative error code on failure. + */ +int dbg_check_cats(struct ubifs_info *c) +{ + struct ubifs_lprops *lprops; + struct list_head *pos; + int i, cat; + + if (!(ubifs_chk_flags & (UBIFS_CHK_GEN | UBIFS_CHK_LPROPS))) + return 0; + + list_for_each_entry(lprops, &c->empty_list, list) { + if (lprops->free != c->leb_size) { + ubifs_err("non-empty LEB %d on empty list " + "(free %d dirty %d flags %d)", lprops->lnum, + lprops->free, lprops->dirty, lprops->flags); + return -EINVAL; + } + if (lprops->flags & LPROPS_TAKEN) { + ubifs_err("taken LEB %d on empty list " + "(free %d dirty %d flags %d)", lprops->lnum, + lprops->free, lprops->dirty, lprops->flags); + return -EINVAL; + } + } + + i = 0; + list_for_each_entry(lprops, &c->freeable_list, list) { + if (lprops->free + lprops->dirty != c->leb_size) { + ubifs_err("non-freeable LEB %d on freeable list " + "(free %d dirty %d flags %d)", lprops->lnum, + lprops->free, lprops->dirty, lprops->flags); + return -EINVAL; + } + if (lprops->flags & LPROPS_TAKEN) { + ubifs_err("taken LEB %d on freeable list " + "(free %d dirty %d flags %d)", lprops->lnum, + lprops->free, lprops->dirty, lprops->flags); + return -EINVAL; + } + i += 1; + } + if (i != c->freeable_cnt) { + ubifs_err("freeable list count %d expected %d", i, + c->freeable_cnt); + return -EINVAL; + } + + i = 0; + list_for_each(pos, &c->idx_gc) + i += 1; + if (i != c->idx_gc_cnt) { + ubifs_err("idx_gc list count %d expected %d", i, + c->idx_gc_cnt); + return -EINVAL; + } + + list_for_each_entry(lprops, &c->frdi_idx_list, list) { + if (lprops->free + lprops->dirty != c->leb_size) { + ubifs_err("non-freeable LEB %d on frdi_idx list " + "(free %d dirty %d flags %d)", lprops->lnum, + lprops->free, lprops->dirty, lprops->flags); + return -EINVAL; + } + if (lprops->flags & LPROPS_TAKEN) { + ubifs_err("taken LEB %d on frdi_idx list " + "(free %d dirty %d flags %d)", lprops->lnum, + lprops->free, lprops->dirty, lprops->flags); + return -EINVAL; + } + if (!(lprops->flags & LPROPS_INDEX)) { + ubifs_err("non-index LEB %d on frdi_idx list " + "(free %d dirty %d flags %d)", lprops->lnum, + lprops->free, lprops->dirty, lprops->flags); + return -EINVAL; + } + } + + for (cat = 1; cat <= LPROPS_HEAP_CNT; cat++) { + struct ubifs_lpt_heap *heap = &c->lpt_heap[cat - 1]; + + for (i = 0; i < heap->cnt; i++) { + lprops = heap->arr[i]; + if (!lprops) { + ubifs_err("null ptr in LPT heap cat %d", cat); + return -EINVAL; + } + if (lprops->hpos != i) { + ubifs_err("bad ptr in LPT heap cat %d", cat); + return -EINVAL; + } + if (lprops->flags & LPROPS_TAKEN) { + ubifs_err("taken LEB in LPT heap cat %d", cat); + return -EINVAL; + } + } + } + + return 0; +} + +void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat, + int add_pos) +{ + int i = 0, j, err = 0; + + if (!(ubifs_chk_flags & (UBIFS_CHK_GEN | UBIFS_CHK_LPROPS))) + return; + + for (i = 0; i < heap->cnt; i++) { + struct ubifs_lprops *lprops = heap->arr[i]; + struct ubifs_lprops *lp; + + if (i != add_pos) + if ((lprops->flags & LPROPS_CAT_MASK) != cat) { + err = 1; + goto out; + } + if (lprops->hpos != i) { + err = 2; + goto out; + } + lp = ubifs_lpt_lookup(c, lprops->lnum); + if (IS_ERR(lp)) { + err = 3; + goto out; + } + if (lprops != lp) { + dbg_msg("lprops %zx lp %zx lprops->lnum %d lp->lnum %d", + (size_t)lprops, (size_t)lp, lprops->lnum, + lp->lnum); + err = 4; + goto out; + } + for (j = 0; j < i; j++) { + lp = heap->arr[j]; + if (lp == lprops) { + err = 5; + goto out; + } + if (lp->lnum == lprops->lnum) { + err = 6; + goto out; + } + } + } +out: + if (err) { + dbg_msg("failed cat %d hpos %d err %d", cat, i, err); + dbg_dump_stack(); + dbg_dump_heap(c, heap, cat); + } +} + +/** + * struct scan_check_data - data provided to scan callback function. + * @lst: LEB properties statistics + * @err: error code + */ +struct scan_check_data { + struct ubifs_lp_stats lst; + int err; +}; + +/** + * scan_check_cb - scan callback. + * @c: the UBIFS file-system description object + * @lp: LEB properties to scan + * @in_tree: whether the LEB properties are in main memory + * @data: information passed to and from the caller of the scan + * + * This function returns a code that indicates whether the scan should continue + * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree + * in main memory (%LPT_SCAN_ADD), or whether the scan should stop + * (%LPT_SCAN_STOP). + */ +static int scan_check_cb(struct ubifs_info *c, + const struct ubifs_lprops *lp, int in_tree, + struct scan_check_data *data) +{ + struct ubifs_scan_leb *sleb; + struct ubifs_scan_node *snod; + struct ubifs_lp_stats *lst = &data->lst; + int cat, lnum = lp->lnum, is_idx = 0, used = 0, free, dirty; + + cat = lp->flags & LPROPS_CAT_MASK; + if (cat != LPROPS_UNCAT) { + cat = ubifs_categorize_lprops(c, lp); + if (cat != (lp->flags & LPROPS_CAT_MASK)) { + ubifs_err("bad LEB category %d expected %d", + (lp->flags & LPROPS_CAT_MASK), cat); + goto out; + } + } + + /* Check lp is on its category list (if it has one) */ + if (in_tree) { + struct list_head *list = NULL; + + switch (cat) { + case LPROPS_EMPTY: + list = &c->empty_list; + break; + case LPROPS_FREEABLE: + list = &c->freeable_list; + break; + case LPROPS_FRDI_IDX: + list = &c->frdi_idx_list; + break; + case LPROPS_UNCAT: + list = &c->uncat_list; + break; + } + if (list) { + struct ubifs_lprops *lprops; + int found = 0; + + list_for_each_entry(lprops, list, list) { + if (lprops == lp) { + found = 1; + break; + } + } + if (!found) { + ubifs_err("bad LPT list (category %d)", cat); + goto out; + } + } + } + + /* Check lp is on its category heap (if it has one) */ + if (in_tree && cat > 0 && cat <= LPROPS_HEAP_CNT) { + struct ubifs_lpt_heap *heap = &c->lpt_heap[cat - 1]; + + if ((lp->hpos != -1 && heap->arr[lp->hpos]->lnum != lnum) || + lp != heap->arr[lp->hpos]) { + ubifs_err("bad LPT heap (category %d)", cat); + goto out; + } + } + + sleb = ubifs_scan(c, lnum, 0, c->dbg_buf); + if (IS_ERR(sleb)) { + /* + * After an unclean unmount, empty and freeable LEBs + * may contain garbage. + */ + if (lp->free == c->leb_size) { + ubifs_err("scan errors were in empty LEB " + "- continuing checking"); + lst->empty_lebs += 1; + lst->total_free += c->leb_size; + lst->total_dark += calc_dark(c, c->leb_size); + return LPT_SCAN_CONTINUE; + } + + if (lp->free + lp->dirty == c->leb_size && + !(lp->flags & LPROPS_INDEX)) { + ubifs_err("scan errors were in freeable LEB " + "- continuing checking"); + lst->total_free += lp->free; + lst->total_dirty += lp->dirty; + lst->total_dark += calc_dark(c, c->leb_size); + return LPT_SCAN_CONTINUE; + } + data->err = PTR_ERR(sleb); + return LPT_SCAN_STOP; + } + + is_idx = -1; + list_for_each_entry(snod, &sleb->nodes, list) { + int found, level = 0; + + cond_resched(); + + if (is_idx == -1) + is_idx = (snod->type == UBIFS_IDX_NODE) ? 1 : 0; + + if (is_idx && snod->type != UBIFS_IDX_NODE) { + ubifs_err("indexing node in data LEB %d:%d", + lnum, snod->offs); + goto out_destroy; + } + + if (snod->type == UBIFS_IDX_NODE) { + struct ubifs_idx_node *idx = snod->node; + + key_read(c, ubifs_idx_key(c, idx), &snod->key); + level = le16_to_cpu(idx->level); + } + + found = ubifs_tnc_has_node(c, &snod->key, level, lnum, + snod->offs, is_idx); + if (found) { + if (found < 0) + goto out_destroy; + used += ALIGN(snod->len, 8); + } + } + + free = c->leb_size - sleb->endpt; + dirty = sleb->endpt - used; + + if (free > c->leb_size || free < 0 || dirty > c->leb_size || + dirty < 0) { + ubifs_err("bad calculated accounting for LEB %d: " + "free %d, dirty %d", lnum, free, dirty); + goto out_destroy; + } + + if (lp->free + lp->dirty == c->leb_size && + free + dirty == c->leb_size) + if ((is_idx && !(lp->flags & LPROPS_INDEX)) || + (!is_idx && free == c->leb_size) || + lp->free == c->leb_size) { + /* + * Empty or freeable LEBs could contain index + * nodes from an uncompleted commit due to an + * unclean unmount. Or they could be empty for + * the same reason. Or it may simply not have been + * unmapped. + */ + free = lp->free; + dirty = lp->dirty; + is_idx = 0; + } + + if (is_idx && lp->free + lp->dirty == free + dirty && + lnum != c->ihead_lnum) { + /* + * After an unclean unmount, an index LEB could have a different + * amount of free space than the value recorded by lprops. That + * is because the in-the-gaps method may use free space or + * create free space (as a side-effect of using ubi_leb_change + * and not writing the whole LEB). The incorrect free space + * value is not a problem because the index is only ever + * allocated empty LEBs, so there will never be an attempt to + * write to the free space at the end of an index LEB - except + * by the in-the-gaps method for which it is not a problem. + */ + free = lp->free; + dirty = lp->dirty; + } + + if (lp->free != free || lp->dirty != dirty) + goto out_print; + + if (is_idx && !(lp->flags & LPROPS_INDEX)) { + if (free == c->leb_size) + /* Free but not unmapped LEB, it's fine */ + is_idx = 0; + else { + ubifs_err("indexing node without indexing " + "flag"); + goto out_print; + } + } + + if (!is_idx && (lp->flags & LPROPS_INDEX)) { + ubifs_err("data node with indexing flag"); + goto out_print; + } + + if (free == c->leb_size) + lst->empty_lebs += 1; + + if (is_idx) + lst->idx_lebs += 1; + + if (!(lp->flags & LPROPS_INDEX)) + lst->total_used += c->leb_size - free - dirty; + lst->total_free += free; + lst->total_dirty += dirty; + + if (!(lp->flags & LPROPS_INDEX)) { + int spc = free + dirty; + + if (spc < c->dead_wm) + lst->total_dead += spc; + else + lst->total_dark += calc_dark(c, spc); + } + + ubifs_scan_destroy(sleb); + + return LPT_SCAN_CONTINUE; + +out_print: + ubifs_err("bad accounting of LEB %d: free %d, dirty %d flags %#x, " + "should be free %d, dirty %d", + lnum, lp->free, lp->dirty, lp->flags, free, dirty); + dbg_dump_leb(c, lnum); +out_destroy: + ubifs_scan_destroy(sleb); +out: + data->err = -EINVAL; + return LPT_SCAN_STOP; +} + +/** + * dbg_check_lprops - check all LEB properties. + * @c: UBIFS file-system description object + * + * This function checks all LEB properties and makes sure they are all correct. + * It returns zero if everything is fine, %-EINVAL if there is an inconsistency + * and other negative error codes in case of other errors. This function is + * called while the file system is locked (because of commit start), so no + * additional locking is required. Note that locking the LPT mutex would cause + * a circular lock dependency with the TNC mutex. + */ +int dbg_check_lprops(struct ubifs_info *c) +{ + int i, err; + struct scan_check_data data; + struct ubifs_lp_stats *lst = &data.lst; + + if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) + return 0; + + /* + * As we are going to scan the media, the write buffers have to be + * synchronized. + */ + for (i = 0; i < c->jhead_cnt; i++) { + err = ubifs_wbuf_sync(&c->jheads[i].wbuf); + if (err) + return err; + } + + memset(lst, 0, sizeof(struct ubifs_lp_stats)); + + data.err = 0; + err = ubifs_lpt_scan_nolock(c, c->main_first, c->leb_cnt - 1, + (ubifs_lpt_scan_callback)scan_check_cb, + &data); + if (err && err != -ENOSPC) + goto out; + if (data.err) { + err = data.err; + goto out; + } + + if (lst->empty_lebs != c->lst.empty_lebs || + lst->idx_lebs != c->lst.idx_lebs || + lst->total_free != c->lst.total_free || + lst->total_dirty != c->lst.total_dirty || + lst->total_used != c->lst.total_used) { + ubifs_err("bad overall accounting"); + ubifs_err("calculated: empty_lebs %d, idx_lebs %d, " + "total_free %lld, total_dirty %lld, total_used %lld", + lst->empty_lebs, lst->idx_lebs, lst->total_free, + lst->total_dirty, lst->total_used); + ubifs_err("read from lprops: empty_lebs %d, idx_lebs %d, " + "total_free %lld, total_dirty %lld, total_used %lld", + c->lst.empty_lebs, c->lst.idx_lebs, c->lst.total_free, + c->lst.total_dirty, c->lst.total_used); + err = -EINVAL; + goto out; + } + + if (lst->total_dead != c->lst.total_dead || + lst->total_dark != c->lst.total_dark) { + ubifs_err("bad dead/dark space accounting"); + ubifs_err("calculated: total_dead %lld, total_dark %lld", + lst->total_dead, lst->total_dark); + ubifs_err("read from lprops: total_dead %lld, total_dark %lld", + c->lst.total_dead, c->lst.total_dark); + err = -EINVAL; + goto out; + } + + err = dbg_check_cats(c); +out: + return err; +} + +#endif /* CONFIG_UBIFS_FS_DEBUG */ diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c new file mode 100644 index 0000000..9ff2463 --- /dev/null +++ b/fs/ubifs/lpt.c @@ -0,0 +1,2243 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Adrian Hunter + * Artem Bityutskiy (Битюцкий Артём) + */ + +/* + * This file implements the LEB properties tree (LPT) area. The LPT area + * contains the LEB properties tree, a table of LPT area eraseblocks (ltab), and + * (for the "big" model) a table of saved LEB numbers (lsave). The LPT area sits + * between the log and the orphan area. + * + * The LPT area is like a miniature self-contained file system. It is required + * that it never runs out of space, is fast to access and update, and scales + * logarithmically. The LEB properties tree is implemented as a wandering tree + * much like the TNC, and the LPT area has its own garbage collection. + * + * The LPT has two slightly different forms called the "small model" and the + * "big model". The small model is used when the entire LEB properties table + * can be written into a single eraseblock. In that case, garbage collection + * consists of just writing the whole table, which therefore makes all other + * eraseblocks reusable. In the case of the big model, dirty eraseblocks are + * selected for garbage collection, which consists are marking the nodes in + * that LEB as dirty, and then only the dirty nodes are written out. Also, in + * the case of the big model, a table of LEB numbers is saved so that the entire + * LPT does not to be scanned looking for empty eraseblocks when UBIFS is first + * mounted. + */ + +#include +#include "ubifs.h" + +/** + * do_calc_lpt_geom - calculate sizes for the LPT area. + * @c: the UBIFS file-system description object + * + * Calculate the sizes of LPT bit fields, nodes, and tree, based on the + * properties of the flash and whether LPT is "big" (c->big_lpt). + */ +static void do_calc_lpt_geom(struct ubifs_info *c) +{ + int i, n, bits, per_leb_wastage, max_pnode_cnt; + long long sz, tot_wastage; + + n = c->main_lebs + c->max_leb_cnt - c->leb_cnt; + max_pnode_cnt = DIV_ROUND_UP(n, UBIFS_LPT_FANOUT); + + c->lpt_hght = 1; + n = UBIFS_LPT_FANOUT; + while (n < max_pnode_cnt) { + c->lpt_hght += 1; + n <<= UBIFS_LPT_FANOUT_SHIFT; + } + + c->pnode_cnt = DIV_ROUND_UP(c->main_lebs, UBIFS_LPT_FANOUT); + + n = DIV_ROUND_UP(c->pnode_cnt, UBIFS_LPT_FANOUT); + c->nnode_cnt = n; + for (i = 1; i < c->lpt_hght; i++) { + n = DIV_ROUND_UP(n, UBIFS_LPT_FANOUT); + c->nnode_cnt += n; + } + + c->space_bits = fls(c->leb_size) - 3; + c->lpt_lnum_bits = fls(c->lpt_lebs); + c->lpt_offs_bits = fls(c->leb_size - 1); + c->lpt_spc_bits = fls(c->leb_size); + + n = DIV_ROUND_UP(c->max_leb_cnt, UBIFS_LPT_FANOUT); + c->pcnt_bits = fls(n - 1); + + c->lnum_bits = fls(c->max_leb_cnt - 1); + + bits = UBIFS_LPT_CRC_BITS + UBIFS_LPT_TYPE_BITS + + (c->big_lpt ? c->pcnt_bits : 0) + + (c->space_bits * 2 + 1) * UBIFS_LPT_FANOUT; + c->pnode_sz = (bits + 7) / 8; + + bits = UBIFS_LPT_CRC_BITS + UBIFS_LPT_TYPE_BITS + + (c->big_lpt ? c->pcnt_bits : 0) + + (c->lpt_lnum_bits + c->lpt_offs_bits) * UBIFS_LPT_FANOUT; + c->nnode_sz = (bits + 7) / 8; + + bits = UBIFS_LPT_CRC_BITS + UBIFS_LPT_TYPE_BITS + + c->lpt_lebs * c->lpt_spc_bits * 2; + c->ltab_sz = (bits + 7) / 8; + + bits = UBIFS_LPT_CRC_BITS + UBIFS_LPT_TYPE_BITS + + c->lnum_bits * c->lsave_cnt; + c->lsave_sz = (bits + 7) / 8; + + /* Calculate the minimum LPT size */ + c->lpt_sz = (long long)c->pnode_cnt * c->pnode_sz; + c->lpt_sz += (long long)c->nnode_cnt * c->nnode_sz; + c->lpt_sz += c->ltab_sz; + c->lpt_sz += c->lsave_sz; + + /* Add wastage */ + sz = c->lpt_sz; + per_leb_wastage = max_t(int, c->pnode_sz, c->nnode_sz); + sz += per_leb_wastage; + tot_wastage = per_leb_wastage; + while (sz > c->leb_size) { + sz += per_leb_wastage; + sz -= c->leb_size; + tot_wastage += per_leb_wastage; + } + tot_wastage += ALIGN(sz, c->min_io_size) - sz; + c->lpt_sz += tot_wastage; +} + +/** + * ubifs_calc_lpt_geom - calculate and check sizes for the LPT area. + * @c: the UBIFS file-system description object + * + * This function returns %0 on success and a negative error code on failure. + */ +int ubifs_calc_lpt_geom(struct ubifs_info *c) +{ + int lebs_needed; + uint64_t sz; + + do_calc_lpt_geom(c); + + /* Verify that lpt_lebs is big enough */ + sz = c->lpt_sz * 2; /* Must have at least 2 times the size */ + sz += c->leb_size - 1; + do_div(sz, c->leb_size); + lebs_needed = sz; + if (lebs_needed > c->lpt_lebs) { + ubifs_err("too few LPT LEBs"); + return -EINVAL; + } + + /* Verify that ltab fits in a single LEB (since ltab is a single node */ + if (c->ltab_sz > c->leb_size) { + ubifs_err("LPT ltab too big"); + return -EINVAL; + } + + c->check_lpt_free = c->big_lpt; + + return 0; +} + +/** + * calc_dflt_lpt_geom - calculate default LPT geometry. + * @c: the UBIFS file-system description object + * @main_lebs: number of main area LEBs is passed and returned here + * @big_lpt: whether the LPT area is "big" is returned here + * + * The size of the LPT area depends on parameters that themselves are dependent + * on the size of the LPT area. This function, successively recalculates the LPT + * area geometry until the parameters and resultant geometry are consistent. + * + * This function returns %0 on success and a negative error code on failure. + */ +static int calc_dflt_lpt_geom(struct ubifs_info *c, int *main_lebs, + int *big_lpt) +{ + int i, lebs_needed; + uint64_t sz; + + /* Start by assuming the minimum number of LPT LEBs */ + c->lpt_lebs = UBIFS_MIN_LPT_LEBS; + c->main_lebs = *main_lebs - c->lpt_lebs; + if (c->main_lebs <= 0) + return -EINVAL; + + /* And assume we will use the small LPT model */ + c->big_lpt = 0; + + /* + * Calculate the geometry based on assumptions above and then see if it + * makes sense + */ + do_calc_lpt_geom(c); + + /* Small LPT model must have lpt_sz < leb_size */ + if (c->lpt_sz > c->leb_size) { + /* Nope, so try again using big LPT model */ + c->big_lpt = 1; + do_calc_lpt_geom(c); + } + + /* Now check there are enough LPT LEBs */ + for (i = 0; i < 64 ; i++) { + sz = c->lpt_sz * 4; /* Allow 4 times the size */ + sz += c->leb_size - 1; + do_div(sz, c->leb_size); + lebs_needed = sz; + if (lebs_needed > c->lpt_lebs) { + /* Not enough LPT LEBs so try again with more */ + c->lpt_lebs = lebs_needed; + c->main_lebs = *main_lebs - c->lpt_lebs; + if (c->main_lebs <= 0) + return -EINVAL; + do_calc_lpt_geom(c); + continue; + } + if (c->ltab_sz > c->leb_size) { + ubifs_err("LPT ltab too big"); + return -EINVAL; + } + *main_lebs = c->main_lebs; + *big_lpt = c->big_lpt; + return 0; + } + return -EINVAL; +} + +/** + * pack_bits - pack bit fields end-to-end. + * @addr: address at which to pack (passed and next address returned) + * @pos: bit position at which to pack (passed and next position returned) + * @val: value to pack + * @nrbits: number of bits of value to pack (1-32) + */ +static void pack_bits(uint8_t **addr, int *pos, uint32_t val, int nrbits) +{ + uint8_t *p = *addr; + int b = *pos; + + ubifs_assert(nrbits > 0); + ubifs_assert(nrbits <= 32); + ubifs_assert(*pos >= 0); + ubifs_assert(*pos < 8); + ubifs_assert((val >> nrbits) == 0 || nrbits == 32); + if (b) { + *p |= ((uint8_t)val) << b; + nrbits += b; + if (nrbits > 8) { + *++p = (uint8_t)(val >>= (8 - b)); + if (nrbits > 16) { + *++p = (uint8_t)(val >>= 8); + if (nrbits > 24) { + *++p = (uint8_t)(val >>= 8); + if (nrbits > 32) + *++p = (uint8_t)(val >>= 8); + } + } + } + } else { + *p = (uint8_t)val; + if (nrbits > 8) { + *++p = (uint8_t)(val >>= 8); + if (nrbits > 16) { + *++p = (uint8_t)(val >>= 8); + if (nrbits > 24) + *++p = (uint8_t)(val >>= 8); + } + } + } + b = nrbits & 7; + if (b == 0) + p++; + *addr = p; + *pos = b; +} + +/** + * ubifs_unpack_bits - unpack bit fields. + * @addr: address at which to unpack (passed and next address returned) + * @pos: bit position at which to unpack (passed and next position returned) + * @nrbits: number of bits of value to unpack (1-32) + * + * This functions returns the value unpacked. + */ +uint32_t ubifs_unpack_bits(uint8_t **addr, int *pos, int nrbits) +{ + const int k = 32 - nrbits; + uint8_t *p = *addr; + int b = *pos; + uint32_t val; + + ubifs_assert(nrbits > 0); + ubifs_assert(nrbits <= 32); + ubifs_assert(*pos >= 0); + ubifs_assert(*pos < 8); + if (b) { + val = p[1] | ((uint32_t)p[2] << 8) | ((uint32_t)p[3] << 16) | + ((uint32_t)p[4] << 24); + val <<= (8 - b); + val |= *p >> b; + nrbits += b; + } else + val = p[0] | ((uint32_t)p[1] << 8) | ((uint32_t)p[2] << 16) | + ((uint32_t)p[3] << 24); + val <<= k; + val >>= k; + b = nrbits & 7; + p += nrbits / 8; + *addr = p; + *pos = b; + ubifs_assert((val >> nrbits) == 0 || nrbits - b == 32); + return val; +} + +/** + * ubifs_pack_pnode - pack all the bit fields of a pnode. + * @c: UBIFS file-system description object + * @buf: buffer into which to pack + * @pnode: pnode to pack + */ +void ubifs_pack_pnode(struct ubifs_info *c, void *buf, + struct ubifs_pnode *pnode) +{ + uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; + int i, pos = 0; + uint16_t crc; + + pack_bits(&addr, &pos, UBIFS_LPT_PNODE, UBIFS_LPT_TYPE_BITS); + if (c->big_lpt) + pack_bits(&addr, &pos, pnode->num, c->pcnt_bits); + for (i = 0; i < UBIFS_LPT_FANOUT; i++) { + pack_bits(&addr, &pos, pnode->lprops[i].free >> 3, + c->space_bits); + pack_bits(&addr, &pos, pnode->lprops[i].dirty >> 3, + c->space_bits); + if (pnode->lprops[i].flags & LPROPS_INDEX) + pack_bits(&addr, &pos, 1, 1); + else + pack_bits(&addr, &pos, 0, 1); + } + crc = crc16(-1, buf + UBIFS_LPT_CRC_BYTES, + c->pnode_sz - UBIFS_LPT_CRC_BYTES); + addr = buf; + pos = 0; + pack_bits(&addr, &pos, crc, UBIFS_LPT_CRC_BITS); +} + +/** + * ubifs_pack_nnode - pack all the bit fields of a nnode. + * @c: UBIFS file-system description object + * @buf: buffer into which to pack + * @nnode: nnode to pack + */ +void ubifs_pack_nnode(struct ubifs_info *c, void *buf, + struct ubifs_nnode *nnode) +{ + uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; + int i, pos = 0; + uint16_t crc; + + pack_bits(&addr, &pos, UBIFS_LPT_NNODE, UBIFS_LPT_TYPE_BITS); + if (c->big_lpt) + pack_bits(&addr, &pos, nnode->num, c->pcnt_bits); + for (i = 0; i < UBIFS_LPT_FANOUT; i++) { + int lnum = nnode->nbranch[i].lnum; + + if (lnum == 0) + lnum = c->lpt_last + 1; + pack_bits(&addr, &pos, lnum - c->lpt_first, c->lpt_lnum_bits); + pack_bits(&addr, &pos, nnode->nbranch[i].offs, + c->lpt_offs_bits); + } + crc = crc16(-1, buf + UBIFS_LPT_CRC_BYTES, + c->nnode_sz - UBIFS_LPT_CRC_BYTES); + addr = buf; + pos = 0; + pack_bits(&addr, &pos, crc, UBIFS_LPT_CRC_BITS); +} + +/** + * ubifs_pack_ltab - pack the LPT's own lprops table. + * @c: UBIFS file-system description object + * @buf: buffer into which to pack + * @ltab: LPT's own lprops table to pack + */ +void ubifs_pack_ltab(struct ubifs_info *c, void *buf, + struct ubifs_lpt_lprops *ltab) +{ + uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; + int i, pos = 0; + uint16_t crc; + + pack_bits(&addr, &pos, UBIFS_LPT_LTAB, UBIFS_LPT_TYPE_BITS); + for (i = 0; i < c->lpt_lebs; i++) { + pack_bits(&addr, &pos, ltab[i].free, c->lpt_spc_bits); + pack_bits(&addr, &pos, ltab[i].dirty, c->lpt_spc_bits); + } + crc = crc16(-1, buf + UBIFS_LPT_CRC_BYTES, + c->ltab_sz - UBIFS_LPT_CRC_BYTES); + addr = buf; + pos = 0; + pack_bits(&addr, &pos, crc, UBIFS_LPT_CRC_BITS); +} + +/** + * ubifs_pack_lsave - pack the LPT's save table. + * @c: UBIFS file-system description object + * @buf: buffer into which to pack + * @lsave: LPT's save table to pack + */ +void ubifs_pack_lsave(struct ubifs_info *c, void *buf, int *lsave) +{ + uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; + int i, pos = 0; + uint16_t crc; + + pack_bits(&addr, &pos, UBIFS_LPT_LSAVE, UBIFS_LPT_TYPE_BITS); + for (i = 0; i < c->lsave_cnt; i++) + pack_bits(&addr, &pos, lsave[i], c->lnum_bits); + crc = crc16(-1, buf + UBIFS_LPT_CRC_BYTES, + c->lsave_sz - UBIFS_LPT_CRC_BYTES); + addr = buf; + pos = 0; + pack_bits(&addr, &pos, crc, UBIFS_LPT_CRC_BITS); +} + +/** + * ubifs_add_lpt_dirt - add dirty space to LPT LEB properties. + * @c: UBIFS file-system description object + * @lnum: LEB number to which to add dirty space + * @dirty: amount of dirty space to add + */ +void ubifs_add_lpt_dirt(struct ubifs_info *c, int lnum, int dirty) +{ + if (!dirty || !lnum) + return; + dbg_lp("LEB %d add %d to %d", + lnum, dirty, c->ltab[lnum - c->lpt_first].dirty); + ubifs_assert(lnum >= c->lpt_first && lnum <= c->lpt_last); + c->ltab[lnum - c->lpt_first].dirty += dirty; +} + +/** + * set_ltab - set LPT LEB properties. + * @c: UBIFS file-system description object + * @lnum: LEB number + * @free: amount of free space + * @dirty: amount of dirty space + */ +static void set_ltab(struct ubifs_info *c, int lnum, int free, int dirty) +{ + dbg_lp("LEB %d free %d dirty %d to %d %d", + lnum, c->ltab[lnum - c->lpt_first].free, + c->ltab[lnum - c->lpt_first].dirty, free, dirty); + ubifs_assert(lnum >= c->lpt_first && lnum <= c->lpt_last); + c->ltab[lnum - c->lpt_first].free = free; + c->ltab[lnum - c->lpt_first].dirty = dirty; +} + +/** + * ubifs_add_nnode_dirt - add dirty space to LPT LEB properties. + * @c: UBIFS file-system description object + * @nnode: nnode for which to add dirt + */ +void ubifs_add_nnode_dirt(struct ubifs_info *c, struct ubifs_nnode *nnode) +{ + struct ubifs_nnode *np = nnode->parent; + + if (np) + ubifs_add_lpt_dirt(c, np->nbranch[nnode->iip].lnum, + c->nnode_sz); + else { + ubifs_add_lpt_dirt(c, c->lpt_lnum, c->nnode_sz); + if (!(c->lpt_drty_flgs & LTAB_DIRTY)) { + c->lpt_drty_flgs |= LTAB_DIRTY; + ubifs_add_lpt_dirt(c, c->ltab_lnum, c->ltab_sz); + } + } +} + +/** + * add_pnode_dirt - add dirty space to LPT LEB properties. + * @c: UBIFS file-system description object + * @pnode: pnode for which to add dirt + */ +static void add_pnode_dirt(struct ubifs_info *c, struct ubifs_pnode *pnode) +{ + ubifs_add_lpt_dirt(c, pnode->parent->nbranch[pnode->iip].lnum, + c->pnode_sz); +} + +/** + * calc_nnode_num - calculate nnode number. + * @row: the row in the tree (root is zero) + * @col: the column in the row (leftmost is zero) + * + * The nnode number is a number that uniquely identifies a nnode and can be used + * easily to traverse the tree from the root to that nnode. + * + * This function calculates and returns the nnode number for the nnode at @row + * and @col. + */ +static int calc_nnode_num(int row, int col) +{ + int num, bits; + + num = 1; + while (row--) { + bits = (col & (UBIFS_LPT_FANOUT - 1)); + col >>= UBIFS_LPT_FANOUT_SHIFT; + num <<= UBIFS_LPT_FANOUT_SHIFT; + num |= bits; + } + return num; +} + +/** + * calc_nnode_num_from_parent - calculate nnode number. + * @c: UBIFS file-system description object + * @parent: parent nnode + * @iip: index in parent + * + * The nnode number is a number that uniquely identifies a nnode and can be used + * easily to traverse the tree from the root to that nnode. + * + * This function calculates and returns the nnode number based on the parent's + * nnode number and the index in parent. + */ +static int calc_nnode_num_from_parent(struct ubifs_info *c, + struct ubifs_nnode *parent, int iip) +{ + int num, shft; + + if (!parent) + return 1; + shft = (c->lpt_hght - parent->level) * UBIFS_LPT_FANOUT_SHIFT; + num = parent->num ^ (1 << shft); + num |= (UBIFS_LPT_FANOUT + iip) << shft; + return num; +} + +/** + * calc_pnode_num_from_parent - calculate pnode number. + * @c: UBIFS file-system description object + * @parent: parent nnode + * @iip: index in parent + * + * The pnode number is a number that uniquely identifies a pnode and can be used + * easily to traverse the tree from the root to that pnode. + * + * This function calculates and returns the pnode number based on the parent's + * nnode number and the index in parent. + */ +static int calc_pnode_num_from_parent(struct ubifs_info *c, + struct ubifs_nnode *parent, int iip) +{ + int i, n = c->lpt_hght - 1, pnum = parent->num, num = 0; + + for (i = 0; i < n; i++) { + num <<= UBIFS_LPT_FANOUT_SHIFT; + num |= pnum & (UBIFS_LPT_FANOUT - 1); + pnum >>= UBIFS_LPT_FANOUT_SHIFT; + } + num <<= UBIFS_LPT_FANOUT_SHIFT; + num |= iip; + return num; +} + +/** + * ubifs_create_dflt_lpt - create default LPT. + * @c: UBIFS file-system description object + * @main_lebs: number of main area LEBs is passed and returned here + * @lpt_first: LEB number of first LPT LEB + * @lpt_lebs: number of LEBs for LPT is passed and returned here + * @big_lpt: use big LPT model is passed and returned here + * + * This function returns %0 on success and a negative error code on failure. + */ +int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first, + int *lpt_lebs, int *big_lpt) +{ + int lnum, err = 0, node_sz, iopos, i, j, cnt, len, alen, row; + int blnum, boffs, bsz, bcnt; + struct ubifs_pnode *pnode = NULL; + struct ubifs_nnode *nnode = NULL; + void *buf = NULL, *p; + struct ubifs_lpt_lprops *ltab = NULL; + int *lsave = NULL; + + err = calc_dflt_lpt_geom(c, main_lebs, big_lpt); + if (err) + return err; + *lpt_lebs = c->lpt_lebs; + + /* Needed by 'ubifs_pack_nnode()' and 'set_ltab()' */ + c->lpt_first = lpt_first; + /* Needed by 'set_ltab()' */ + c->lpt_last = lpt_first + c->lpt_lebs - 1; + /* Needed by 'ubifs_pack_lsave()' */ + c->main_first = c->leb_cnt - *main_lebs; + + lsave = kmalloc(sizeof(int) * c->lsave_cnt, GFP_KERNEL); + pnode = kzalloc(sizeof(struct ubifs_pnode), GFP_KERNEL); + nnode = kzalloc(sizeof(struct ubifs_nnode), GFP_KERNEL); + buf = vmalloc(c->leb_size); + ltab = vmalloc(sizeof(struct ubifs_lpt_lprops) * c->lpt_lebs); + if (!pnode || !nnode || !buf || !ltab || !lsave) { + err = -ENOMEM; + goto out; + } + + ubifs_assert(!c->ltab); + c->ltab = ltab; /* Needed by set_ltab */ + + /* Initialize LPT's own lprops */ + for (i = 0; i < c->lpt_lebs; i++) { + ltab[i].free = c->leb_size; + ltab[i].dirty = 0; + ltab[i].tgc = 0; + ltab[i].cmt = 0; + } + + lnum = lpt_first; + p = buf; + /* Number of leaf nodes (pnodes) */ + cnt = c->pnode_cnt; + + /* + * The first pnode contains the LEB properties for the LEBs that contain + * the root inode node and the root index node of the index tree. + */ + node_sz = ALIGN(ubifs_idx_node_sz(c, 1), 8); + iopos = ALIGN(node_sz, c->min_io_size); + pnode->lprops[0].free = c->leb_size - iopos; + pnode->lprops[0].dirty = iopos - node_sz; + pnode->lprops[0].flags = LPROPS_INDEX; + + node_sz = UBIFS_INO_NODE_SZ; + iopos = ALIGN(node_sz, c->min_io_size); + pnode->lprops[1].free = c->leb_size - iopos; + pnode->lprops[1].dirty = iopos - node_sz; + + for (i = 2; i < UBIFS_LPT_FANOUT; i++) + pnode->lprops[i].free = c->leb_size; + + /* Add first pnode */ + ubifs_pack_pnode(c, p, pnode); + p += c->pnode_sz; + len = c->pnode_sz; + pnode->num += 1; + + /* Reset pnode values for remaining pnodes */ + pnode->lprops[0].free = c->leb_size; + pnode->lprops[0].dirty = 0; + pnode->lprops[0].flags = 0; + + pnode->lprops[1].free = c->leb_size; + pnode->lprops[1].dirty = 0; + + /* + * To calculate the internal node branches, we keep information about + * the level below. + */ + blnum = lnum; /* LEB number of level below */ + boffs = 0; /* Offset of level below */ + bcnt = cnt; /* Number of nodes in level below */ + bsz = c->pnode_sz; /* Size of nodes in level below */ + + /* Add all remaining pnodes */ + for (i = 1; i < cnt; i++) { + if (len + c->pnode_sz > c->leb_size) { + alen = ALIGN(len, c->min_io_size); + set_ltab(c, lnum, c->leb_size - alen, alen - len); + memset(p, 0xff, alen - len); + err = ubi_leb_change(c->ubi, lnum++, buf, alen, + UBI_SHORTTERM); + if (err) + goto out; + p = buf; + len = 0; + } + ubifs_pack_pnode(c, p, pnode); + p += c->pnode_sz; + len += c->pnode_sz; + /* + * pnodes are simply numbered left to right starting at zero, + * which means the pnode number can be used easily to traverse + * down the tree to the corresponding pnode. + */ + pnode->num += 1; + } + + row = 0; + for (i = UBIFS_LPT_FANOUT; cnt > i; i <<= UBIFS_LPT_FANOUT_SHIFT) + row += 1; + /* Add all nnodes, one level at a time */ + while (1) { + /* Number of internal nodes (nnodes) at next level */ + cnt = DIV_ROUND_UP(cnt, UBIFS_LPT_FANOUT); + for (i = 0; i < cnt; i++) { + if (len + c->nnode_sz > c->leb_size) { + alen = ALIGN(len, c->min_io_size); + set_ltab(c, lnum, c->leb_size - alen, + alen - len); + memset(p, 0xff, alen - len); + err = ubi_leb_change(c->ubi, lnum++, buf, alen, + UBI_SHORTTERM); + if (err) + goto out; + p = buf; + len = 0; + } + /* Only 1 nnode at this level, so it is the root */ + if (cnt == 1) { + c->lpt_lnum = lnum; + c->lpt_offs = len; + } + /* Set branches to the level below */ + for (j = 0; j < UBIFS_LPT_FANOUT; j++) { + if (bcnt) { + if (boffs + bsz > c->leb_size) { + blnum += 1; + boffs = 0; + } + nnode->nbranch[j].lnum = blnum; + nnode->nbranch[j].offs = boffs; + boffs += bsz; + bcnt--; + } else { + nnode->nbranch[j].lnum = 0; + nnode->nbranch[j].offs = 0; + } + } + nnode->num = calc_nnode_num(row, i); + ubifs_pack_nnode(c, p, nnode); + p += c->nnode_sz; + len += c->nnode_sz; + } + /* Only 1 nnode at this level, so it is the root */ + if (cnt == 1) + break; + /* Update the information about the level below */ + bcnt = cnt; + bsz = c->nnode_sz; + row -= 1; + } + + if (*big_lpt) { + /* Need to add LPT's save table */ + if (len + c->lsave_sz > c->leb_size) { + alen = ALIGN(len, c->min_io_size); + set_ltab(c, lnum, c->leb_size - alen, alen - len); + memset(p, 0xff, alen - len); + err = ubi_leb_change(c->ubi, lnum++, buf, alen, + UBI_SHORTTERM); + if (err) + goto out; + p = buf; + len = 0; + } + + c->lsave_lnum = lnum; + c->lsave_offs = len; + + for (i = 0; i < c->lsave_cnt && i < *main_lebs; i++) + lsave[i] = c->main_first + i; + for (; i < c->lsave_cnt; i++) + lsave[i] = c->main_first; + + ubifs_pack_lsave(c, p, lsave); + p += c->lsave_sz; + len += c->lsave_sz; + } + + /* Need to add LPT's own LEB properties table */ + if (len + c->ltab_sz > c->leb_size) { + alen = ALIGN(len, c->min_io_size); + set_ltab(c, lnum, c->leb_size - alen, alen - len); + memset(p, 0xff, alen - len); + err = ubi_leb_change(c->ubi, lnum++, buf, alen, UBI_SHORTTERM); + if (err) + goto out; + p = buf; + len = 0; + } + + c->ltab_lnum = lnum; + c->ltab_offs = len; + + /* Update ltab before packing it */ + len += c->ltab_sz; + alen = ALIGN(len, c->min_io_size); + set_ltab(c, lnum, c->leb_size - alen, alen - len); + + ubifs_pack_ltab(c, p, ltab); + p += c->ltab_sz; + + /* Write remaining buffer */ + memset(p, 0xff, alen - len); + err = ubi_leb_change(c->ubi, lnum, buf, alen, UBI_SHORTTERM); + if (err) + goto out; + + c->nhead_lnum = lnum; + c->nhead_offs = ALIGN(len, c->min_io_size); + + dbg_lp("space_bits %d", c->space_bits); + dbg_lp("lpt_lnum_bits %d", c->lpt_lnum_bits); + dbg_lp("lpt_offs_bits %d", c->lpt_offs_bits); + dbg_lp("lpt_spc_bits %d", c->lpt_spc_bits); + dbg_lp("pcnt_bits %d", c->pcnt_bits); + dbg_lp("lnum_bits %d", c->lnum_bits); + dbg_lp("pnode_sz %d", c->pnode_sz); + dbg_lp("nnode_sz %d", c->nnode_sz); + dbg_lp("ltab_sz %d", c->ltab_sz); + dbg_lp("lsave_sz %d", c->lsave_sz); + dbg_lp("lsave_cnt %d", c->lsave_cnt); + dbg_lp("lpt_hght %d", c->lpt_hght); + dbg_lp("big_lpt %d", c->big_lpt); + dbg_lp("LPT root is at %d:%d", c->lpt_lnum, c->lpt_offs); + dbg_lp("LPT head is at %d:%d", c->nhead_lnum, c->nhead_offs); + dbg_lp("LPT ltab is at %d:%d", c->ltab_lnum, c->ltab_offs); + if (c->big_lpt) + dbg_lp("LPT lsave is at %d:%d", c->lsave_lnum, c->lsave_offs); +out: + c->ltab = NULL; + kfree(lsave); + vfree(ltab); + vfree(buf); + kfree(nnode); + kfree(pnode); + return err; +} + +/** + * update_cats - add LEB properties of a pnode to LEB category lists and heaps. + * @c: UBIFS file-system description object + * @pnode: pnode + * + * When a pnode is loaded into memory, the LEB properties it contains are added, + * by this function, to the LEB category lists and heaps. + */ +static void update_cats(struct ubifs_info *c, struct ubifs_pnode *pnode) +{ + int i; + + for (i = 0; i < UBIFS_LPT_FANOUT; i++) { + int cat = pnode->lprops[i].flags & LPROPS_CAT_MASK; + int lnum = pnode->lprops[i].lnum; + + if (!lnum) + return; + ubifs_add_to_cat(c, &pnode->lprops[i], cat); + } +} + +/** + * replace_cats - add LEB properties of a pnode to LEB category lists and heaps. + * @c: UBIFS file-system description object + * @old_pnode: pnode copied + * @new_pnode: pnode copy + * + * During commit it is sometimes necessary to copy a pnode + * (see dirty_cow_pnode). When that happens, references in + * category lists and heaps must be replaced. This function does that. + */ +static void replace_cats(struct ubifs_info *c, struct ubifs_pnode *old_pnode, + struct ubifs_pnode *new_pnode) +{ + int i; + + for (i = 0; i < UBIFS_LPT_FANOUT; i++) { + if (!new_pnode->lprops[i].lnum) + return; + ubifs_replace_cat(c, &old_pnode->lprops[i], + &new_pnode->lprops[i]); + } +} + +/** + * check_lpt_crc - check LPT node crc is correct. + * @c: UBIFS file-system description object + * @buf: buffer containing node + * @len: length of node + * + * This function returns %0 on success and a negative error code on failure. + */ +static int check_lpt_crc(void *buf, int len) +{ + int pos = 0; + uint8_t *addr = buf; + uint16_t crc, calc_crc; + + crc = ubifs_unpack_bits(&addr, &pos, UBIFS_LPT_CRC_BITS); + calc_crc = crc16(-1, buf + UBIFS_LPT_CRC_BYTES, + len - UBIFS_LPT_CRC_BYTES); + if (crc != calc_crc) { + ubifs_err("invalid crc in LPT node: crc %hx calc %hx", crc, + calc_crc); + dbg_dump_stack(); + return -EINVAL; + } + return 0; +} + +/** + * check_lpt_type - check LPT node type is correct. + * @c: UBIFS file-system description object + * @addr: address of type bit field is passed and returned updated here + * @pos: position of type bit field is passed and returned updated here + * @type: expected type + * + * This function returns %0 on success and a negative error code on failure. + */ +static int check_lpt_type(uint8_t **addr, int *pos, int type) +{ + int node_type; + + node_type = ubifs_unpack_bits(addr, pos, UBIFS_LPT_TYPE_BITS); + if (node_type != type) { + ubifs_err("invalid type (%d) in LPT node type %d", node_type, + type); + dbg_dump_stack(); + return -EINVAL; + } + return 0; +} + +/** + * unpack_pnode - unpack a pnode. + * @c: UBIFS file-system description object + * @buf: buffer containing packed pnode to unpack + * @pnode: pnode structure to fill + * + * This function returns %0 on success and a negative error code on failure. + */ +static int unpack_pnode(struct ubifs_info *c, void *buf, + struct ubifs_pnode *pnode) +{ + uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; + int i, pos = 0, err; + + err = check_lpt_type(&addr, &pos, UBIFS_LPT_PNODE); + if (err) + return err; + if (c->big_lpt) + pnode->num = ubifs_unpack_bits(&addr, &pos, c->pcnt_bits); + for (i = 0; i < UBIFS_LPT_FANOUT; i++) { + struct ubifs_lprops * const lprops = &pnode->lprops[i]; + + lprops->free = ubifs_unpack_bits(&addr, &pos, c->space_bits); + lprops->free <<= 3; + lprops->dirty = ubifs_unpack_bits(&addr, &pos, c->space_bits); + lprops->dirty <<= 3; + + if (ubifs_unpack_bits(&addr, &pos, 1)) + lprops->flags = LPROPS_INDEX; + else + lprops->flags = 0; + lprops->flags |= ubifs_categorize_lprops(c, lprops); + } + err = check_lpt_crc(buf, c->pnode_sz); + return err; +} + +/** + * unpack_nnode - unpack a nnode. + * @c: UBIFS file-system description object + * @buf: buffer containing packed nnode to unpack + * @nnode: nnode structure to fill + * + * This function returns %0 on success and a negative error code on failure. + */ +static int unpack_nnode(struct ubifs_info *c, void *buf, + struct ubifs_nnode *nnode) +{ + uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; + int i, pos = 0, err; + + err = check_lpt_type(&addr, &pos, UBIFS_LPT_NNODE); + if (err) + return err; + if (c->big_lpt) + nnode->num = ubifs_unpack_bits(&addr, &pos, c->pcnt_bits); + for (i = 0; i < UBIFS_LPT_FANOUT; i++) { + int lnum; + + lnum = ubifs_unpack_bits(&addr, &pos, c->lpt_lnum_bits) + + c->lpt_first; + if (lnum == c->lpt_last + 1) + lnum = 0; + nnode->nbranch[i].lnum = lnum; + nnode->nbranch[i].offs = ubifs_unpack_bits(&addr, &pos, + c->lpt_offs_bits); + } + err = check_lpt_crc(buf, c->nnode_sz); + return err; +} + +/** + * unpack_ltab - unpack the LPT's own lprops table. + * @c: UBIFS file-system description object + * @buf: buffer from which to unpack + * + * This function returns %0 on success and a negative error code on failure. + */ +static int unpack_ltab(struct ubifs_info *c, void *buf) +{ + uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; + int i, pos = 0, err; + + err = check_lpt_type(&addr, &pos, UBIFS_LPT_LTAB); + if (err) + return err; + for (i = 0; i < c->lpt_lebs; i++) { + int free = ubifs_unpack_bits(&addr, &pos, c->lpt_spc_bits); + int dirty = ubifs_unpack_bits(&addr, &pos, c->lpt_spc_bits); + + if (free < 0 || free > c->leb_size || dirty < 0 || + dirty > c->leb_size || free + dirty > c->leb_size) + return -EINVAL; + + c->ltab[i].free = free; + c->ltab[i].dirty = dirty; + c->ltab[i].tgc = 0; + c->ltab[i].cmt = 0; + } + err = check_lpt_crc(buf, c->ltab_sz); + return err; +} + +/** + * unpack_lsave - unpack the LPT's save table. + * @c: UBIFS file-system description object + * @buf: buffer from which to unpack + * + * This function returns %0 on success and a negative error code on failure. + */ +static int unpack_lsave(struct ubifs_info *c, void *buf) +{ + uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; + int i, pos = 0, err; + + err = check_lpt_type(&addr, &pos, UBIFS_LPT_LSAVE); + if (err) + return err; + for (i = 0; i < c->lsave_cnt; i++) { + int lnum = ubifs_unpack_bits(&addr, &pos, c->lnum_bits); + + if (lnum < c->main_first || lnum >= c->leb_cnt) + return -EINVAL; + c->lsave[i] = lnum; + } + err = check_lpt_crc(buf, c->lsave_sz); + return err; +} + +/** + * validate_nnode - validate a nnode. + * @c: UBIFS file-system description object + * @nnode: nnode to validate + * @parent: parent nnode (or NULL for the root nnode) + * @iip: index in parent + * + * This function returns %0 on success and a negative error code on failure. + */ +static int validate_nnode(struct ubifs_info *c, struct ubifs_nnode *nnode, + struct ubifs_nnode *parent, int iip) +{ + int i, lvl, max_offs; + + if (c->big_lpt) { + int num = calc_nnode_num_from_parent(c, parent, iip); + + if (nnode->num != num) + return -EINVAL; + } + lvl = parent ? parent->level - 1 : c->lpt_hght; + if (lvl < 1) + return -EINVAL; + if (lvl == 1) + max_offs = c->leb_size - c->pnode_sz; + else + max_offs = c->leb_size - c->nnode_sz; + for (i = 0; i < UBIFS_LPT_FANOUT; i++) { + int lnum = nnode->nbranch[i].lnum; + int offs = nnode->nbranch[i].offs; + + if (lnum == 0) { + if (offs != 0) + return -EINVAL; + continue; + } + if (lnum < c->lpt_first || lnum > c->lpt_last) + return -EINVAL; + if (offs < 0 || offs > max_offs) + return -EINVAL; + } + return 0; +} + +/** + * validate_pnode - validate a pnode. + * @c: UBIFS file-system description object + * @pnode: pnode to validate + * @parent: parent nnode + * @iip: index in parent + * + * This function returns %0 on success and a negative error code on failure. + */ +static int validate_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, + struct ubifs_nnode *parent, int iip) +{ + int i; + + if (c->big_lpt) { + int num = calc_pnode_num_from_parent(c, parent, iip); + + if (pnode->num != num) + return -EINVAL; + } + for (i = 0; i < UBIFS_LPT_FANOUT; i++) { + int free = pnode->lprops[i].free; + int dirty = pnode->lprops[i].dirty; + + if (free < 0 || free > c->leb_size || free % c->min_io_size || + (free & 7)) + return -EINVAL; + if (dirty < 0 || dirty > c->leb_size || (dirty & 7)) + return -EINVAL; + if (dirty + free > c->leb_size) + return -EINVAL; + } + return 0; +} + +/** + * set_pnode_lnum - set LEB numbers on a pnode. + * @c: UBIFS file-system description object + * @pnode: pnode to update + * + * This function calculates the LEB numbers for the LEB properties it contains + * based on the pnode number. + */ +static void set_pnode_lnum(struct ubifs_info *c, struct ubifs_pnode *pnode) +{ + int i, lnum; + + lnum = (pnode->num << UBIFS_LPT_FANOUT_SHIFT) + c->main_first; + for (i = 0; i < UBIFS_LPT_FANOUT; i++) { + if (lnum >= c->leb_cnt) + return; + pnode->lprops[i].lnum = lnum++; + } +} + +/** + * ubifs_read_nnode - read a nnode from flash and link it to the tree in memory. + * @c: UBIFS file-system description object + * @parent: parent nnode (or NULL for the root) + * @iip: index in parent + * + * This function returns %0 on success and a negative error code on failure. + */ +int ubifs_read_nnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip) +{ + struct ubifs_nbranch *branch = NULL; + struct ubifs_nnode *nnode = NULL; + void *buf = c->lpt_nod_buf; + int err, lnum, offs; + + if (parent) { + branch = &parent->nbranch[iip]; + lnum = branch->lnum; + offs = branch->offs; + } else { + lnum = c->lpt_lnum; + offs = c->lpt_offs; + } + nnode = kzalloc(sizeof(struct ubifs_nnode), GFP_NOFS); + if (!nnode) { + err = -ENOMEM; + goto out; + } + if (lnum == 0) { + /* + * This nnode was not written which just means that the LEB + * properties in the subtree below it describe empty LEBs. We + * make the nnode as though we had read it, which in fact means + * doing almost nothing. + */ + if (c->big_lpt) + nnode->num = calc_nnode_num_from_parent(c, parent, iip); + } else { + err = ubi_read(c->ubi, lnum, buf, offs, c->nnode_sz); + if (err) + goto out; + err = unpack_nnode(c, buf, nnode); + if (err) + goto out; + } + err = validate_nnode(c, nnode, parent, iip); + if (err) + goto out; + if (!c->big_lpt) + nnode->num = calc_nnode_num_from_parent(c, parent, iip); + if (parent) { + branch->nnode = nnode; + nnode->level = parent->level - 1; + } else { + c->nroot = nnode; + nnode->level = c->lpt_hght; + } + nnode->parent = parent; + nnode->iip = iip; + return 0; + +out: + ubifs_err("error %d reading nnode at %d:%d", err, lnum, offs); + kfree(nnode); + return err; +} + +/** + * read_pnode - read a pnode from flash and link it to the tree in memory. + * @c: UBIFS file-system description object + * @parent: parent nnode + * @iip: index in parent + * + * This function returns %0 on success and a negative error code on failure. + */ +static int read_pnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip) +{ + struct ubifs_nbranch *branch; + struct ubifs_pnode *pnode = NULL; + void *buf = c->lpt_nod_buf; + int err, lnum, offs; + + branch = &parent->nbranch[iip]; + lnum = branch->lnum; + offs = branch->offs; + pnode = kzalloc(sizeof(struct ubifs_pnode), GFP_NOFS); + if (!pnode) { + err = -ENOMEM; + goto out; + } + if (lnum == 0) { + /* + * This pnode was not written which just means that the LEB + * properties in it describe empty LEBs. We make the pnode as + * though we had read it. + */ + int i; + + if (c->big_lpt) + pnode->num = calc_pnode_num_from_parent(c, parent, iip); + for (i = 0; i < UBIFS_LPT_FANOUT; i++) { + struct ubifs_lprops * const lprops = &pnode->lprops[i]; + + lprops->free = c->leb_size; + lprops->flags = ubifs_categorize_lprops(c, lprops); + } + } else { + err = ubi_read(c->ubi, lnum, buf, offs, c->pnode_sz); + if (err) + goto out; + err = unpack_pnode(c, buf, pnode); + if (err) + goto out; + } + err = validate_pnode(c, pnode, parent, iip); + if (err) + goto out; + if (!c->big_lpt) + pnode->num = calc_pnode_num_from_parent(c, parent, iip); + branch->pnode = pnode; + pnode->parent = parent; + pnode->iip = iip; + set_pnode_lnum(c, pnode); + c->pnodes_have += 1; + return 0; + +out: + ubifs_err("error %d reading pnode at %d:%d", err, lnum, offs); + dbg_dump_pnode(c, pnode, parent, iip); + dbg_msg("calc num: %d", calc_pnode_num_from_parent(c, parent, iip)); + kfree(pnode); + return err; +} + +/** + * read_ltab - read LPT's own lprops table. + * @c: UBIFS file-system description object + * + * This function returns %0 on success and a negative error code on failure. + */ +static int read_ltab(struct ubifs_info *c) +{ + int err; + void *buf; + + buf = vmalloc(c->ltab_sz); + if (!buf) + return -ENOMEM; + err = ubi_read(c->ubi, c->ltab_lnum, buf, c->ltab_offs, c->ltab_sz); + if (err) + goto out; + err = unpack_ltab(c, buf); +out: + vfree(buf); + return err; +} + +/** + * read_lsave - read LPT's save table. + * @c: UBIFS file-system description object + * + * This function returns %0 on success and a negative error code on failure. + */ +static int read_lsave(struct ubifs_info *c) +{ + int err, i; + void *buf; + + buf = vmalloc(c->lsave_sz); + if (!buf) + return -ENOMEM; + err = ubi_read(c->ubi, c->lsave_lnum, buf, c->lsave_offs, c->lsave_sz); + if (err) + goto out; + err = unpack_lsave(c, buf); + if (err) + goto out; + for (i = 0; i < c->lsave_cnt; i++) { + int lnum = c->lsave[i]; + + /* + * Due to automatic resizing, the values in the lsave table + * could be beyond the volume size - just ignore them. + */ + if (lnum >= c->leb_cnt) + continue; + ubifs_lpt_lookup(c, lnum); + } +out: + vfree(buf); + return err; +} + +/** + * ubifs_get_nnode - get a nnode. + * @c: UBIFS file-system description object + * @parent: parent nnode (or NULL for the root) + * @iip: index in parent + * + * This function returns a pointer to the nnode on success or a negative error + * code on failure. + */ +struct ubifs_nnode *ubifs_get_nnode(struct ubifs_info *c, + struct ubifs_nnode *parent, int iip) +{ + struct ubifs_nbranch *branch; + struct ubifs_nnode *nnode; + int err; + + branch = &parent->nbranch[iip]; + nnode = branch->nnode; + if (nnode) + return nnode; + err = ubifs_read_nnode(c, parent, iip); + if (err) + return ERR_PTR(err); + return branch->nnode; +} + +/** + * ubifs_get_pnode - get a pnode. + * @c: UBIFS file-system description object + * @parent: parent nnode + * @iip: index in parent + * + * This function returns a pointer to the pnode on success or a negative error + * code on failure. + */ +struct ubifs_pnode *ubifs_get_pnode(struct ubifs_info *c, + struct ubifs_nnode *parent, int iip) +{ + struct ubifs_nbranch *branch; + struct ubifs_pnode *pnode; + int err; + + branch = &parent->nbranch[iip]; + pnode = branch->pnode; + if (pnode) + return pnode; + err = read_pnode(c, parent, iip); + if (err) + return ERR_PTR(err); + update_cats(c, branch->pnode); + return branch->pnode; +} + +/** + * ubifs_lpt_lookup - lookup LEB properties in the LPT. + * @c: UBIFS file-system description object + * @lnum: LEB number to lookup + * + * This function returns a pointer to the LEB properties on success or a + * negative error code on failure. + */ +struct ubifs_lprops *ubifs_lpt_lookup(struct ubifs_info *c, int lnum) +{ + int err, i, h, iip, shft; + struct ubifs_nnode *nnode; + struct ubifs_pnode *pnode; + + if (!c->nroot) { + err = ubifs_read_nnode(c, NULL, 0); + if (err) + return ERR_PTR(err); + } + nnode = c->nroot; + i = lnum - c->main_first; + shft = c->lpt_hght * UBIFS_LPT_FANOUT_SHIFT; + for (h = 1; h < c->lpt_hght; h++) { + iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); + shft -= UBIFS_LPT_FANOUT_SHIFT; + nnode = ubifs_get_nnode(c, nnode, iip); + if (IS_ERR(nnode)) + return ERR_PTR(PTR_ERR(nnode)); + } + iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); + shft -= UBIFS_LPT_FANOUT_SHIFT; + pnode = ubifs_get_pnode(c, nnode, iip); + if (IS_ERR(pnode)) + return ERR_PTR(PTR_ERR(pnode)); + iip = (i & (UBIFS_LPT_FANOUT - 1)); + dbg_lp("LEB %d, free %d, dirty %d, flags %d", lnum, + pnode->lprops[iip].free, pnode->lprops[iip].dirty, + pnode->lprops[iip].flags); + return &pnode->lprops[iip]; +} + +/** + * dirty_cow_nnode - ensure a nnode is not being committed. + * @c: UBIFS file-system description object + * @nnode: nnode to check + * + * Returns dirtied nnode on success or negative error code on failure. + */ +static struct ubifs_nnode *dirty_cow_nnode(struct ubifs_info *c, + struct ubifs_nnode *nnode) +{ + struct ubifs_nnode *n; + int i; + + if (!test_bit(COW_CNODE, &nnode->flags)) { + /* nnode is not being committed */ + if (!test_and_set_bit(DIRTY_CNODE, &nnode->flags)) { + c->dirty_nn_cnt += 1; + ubifs_add_nnode_dirt(c, nnode); + } + return nnode; + } + + /* nnode is being committed, so copy it */ + n = kmalloc(sizeof(struct ubifs_nnode), GFP_NOFS); + if (unlikely(!n)) + return ERR_PTR(-ENOMEM); + + memcpy(n, nnode, sizeof(struct ubifs_nnode)); + n->cnext = NULL; + __set_bit(DIRTY_CNODE, &n->flags); + __clear_bit(COW_CNODE, &n->flags); + + /* The children now have new parent */ + for (i = 0; i < UBIFS_LPT_FANOUT; i++) { + struct ubifs_nbranch *branch = &n->nbranch[i]; + + if (branch->cnode) + branch->cnode->parent = n; + } + + ubifs_assert(!test_bit(OBSOLETE_CNODE, &nnode->flags)); + __set_bit(OBSOLETE_CNODE, &nnode->flags); + + c->dirty_nn_cnt += 1; + ubifs_add_nnode_dirt(c, nnode); + if (nnode->parent) + nnode->parent->nbranch[n->iip].nnode = n; + else + c->nroot = n; + return n; +} + +/** + * dirty_cow_pnode - ensure a pnode is not being committed. + * @c: UBIFS file-system description object + * @pnode: pnode to check + * + * Returns dirtied pnode on success or negative error code on failure. + */ +static struct ubifs_pnode *dirty_cow_pnode(struct ubifs_info *c, + struct ubifs_pnode *pnode) +{ + struct ubifs_pnode *p; + + if (!test_bit(COW_CNODE, &pnode->flags)) { + /* pnode is not being committed */ + if (!test_and_set_bit(DIRTY_CNODE, &pnode->flags)) { + c->dirty_pn_cnt += 1; + add_pnode_dirt(c, pnode); + } + return pnode; + } + + /* pnode is being committed, so copy it */ + p = kmalloc(sizeof(struct ubifs_pnode), GFP_NOFS); + if (unlikely(!p)) + return ERR_PTR(-ENOMEM); + + memcpy(p, pnode, sizeof(struct ubifs_pnode)); + p->cnext = NULL; + __set_bit(DIRTY_CNODE, &p->flags); + __clear_bit(COW_CNODE, &p->flags); + replace_cats(c, pnode, p); + + ubifs_assert(!test_bit(OBSOLETE_CNODE, &pnode->flags)); + __set_bit(OBSOLETE_CNODE, &pnode->flags); + + c->dirty_pn_cnt += 1; + add_pnode_dirt(c, pnode); + pnode->parent->nbranch[p->iip].pnode = p; + return p; +} + +/** + * ubifs_lpt_lookup_dirty - lookup LEB properties in the LPT. + * @c: UBIFS file-system description object + * @lnum: LEB number to lookup + * + * This function returns a pointer to the LEB properties on success or a + * negative error code on failure. + */ +struct ubifs_lprops *ubifs_lpt_lookup_dirty(struct ubifs_info *c, int lnum) +{ + int err, i, h, iip, shft; + struct ubifs_nnode *nnode; + struct ubifs_pnode *pnode; + + if (!c->nroot) { + err = ubifs_read_nnode(c, NULL, 0); + if (err) + return ERR_PTR(err); + } + nnode = c->nroot; + nnode = dirty_cow_nnode(c, nnode); + if (IS_ERR(nnode)) + return ERR_PTR(PTR_ERR(nnode)); + i = lnum - c->main_first; + shft = c->lpt_hght * UBIFS_LPT_FANOUT_SHIFT; + for (h = 1; h < c->lpt_hght; h++) { + iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); + shft -= UBIFS_LPT_FANOUT_SHIFT; + nnode = ubifs_get_nnode(c, nnode, iip); + if (IS_ERR(nnode)) + return ERR_PTR(PTR_ERR(nnode)); + nnode = dirty_cow_nnode(c, nnode); + if (IS_ERR(nnode)) + return ERR_PTR(PTR_ERR(nnode)); + } + iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); + shft -= UBIFS_LPT_FANOUT_SHIFT; + pnode = ubifs_get_pnode(c, nnode, iip); + if (IS_ERR(pnode)) + return ERR_PTR(PTR_ERR(pnode)); + pnode = dirty_cow_pnode(c, pnode); + if (IS_ERR(pnode)) + return ERR_PTR(PTR_ERR(pnode)); + iip = (i & (UBIFS_LPT_FANOUT - 1)); + dbg_lp("LEB %d, free %d, dirty %d, flags %d", lnum, + pnode->lprops[iip].free, pnode->lprops[iip].dirty, + pnode->lprops[iip].flags); + ubifs_assert(test_bit(DIRTY_CNODE, &pnode->flags)); + return &pnode->lprops[iip]; +} + +/** + * lpt_init_rd - initialize the LPT for reading. + * @c: UBIFS file-system description object + * + * This function returns %0 on success and a negative error code on failure. + */ +static int lpt_init_rd(struct ubifs_info *c) +{ + int err, i; + + c->ltab = vmalloc(sizeof(struct ubifs_lpt_lprops) * c->lpt_lebs); + if (!c->ltab) + return -ENOMEM; + + i = max_t(int, c->nnode_sz, c->pnode_sz); + c->lpt_nod_buf = kmalloc(i, GFP_KERNEL); + if (!c->lpt_nod_buf) + return -ENOMEM; + + for (i = 0; i < LPROPS_HEAP_CNT; i++) { + c->lpt_heap[i].arr = kmalloc(sizeof(void *) * LPT_HEAP_SZ, + GFP_KERNEL); + if (!c->lpt_heap[i].arr) + return -ENOMEM; + c->lpt_heap[i].cnt = 0; + c->lpt_heap[i].max_cnt = LPT_HEAP_SZ; + } + + c->dirty_idx.arr = kmalloc(sizeof(void *) * LPT_HEAP_SZ, GFP_KERNEL); + if (!c->dirty_idx.arr) + return -ENOMEM; + c->dirty_idx.cnt = 0; + c->dirty_idx.max_cnt = LPT_HEAP_SZ; + + err = read_ltab(c); + if (err) + return err; + + dbg_lp("space_bits %d", c->space_bits); + dbg_lp("lpt_lnum_bits %d", c->lpt_lnum_bits); + dbg_lp("lpt_offs_bits %d", c->lpt_offs_bits); + dbg_lp("lpt_spc_bits %d", c->lpt_spc_bits); + dbg_lp("pcnt_bits %d", c->pcnt_bits); + dbg_lp("lnum_bits %d", c->lnum_bits); + dbg_lp("pnode_sz %d", c->pnode_sz); + dbg_lp("nnode_sz %d", c->nnode_sz); + dbg_lp("ltab_sz %d", c->ltab_sz); + dbg_lp("lsave_sz %d", c->lsave_sz); + dbg_lp("lsave_cnt %d", c->lsave_cnt); + dbg_lp("lpt_hght %d", c->lpt_hght); + dbg_lp("big_lpt %d", c->big_lpt); + dbg_lp("LPT root is at %d:%d", c->lpt_lnum, c->lpt_offs); + dbg_lp("LPT head is at %d:%d", c->nhead_lnum, c->nhead_offs); + dbg_lp("LPT ltab is at %d:%d", c->ltab_lnum, c->ltab_offs); + if (c->big_lpt) + dbg_lp("LPT lsave is at %d:%d", c->lsave_lnum, c->lsave_offs); + + return 0; +} + +/** + * lpt_init_wr - initialize the LPT for writing. + * @c: UBIFS file-system description object + * + * 'lpt_init_rd()' must have been called already. + * + * This function returns %0 on success and a negative error code on failure. + */ +static int lpt_init_wr(struct ubifs_info *c) +{ + int err, i; + + c->ltab_cmt = vmalloc(sizeof(struct ubifs_lpt_lprops) * c->lpt_lebs); + if (!c->ltab_cmt) + return -ENOMEM; + + c->lpt_buf = vmalloc(c->leb_size); + if (!c->lpt_buf) + return -ENOMEM; + + if (c->big_lpt) { + c->lsave = kmalloc(sizeof(int) * c->lsave_cnt, GFP_NOFS); + if (!c->lsave) + return -ENOMEM; + err = read_lsave(c); + if (err) + return err; + } + + for (i = 0; i < c->lpt_lebs; i++) + if (c->ltab[i].free == c->leb_size) { + err = ubifs_leb_unmap(c, i + c->lpt_first); + if (err) + return err; + } + + return 0; +} + +/** + * ubifs_lpt_init - initialize the LPT. + * @c: UBIFS file-system description object + * @rd: whether to initialize lpt for reading + * @wr: whether to initialize lpt for writing + * + * For mounting 'rw', @rd and @wr are both true. For mounting 'ro', @rd is true + * and @wr is false. For mounting from 'ro' to 'rw', @rd is false and @wr is + * true. + * + * This function returns %0 on success and a negative error code on failure. + */ +int ubifs_lpt_init(struct ubifs_info *c, int rd, int wr) +{ + int err; + + if (rd) { + err = lpt_init_rd(c); + if (err) + return err; + } + + if (wr) { + err = lpt_init_wr(c); + if (err) + return err; + } + + return 0; +} + +/** + * struct lpt_scan_node - somewhere to put nodes while we scan LPT. + * @nnode: where to keep a nnode + * @pnode: where to keep a pnode + * @cnode: where to keep a cnode + * @in_tree: is the node in the tree in memory + * @ptr.nnode: pointer to the nnode (if it is an nnode) which may be here or in + * the tree + * @ptr.pnode: ditto for pnode + * @ptr.cnode: ditto for cnode + */ +struct lpt_scan_node { + union { + struct ubifs_nnode nnode; + struct ubifs_pnode pnode; + struct ubifs_cnode cnode; + }; + int in_tree; + union { + struct ubifs_nnode *nnode; + struct ubifs_pnode *pnode; + struct ubifs_cnode *cnode; + } ptr; +}; + +/** + * scan_get_nnode - for the scan, get a nnode from either the tree or flash. + * @c: the UBIFS file-system description object + * @path: where to put the nnode + * @parent: parent of the nnode + * @iip: index in parent of the nnode + * + * This function returns a pointer to the nnode on success or a negative error + * code on failure. + */ +static struct ubifs_nnode *scan_get_nnode(struct ubifs_info *c, + struct lpt_scan_node *path, + struct ubifs_nnode *parent, int iip) +{ + struct ubifs_nbranch *branch; + struct ubifs_nnode *nnode; + void *buf = c->lpt_nod_buf; + int err; + + branch = &parent->nbranch[iip]; + nnode = branch->nnode; + if (nnode) { + path->in_tree = 1; + path->ptr.nnode = nnode; + return nnode; + } + nnode = &path->nnode; + path->in_tree = 0; + path->ptr.nnode = nnode; + memset(nnode, 0, sizeof(struct ubifs_nnode)); + if (branch->lnum == 0) { + /* + * This nnode was not written which just means that the LEB + * properties in the subtree below it describe empty LEBs. We + * make the nnode as though we had read it, which in fact means + * doing almost nothing. + */ + if (c->big_lpt) + nnode->num = calc_nnode_num_from_parent(c, parent, iip); + } else { + err = ubi_read(c->ubi, branch->lnum, buf, branch->offs, + c->nnode_sz); + if (err) + return ERR_PTR(err); + err = unpack_nnode(c, buf, nnode); + if (err) + return ERR_PTR(err); + } + err = validate_nnode(c, nnode, parent, iip); + if (err) + return ERR_PTR(err); + if (!c->big_lpt) + nnode->num = calc_nnode_num_from_parent(c, parent, iip); + nnode->level = parent->level - 1; + nnode->parent = parent; + nnode->iip = iip; + return nnode; +} + +/** + * scan_get_pnode - for the scan, get a pnode from either the tree or flash. + * @c: the UBIFS file-system description object + * @path: where to put the pnode + * @parent: parent of the pnode + * @iip: index in parent of the pnode + * + * This function returns a pointer to the pnode on success or a negative error + * code on failure. + */ +static struct ubifs_pnode *scan_get_pnode(struct ubifs_info *c, + struct lpt_scan_node *path, + struct ubifs_nnode *parent, int iip) +{ + struct ubifs_nbranch *branch; + struct ubifs_pnode *pnode; + void *buf = c->lpt_nod_buf; + int err; + + branch = &parent->nbranch[iip]; + pnode = branch->pnode; + if (pnode) { + path->in_tree = 1; + path->ptr.pnode = pnode; + return pnode; + } + pnode = &path->pnode; + path->in_tree = 0; + path->ptr.pnode = pnode; + memset(pnode, 0, sizeof(struct ubifs_pnode)); + if (branch->lnum == 0) { + /* + * This pnode was not written which just means that the LEB + * properties in it describe empty LEBs. We make the pnode as + * though we had read it. + */ + int i; + + if (c->big_lpt) + pnode->num = calc_pnode_num_from_parent(c, parent, iip); + for (i = 0; i < UBIFS_LPT_FANOUT; i++) { + struct ubifs_lprops * const lprops = &pnode->lprops[i]; + + lprops->free = c->leb_size; + lprops->flags = ubifs_categorize_lprops(c, lprops); + } + } else { + ubifs_assert(branch->lnum >= c->lpt_first && + branch->lnum <= c->lpt_last); + ubifs_assert(branch->offs >= 0 && branch->offs < c->leb_size); + err = ubi_read(c->ubi, branch->lnum, buf, branch->offs, + c->pnode_sz); + if (err) + return ERR_PTR(err); + err = unpack_pnode(c, buf, pnode); + if (err) + return ERR_PTR(err); + } + err = validate_pnode(c, pnode, parent, iip); + if (err) + return ERR_PTR(err); + if (!c->big_lpt) + pnode->num = calc_pnode_num_from_parent(c, parent, iip); + pnode->parent = parent; + pnode->iip = iip; + set_pnode_lnum(c, pnode); + return pnode; +} + +/** + * ubifs_lpt_scan_nolock - scan the LPT. + * @c: the UBIFS file-system description object + * @start_lnum: LEB number from which to start scanning + * @end_lnum: LEB number at which to stop scanning + * @scan_cb: callback function called for each lprops + * @data: data to be passed to the callback function + * + * This function returns %0 on success and a negative error code on failure. + */ +int ubifs_lpt_scan_nolock(struct ubifs_info *c, int start_lnum, int end_lnum, + ubifs_lpt_scan_callback scan_cb, void *data) +{ + int err = 0, i, h, iip, shft; + struct ubifs_nnode *nnode; + struct ubifs_pnode *pnode; + struct lpt_scan_node *path; + + if (start_lnum == -1) { + start_lnum = end_lnum + 1; + if (start_lnum >= c->leb_cnt) + start_lnum = c->main_first; + } + + ubifs_assert(start_lnum >= c->main_first && start_lnum < c->leb_cnt); + ubifs_assert(end_lnum >= c->main_first && end_lnum < c->leb_cnt); + + if (!c->nroot) { + err = ubifs_read_nnode(c, NULL, 0); + if (err) + return err; + } + + path = kmalloc(sizeof(struct lpt_scan_node) * (c->lpt_hght + 1), + GFP_NOFS); + if (!path) + return -ENOMEM; + + path[0].ptr.nnode = c->nroot; + path[0].in_tree = 1; +again: + /* Descend to the pnode containing start_lnum */ + nnode = c->nroot; + i = start_lnum - c->main_first; + shft = c->lpt_hght * UBIFS_LPT_FANOUT_SHIFT; + for (h = 1; h < c->lpt_hght; h++) { + iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); + shft -= UBIFS_LPT_FANOUT_SHIFT; + nnode = scan_get_nnode(c, path + h, nnode, iip); + if (IS_ERR(nnode)) { + err = PTR_ERR(nnode); + goto out; + } + } + iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); + shft -= UBIFS_LPT_FANOUT_SHIFT; + pnode = scan_get_pnode(c, path + h, nnode, iip); + if (IS_ERR(pnode)) { + err = PTR_ERR(pnode); + goto out; + } + iip = (i & (UBIFS_LPT_FANOUT - 1)); + + /* Loop for each lprops */ + while (1) { + struct ubifs_lprops *lprops = &pnode->lprops[iip]; + int ret, lnum = lprops->lnum; + + ret = scan_cb(c, lprops, path[h].in_tree, data); + if (ret < 0) { + err = ret; + goto out; + } + if (ret & LPT_SCAN_ADD) { + /* Add all the nodes in path to the tree in memory */ + for (h = 1; h < c->lpt_hght; h++) { + const size_t sz = sizeof(struct ubifs_nnode); + struct ubifs_nnode *parent; + + if (path[h].in_tree) + continue; + nnode = kmalloc(sz, GFP_NOFS); + if (!nnode) { + err = -ENOMEM; + goto out; + } + memcpy(nnode, &path[h].nnode, sz); + parent = nnode->parent; + parent->nbranch[nnode->iip].nnode = nnode; + path[h].ptr.nnode = nnode; + path[h].in_tree = 1; + path[h + 1].cnode.parent = nnode; + } + if (path[h].in_tree) + ubifs_ensure_cat(c, lprops); + else { + const size_t sz = sizeof(struct ubifs_pnode); + struct ubifs_nnode *parent; + + pnode = kmalloc(sz, GFP_NOFS); + if (!pnode) { + err = -ENOMEM; + goto out; + } + memcpy(pnode, &path[h].pnode, sz); + parent = pnode->parent; + parent->nbranch[pnode->iip].pnode = pnode; + path[h].ptr.pnode = pnode; + path[h].in_tree = 1; + update_cats(c, pnode); + c->pnodes_have += 1; + } + err = dbg_check_lpt_nodes(c, (struct ubifs_cnode *) + c->nroot, 0, 0); + if (err) + goto out; + err = dbg_check_cats(c); + if (err) + goto out; + } + if (ret & LPT_SCAN_STOP) { + err = 0; + break; + } + /* Get the next lprops */ + if (lnum == end_lnum) { + /* + * We got to the end without finding what we were + * looking for + */ + err = -ENOSPC; + goto out; + } + if (lnum + 1 >= c->leb_cnt) { + /* Wrap-around to the beginning */ + start_lnum = c->main_first; + goto again; + } + if (iip + 1 < UBIFS_LPT_FANOUT) { + /* Next lprops is in the same pnode */ + iip += 1; + continue; + } + /* We need to get the next pnode. Go up until we can go right */ + iip = pnode->iip; + while (1) { + h -= 1; + ubifs_assert(h >= 0); + nnode = path[h].ptr.nnode; + if (iip + 1 < UBIFS_LPT_FANOUT) + break; + iip = nnode->iip; + } + /* Go right */ + iip += 1; + /* Descend to the pnode */ + h += 1; + for (; h < c->lpt_hght; h++) { + nnode = scan_get_nnode(c, path + h, nnode, iip); + if (IS_ERR(nnode)) { + err = PTR_ERR(nnode); + goto out; + } + iip = 0; + } + pnode = scan_get_pnode(c, path + h, nnode, iip); + if (IS_ERR(pnode)) { + err = PTR_ERR(pnode); + goto out; + } + iip = 0; + } +out: + kfree(path); + return err; +} + +#ifdef CONFIG_UBIFS_FS_DEBUG + +/** + * dbg_chk_pnode - check a pnode. + * @c: the UBIFS file-system description object + * @pnode: pnode to check + * @col: pnode column + * + * This function returns %0 on success and a negative error code on failure. + */ +static int dbg_chk_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, + int col) +{ + int i; + + if (pnode->num != col) { + dbg_err("pnode num %d expected %d parent num %d iip %d", + pnode->num, col, pnode->parent->num, pnode->iip); + return -EINVAL; + } + for (i = 0; i < UBIFS_LPT_FANOUT; i++) { + struct ubifs_lprops *lp, *lprops = &pnode->lprops[i]; + int lnum = (pnode->num << UBIFS_LPT_FANOUT_SHIFT) + i + + c->main_first; + int found, cat = lprops->flags & LPROPS_CAT_MASK; + struct ubifs_lpt_heap *heap; + struct list_head *list = NULL; + + if (lnum >= c->leb_cnt) + continue; + if (lprops->lnum != lnum) { + dbg_err("bad LEB number %d expected %d", + lprops->lnum, lnum); + return -EINVAL; + } + if (lprops->flags & LPROPS_TAKEN) { + if (cat != LPROPS_UNCAT) { + dbg_err("LEB %d taken but not uncat %d", + lprops->lnum, cat); + return -EINVAL; + } + continue; + } + if (lprops->flags & LPROPS_INDEX) { + switch (cat) { + case LPROPS_UNCAT: + case LPROPS_DIRTY_IDX: + case LPROPS_FRDI_IDX: + break; + default: + dbg_err("LEB %d index but cat %d", + lprops->lnum, cat); + return -EINVAL; + } + } else { + switch (cat) { + case LPROPS_UNCAT: + case LPROPS_DIRTY: + case LPROPS_FREE: + case LPROPS_EMPTY: + case LPROPS_FREEABLE: + break; + default: + dbg_err("LEB %d not index but cat %d", + lprops->lnum, cat); + return -EINVAL; + } + } + switch (cat) { + case LPROPS_UNCAT: + list = &c->uncat_list; + break; + case LPROPS_EMPTY: + list = &c->empty_list; + break; + case LPROPS_FREEABLE: + list = &c->freeable_list; + break; + case LPROPS_FRDI_IDX: + list = &c->frdi_idx_list; + break; + } + found = 0; + switch (cat) { + case LPROPS_DIRTY: + case LPROPS_DIRTY_IDX: + case LPROPS_FREE: + heap = &c->lpt_heap[cat - 1]; + if (lprops->hpos < heap->cnt && + heap->arr[lprops->hpos] == lprops) + found = 1; + break; + case LPROPS_UNCAT: + case LPROPS_EMPTY: + case LPROPS_FREEABLE: + case LPROPS_FRDI_IDX: + list_for_each_entry(lp, list, list) + if (lprops == lp) { + found = 1; + break; + } + break; + } + if (!found) { + dbg_err("LEB %d cat %d not found in cat heap/list", + lprops->lnum, cat); + return -EINVAL; + } + switch (cat) { + case LPROPS_EMPTY: + if (lprops->free != c->leb_size) { + dbg_err("LEB %d cat %d free %d dirty %d", + lprops->lnum, cat, lprops->free, + lprops->dirty); + return -EINVAL; + } + case LPROPS_FREEABLE: + case LPROPS_FRDI_IDX: + if (lprops->free + lprops->dirty != c->leb_size) { + dbg_err("LEB %d cat %d free %d dirty %d", + lprops->lnum, cat, lprops->free, + lprops->dirty); + return -EINVAL; + } + } + } + return 0; +} + +/** + * dbg_check_lpt_nodes - check nnodes and pnodes. + * @c: the UBIFS file-system description object + * @cnode: next cnode (nnode or pnode) to check + * @row: row of cnode (root is zero) + * @col: column of cnode (leftmost is zero) + * + * This function returns %0 on success and a negative error code on failure. + */ +int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode, + int row, int col) +{ + struct ubifs_nnode *nnode, *nn; + struct ubifs_cnode *cn; + int num, iip = 0, err; + + if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) + return 0; + + while (cnode) { + ubifs_assert(row >= 0); + nnode = cnode->parent; + if (cnode->level) { + /* cnode is a nnode */ + num = calc_nnode_num(row, col); + if (cnode->num != num) { + dbg_err("nnode num %d expected %d " + "parent num %d iip %d", cnode->num, num, + (nnode ? nnode->num : 0), cnode->iip); + return -EINVAL; + } + nn = (struct ubifs_nnode *)cnode; + while (iip < UBIFS_LPT_FANOUT) { + cn = nn->nbranch[iip].cnode; + if (cn) { + /* Go down */ + row += 1; + col <<= UBIFS_LPT_FANOUT_SHIFT; + col += iip; + iip = 0; + cnode = cn; + break; + } + /* Go right */ + iip += 1; + } + if (iip < UBIFS_LPT_FANOUT) + continue; + } else { + struct ubifs_pnode *pnode; + + /* cnode is a pnode */ + pnode = (struct ubifs_pnode *)cnode; + err = dbg_chk_pnode(c, pnode, col); + if (err) + return err; + } + /* Go up and to the right */ + row -= 1; + col >>= UBIFS_LPT_FANOUT_SHIFT; + iip = cnode->iip + 1; + cnode = (struct ubifs_cnode *)nnode; + } + return 0; +} + +#endif /* CONFIG_UBIFS_FS_DEBUG */ diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c new file mode 100644 index 0000000..5f0b83e --- /dev/null +++ b/fs/ubifs/lpt_commit.c @@ -0,0 +1,1648 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Adrian Hunter + * Artem Bityutskiy (Битюцкий Артём) + */ + +/* + * This file implements commit-related functionality of the LEB properties + * subsystem. + */ + +#include +#include "ubifs.h" + +/** + * first_dirty_cnode - find first dirty cnode. + * @c: UBIFS file-system description object + * @nnode: nnode at which to start + * + * This function returns the first dirty cnode or %NULL if there is not one. + */ +static struct ubifs_cnode *first_dirty_cnode(struct ubifs_nnode *nnode) +{ + ubifs_assert(nnode); + while (1) { + int i, cont = 0; + + for (i = 0; i < UBIFS_LPT_FANOUT; i++) { + struct ubifs_cnode *cnode; + + cnode = nnode->nbranch[i].cnode; + if (cnode && + test_bit(DIRTY_CNODE, &cnode->flags)) { + if (cnode->level == 0) + return cnode; + nnode = (struct ubifs_nnode *)cnode; + cont = 1; + break; + } + } + if (!cont) + return (struct ubifs_cnode *)nnode; + } +} + +/** + * next_dirty_cnode - find next dirty cnode. + * @cnode: cnode from which to begin searching + * + * This function returns the next dirty cnode or %NULL if there is not one. + */ +static struct ubifs_cnode *next_dirty_cnode(struct ubifs_cnode *cnode) +{ + struct ubifs_nnode *nnode; + int i; + + ubifs_assert(cnode); + nnode = cnode->parent; + if (!nnode) + return NULL; + for (i = cnode->iip + 1; i < UBIFS_LPT_FANOUT; i++) { + cnode = nnode->nbranch[i].cnode; + if (cnode && test_bit(DIRTY_CNODE, &cnode->flags)) { + if (cnode->level == 0) + return cnode; /* cnode is a pnode */ + /* cnode is a nnode */ + return first_dirty_cnode((struct ubifs_nnode *)cnode); + } + } + return (struct ubifs_cnode *)nnode; +} + +/** + * get_cnodes_to_commit - create list of dirty cnodes to commit. + * @c: UBIFS file-system description object + * + * This function returns the number of cnodes to commit. + */ +static int get_cnodes_to_commit(struct ubifs_info *c) +{ + struct ubifs_cnode *cnode, *cnext; + int cnt = 0; + + if (!c->nroot) + return 0; + + if (!test_bit(DIRTY_CNODE, &c->nroot->flags)) + return 0; + + c->lpt_cnext = first_dirty_cnode(c->nroot); + cnode = c->lpt_cnext; + if (!cnode) + return 0; + cnt += 1; + while (1) { + ubifs_assert(!test_bit(COW_ZNODE, &cnode->flags)); + __set_bit(COW_ZNODE, &cnode->flags); + cnext = next_dirty_cnode(cnode); + if (!cnext) { + cnode->cnext = c->lpt_cnext; + break; + } + cnode->cnext = cnext; + cnode = cnext; + cnt += 1; + } + dbg_cmt("committing %d cnodes", cnt); + dbg_lp("committing %d cnodes", cnt); + ubifs_assert(cnt == c->dirty_nn_cnt + c->dirty_pn_cnt); + return cnt; +} + +/** + * upd_ltab - update LPT LEB properties. + * @c: UBIFS file-system description object + * @lnum: LEB number + * @free: amount of free space + * @dirty: amount of dirty space to add + */ +static void upd_ltab(struct ubifs_info *c, int lnum, int free, int dirty) +{ + dbg_lp("LEB %d free %d dirty %d to %d +%d", + lnum, c->ltab[lnum - c->lpt_first].free, + c->ltab[lnum - c->lpt_first].dirty, free, dirty); + ubifs_assert(lnum >= c->lpt_first && lnum <= c->lpt_last); + c->ltab[lnum - c->lpt_first].free = free; + c->ltab[lnum - c->lpt_first].dirty += dirty; +} + +/** + * alloc_lpt_leb - allocate an LPT LEB that is empty. + * @c: UBIFS file-system description object + * @lnum: LEB number is passed and returned here + * + * This function finds the next empty LEB in the ltab starting from @lnum. If a + * an empty LEB is found it is returned in @lnum and the function returns %0. + * Otherwise the function returns -ENOSPC. Note however, that LPT is designed + * never to run out of space. + */ +static int alloc_lpt_leb(struct ubifs_info *c, int *lnum) +{ + int i, n; + + n = *lnum - c->lpt_first + 1; + for (i = n; i < c->lpt_lebs; i++) { + if (c->ltab[i].tgc || c->ltab[i].cmt) + continue; + if (c->ltab[i].free == c->leb_size) { + c->ltab[i].cmt = 1; + *lnum = i + c->lpt_first; + return 0; + } + } + + for (i = 0; i < n; i++) { + if (c->ltab[i].tgc || c->ltab[i].cmt) + continue; + if (c->ltab[i].free == c->leb_size) { + c->ltab[i].cmt = 1; + *lnum = i + c->lpt_first; + return 0; + } + } + dbg_err("last LEB %d", *lnum); + dump_stack(); + return -ENOSPC; +} + +/** + * layout_cnodes - layout cnodes for commit. + * @c: UBIFS file-system description object + * + * This function returns %0 on success and a negative error code on failure. + */ +static int layout_cnodes(struct ubifs_info *c) +{ + int lnum, offs, len, alen, done_lsave, done_ltab, err; + struct ubifs_cnode *cnode; + + cnode = c->lpt_cnext; + if (!cnode) + return 0; + lnum = c->nhead_lnum; + offs = c->nhead_offs; + /* Try to place lsave and ltab nicely */ + done_lsave = !c->big_lpt; + done_ltab = 0; + if (!done_lsave && offs + c->lsave_sz <= c->leb_size) { + done_lsave = 1; + c->lsave_lnum = lnum; + c->lsave_offs = offs; + offs += c->lsave_sz; + } + + if (offs + c->ltab_sz <= c->leb_size) { + done_ltab = 1; + c->ltab_lnum = lnum; + c->ltab_offs = offs; + offs += c->ltab_sz; + } + + do { + if (cnode->level) { + len = c->nnode_sz; + c->dirty_nn_cnt -= 1; + } else { + len = c->pnode_sz; + c->dirty_pn_cnt -= 1; + } + while (offs + len > c->leb_size) { + alen = ALIGN(offs, c->min_io_size); + upd_ltab(c, lnum, c->leb_size - alen, alen - offs); + err = alloc_lpt_leb(c, &lnum); + if (err) + return err; + offs = 0; + ubifs_assert(lnum >= c->lpt_first && + lnum <= c->lpt_last); + /* Try to place lsave and ltab nicely */ + if (!done_lsave) { + done_lsave = 1; + c->lsave_lnum = lnum; + c->lsave_offs = offs; + offs += c->lsave_sz; + continue; + } + if (!done_ltab) { + done_ltab = 1; + c->ltab_lnum = lnum; + c->ltab_offs = offs; + offs += c->ltab_sz; + continue; + } + break; + } + if (cnode->parent) { + cnode->parent->nbranch[cnode->iip].lnum = lnum; + cnode->parent->nbranch[cnode->iip].offs = offs; + } else { + c->lpt_lnum = lnum; + c->lpt_offs = offs; + } + offs += len; + cnode = cnode->cnext; + } while (cnode && cnode != c->lpt_cnext); + + /* Make sure to place LPT's save table */ + if (!done_lsave) { + if (offs + c->lsave_sz > c->leb_size) { + alen = ALIGN(offs, c->min_io_size); + upd_ltab(c, lnum, c->leb_size - alen, alen - offs); + err = alloc_lpt_leb(c, &lnum); + if (err) + return err; + offs = 0; + ubifs_assert(lnum >= c->lpt_first && + lnum <= c->lpt_last); + } + done_lsave = 1; + c->lsave_lnum = lnum; + c->lsave_offs = offs; + offs += c->lsave_sz; + } + + /* Make sure to place LPT's own lprops table */ + if (!done_ltab) { + if (offs + c->ltab_sz > c->leb_size) { + alen = ALIGN(offs, c->min_io_size); + upd_ltab(c, lnum, c->leb_size - alen, alen - offs); + err = alloc_lpt_leb(c, &lnum); + if (err) + return err; + offs = 0; + ubifs_assert(lnum >= c->lpt_first && + lnum <= c->lpt_last); + } + done_ltab = 1; + c->ltab_lnum = lnum; + c->ltab_offs = offs; + offs += c->ltab_sz; + } + + alen = ALIGN(offs, c->min_io_size); + upd_ltab(c, lnum, c->leb_size - alen, alen - offs); + return 0; +} + +/** + * realloc_lpt_leb - allocate an LPT LEB that is empty. + * @c: UBIFS file-system description object + * @lnum: LEB number is passed and returned here + * + * This function duplicates exactly the results of the function alloc_lpt_leb. + * It is used during end commit to reallocate the same LEB numbers that were + * allocated by alloc_lpt_leb during start commit. + * + * This function finds the next LEB that was allocated by the alloc_lpt_leb + * function starting from @lnum. If a LEB is found it is returned in @lnum and + * the function returns %0. Otherwise the function returns -ENOSPC. + * Note however, that LPT is designed never to run out of space. + */ +static int realloc_lpt_leb(struct ubifs_info *c, int *lnum) +{ + int i, n; + + n = *lnum - c->lpt_first + 1; + for (i = n; i < c->lpt_lebs; i++) + if (c->ltab[i].cmt) { + c->ltab[i].cmt = 0; + *lnum = i + c->lpt_first; + return 0; + } + + for (i = 0; i < n; i++) + if (c->ltab[i].cmt) { + c->ltab[i].cmt = 0; + *lnum = i + c->lpt_first; + return 0; + } + dbg_err("last LEB %d", *lnum); + dump_stack(); + return -ENOSPC; +} + +/** + * write_cnodes - write cnodes for commit. + * @c: UBIFS file-system description object + * + * This function returns %0 on success and a negative error code on failure. + */ +static int write_cnodes(struct ubifs_info *c) +{ + int lnum, offs, len, from, err, wlen, alen, done_ltab, done_lsave; + struct ubifs_cnode *cnode; + void *buf = c->lpt_buf; + + cnode = c->lpt_cnext; + if (!cnode) + return 0; + lnum = c->nhead_lnum; + offs = c->nhead_offs; + from = offs; + /* Ensure empty LEB is unmapped */ + if (offs == 0) { + err = ubifs_leb_unmap(c, lnum); + if (err) + return err; + } + /* Try to place lsave and ltab nicely */ + done_lsave = !c->big_lpt; + done_ltab = 0; + if (!done_lsave && offs + c->lsave_sz <= c->leb_size) { + done_lsave = 1; + ubifs_pack_lsave(c, buf + offs, c->lsave); + offs += c->lsave_sz; + } + + if (offs + c->ltab_sz <= c->leb_size) { + done_ltab = 1; + ubifs_pack_ltab(c, buf + offs, c->ltab_cmt); + offs += c->ltab_sz; + } + + /* Loop for each cnode */ + do { + if (cnode->level) + len = c->nnode_sz; + else + len = c->pnode_sz; + while (offs + len > c->leb_size) { + wlen = offs - from; + if (wlen) { + alen = ALIGN(wlen, c->min_io_size); + memset(buf + offs, 0xff, alen - wlen); + err = ubifs_leb_write(c, lnum, buf + from, from, + alen, UBI_SHORTTERM); + if (err) + return err; + } + err = realloc_lpt_leb(c, &lnum); + if (err) + return err; + offs = 0; + from = 0; + ubifs_assert(lnum >= c->lpt_first && + lnum <= c->lpt_last); + err = ubifs_leb_unmap(c, lnum); + if (err) + return err; + /* Try to place lsave and ltab nicely */ + if (!done_lsave) { + done_lsave = 1; + ubifs_pack_lsave(c, buf + offs, c->lsave); + offs += c->lsave_sz; + continue; + } + if (!done_ltab) { + done_ltab = 1; + ubifs_pack_ltab(c, buf + offs, c->ltab_cmt); + offs += c->ltab_sz; + continue; + } + break; + } + if (cnode->level) + ubifs_pack_nnode(c, buf + offs, + (struct ubifs_nnode *)cnode); + else + ubifs_pack_pnode(c, buf + offs, + (struct ubifs_pnode *)cnode); + /* + * The reason for the barriers is the same as in case of TNC. + * See comment in 'write_index()'. 'dirty_cow_nnode()' and + * 'dirty_cow_pnode()' are the functions for which this is + * important. + */ + clear_bit(DIRTY_CNODE, &cnode->flags); + smp_mb__before_clear_bit(); + clear_bit(COW_ZNODE, &cnode->flags); + smp_mb__after_clear_bit(); + offs += len; + cnode = cnode->cnext; + } while (cnode && cnode != c->lpt_cnext); + + /* Make sure to place LPT's save table */ + if (!done_lsave) { + if (offs + c->lsave_sz > c->leb_size) { + wlen = offs - from; + alen = ALIGN(wlen, c->min_io_size); + memset(buf + offs, 0xff, alen - wlen); + err = ubifs_leb_write(c, lnum, buf + from, from, alen, + UBI_SHORTTERM); + if (err) + return err; + err = realloc_lpt_leb(c, &lnum); + if (err) + return err; + offs = 0; + ubifs_assert(lnum >= c->lpt_first && + lnum <= c->lpt_last); + err = ubifs_leb_unmap(c, lnum); + if (err) + return err; + } + done_lsave = 1; + ubifs_pack_lsave(c, buf + offs, c->lsave); + offs += c->lsave_sz; + } + + /* Make sure to place LPT's own lprops table */ + if (!done_ltab) { + if (offs + c->ltab_sz > c->leb_size) { + wlen = offs - from; + alen = ALIGN(wlen, c->min_io_size); + memset(buf + offs, 0xff, alen - wlen); + err = ubifs_leb_write(c, lnum, buf + from, from, alen, + UBI_SHORTTERM); + if (err) + return err; + err = realloc_lpt_leb(c, &lnum); + if (err) + return err; + offs = 0; + ubifs_assert(lnum >= c->lpt_first && + lnum <= c->lpt_last); + err = ubifs_leb_unmap(c, lnum); + if (err) + return err; + } + done_ltab = 1; + ubifs_pack_ltab(c, buf + offs, c->ltab_cmt); + offs += c->ltab_sz; + } + + /* Write remaining data in buffer */ + wlen = offs - from; + alen = ALIGN(wlen, c->min_io_size); + memset(buf + offs, 0xff, alen - wlen); + err = ubifs_leb_write(c, lnum, buf + from, from, alen, UBI_SHORTTERM); + if (err) + return err; + c->nhead_lnum = lnum; + c->nhead_offs = ALIGN(offs, c->min_io_size); + + dbg_lp("LPT root is at %d:%d", c->lpt_lnum, c->lpt_offs); + dbg_lp("LPT head is at %d:%d", c->nhead_lnum, c->nhead_offs); + dbg_lp("LPT ltab is at %d:%d", c->ltab_lnum, c->ltab_offs); + if (c->big_lpt) + dbg_lp("LPT lsave is at %d:%d", c->lsave_lnum, c->lsave_offs); + return 0; +} + +/** + * next_pnode - find next pnode. + * @c: UBIFS file-system description object + * @pnode: pnode + * + * This function returns the next pnode or %NULL if there are no more pnodes. + */ +static struct ubifs_pnode *next_pnode(struct ubifs_info *c, + struct ubifs_pnode *pnode) +{ + struct ubifs_nnode *nnode; + int iip; + + /* Try to go right */ + nnode = pnode->parent; + iip = pnode->iip + 1; + if (iip < UBIFS_LPT_FANOUT) { + /* We assume here that LEB zero is never an LPT LEB */ + if (nnode->nbranch[iip].lnum) + return ubifs_get_pnode(c, nnode, iip); + else + return NULL; + } + + /* Go up while can't go right */ + do { + iip = nnode->iip + 1; + nnode = nnode->parent; + if (!nnode) + return NULL; + /* We assume here that LEB zero is never an LPT LEB */ + } while (iip >= UBIFS_LPT_FANOUT || !nnode->nbranch[iip].lnum); + + /* Go right */ + nnode = ubifs_get_nnode(c, nnode, iip); + if (IS_ERR(nnode)) + return (void *)nnode; + + /* Go down to level 1 */ + while (nnode->level > 1) { + nnode = ubifs_get_nnode(c, nnode, 0); + if (IS_ERR(nnode)) + return (void *)nnode; + } + + return ubifs_get_pnode(c, nnode, 0); +} + +/** + * pnode_lookup - lookup a pnode in the LPT. + * @c: UBIFS file-system description object + * @i: pnode number (0 to main_lebs - 1) + * + * This function returns a pointer to the pnode on success or a negative + * error code on failure. + */ +static struct ubifs_pnode *pnode_lookup(struct ubifs_info *c, int i) +{ + int err, h, iip, shft; + struct ubifs_nnode *nnode; + + if (!c->nroot) { + err = ubifs_read_nnode(c, NULL, 0); + if (err) + return ERR_PTR(err); + } + i <<= UBIFS_LPT_FANOUT_SHIFT; + nnode = c->nroot; + shft = c->lpt_hght * UBIFS_LPT_FANOUT_SHIFT; + for (h = 1; h < c->lpt_hght; h++) { + iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); + shft -= UBIFS_LPT_FANOUT_SHIFT; + nnode = ubifs_get_nnode(c, nnode, iip); + if (IS_ERR(nnode)) + return ERR_PTR(PTR_ERR(nnode)); + } + iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); + return ubifs_get_pnode(c, nnode, iip); +} + +/** + * add_pnode_dirt - add dirty space to LPT LEB properties. + * @c: UBIFS file-system description object + * @pnode: pnode for which to add dirt + */ +static void add_pnode_dirt(struct ubifs_info *c, struct ubifs_pnode *pnode) +{ + ubifs_add_lpt_dirt(c, pnode->parent->nbranch[pnode->iip].lnum, + c->pnode_sz); +} + +/** + * do_make_pnode_dirty - mark a pnode dirty. + * @c: UBIFS file-system description object + * @pnode: pnode to mark dirty + */ +static void do_make_pnode_dirty(struct ubifs_info *c, struct ubifs_pnode *pnode) +{ + /* Assumes cnext list is empty i.e. not called during commit */ + if (!test_and_set_bit(DIRTY_CNODE, &pnode->flags)) { + struct ubifs_nnode *nnode; + + c->dirty_pn_cnt += 1; + add_pnode_dirt(c, pnode); + /* Mark parent and ancestors dirty too */ + nnode = pnode->parent; + while (nnode) { + if (!test_and_set_bit(DIRTY_CNODE, &nnode->flags)) { + c->dirty_nn_cnt += 1; + ubifs_add_nnode_dirt(c, nnode); + nnode = nnode->parent; + } else + break; + } + } +} + +/** + * make_tree_dirty - mark the entire LEB properties tree dirty. + * @c: UBIFS file-system description object + * + * This function is used by the "small" LPT model to cause the entire LEB + * properties tree to be written. The "small" LPT model does not use LPT + * garbage collection because it is more efficient to write the entire tree + * (because it is small). + * + * This function returns %0 on success and a negative error code on failure. + */ +static int make_tree_dirty(struct ubifs_info *c) +{ + struct ubifs_pnode *pnode; + + pnode = pnode_lookup(c, 0); + while (pnode) { + do_make_pnode_dirty(c, pnode); + pnode = next_pnode(c, pnode); + if (IS_ERR(pnode)) + return PTR_ERR(pnode); + } + return 0; +} + +/** + * need_write_all - determine if the LPT area is running out of free space. + * @c: UBIFS file-system description object + * + * This function returns %1 if the LPT area is running out of free space and %0 + * if it is not. + */ +static int need_write_all(struct ubifs_info *c) +{ + long long free = 0; + int i; + + for (i = 0; i < c->lpt_lebs; i++) { + if (i + c->lpt_first == c->nhead_lnum) + free += c->leb_size - c->nhead_offs; + else if (c->ltab[i].free == c->leb_size) + free += c->leb_size; + else if (c->ltab[i].free + c->ltab[i].dirty == c->leb_size) + free += c->leb_size; + } + /* Less than twice the size left */ + if (free <= c->lpt_sz * 2) + return 1; + return 0; +} + +/** + * lpt_tgc_start - start trivial garbage collection of LPT LEBs. + * @c: UBIFS file-system description object + * + * LPT trivial garbage collection is where a LPT LEB contains only dirty and + * free space and so may be reused as soon as the next commit is completed. + * This function is called during start commit to mark LPT LEBs for trivial GC. + */ +static void lpt_tgc_start(struct ubifs_info *c) +{ + int i; + + for (i = 0; i < c->lpt_lebs; i++) { + if (i + c->lpt_first == c->nhead_lnum) + continue; + if (c->ltab[i].dirty > 0 && + c->ltab[i].free + c->ltab[i].dirty == c->leb_size) { + c->ltab[i].tgc = 1; + c->ltab[i].free = c->leb_size; + c->ltab[i].dirty = 0; + dbg_lp("LEB %d", i + c->lpt_first); + } + } +} + +/** + * lpt_tgc_end - end trivial garbage collection of LPT LEBs. + * @c: UBIFS file-system description object + * + * LPT trivial garbage collection is where a LPT LEB contains only dirty and + * free space and so may be reused as soon as the next commit is completed. + * This function is called after the commit is completed (master node has been + * written) and unmaps LPT LEBs that were marked for trivial GC. + */ +static int lpt_tgc_end(struct ubifs_info *c) +{ + int i, err; + + for (i = 0; i < c->lpt_lebs; i++) + if (c->ltab[i].tgc) { + err = ubifs_leb_unmap(c, i + c->lpt_first); + if (err) + return err; + c->ltab[i].tgc = 0; + dbg_lp("LEB %d", i + c->lpt_first); + } + return 0; +} + +/** + * populate_lsave - fill the lsave array with important LEB numbers. + * @c: the UBIFS file-system description object + * + * This function is only called for the "big" model. It records a small number + * of LEB numbers of important LEBs. Important LEBs are ones that are (from + * most important to least important): empty, freeable, freeable index, dirty + * index, dirty or free. Upon mount, we read this list of LEB numbers and bring + * their pnodes into memory. That will stop us from having to scan the LPT + * straight away. For the "small" model we assume that scanning the LPT is no + * big deal. + */ +static void populate_lsave(struct ubifs_info *c) +{ + struct ubifs_lprops *lprops; + struct ubifs_lpt_heap *heap; + int i, cnt = 0; + + ubifs_assert(c->big_lpt); + if (!(c->lpt_drty_flgs & LSAVE_DIRTY)) { + c->lpt_drty_flgs |= LSAVE_DIRTY; + ubifs_add_lpt_dirt(c, c->lsave_lnum, c->lsave_sz); + } + list_for_each_entry(lprops, &c->empty_list, list) { + c->lsave[cnt++] = lprops->lnum; + if (cnt >= c->lsave_cnt) + return; + } + list_for_each_entry(lprops, &c->freeable_list, list) { + c->lsave[cnt++] = lprops->lnum; + if (cnt >= c->lsave_cnt) + return; + } + list_for_each_entry(lprops, &c->frdi_idx_list, list) { + c->lsave[cnt++] = lprops->lnum; + if (cnt >= c->lsave_cnt) + return; + } + heap = &c->lpt_heap[LPROPS_DIRTY_IDX - 1]; + for (i = 0; i < heap->cnt; i++) { + c->lsave[cnt++] = heap->arr[i]->lnum; + if (cnt >= c->lsave_cnt) + return; + } + heap = &c->lpt_heap[LPROPS_DIRTY - 1]; + for (i = 0; i < heap->cnt; i++) { + c->lsave[cnt++] = heap->arr[i]->lnum; + if (cnt >= c->lsave_cnt) + return; + } + heap = &c->lpt_heap[LPROPS_FREE - 1]; + for (i = 0; i < heap->cnt; i++) { + c->lsave[cnt++] = heap->arr[i]->lnum; + if (cnt >= c->lsave_cnt) + return; + } + /* Fill it up completely */ + while (cnt < c->lsave_cnt) + c->lsave[cnt++] = c->main_first; +} + +/** + * nnode_lookup - lookup a nnode in the LPT. + * @c: UBIFS file-system description object + * @i: nnode number + * + * This function returns a pointer to the nnode on success or a negative + * error code on failure. + */ +static struct ubifs_nnode *nnode_lookup(struct ubifs_info *c, int i) +{ + int err, iip; + struct ubifs_nnode *nnode; + + if (!c->nroot) { + err = ubifs_read_nnode(c, NULL, 0); + if (err) + return ERR_PTR(err); + } + nnode = c->nroot; + while (1) { + iip = i & (UBIFS_LPT_FANOUT - 1); + i >>= UBIFS_LPT_FANOUT_SHIFT; + if (!i) + break; + nnode = ubifs_get_nnode(c, nnode, iip); + if (IS_ERR(nnode)) + return nnode; + } + return nnode; +} + +/** + * make_nnode_dirty - find a nnode and, if found, make it dirty. + * @c: UBIFS file-system description object + * @node_num: nnode number of nnode to make dirty + * @lnum: LEB number where nnode was written + * @offs: offset where nnode was written + * + * This function is used by LPT garbage collection. LPT garbage collection is + * used only for the "big" LPT model (c->big_lpt == 1). Garbage collection + * simply involves marking all the nodes in the LEB being garbage-collected as + * dirty. The dirty nodes are written next commit, after which the LEB is free + * to be reused. + * + * This function returns %0 on success and a negative error code on failure. + */ +static int make_nnode_dirty(struct ubifs_info *c, int node_num, int lnum, + int offs) +{ + struct ubifs_nnode *nnode; + + nnode = nnode_lookup(c, node_num); + if (IS_ERR(nnode)) + return PTR_ERR(nnode); + if (nnode->parent) { + struct ubifs_nbranch *branch; + + branch = &nnode->parent->nbranch[nnode->iip]; + if (branch->lnum != lnum || branch->offs != offs) + return 0; /* nnode is obsolete */ + } else if (c->lpt_lnum != lnum || c->lpt_offs != offs) + return 0; /* nnode is obsolete */ + /* Assumes cnext list is empty i.e. not called during commit */ + if (!test_and_set_bit(DIRTY_CNODE, &nnode->flags)) { + c->dirty_nn_cnt += 1; + ubifs_add_nnode_dirt(c, nnode); + /* Mark parent and ancestors dirty too */ + nnode = nnode->parent; + while (nnode) { + if (!test_and_set_bit(DIRTY_CNODE, &nnode->flags)) { + c->dirty_nn_cnt += 1; + ubifs_add_nnode_dirt(c, nnode); + nnode = nnode->parent; + } else + break; + } + } + return 0; +} + +/** + * make_pnode_dirty - find a pnode and, if found, make it dirty. + * @c: UBIFS file-system description object + * @node_num: pnode number of pnode to make dirty + * @lnum: LEB number where pnode was written + * @offs: offset where pnode was written + * + * This function is used by LPT garbage collection. LPT garbage collection is + * used only for the "big" LPT model (c->big_lpt == 1). Garbage collection + * simply involves marking all the nodes in the LEB being garbage-collected as + * dirty. The dirty nodes are written next commit, after which the LEB is free + * to be reused. + * + * This function returns %0 on success and a negative error code on failure. + */ +static int make_pnode_dirty(struct ubifs_info *c, int node_num, int lnum, + int offs) +{ + struct ubifs_pnode *pnode; + struct ubifs_nbranch *branch; + + pnode = pnode_lookup(c, node_num); + if (IS_ERR(pnode)) + return PTR_ERR(pnode); + branch = &pnode->parent->nbranch[pnode->iip]; + if (branch->lnum != lnum || branch->offs != offs) + return 0; + do_make_pnode_dirty(c, pnode); + return 0; +} + +/** + * make_ltab_dirty - make ltab node dirty. + * @c: UBIFS file-system description object + * @lnum: LEB number where ltab was written + * @offs: offset where ltab was written + * + * This function is used by LPT garbage collection. LPT garbage collection is + * used only for the "big" LPT model (c->big_lpt == 1). Garbage collection + * simply involves marking all the nodes in the LEB being garbage-collected as + * dirty. The dirty nodes are written next commit, after which the LEB is free + * to be reused. + * + * This function returns %0 on success and a negative error code on failure. + */ +static int make_ltab_dirty(struct ubifs_info *c, int lnum, int offs) +{ + if (lnum != c->ltab_lnum || offs != c->ltab_offs) + return 0; /* This ltab node is obsolete */ + if (!(c->lpt_drty_flgs & LTAB_DIRTY)) { + c->lpt_drty_flgs |= LTAB_DIRTY; + ubifs_add_lpt_dirt(c, c->ltab_lnum, c->ltab_sz); + } + return 0; +} + +/** + * make_lsave_dirty - make lsave node dirty. + * @c: UBIFS file-system description object + * @lnum: LEB number where lsave was written + * @offs: offset where lsave was written + * + * This function is used by LPT garbage collection. LPT garbage collection is + * used only for the "big" LPT model (c->big_lpt == 1). Garbage collection + * simply involves marking all the nodes in the LEB being garbage-collected as + * dirty. The dirty nodes are written next commit, after which the LEB is free + * to be reused. + * + * This function returns %0 on success and a negative error code on failure. + */ +static int make_lsave_dirty(struct ubifs_info *c, int lnum, int offs) +{ + if (lnum != c->lsave_lnum || offs != c->lsave_offs) + return 0; /* This lsave node is obsolete */ + if (!(c->lpt_drty_flgs & LSAVE_DIRTY)) { + c->lpt_drty_flgs |= LSAVE_DIRTY; + ubifs_add_lpt_dirt(c, c->lsave_lnum, c->lsave_sz); + } + return 0; +} + +/** + * make_node_dirty - make node dirty. + * @c: UBIFS file-system description object + * @node_type: LPT node type + * @node_num: node number + * @lnum: LEB number where node was written + * @offs: offset where node was written + * + * This function is used by LPT garbage collection. LPT garbage collection is + * used only for the "big" LPT model (c->big_lpt == 1). Garbage collection + * simply involves marking all the nodes in the LEB being garbage-collected as + * dirty. The dirty nodes are written next commit, after which the LEB is free + * to be reused. + * + * This function returns %0 on success and a negative error code on failure. + */ +static int make_node_dirty(struct ubifs_info *c, int node_type, int node_num, + int lnum, int offs) +{ + switch (node_type) { + case UBIFS_LPT_NNODE: + return make_nnode_dirty(c, node_num, lnum, offs); + case UBIFS_LPT_PNODE: + return make_pnode_dirty(c, node_num, lnum, offs); + case UBIFS_LPT_LTAB: + return make_ltab_dirty(c, lnum, offs); + case UBIFS_LPT_LSAVE: + return make_lsave_dirty(c, lnum, offs); + } + return -EINVAL; +} + +/** + * get_lpt_node_len - return the length of a node based on its type. + * @c: UBIFS file-system description object + * @node_type: LPT node type + */ +static int get_lpt_node_len(struct ubifs_info *c, int node_type) +{ + switch (node_type) { + case UBIFS_LPT_NNODE: + return c->nnode_sz; + case UBIFS_LPT_PNODE: + return c->pnode_sz; + case UBIFS_LPT_LTAB: + return c->ltab_sz; + case UBIFS_LPT_LSAVE: + return c->lsave_sz; + } + return 0; +} + +/** + * get_pad_len - return the length of padding in a buffer. + * @c: UBIFS file-system description object + * @buf: buffer + * @len: length of buffer + */ +static int get_pad_len(struct ubifs_info *c, uint8_t *buf, int len) +{ + int offs, pad_len; + + if (c->min_io_size == 1) + return 0; + offs = c->leb_size - len; + pad_len = ALIGN(offs, c->min_io_size) - offs; + return pad_len; +} + +/** + * get_lpt_node_type - return type (and node number) of a node in a buffer. + * @c: UBIFS file-system description object + * @buf: buffer + * @node_num: node number is returned here + */ +static int get_lpt_node_type(struct ubifs_info *c, uint8_t *buf, int *node_num) +{ + uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; + int pos = 0, node_type; + + node_type = ubifs_unpack_bits(&addr, &pos, UBIFS_LPT_TYPE_BITS); + *node_num = ubifs_unpack_bits(&addr, &pos, c->pcnt_bits); + return node_type; +} + +/** + * is_a_node - determine if a buffer contains a node. + * @c: UBIFS file-system description object + * @buf: buffer + * @len: length of buffer + * + * This function returns %1 if the buffer contains a node or %0 if it does not. + */ +static int is_a_node(struct ubifs_info *c, uint8_t *buf, int len) +{ + uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; + int pos = 0, node_type, node_len; + uint16_t crc, calc_crc; + + node_type = ubifs_unpack_bits(&addr, &pos, UBIFS_LPT_TYPE_BITS); + if (node_type == UBIFS_LPT_NOT_A_NODE) + return 0; + node_len = get_lpt_node_len(c, node_type); + if (!node_len || node_len > len) + return 0; + pos = 0; + addr = buf; + crc = ubifs_unpack_bits(&addr, &pos, UBIFS_LPT_CRC_BITS); + calc_crc = crc16(-1, buf + UBIFS_LPT_CRC_BYTES, + node_len - UBIFS_LPT_CRC_BYTES); + if (crc != calc_crc) + return 0; + return 1; +} + + +/** + * lpt_gc_lnum - garbage collect a LPT LEB. + * @c: UBIFS file-system description object + * @lnum: LEB number to garbage collect + * + * LPT garbage collection is used only for the "big" LPT model + * (c->big_lpt == 1). Garbage collection simply involves marking all the nodes + * in the LEB being garbage-collected as dirty. The dirty nodes are written + * next commit, after which the LEB is free to be reused. + * + * This function returns %0 on success and a negative error code on failure. + */ +static int lpt_gc_lnum(struct ubifs_info *c, int lnum) +{ + int err, len = c->leb_size, node_type, node_num, node_len, offs; + void *buf = c->lpt_buf; + + dbg_lp("LEB %d", lnum); + err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size); + if (err) { + ubifs_err("cannot read LEB %d, error %d", lnum, err); + return err; + } + while (1) { + if (!is_a_node(c, buf, len)) { + int pad_len; + + pad_len = get_pad_len(c, buf, len); + if (pad_len) { + buf += pad_len; + len -= pad_len; + continue; + } + return 0; + } + node_type = get_lpt_node_type(c, buf, &node_num); + node_len = get_lpt_node_len(c, node_type); + offs = c->leb_size - len; + ubifs_assert(node_len != 0); + mutex_lock(&c->lp_mutex); + err = make_node_dirty(c, node_type, node_num, lnum, offs); + mutex_unlock(&c->lp_mutex); + if (err) + return err; + buf += node_len; + len -= node_len; + } + return 0; +} + +/** + * lpt_gc - LPT garbage collection. + * @c: UBIFS file-system description object + * + * Select a LPT LEB for LPT garbage collection and call 'lpt_gc_lnum()'. + * Returns %0 on success and a negative error code on failure. + */ +static int lpt_gc(struct ubifs_info *c) +{ + int i, lnum = -1, dirty = 0; + + mutex_lock(&c->lp_mutex); + for (i = 0; i < c->lpt_lebs; i++) { + ubifs_assert(!c->ltab[i].tgc); + if (i + c->lpt_first == c->nhead_lnum || + c->ltab[i].free + c->ltab[i].dirty == c->leb_size) + continue; + if (c->ltab[i].dirty > dirty) { + dirty = c->ltab[i].dirty; + lnum = i + c->lpt_first; + } + } + mutex_unlock(&c->lp_mutex); + if (lnum == -1) + return -ENOSPC; + return lpt_gc_lnum(c, lnum); +} + +/** + * ubifs_lpt_start_commit - UBIFS commit starts. + * @c: the UBIFS file-system description object + * + * This function has to be called when UBIFS starts the commit operation. + * This function "freezes" all currently dirty LEB properties and does not + * change them anymore. Further changes are saved and tracked separately + * because they are not part of this commit. This function returns zero in case + * of success and a negative error code in case of failure. + */ +int ubifs_lpt_start_commit(struct ubifs_info *c) +{ + int err, cnt; + + dbg_lp(""); + + mutex_lock(&c->lp_mutex); + err = dbg_check_ltab(c); + if (err) + goto out; + + if (c->check_lpt_free) { + /* + * We ensure there is enough free space in + * ubifs_lpt_post_commit() by marking nodes dirty. That + * information is lost when we unmount, so we also need + * to check free space once after mounting also. + */ + c->check_lpt_free = 0; + while (need_write_all(c)) { + mutex_unlock(&c->lp_mutex); + err = lpt_gc(c); + if (err) + return err; + mutex_lock(&c->lp_mutex); + } + } + + lpt_tgc_start(c); + + if (!c->dirty_pn_cnt) { + dbg_cmt("no cnodes to commit"); + err = 0; + goto out; + } + + if (!c->big_lpt && need_write_all(c)) { + /* If needed, write everything */ + err = make_tree_dirty(c); + if (err) + goto out; + lpt_tgc_start(c); + } + + if (c->big_lpt) + populate_lsave(c); + + cnt = get_cnodes_to_commit(c); + ubifs_assert(cnt != 0); + + err = layout_cnodes(c); + if (err) + goto out; + + /* Copy the LPT's own lprops for end commit to write */ + memcpy(c->ltab_cmt, c->ltab, + sizeof(struct ubifs_lpt_lprops) * c->lpt_lebs); + c->lpt_drty_flgs &= ~(LTAB_DIRTY | LSAVE_DIRTY); + +out: + mutex_unlock(&c->lp_mutex); + return err; +} + +/** + * free_obsolete_cnodes - free obsolete cnodes for commit end. + * @c: UBIFS file-system description object + */ +static void free_obsolete_cnodes(struct ubifs_info *c) +{ + struct ubifs_cnode *cnode, *cnext; + + cnext = c->lpt_cnext; + if (!cnext) + return; + do { + cnode = cnext; + cnext = cnode->cnext; + if (test_bit(OBSOLETE_CNODE, &cnode->flags)) + kfree(cnode); + else + cnode->cnext = NULL; + } while (cnext != c->lpt_cnext); + c->lpt_cnext = NULL; +} + +/** + * ubifs_lpt_end_commit - finish the commit operation. + * @c: the UBIFS file-system description object + * + * This function has to be called when the commit operation finishes. It + * flushes the changes which were "frozen" by 'ubifs_lprops_start_commit()' to + * the media. Returns zero in case of success and a negative error code in case + * of failure. + */ +int ubifs_lpt_end_commit(struct ubifs_info *c) +{ + int err; + + dbg_lp(""); + + if (!c->lpt_cnext) + return 0; + + err = write_cnodes(c); + if (err) + return err; + + mutex_lock(&c->lp_mutex); + free_obsolete_cnodes(c); + mutex_unlock(&c->lp_mutex); + + return 0; +} + +/** + * ubifs_lpt_post_commit - post commit LPT trivial GC and LPT GC. + * @c: UBIFS file-system description object + * + * LPT trivial GC is completed after a commit. Also LPT GC is done after a + * commit for the "big" LPT model. + */ +int ubifs_lpt_post_commit(struct ubifs_info *c) +{ + int err; + + mutex_lock(&c->lp_mutex); + err = lpt_tgc_end(c); + if (err) + goto out; + if (c->big_lpt) + while (need_write_all(c)) { + mutex_unlock(&c->lp_mutex); + err = lpt_gc(c); + if (err) + return err; + mutex_lock(&c->lp_mutex); + } +out: + mutex_unlock(&c->lp_mutex); + return err; +} + +/** + * first_nnode - find the first nnode in memory. + * @c: UBIFS file-system description object + * @hght: height of tree where nnode found is returned here + * + * This function returns a pointer to the nnode found or %NULL if no nnode is + * found. This function is a helper to 'ubifs_lpt_free()'. + */ +static struct ubifs_nnode *first_nnode(struct ubifs_info *c, int *hght) +{ + struct ubifs_nnode *nnode; + int h, i, found; + + nnode = c->nroot; + *hght = 0; + if (!nnode) + return NULL; + for (h = 1; h < c->lpt_hght; h++) { + found = 0; + for (i = 0; i < UBIFS_LPT_FANOUT; i++) { + if (nnode->nbranch[i].nnode) { + found = 1; + nnode = nnode->nbranch[i].nnode; + *hght = h; + break; + } + } + if (!found) + break; + } + return nnode; +} + +/** + * next_nnode - find the next nnode in memory. + * @c: UBIFS file-system description object + * @nnode: nnode from which to start. + * @hght: height of tree where nnode is, is passed and returned here + * + * This function returns a pointer to the nnode found or %NULL if no nnode is + * found. This function is a helper to 'ubifs_lpt_free()'. + */ +static struct ubifs_nnode *next_nnode(struct ubifs_info *c, + struct ubifs_nnode *nnode, int *hght) +{ + struct ubifs_nnode *parent; + int iip, h, i, found; + + parent = nnode->parent; + if (!parent) + return NULL; + if (nnode->iip == UBIFS_LPT_FANOUT - 1) { + *hght -= 1; + return parent; + } + for (iip = nnode->iip + 1; iip < UBIFS_LPT_FANOUT; iip++) { + nnode = parent->nbranch[iip].nnode; + if (nnode) + break; + } + if (!nnode) { + *hght -= 1; + return parent; + } + for (h = *hght + 1; h < c->lpt_hght; h++) { + found = 0; + for (i = 0; i < UBIFS_LPT_FANOUT; i++) { + if (nnode->nbranch[i].nnode) { + found = 1; + nnode = nnode->nbranch[i].nnode; + *hght = h; + break; + } + } + if (!found) + break; + } + return nnode; +} + +/** + * ubifs_lpt_free - free resources owned by the LPT. + * @c: UBIFS file-system description object + * @wr_only: free only resources used for writing + */ +void ubifs_lpt_free(struct ubifs_info *c, int wr_only) +{ + struct ubifs_nnode *nnode; + int i, hght; + + /* Free write-only things first */ + + free_obsolete_cnodes(c); /* Leftover from a failed commit */ + + vfree(c->ltab_cmt); + c->ltab_cmt = NULL; + vfree(c->lpt_buf); + c->lpt_buf = NULL; + kfree(c->lsave); + c->lsave = NULL; + + if (wr_only) + return; + + /* Now free the rest */ + + nnode = first_nnode(c, &hght); + while (nnode) { + for (i = 0; i < UBIFS_LPT_FANOUT; i++) + kfree(nnode->nbranch[i].nnode); + nnode = next_nnode(c, nnode, &hght); + } + for (i = 0; i < LPROPS_HEAP_CNT; i++) + kfree(c->lpt_heap[i].arr); + kfree(c->dirty_idx.arr); + kfree(c->nroot); + vfree(c->ltab); + kfree(c->lpt_nod_buf); +} + +#ifdef CONFIG_UBIFS_FS_DEBUG + +/** + * dbg_is_all_ff - determine if a buffer contains only 0xff bytes. + * @buf: buffer + * @len: buffer length + */ +static int dbg_is_all_ff(uint8_t *buf, int len) +{ + int i; + + for (i = 0; i < len; i++) + if (buf[i] != 0xff) + return 0; + return 1; +} + +/** + * dbg_is_nnode_dirty - determine if a nnode is dirty. + * @c: the UBIFS file-system description object + * @lnum: LEB number where nnode was written + * @offs: offset where nnode was written + */ +static int dbg_is_nnode_dirty(struct ubifs_info *c, int lnum, int offs) +{ + struct ubifs_nnode *nnode; + int hght; + + /* Entire tree is in memory so first_nnode / next_nnode are ok */ + nnode = first_nnode(c, &hght); + for (; nnode; nnode = next_nnode(c, nnode, &hght)) { + struct ubifs_nbranch *branch; + + cond_resched(); + if (nnode->parent) { + branch = &nnode->parent->nbranch[nnode->iip]; + if (branch->lnum != lnum || branch->offs != offs) + continue; + if (test_bit(DIRTY_CNODE, &nnode->flags)) + return 1; + return 0; + } else { + if (c->lpt_lnum != lnum || c->lpt_offs != offs) + continue; + if (test_bit(DIRTY_CNODE, &nnode->flags)) + return 1; + return 0; + } + } + return 1; +} + +/** + * dbg_is_pnode_dirty - determine if a pnode is dirty. + * @c: the UBIFS file-system description object + * @lnum: LEB number where pnode was written + * @offs: offset where pnode was written + */ +static int dbg_is_pnode_dirty(struct ubifs_info *c, int lnum, int offs) +{ + int i, cnt; + + cnt = DIV_ROUND_UP(c->main_lebs, UBIFS_LPT_FANOUT); + for (i = 0; i < cnt; i++) { + struct ubifs_pnode *pnode; + struct ubifs_nbranch *branch; + + cond_resched(); + pnode = pnode_lookup(c, i); + if (IS_ERR(pnode)) + return PTR_ERR(pnode); + branch = &pnode->parent->nbranch[pnode->iip]; + if (branch->lnum != lnum || branch->offs != offs) + continue; + if (test_bit(DIRTY_CNODE, &pnode->flags)) + return 1; + return 0; + } + return 1; +} + +/** + * dbg_is_ltab_dirty - determine if a ltab node is dirty. + * @c: the UBIFS file-system description object + * @lnum: LEB number where ltab node was written + * @offs: offset where ltab node was written + */ +static int dbg_is_ltab_dirty(struct ubifs_info *c, int lnum, int offs) +{ + if (lnum != c->ltab_lnum || offs != c->ltab_offs) + return 1; + return (c->lpt_drty_flgs & LTAB_DIRTY) != 0; +} + +/** + * dbg_is_lsave_dirty - determine if a lsave node is dirty. + * @c: the UBIFS file-system description object + * @lnum: LEB number where lsave node was written + * @offs: offset where lsave node was written + */ +static int dbg_is_lsave_dirty(struct ubifs_info *c, int lnum, int offs) +{ + if (lnum != c->lsave_lnum || offs != c->lsave_offs) + return 1; + return (c->lpt_drty_flgs & LSAVE_DIRTY) != 0; +} + +/** + * dbg_is_node_dirty - determine if a node is dirty. + * @c: the UBIFS file-system description object + * @node_type: node type + * @lnum: LEB number where node was written + * @offs: offset where node was written + */ +static int dbg_is_node_dirty(struct ubifs_info *c, int node_type, int lnum, + int offs) +{ + switch (node_type) { + case UBIFS_LPT_NNODE: + return dbg_is_nnode_dirty(c, lnum, offs); + case UBIFS_LPT_PNODE: + return dbg_is_pnode_dirty(c, lnum, offs); + case UBIFS_LPT_LTAB: + return dbg_is_ltab_dirty(c, lnum, offs); + case UBIFS_LPT_LSAVE: + return dbg_is_lsave_dirty(c, lnum, offs); + } + return 1; +} + +/** + * dbg_check_ltab_lnum - check the ltab for a LPT LEB number. + * @c: the UBIFS file-system description object + * @lnum: LEB number where node was written + * @offs: offset where node was written + * + * This function returns %0 on success and a negative error code on failure. + */ +static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum) +{ + int err, len = c->leb_size, dirty = 0, node_type, node_num, node_len; + int ret; + void *buf = c->dbg_buf; + + dbg_lp("LEB %d", lnum); + err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size); + if (err) { + dbg_msg("ubi_read failed, LEB %d, error %d", lnum, err); + return err; + } + while (1) { + if (!is_a_node(c, buf, len)) { + int i, pad_len; + + pad_len = get_pad_len(c, buf, len); + if (pad_len) { + buf += pad_len; + len -= pad_len; + dirty += pad_len; + continue; + } + if (!dbg_is_all_ff(buf, len)) { + dbg_msg("invalid empty space in LEB %d at %d", + lnum, c->leb_size - len); + err = -EINVAL; + } + i = lnum - c->lpt_first; + if (len != c->ltab[i].free) { + dbg_msg("invalid free space in LEB %d " + "(free %d, expected %d)", + lnum, len, c->ltab[i].free); + err = -EINVAL; + } + if (dirty != c->ltab[i].dirty) { + dbg_msg("invalid dirty space in LEB %d " + "(dirty %d, expected %d)", + lnum, dirty, c->ltab[i].dirty); + err = -EINVAL; + } + return err; + } + node_type = get_lpt_node_type(c, buf, &node_num); + node_len = get_lpt_node_len(c, node_type); + ret = dbg_is_node_dirty(c, node_type, lnum, c->leb_size - len); + if (ret == 1) + dirty += node_len; + buf += node_len; + len -= node_len; + } +} + +/** + * dbg_check_ltab - check the free and dirty space in the ltab. + * @c: the UBIFS file-system description object + * + * This function returns %0 on success and a negative error code on failure. + */ +int dbg_check_ltab(struct ubifs_info *c) +{ + int lnum, err, i, cnt; + + if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) + return 0; + + /* Bring the entire tree into memory */ + cnt = DIV_ROUND_UP(c->main_lebs, UBIFS_LPT_FANOUT); + for (i = 0; i < cnt; i++) { + struct ubifs_pnode *pnode; + + pnode = pnode_lookup(c, i); + if (IS_ERR(pnode)) + return PTR_ERR(pnode); + cond_resched(); + } + + /* Check nodes */ + err = dbg_check_lpt_nodes(c, (struct ubifs_cnode *)c->nroot, 0, 0); + if (err) + return err; + + /* Check each LEB */ + for (lnum = c->lpt_first; lnum <= c->lpt_last; lnum++) { + err = dbg_check_ltab_lnum(c, lnum); + if (err) { + dbg_err("failed at LEB %d", lnum); + return err; + } + } + + dbg_lp("succeeded"); + return 0; +} + +#endif /* CONFIG_UBIFS_FS_DEBUG */ diff --git a/fs/ubifs/master.c b/fs/ubifs/master.c new file mode 100644 index 0000000..71d5493 --- /dev/null +++ b/fs/ubifs/master.c @@ -0,0 +1,387 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Artem Bityutskiy (Битюцкий Артём) + * Adrian Hunter + */ + +/* This file implements reading and writing the master node */ + +#include "ubifs.h" + +/** + * scan_for_master - search the valid master node. + * @c: UBIFS file-system description object + * + * This function scans the master node LEBs and search for the latest master + * node. Returns zero in case of success and a negative error code in case of + * failure. + */ +static int scan_for_master(struct ubifs_info *c) +{ + struct ubifs_scan_leb *sleb; + struct ubifs_scan_node *snod; + int lnum, offs = 0, nodes_cnt; + + lnum = UBIFS_MST_LNUM; + + sleb = ubifs_scan(c, lnum, 0, c->sbuf); + if (IS_ERR(sleb)) + return PTR_ERR(sleb); + nodes_cnt = sleb->nodes_cnt; + if (nodes_cnt > 0) { + snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node, + list); + if (snod->type != UBIFS_MST_NODE) + goto out; + memcpy(c->mst_node, snod->node, snod->len); + offs = snod->offs; + } + ubifs_scan_destroy(sleb); + + lnum += 1; + + sleb = ubifs_scan(c, lnum, 0, c->sbuf); + if (IS_ERR(sleb)) + return PTR_ERR(sleb); + if (sleb->nodes_cnt != nodes_cnt) + goto out; + if (!sleb->nodes_cnt) + goto out; + snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node, list); + if (snod->type != UBIFS_MST_NODE) + goto out; + if (snod->offs != offs) + goto out; + if (memcmp((void *)c->mst_node + UBIFS_CH_SZ, + (void *)snod->node + UBIFS_CH_SZ, + UBIFS_MST_NODE_SZ - UBIFS_CH_SZ)) + goto out; + c->mst_offs = offs; + ubifs_scan_destroy(sleb); + return 0; + +out: + ubifs_scan_destroy(sleb); + return -EINVAL; +} + +/** + * validate_master - validate master node. + * @c: UBIFS file-system description object + * + * This function validates data which was read from master node. Returns zero + * if the data is all right and %-EINVAL if not. + */ +static int validate_master(const struct ubifs_info *c) +{ + long long main_sz; + int err; + + if (c->max_sqnum >= SQNUM_WATERMARK) { + err = 1; + goto out; + } + + if (c->cmt_no >= c->max_sqnum) { + err = 2; + goto out; + } + + if (c->highest_inum >= INUM_WATERMARK) { + err = 3; + goto out; + } + + if (c->lhead_lnum < UBIFS_LOG_LNUM || + c->lhead_lnum >= UBIFS_LOG_LNUM + c->log_lebs || + c->lhead_offs < 0 || c->lhead_offs >= c->leb_size || + c->lhead_offs & (c->min_io_size - 1)) { + err = 4; + goto out; + } + + if (c->zroot.lnum >= c->leb_cnt || c->zroot.lnum < c->main_first || + c->zroot.offs >= c->leb_size || c->zroot.offs & 7) { + err = 5; + goto out; + } + + if (c->zroot.len < c->ranges[UBIFS_IDX_NODE].min_len || + c->zroot.len > c->ranges[UBIFS_IDX_NODE].max_len) { + err = 6; + goto out; + } + + if (c->gc_lnum >= c->leb_cnt || c->gc_lnum < c->main_first) { + err = 7; + goto out; + } + + if (c->ihead_lnum >= c->leb_cnt || c->ihead_lnum < c->main_first || + c->ihead_offs % c->min_io_size || c->ihead_offs < 0 || + c->ihead_offs > c->leb_size || c->ihead_offs & 7) { + err = 8; + goto out; + } + + main_sz = (long long)c->main_lebs * c->leb_size; + if (c->old_idx_sz & 7 || c->old_idx_sz >= main_sz) { + err = 9; + goto out; + } + + if (c->lpt_lnum < c->lpt_first || c->lpt_lnum > c->lpt_last || + c->lpt_offs < 0 || c->lpt_offs + c->nnode_sz > c->leb_size) { + err = 10; + goto out; + } + + if (c->nhead_lnum < c->lpt_first || c->nhead_lnum > c->lpt_last || + c->nhead_offs < 0 || c->nhead_offs % c->min_io_size || + c->nhead_offs > c->leb_size) { + err = 11; + goto out; + } + + if (c->ltab_lnum < c->lpt_first || c->ltab_lnum > c->lpt_last || + c->ltab_offs < 0 || + c->ltab_offs + c->ltab_sz > c->leb_size) { + err = 12; + goto out; + } + + if (c->big_lpt && (c->lsave_lnum < c->lpt_first || + c->lsave_lnum > c->lpt_last || c->lsave_offs < 0 || + c->lsave_offs + c->lsave_sz > c->leb_size)) { + err = 13; + goto out; + } + + if (c->lscan_lnum < c->main_first || c->lscan_lnum >= c->leb_cnt) { + err = 14; + goto out; + } + + if (c->lst.empty_lebs < 0 || c->lst.empty_lebs > c->main_lebs - 2) { + err = 15; + goto out; + } + + if (c->lst.idx_lebs < 0 || c->lst.idx_lebs > c->main_lebs - 1) { + err = 16; + goto out; + } + + if (c->lst.total_free < 0 || c->lst.total_free > main_sz || + c->lst.total_free & 7) { + err = 17; + goto out; + } + + if (c->lst.total_dirty < 0 || (c->lst.total_dirty & 7)) { + err = 18; + goto out; + } + + if (c->lst.total_used < 0 || (c->lst.total_used & 7)) { + err = 19; + goto out; + } + + if (c->lst.total_free + c->lst.total_dirty + + c->lst.total_used > main_sz) { + err = 20; + goto out; + } + + if (c->lst.total_dead + c->lst.total_dark + + c->lst.total_used + c->old_idx_sz > main_sz) { + err = 21; + goto out; + } + + if (c->lst.total_dead < 0 || + c->lst.total_dead > c->lst.total_free + c->lst.total_dirty || + c->lst.total_dead & 7) { + err = 22; + goto out; + } + + if (c->lst.total_dark < 0 || + c->lst.total_dark > c->lst.total_free + c->lst.total_dirty || + c->lst.total_dark & 7) { + err = 23; + goto out; + } + + return 0; + +out: + ubifs_err("bad master node at offset %d error %d", c->mst_offs, err); + dbg_dump_node(c, c->mst_node); + return -EINVAL; +} + +/** + * ubifs_read_master - read master node. + * @c: UBIFS file-system description object + * + * This function finds and reads the master node during file-system mount. If + * the flash is empty, it creates default master node as well. Returns zero in + * case of success and a negative error code in case of failure. + */ +int ubifs_read_master(struct ubifs_info *c) +{ + int err, old_leb_cnt; + + c->mst_node = kzalloc(c->mst_node_alsz, GFP_KERNEL); + if (!c->mst_node) + return -ENOMEM; + + err = scan_for_master(c); + if (err) { + err = ubifs_recover_master_node(c); + if (err) + /* + * Note, we do not free 'c->mst_node' here because the + * unmount routine will take care of this. + */ + return err; + } + + /* Make sure that the recovery flag is clear */ + c->mst_node->flags &= cpu_to_le32(~UBIFS_MST_RCVRY); + + c->max_sqnum = le64_to_cpu(c->mst_node->ch.sqnum); + c->highest_inum = le64_to_cpu(c->mst_node->highest_inum); + c->cmt_no = le64_to_cpu(c->mst_node->cmt_no); + c->zroot.lnum = le32_to_cpu(c->mst_node->root_lnum); + c->zroot.offs = le32_to_cpu(c->mst_node->root_offs); + c->zroot.len = le32_to_cpu(c->mst_node->root_len); + c->lhead_lnum = le32_to_cpu(c->mst_node->log_lnum); + c->gc_lnum = le32_to_cpu(c->mst_node->gc_lnum); + c->ihead_lnum = le32_to_cpu(c->mst_node->ihead_lnum); + c->ihead_offs = le32_to_cpu(c->mst_node->ihead_offs); + c->old_idx_sz = le64_to_cpu(c->mst_node->index_size); + c->lpt_lnum = le32_to_cpu(c->mst_node->lpt_lnum); + c->lpt_offs = le32_to_cpu(c->mst_node->lpt_offs); + c->nhead_lnum = le32_to_cpu(c->mst_node->nhead_lnum); + c->nhead_offs = le32_to_cpu(c->mst_node->nhead_offs); + c->ltab_lnum = le32_to_cpu(c->mst_node->ltab_lnum); + c->ltab_offs = le32_to_cpu(c->mst_node->ltab_offs); + c->lsave_lnum = le32_to_cpu(c->mst_node->lsave_lnum); + c->lsave_offs = le32_to_cpu(c->mst_node->lsave_offs); + c->lscan_lnum = le32_to_cpu(c->mst_node->lscan_lnum); + c->lst.empty_lebs = le32_to_cpu(c->mst_node->empty_lebs); + c->lst.idx_lebs = le32_to_cpu(c->mst_node->idx_lebs); + old_leb_cnt = le32_to_cpu(c->mst_node->leb_cnt); + c->lst.total_free = le64_to_cpu(c->mst_node->total_free); + c->lst.total_dirty = le64_to_cpu(c->mst_node->total_dirty); + c->lst.total_used = le64_to_cpu(c->mst_node->total_used); + c->lst.total_dead = le64_to_cpu(c->mst_node->total_dead); + c->lst.total_dark = le64_to_cpu(c->mst_node->total_dark); + + c->calc_idx_sz = c->old_idx_sz; + + if (c->mst_node->flags & cpu_to_le32(UBIFS_MST_NO_ORPHS)) + c->no_orphs = 1; + + if (old_leb_cnt != c->leb_cnt) { + /* The file system has been resized */ + int growth = c->leb_cnt - old_leb_cnt; + + if (c->leb_cnt < old_leb_cnt || + c->leb_cnt < UBIFS_MIN_LEB_CNT) { + ubifs_err("bad leb_cnt on master node"); + dbg_dump_node(c, c->mst_node); + return -EINVAL; + } + + dbg_mnt("Auto resizing (master) from %d LEBs to %d LEBs", + old_leb_cnt, c->leb_cnt); + c->lst.empty_lebs += growth; + c->lst.total_free += growth * (long long)c->leb_size; + c->lst.total_dark += growth * (long long)c->dark_wm; + + /* + * Reflect changes back onto the master node. N.B. the master + * node gets written immediately whenever mounting (or + * remounting) in read-write mode, so we do not need to write it + * here. + */ + c->mst_node->leb_cnt = cpu_to_le32(c->leb_cnt); + c->mst_node->empty_lebs = cpu_to_le32(c->lst.empty_lebs); + c->mst_node->total_free = cpu_to_le64(c->lst.total_free); + c->mst_node->total_dark = cpu_to_le64(c->lst.total_dark); + } + + err = validate_master(c); + if (err) + return err; + + err = dbg_old_index_check_init(c, &c->zroot); + + return err; +} + +/** + * ubifs_write_master - write master node. + * @c: UBIFS file-system description object + * + * This function writes the master node. The caller has to take the + * @c->mst_mutex lock before calling this function. Returns zero in case of + * success and a negative error code in case of failure. The master node is + * written twice to enable recovery. + */ +int ubifs_write_master(struct ubifs_info *c) +{ + int err, lnum, offs, len; + + if (c->ro_media) + return -EINVAL; + + lnum = UBIFS_MST_LNUM; + offs = c->mst_offs + c->mst_node_alsz; + len = UBIFS_MST_NODE_SZ; + + if (offs + UBIFS_MST_NODE_SZ > c->leb_size) { + err = ubifs_leb_unmap(c, lnum); + if (err) + return err; + offs = 0; + } + + c->mst_offs = offs; + c->mst_node->highest_inum = cpu_to_le64(c->highest_inum); + + err = ubifs_write_node(c, c->mst_node, len, lnum, offs, UBI_SHORTTERM); + if (err) + return err; + + lnum += 1; + + if (offs == 0) { + err = ubifs_leb_unmap(c, lnum); + if (err) + return err; + } + err = ubifs_write_node(c, c->mst_node, len, lnum, offs, UBI_SHORTTERM); + + return err; +} diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h new file mode 100644 index 0000000..4beccfc --- /dev/null +++ b/fs/ubifs/misc.h @@ -0,0 +1,342 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Artem Bityutskiy (Битюцкий Артём) + * Adrian Hunter + */ + +/* + * This file contains miscellaneous helper functions. + */ + +#ifndef __UBIFS_MISC_H__ +#define __UBIFS_MISC_H__ + +/** + * ubifs_zn_dirty - check if znode is dirty. + * @znode: znode to check + * + * This helper function returns %1 if @znode is dirty and %0 otherwise. + */ +static inline int ubifs_zn_dirty(const struct ubifs_znode *znode) +{ + return !!test_bit(DIRTY_ZNODE, &znode->flags); +} + +/** + * ubifs_wake_up_bgt - wake up background thread. + * @c: UBIFS file-system description object + */ +static inline void ubifs_wake_up_bgt(struct ubifs_info *c) +{ + if (c->bgt && !c->need_bgt) { + c->need_bgt = 1; + wake_up_process(c->bgt); + } +} + +/** + * ubifs_tnc_find_child - find next child in znode. + * @znode: znode to search at + * @start: the zbranch index to start at + * + * This helper function looks for znode child starting at index @start. Returns + * the child or %NULL if no children were found. + */ +static inline struct ubifs_znode * +ubifs_tnc_find_child(struct ubifs_znode *znode, int start) +{ + while (start < znode->child_cnt) { + if (znode->zbranch[start].znode) + return znode->zbranch[start].znode; + start += 1; + } + + return NULL; +} + +/** + * ubifs_inode - get UBIFS inode information by VFS 'struct inode' object. + * @inode: the VFS 'struct inode' pointer + */ +static inline struct ubifs_inode *ubifs_inode(const struct inode *inode) +{ + return container_of(inode, struct ubifs_inode, vfs_inode); +} + +/** + * ubifs_ro_mode - switch UBIFS to read read-only mode. + * @c: UBIFS file-system description object + * @err: error code which is the reason of switching to R/O mode + */ +static inline void ubifs_ro_mode(struct ubifs_info *c, int err) +{ + if (!c->ro_media) { + c->ro_media = 1; + ubifs_warn("switched to read-only mode, error %d", err); + dbg_dump_stack(); + } +} + +/** + * ubifs_compr_present - check if compressor was compiled in. + * @compr_type: compressor type to check + * + * This function returns %1 of compressor of type @compr_type is present, and + * %0 if not. + */ +static inline int ubifs_compr_present(int compr_type) +{ + ubifs_assert(compr_type >= 0 && compr_type < UBIFS_COMPR_TYPES_CNT); + return !!ubifs_compressors[compr_type]->capi_name; +} + +/** + * ubifs_compr_name - get compressor name string by its type. + * @compr_type: compressor type + * + * This function returns compressor type string. + */ +static inline const char *ubifs_compr_name(int compr_type) +{ + ubifs_assert(compr_type >= 0 && compr_type < UBIFS_COMPR_TYPES_CNT); + return ubifs_compressors[compr_type]->name; +} + +/** + * ubifs_wbuf_sync - synchronize write-buffer. + * @wbuf: write-buffer to synchronize + * + * This is the same as as 'ubifs_wbuf_sync_nolock()' but it does not assume + * that the write-buffer is already locked. + */ +static inline int ubifs_wbuf_sync(struct ubifs_wbuf *wbuf) +{ + int err; + + mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); + err = ubifs_wbuf_sync_nolock(wbuf); + mutex_unlock(&wbuf->io_mutex); + return err; +} + +/** + * ubifs_leb_unmap - unmap an LEB. + * @c: UBIFS file-system description object + * @lnum: LEB number to unmap + * + * This function returns %0 on success and a negative error code on failure. + */ +static inline int ubifs_leb_unmap(const struct ubifs_info *c, int lnum) +{ + int err; + + if (c->ro_media) + return -EROFS; + err = ubi_leb_unmap(c->ubi, lnum); + if (err) { + ubifs_err("unmap LEB %d failed, error %d", lnum, err); + return err; + } + + return 0; +} + +/** + * ubifs_leb_write - write to a LEB. + * @c: UBIFS file-system description object + * @lnum: LEB number to write + * @buf: buffer to write from + * @offs: offset within LEB to write to + * @len: length to write + * @dtype: data type + * + * This function returns %0 on success and a negative error code on failure. + */ +static inline int ubifs_leb_write(const struct ubifs_info *c, int lnum, + const void *buf, int offs, int len, int dtype) +{ + int err; + + if (c->ro_media) + return -EROFS; + err = ubi_leb_write(c->ubi, lnum, buf, offs, len, dtype); + if (err) { + ubifs_err("writing %d bytes at %d:%d, error %d", + len, lnum, offs, err); + return err; + } + + return 0; +} + +/** + * ubifs_leb_change - atomic LEB change. + * @c: UBIFS file-system description object + * @lnum: LEB number to write + * @buf: buffer to write from + * @len: length to write + * @dtype: data type + * + * This function returns %0 on success and a negative error code on failure. + */ +static inline int ubifs_leb_change(const struct ubifs_info *c, int lnum, + const void *buf, int len, int dtype) +{ + int err; + + if (c->ro_media) + return -EROFS; + err = ubi_leb_change(c->ubi, lnum, buf, len, dtype); + if (err) { + ubifs_err("changing %d bytes in LEB %d, error %d", + len, lnum, err); + return err; + } + + return 0; +} + +/** + * ubifs_encode_dev - encode device node IDs. + * @dev: UBIFS device node information + * @rdev: device IDs to encode + * + * This is a helper function which encodes major/minor numbers of a device node + * into UBIFS device node description. We use standard Linux "new" and "huge" + * encodings. + */ +static inline int ubifs_encode_dev(union ubifs_dev_desc *dev, dev_t rdev) +{ + if (new_valid_dev(rdev)) { + dev->new = cpu_to_le32(new_encode_dev(rdev)); + return sizeof(dev->new); + } else { + dev->huge = cpu_to_le64(huge_encode_dev(rdev)); + return sizeof(dev->huge); + } +} + +/** + * ubifs_add_dirt - add dirty space to LEB properties. + * @c: the UBIFS file-system description object + * @lnum: LEB to add dirty space for + * @dirty: dirty space to add + * + * This is a helper function which increased amount of dirty LEB space. Returns + * zero in case of success and a negative error code in case of failure. + */ +static inline int ubifs_add_dirt(struct ubifs_info *c, int lnum, int dirty) +{ + return ubifs_update_one_lp(c, lnum, LPROPS_NC, dirty, 0, 0); +} + +/** + * ubifs_return_leb - return LEB to lprops. + * @c: the UBIFS file-system description object + * @lnum: LEB to return + * + * This helper function cleans the "taken" flag of a logical eraseblock in the + * lprops. Returns zero in case of success and a negative error code in case of + * failure. + */ +static inline int ubifs_return_leb(struct ubifs_info *c, int lnum) +{ + return ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0, + LPROPS_TAKEN, 0); +} + +/** + * ubifs_idx_node_sz - return index node size. + * @c: the UBIFS file-system description object + * @child_cnt: number of children of this index node + */ +static inline int ubifs_idx_node_sz(const struct ubifs_info *c, int child_cnt) +{ + return UBIFS_IDX_NODE_SZ + (UBIFS_BRANCH_SZ + c->key_len) * child_cnt; +} + +/** + * ubifs_idx_branch - return pointer to an index branch. + * @c: the UBIFS file-system description object + * @idx: index node + * @bnum: branch number + */ +static inline +struct ubifs_branch *ubifs_idx_branch(const struct ubifs_info *c, + const struct ubifs_idx_node *idx, + int bnum) +{ + return (struct ubifs_branch *)((void *)idx->branches + + (UBIFS_BRANCH_SZ + c->key_len) * bnum); +} + +/** + * ubifs_idx_key - return pointer to an index key. + * @c: the UBIFS file-system description object + * @idx: index node + */ +static inline void *ubifs_idx_key(const struct ubifs_info *c, + const struct ubifs_idx_node *idx) +{ + return (void *)((struct ubifs_branch *)idx->branches)->key; +} + +/** + * ubifs_reported_space - calculate reported free space. + * @c: the UBIFS file-system description object + * @free: amount of free space + * + * This function calculates amount of free space which will be reported to + * user-space. User-space application tend to expect that if the file-system + * (e.g., via the 'statfs()' call) reports that it has N bytes available, they + * are able to write a file of size N. UBIFS attaches node headers to each data + * node and it has to write indexind nodes as well. This introduces additional + * overhead, and UBIFS it has to report sligtly less free space to meet the + * above expectetion. + * + * This function assumes free space is made up of uncompressed data nodes and + * full index nodes (one per data node, doubled because we always allow enough + * space to write the index twice). + * + * Note, the calculation is pessimistic, which means that most of the time + * UBIFS reports less space than it actually has. + */ +static inline long long ubifs_reported_space(const struct ubifs_info *c, + uint64_t free) +{ + int divisor, factor; + + divisor = UBIFS_MAX_DATA_NODE_SZ + (c->max_idx_node_sz << 1); + factor = UBIFS_MAX_DATA_NODE_SZ - UBIFS_DATA_NODE_SZ; + do_div(free, divisor); + + return free * factor; +} + +/** + * ubifs_current_time - round current time to time granularity. + * @inode: inode + */ +static inline struct timespec ubifs_current_time(struct inode *inode) +{ + return (inode->i_sb->s_time_gran < NSEC_PER_SEC) ? + current_fs_time(inode->i_sb) : CURRENT_TIME_SEC; +} + +#endif /* __UBIFS_MISC_H__ */ diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c new file mode 100644 index 0000000..3afeb92 --- /dev/null +++ b/fs/ubifs/orphan.c @@ -0,0 +1,958 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Author: Adrian Hunter + */ + +#include "ubifs.h" + +/* + * An orphan is an inode number whose inode node has been committed to the index + * with a link count of zero. That happens when an open file is deleted + * (unlinked) and then a commit is run. In the normal course of events the inode + * would be deleted when the file is closed. However in the case of an unclean + * unmount, orphans need to be accounted for. After an unclean unmount, the + * orphans' inodes must be deleted which means either scanning the entire index + * looking for them, or keeping a list on flash somewhere. This unit implements + * the latter approach. + * + * The orphan area is a fixed number of LEBs situated between the LPT area and + * the main area. The number of orphan area LEBs is specified when the file + * system is created. The minimum number is 1. The size of the orphan area + * should be so that it can hold the maximum number of orphans that are expected + * to ever exist at one time. + * + * The number of orphans that can fit in a LEB is: + * + * (c->leb_size - UBIFS_ORPH_NODE_SZ) / sizeof(__le64) + * + * For example: a 15872 byte LEB can fit 1980 orphans so 1 LEB may be enough. + * + * Orphans are accumulated in a rb-tree. When an inode's link count drops to + * zero, the inode number is added to the rb-tree. It is removed from the tree + * when the inode is deleted. Any new orphans that are in the orphan tree when + * the commit is run, are written to the orphan area in 1 or more orph nodes. + * If the orphan area is full, it is consolidated to make space. There is + * always enough space because validation prevents the user from creating more + * than the maximum number of orphans allowed. + */ + +#ifdef CONFIG_UBIFS_FS_DEBUG +static int dbg_check_orphans(struct ubifs_info *c); +#else +#define dbg_check_orphans(c) 0 +#endif + +/** + * ubifs_add_orphan - add an orphan. + * @c: UBIFS file-system description object + * @inum: orphan inode number + * + * Add an orphan. This function is called when an inodes link count drops to + * zero. + */ +int ubifs_add_orphan(struct ubifs_info *c, ino_t inum) +{ + struct ubifs_orphan *orphan, *o; + struct rb_node **p, *parent = NULL; + + orphan = kzalloc(sizeof(struct ubifs_orphan), GFP_NOFS); + if (!orphan) + return -ENOMEM; + orphan->inum = inum; + orphan->new = 1; + + spin_lock(&c->orphan_lock); + if (c->tot_orphans >= c->max_orphans) { + spin_unlock(&c->orphan_lock); + kfree(orphan); + return -ENFILE; + } + p = &c->orph_tree.rb_node; + while (*p) { + parent = *p; + o = rb_entry(parent, struct ubifs_orphan, rb); + if (inum < o->inum) + p = &(*p)->rb_left; + else if (inum > o->inum) + p = &(*p)->rb_right; + else { + dbg_err("orphaned twice"); + spin_unlock(&c->orphan_lock); + kfree(orphan); + return 0; + } + } + c->tot_orphans += 1; + c->new_orphans += 1; + rb_link_node(&orphan->rb, parent, p); + rb_insert_color(&orphan->rb, &c->orph_tree); + list_add_tail(&orphan->list, &c->orph_list); + list_add_tail(&orphan->new_list, &c->orph_new); + spin_unlock(&c->orphan_lock); + dbg_gen("ino %lu", inum); + return 0; +} + +/** + * ubifs_delete_orphan - delete an orphan. + * @c: UBIFS file-system description object + * @inum: orphan inode number + * + * Delete an orphan. This function is called when an inode is deleted. + */ +void ubifs_delete_orphan(struct ubifs_info *c, ino_t inum) +{ + struct ubifs_orphan *o; + struct rb_node *p; + + spin_lock(&c->orphan_lock); + p = c->orph_tree.rb_node; + while (p) { + o = rb_entry(p, struct ubifs_orphan, rb); + if (inum < o->inum) + p = p->rb_left; + else if (inum > o->inum) + p = p->rb_right; + else { + if (o->dnext) { + spin_unlock(&c->orphan_lock); + dbg_gen("deleted twice ino %lu", inum); + return; + } + if (o->cnext) { + o->dnext = c->orph_dnext; + c->orph_dnext = o; + spin_unlock(&c->orphan_lock); + dbg_gen("delete later ino %lu", inum); + return; + } + rb_erase(p, &c->orph_tree); + list_del(&o->list); + c->tot_orphans -= 1; + if (o->new) { + list_del(&o->new_list); + c->new_orphans -= 1; + } + spin_unlock(&c->orphan_lock); + kfree(o); + dbg_gen("inum %lu", inum); + return; + } + } + spin_unlock(&c->orphan_lock); + dbg_err("missing orphan ino %lu", inum); + dbg_dump_stack(); +} + +/** + * ubifs_orphan_start_commit - start commit of orphans. + * @c: UBIFS file-system description object + * + * Start commit of orphans. + */ +int ubifs_orphan_start_commit(struct ubifs_info *c) +{ + struct ubifs_orphan *orphan, **last; + + spin_lock(&c->orphan_lock); + last = &c->orph_cnext; + list_for_each_entry(orphan, &c->orph_new, new_list) { + ubifs_assert(orphan->new); + orphan->new = 0; + *last = orphan; + last = &orphan->cnext; + } + *last = orphan->cnext; + c->cmt_orphans = c->new_orphans; + c->new_orphans = 0; + dbg_cmt("%d orphans to commit", c->cmt_orphans); + INIT_LIST_HEAD(&c->orph_new); + if (c->tot_orphans == 0) + c->no_orphs = 1; + else + c->no_orphs = 0; + spin_unlock(&c->orphan_lock); + return 0; +} + +/** + * avail_orphs - calculate available space. + * @c: UBIFS file-system description object + * + * This function returns the number of orphans that can be written in the + * available space. + */ +static int avail_orphs(struct ubifs_info *c) +{ + int avail_lebs, avail, gap; + + avail_lebs = c->orph_lebs - (c->ohead_lnum - c->orph_first) - 1; + avail = avail_lebs * + ((c->leb_size - UBIFS_ORPH_NODE_SZ) / sizeof(__le64)); + gap = c->leb_size - c->ohead_offs; + if (gap >= UBIFS_ORPH_NODE_SZ + sizeof(__le64)) + avail += (gap - UBIFS_ORPH_NODE_SZ) / sizeof(__le64); + return avail; +} + +/** + * tot_avail_orphs - calculate total space. + * @c: UBIFS file-system description object + * + * This function returns the number of orphans that can be written in half + * the total space. That leaves half the space for adding new orphans. + */ +static int tot_avail_orphs(struct ubifs_info *c) +{ + int avail_lebs, avail; + + avail_lebs = c->orph_lebs; + avail = avail_lebs * + ((c->leb_size - UBIFS_ORPH_NODE_SZ) / sizeof(__le64)); + return avail / 2; +} + +/** + * do_write_orph_node - write a node + * @c: UBIFS file-system description object + * @len: length of node + * @atomic: write atomically + * + * This function writes a node to the orphan head from the orphan buffer. If + * %atomic is not zero, then the write is done atomically. On success, %0 is + * returned, otherwise a negative error code is returned. + */ +static int do_write_orph_node(struct ubifs_info *c, int len, int atomic) +{ + int err = 0; + + if (atomic) { + ubifs_assert(c->ohead_offs == 0); + ubifs_prepare_node(c, c->orph_buf, len, 1); + len = ALIGN(len, c->min_io_size); + err = ubifs_leb_change(c, c->ohead_lnum, c->orph_buf, len, + UBI_SHORTTERM); + } else { + if (c->ohead_offs == 0) { + /* Ensure LEB has been unmapped */ + err = ubifs_leb_unmap(c, c->ohead_lnum); + if (err) + return err; + } + err = ubifs_write_node(c, c->orph_buf, len, c->ohead_lnum, + c->ohead_offs, UBI_SHORTTERM); + } + return err; +} + +/** + * write_orph_node - write an orph node + * @c: UBIFS file-system description object + * @atomic: write atomically + * + * This function builds an orph node from the cnext list and writes it to the + * orphan head. On success, %0 is returned, otherwise a negative error code + * is returned. + */ +static int write_orph_node(struct ubifs_info *c, int atomic) +{ + struct ubifs_orphan *orphan, *cnext; + struct ubifs_orph_node *orph; + int gap, err, len, cnt, i; + + ubifs_assert(c->cmt_orphans > 0); + gap = c->leb_size - c->ohead_offs; + if (gap < UBIFS_ORPH_NODE_SZ + sizeof(__le64)) { + c->ohead_lnum += 1; + c->ohead_offs = 0; + gap = c->leb_size; + if (c->ohead_lnum > c->orph_last) { + /* + * We limit the number of orphans so that this should + * never happen. + */ + ubifs_err("out of space in orphan area"); + return -EINVAL; + } + } + cnt = (gap - UBIFS_ORPH_NODE_SZ) / sizeof(__le64); + if (cnt > c->cmt_orphans) + cnt = c->cmt_orphans; + len = UBIFS_ORPH_NODE_SZ + cnt * sizeof(__le64); + ubifs_assert(c->orph_buf); + orph = c->orph_buf; + orph->ch.node_type = UBIFS_ORPH_NODE; + spin_lock(&c->orphan_lock); + cnext = c->orph_cnext; + for (i = 0; i < cnt; i++) { + orphan = cnext; + orph->inos[i] = cpu_to_le64(orphan->inum); + cnext = orphan->cnext; + orphan->cnext = NULL; + } + c->orph_cnext = cnext; + c->cmt_orphans -= cnt; + spin_unlock(&c->orphan_lock); + if (c->cmt_orphans) + orph->cmt_no = cpu_to_le64(c->cmt_no + 1); + else + /* Mark the last node of the commit */ + orph->cmt_no = cpu_to_le64((c->cmt_no + 1) | (1ULL << 63)); + ubifs_assert(c->ohead_offs + len <= c->leb_size); + ubifs_assert(c->ohead_lnum >= c->orph_first); + ubifs_assert(c->ohead_lnum <= c->orph_last); + err = do_write_orph_node(c, len, atomic); + c->ohead_offs += ALIGN(len, c->min_io_size); + c->ohead_offs = ALIGN(c->ohead_offs, 8); + return err; +} + +/** + * write_orph_nodes - write orph nodes until there are no more to commit + * @c: UBIFS file-system description object + * @atomic: write atomically + * + * This function writes orph nodes for all the orphans to commit. On success, + * %0 is returned, otherwise a negative error code is returned. + */ +static int write_orph_nodes(struct ubifs_info *c, int atomic) +{ + int err; + + while (c->cmt_orphans > 0) { + err = write_orph_node(c, atomic); + if (err) + return err; + } + if (atomic) { + int lnum; + + /* Unmap any unused LEBs after consolidation */ + lnum = c->ohead_lnum + 1; + for (lnum = c->ohead_lnum + 1; lnum <= c->orph_last; lnum++) { + err = ubifs_leb_unmap(c, lnum); + if (err) + return err; + } + } + return 0; +} + +/** + * consolidate - consolidate the orphan area. + * @c: UBIFS file-system description object + * + * This function enables consolidation by putting all the orphans into the list + * to commit. The list is in the order that the orphans were added, and the + * LEBs are written atomically in order, so at no time can orphans be lost by + * an unclean unmount. + * + * This function returns %0 on success and a negative error code on failure. + */ +static int consolidate(struct ubifs_info *c) +{ + int tot_avail = tot_avail_orphs(c), err = 0; + + spin_lock(&c->orphan_lock); + dbg_cmt("there is space for %d orphans and there are %d", + tot_avail, c->tot_orphans); + if (c->tot_orphans - c->new_orphans <= tot_avail) { + struct ubifs_orphan *orphan, **last; + int cnt = 0; + + /* Change the cnext list to include all non-new orphans */ + last = &c->orph_cnext; + list_for_each_entry(orphan, &c->orph_list, list) { + if (orphan->new) + continue; + *last = orphan; + last = &orphan->cnext; + cnt += 1; + } + *last = orphan->cnext; + ubifs_assert(cnt == c->tot_orphans - c->new_orphans); + c->cmt_orphans = cnt; + c->ohead_lnum = c->orph_first; + c->ohead_offs = 0; + } else { + /* + * We limit the number of orphans so that this should + * never happen. + */ + ubifs_err("out of space in orphan area"); + err = -EINVAL; + } + spin_unlock(&c->orphan_lock); + return err; +} + +/** + * commit_orphans - commit orphans. + * @c: UBIFS file-system description object + * + * This function commits orphans to flash. On success, %0 is returned, + * otherwise a negative error code is returned. + */ +static int commit_orphans(struct ubifs_info *c) +{ + int avail, atomic = 0, err; + + ubifs_assert(c->cmt_orphans > 0); + avail = avail_orphs(c); + if (avail < c->cmt_orphans) { + /* Not enough space to write new orphans, so consolidate */ + err = consolidate(c); + if (err) + return err; + atomic = 1; + } + err = write_orph_nodes(c, atomic); + return err; +} + +/** + * erase_deleted - erase the orphans marked for deletion. + * @c: UBIFS file-system description object + * + * During commit, the orphans being committed cannot be deleted, so they are + * marked for deletion and deleted by this function. Also, the recovery + * adds killed orphans to the deletion list, and therefore they are deleted + * here too. + */ +static void erase_deleted(struct ubifs_info *c) +{ + struct ubifs_orphan *orphan, *dnext; + + spin_lock(&c->orphan_lock); + dnext = c->orph_dnext; + while (dnext) { + orphan = dnext; + dnext = orphan->dnext; + ubifs_assert(!orphan->new); + rb_erase(&orphan->rb, &c->orph_tree); + list_del(&orphan->list); + c->tot_orphans -= 1; + dbg_gen("deleting orphan ino %lu", orphan->inum); + kfree(orphan); + } + c->orph_dnext = NULL; + spin_unlock(&c->orphan_lock); +} + +/** + * ubifs_orphan_end_commit - end commit of orphans. + * @c: UBIFS file-system description object + * + * End commit of orphans. + */ +int ubifs_orphan_end_commit(struct ubifs_info *c) +{ + int err; + + if (c->cmt_orphans != 0) { + err = commit_orphans(c); + if (err) + return err; + } + erase_deleted(c); + err = dbg_check_orphans(c); + return err; +} + +/** + * clear_orphans - erase all LEBs used for orphans. + * @c: UBIFS file-system description object + * + * If recovery is not required, then the orphans from the previous session + * are not needed. This function locates the LEBs used to record + * orphans, and un-maps them. + */ +static int clear_orphans(struct ubifs_info *c) +{ + int lnum, err; + + for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) { + err = ubifs_leb_unmap(c, lnum); + if (err) + return err; + } + c->ohead_lnum = c->orph_first; + c->ohead_offs = 0; + return 0; +} + +/** + * insert_dead_orphan - insert an orphan. + * @c: UBIFS file-system description object + * @inum: orphan inode number + * + * This function is a helper to the 'do_kill_orphans()' function. The orphan + * must be kept until the next commit, so it is added to the rb-tree and the + * deletion list. + */ +static int insert_dead_orphan(struct ubifs_info *c, ino_t inum) +{ + struct ubifs_orphan *orphan, *o; + struct rb_node **p, *parent = NULL; + + orphan = kzalloc(sizeof(struct ubifs_orphan), GFP_KERNEL); + if (!orphan) + return -ENOMEM; + orphan->inum = inum; + + p = &c->orph_tree.rb_node; + while (*p) { + parent = *p; + o = rb_entry(parent, struct ubifs_orphan, rb); + if (inum < o->inum) + p = &(*p)->rb_left; + else if (inum > o->inum) + p = &(*p)->rb_right; + else { + /* Already added - no problem */ + kfree(orphan); + return 0; + } + } + c->tot_orphans += 1; + rb_link_node(&orphan->rb, parent, p); + rb_insert_color(&orphan->rb, &c->orph_tree); + list_add_tail(&orphan->list, &c->orph_list); + orphan->dnext = c->orph_dnext; + c->orph_dnext = orphan; + dbg_mnt("ino %lu, new %d, tot %d", + inum, c->new_orphans, c->tot_orphans); + return 0; +} + +/** + * do_kill_orphans - remove orphan inodes from the index. + * @c: UBIFS file-system description object + * @sleb: scanned LEB + * @last_cmt_no: cmt_no of last orph node read is passed and returned here + * @outofdate: whether the LEB is out of date is returned here + * @last_flagged: whether the end orph node is encountered + * + * This function is a helper to the 'kill_orphans()' function. It goes through + * every orphan node in a LEB and for every inode number recorded, removes + * all keys for that inode from the TNC. + */ +static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb, + unsigned long long *last_cmt_no, int *outofdate, + int *last_flagged) +{ + struct ubifs_scan_node *snod; + struct ubifs_orph_node *orph; + unsigned long long cmt_no; + ino_t inum; + int i, n, err, first = 1; + + list_for_each_entry(snod, &sleb->nodes, list) { + if (snod->type != UBIFS_ORPH_NODE) { + ubifs_err("invalid node type %d in orphan area at " + "%d:%d", snod->type, sleb->lnum, snod->offs); + dbg_dump_node(c, snod->node); + return -EINVAL; + } + + orph = snod->node; + + /* Check commit number */ + cmt_no = le64_to_cpu(orph->cmt_no) & LLONG_MAX; + /* + * The commit number on the master node may be less, because + * of a failed commit. If there are several failed commits in a + * row, the commit number written on orph nodes will continue to + * increase (because the commit number is adjusted here) even + * though the commit number on the master node stays the same + * because the master node has not been re-written. + */ + if (cmt_no > c->cmt_no) + c->cmt_no = cmt_no; + if (cmt_no < *last_cmt_no && *last_flagged) { + /* + * The last orph node had a higher commit number and was + * flagged as the last written for that commit number. + * That makes this orph node, out of date. + */ + if (!first) { + ubifs_err("out of order commit number %llu in " + "orphan node at %d:%d", + cmt_no, sleb->lnum, snod->offs); + dbg_dump_node(c, snod->node); + return -EINVAL; + } + dbg_rcvry("out of date LEB %d", sleb->lnum); + *outofdate = 1; + return 0; + } + + if (first) + first = 0; + + n = (le32_to_cpu(orph->ch.len) - UBIFS_ORPH_NODE_SZ) >> 3; + for (i = 0; i < n; i++) { + inum = le64_to_cpu(orph->inos[i]); + dbg_rcvry("deleting orphaned inode %lu", inum); + err = ubifs_tnc_remove_ino(c, inum); + if (err) + return err; + err = insert_dead_orphan(c, inum); + if (err) + return err; + } + + *last_cmt_no = cmt_no; + if (le64_to_cpu(orph->cmt_no) & (1ULL << 63)) { + dbg_rcvry("last orph node for commit %llu at %d:%d", + cmt_no, sleb->lnum, snod->offs); + *last_flagged = 1; + } else + *last_flagged = 0; + } + + return 0; +} + +/** + * kill_orphans - remove all orphan inodes from the index. + * @c: UBIFS file-system description object + * + * If recovery is required, then orphan inodes recorded during the previous + * session (which ended with an unclean unmount) must be deleted from the index. + * This is done by updating the TNC, but since the index is not updated until + * the next commit, the LEBs where the orphan information is recorded are not + * erased until the next commit. + */ +static int kill_orphans(struct ubifs_info *c) +{ + unsigned long long last_cmt_no = 0; + int lnum, err = 0, outofdate = 0, last_flagged = 0; + + c->ohead_lnum = c->orph_first; + c->ohead_offs = 0; + /* Check no-orphans flag and skip this if no orphans */ + if (c->no_orphs) { + dbg_rcvry("no orphans"); + return 0; + } + /* + * Orph nodes always start at c->orph_first and are written to each + * successive LEB in turn. Generally unused LEBs will have been unmapped + * but may contain out of date orph nodes if the unmap didn't go + * through. In addition, the last orph node written for each commit is + * marked (top bit of orph->cmt_no is set to 1). It is possible that + * there are orph nodes from the next commit (i.e. the commit did not + * complete successfully). In that case, no orphans will have been lost + * due to the way that orphans are written, and any orphans added will + * be valid orphans anyway and so can be deleted. + */ + for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) { + struct ubifs_scan_leb *sleb; + + dbg_rcvry("LEB %d", lnum); + sleb = ubifs_scan(c, lnum, 0, c->sbuf); + if (IS_ERR(sleb)) { + sleb = ubifs_recover_leb(c, lnum, 0, c->sbuf, 0); + if (IS_ERR(sleb)) { + err = PTR_ERR(sleb); + break; + } + } + err = do_kill_orphans(c, sleb, &last_cmt_no, &outofdate, + &last_flagged); + if (err || outofdate) { + ubifs_scan_destroy(sleb); + break; + } + if (sleb->endpt) { + c->ohead_lnum = lnum; + c->ohead_offs = sleb->endpt; + } + ubifs_scan_destroy(sleb); + } + return err; +} + +/** + * ubifs_mount_orphans - delete orphan inodes and erase LEBs that recorded them. + * @c: UBIFS file-system description object + * @unclean: indicates recovery from unclean unmount + * @read_only: indicates read only mount + * + * This function is called when mounting to erase orphans from the previous + * session. If UBIFS was not unmounted cleanly, then the inodes recorded as + * orphans are deleted. + */ +int ubifs_mount_orphans(struct ubifs_info *c, int unclean, int read_only) +{ + int err = 0; + + c->max_orphans = tot_avail_orphs(c); + + if (!read_only) { + c->orph_buf = vmalloc(c->leb_size); + if (!c->orph_buf) + return -ENOMEM; + } + + if (unclean) + err = kill_orphans(c); + else if (!read_only) + err = clear_orphans(c); + + return err; +} + +#ifdef CONFIG_UBIFS_FS_DEBUG + +struct check_orphan { + struct rb_node rb; + ino_t inum; +}; + +struct check_info { + unsigned long last_ino; + unsigned long tot_inos; + unsigned long missing; + unsigned long long leaf_cnt; + struct ubifs_ino_node *node; + struct rb_root root; +}; + +static int dbg_find_orphan(struct ubifs_info *c, ino_t inum) +{ + struct ubifs_orphan *o; + struct rb_node *p; + + spin_lock(&c->orphan_lock); + p = c->orph_tree.rb_node; + while (p) { + o = rb_entry(p, struct ubifs_orphan, rb); + if (inum < o->inum) + p = p->rb_left; + else if (inum > o->inum) + p = p->rb_right; + else { + spin_unlock(&c->orphan_lock); + return 1; + } + } + spin_unlock(&c->orphan_lock); + return 0; +} + +static int dbg_ins_check_orphan(struct rb_root *root, ino_t inum) +{ + struct check_orphan *orphan, *o; + struct rb_node **p, *parent = NULL; + + orphan = kzalloc(sizeof(struct check_orphan), GFP_NOFS); + if (!orphan) + return -ENOMEM; + orphan->inum = inum; + + p = &root->rb_node; + while (*p) { + parent = *p; + o = rb_entry(parent, struct check_orphan, rb); + if (inum < o->inum) + p = &(*p)->rb_left; + else if (inum > o->inum) + p = &(*p)->rb_right; + else { + kfree(orphan); + return 0; + } + } + rb_link_node(&orphan->rb, parent, p); + rb_insert_color(&orphan->rb, root); + return 0; +} + +static int dbg_find_check_orphan(struct rb_root *root, ino_t inum) +{ + struct check_orphan *o; + struct rb_node *p; + + p = root->rb_node; + while (p) { + o = rb_entry(p, struct check_orphan, rb); + if (inum < o->inum) + p = p->rb_left; + else if (inum > o->inum) + p = p->rb_right; + else + return 1; + } + return 0; +} + +static void dbg_free_check_tree(struct rb_root *root) +{ + struct rb_node *this = root->rb_node; + struct check_orphan *o; + + while (this) { + if (this->rb_left) { + this = this->rb_left; + continue; + } else if (this->rb_right) { + this = this->rb_right; + continue; + } + o = rb_entry(this, struct check_orphan, rb); + this = rb_parent(this); + if (this) { + if (this->rb_left == &o->rb) + this->rb_left = NULL; + else + this->rb_right = NULL; + } + kfree(o); + } +} + +static int dbg_orphan_check(struct ubifs_info *c, struct ubifs_zbranch *zbr, + void *priv) +{ + struct check_info *ci = priv; + ino_t inum; + int err; + + inum = key_inum(c, &zbr->key); + if (inum != ci->last_ino) { + /* Lowest node type is the inode node, so it comes first */ + if (key_type(c, &zbr->key) != UBIFS_INO_KEY) + ubifs_err("found orphan node ino %lu, type %d", inum, + key_type(c, &zbr->key)); + ci->last_ino = inum; + ci->tot_inos += 1; + err = ubifs_tnc_read_node(c, zbr, ci->node); + if (err) { + ubifs_err("node read failed, error %d", err); + return err; + } + if (ci->node->nlink == 0) + /* Must be recorded as an orphan */ + if (!dbg_find_check_orphan(&ci->root, inum) && + !dbg_find_orphan(c, inum)) { + ubifs_err("missing orphan, ino %lu", inum); + ci->missing += 1; + } + } + ci->leaf_cnt += 1; + return 0; +} + +static int dbg_read_orphans(struct check_info *ci, struct ubifs_scan_leb *sleb) +{ + struct ubifs_scan_node *snod; + struct ubifs_orph_node *orph; + ino_t inum; + int i, n, err; + + list_for_each_entry(snod, &sleb->nodes, list) { + cond_resched(); + if (snod->type != UBIFS_ORPH_NODE) + continue; + orph = snod->node; + n = (le32_to_cpu(orph->ch.len) - UBIFS_ORPH_NODE_SZ) >> 3; + for (i = 0; i < n; i++) { + inum = le64_to_cpu(orph->inos[i]); + err = dbg_ins_check_orphan(&ci->root, inum); + if (err) + return err; + } + } + return 0; +} + +static int dbg_scan_orphans(struct ubifs_info *c, struct check_info *ci) +{ + int lnum, err = 0; + + /* Check no-orphans flag and skip this if no orphans */ + if (c->no_orphs) + return 0; + + for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) { + struct ubifs_scan_leb *sleb; + + sleb = ubifs_scan(c, lnum, 0, c->dbg_buf); + if (IS_ERR(sleb)) { + err = PTR_ERR(sleb); + break; + } + + err = dbg_read_orphans(ci, sleb); + ubifs_scan_destroy(sleb); + if (err) + break; + } + + return err; +} + +static int dbg_check_orphans(struct ubifs_info *c) +{ + struct check_info ci; + int err; + + if (!(ubifs_chk_flags & UBIFS_CHK_ORPH)) + return 0; + + ci.last_ino = 0; + ci.tot_inos = 0; + ci.missing = 0; + ci.leaf_cnt = 0; + ci.root = RB_ROOT; + ci.node = kmalloc(UBIFS_MAX_INO_NODE_SZ, GFP_NOFS); + if (!ci.node) { + ubifs_err("out of memory"); + return -ENOMEM; + } + + err = dbg_scan_orphans(c, &ci); + if (err) + goto out; + + err = dbg_walk_index(c, &dbg_orphan_check, NULL, &ci); + if (err) { + ubifs_err("cannot scan TNC, error %d", err); + goto out; + } + + if (ci.missing) { + ubifs_err("%lu missing orphan(s)", ci.missing); + err = -EINVAL; + goto out; + } + + dbg_cmt("last inode number is %lu", ci.last_ino); + dbg_cmt("total number of inodes is %lu", ci.tot_inos); + dbg_cmt("total number of leaf nodes is %llu", ci.leaf_cnt); + +out: + dbg_free_check_tree(&ci.root); + kfree(ci.node); + return err; +} + +#endif /* CONFIG_UBIFS_FS_DEBUG */ diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c new file mode 100644 index 0000000..77d26c1 --- /dev/null +++ b/fs/ubifs/recovery.c @@ -0,0 +1,1519 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Adrian Hunter + * Artem Bityutskiy (Битюцкий Артём) + */ + +/* + * This file implements functions needed to recover from unclean un-mounts. + * When UBIFS is mounted, it checks a flag on the master node to determine if + * an un-mount was completed sucessfully. If not, the process of mounting + * incorparates additional checking and fixing of on-flash data structures. + * UBIFS always cleans away all remnants of an unclean un-mount, so that + * errors do not accumulate. However UBIFS defers recovery if it is mounted + * read-only, and the flash is not modified in that case. + */ + +#include +#include "ubifs.h" + +/** + * is_empty - determine whether a buffer is empty (contains all 0xff). + * @buf: buffer to clean + * @len: length of buffer + * + * This function returns %1 if the buffer is empty (contains all 0xff) otherwise + * %0 is returned. + */ +static int is_empty(void *buf, int len) +{ + uint8_t *p = buf; + int i; + + for (i = 0; i < len; i++) + if (*p++ != 0xff) + return 0; + return 1; +} + +/** + * get_master_node - get the last valid master node allowing for corruption. + * @c: UBIFS file-system description object + * @lnum: LEB number + * @pbuf: buffer containing the LEB read, is returned here + * @mst: master node, if found, is returned here + * @cor: corruption, if found, is returned here + * + * This function allocates a buffer, reads the LEB into it, and finds and + * returns the last valid master node allowing for one area of corruption. + * The corrupt area, if there is one, must be consistent with the assumption + * that it is the result of an unclean unmount while the master node was being + * written. Under those circumstances, it is valid to use the previously written + * master node. + * + * This function returns %0 on success and a negative error code on failure. + */ +static int get_master_node(const struct ubifs_info *c, int lnum, void **pbuf, + struct ubifs_mst_node **mst, void **cor) +{ + const int sz = c->mst_node_alsz; + int err, offs, len; + void *sbuf, *buf; + + sbuf = vmalloc(c->leb_size); + if (!sbuf) + return -ENOMEM; + + err = ubi_read(c->ubi, lnum, sbuf, 0, c->leb_size); + if (err && err != -EBADMSG) + goto out_free; + + /* Find the first position that is definitely not a node */ + offs = 0; + buf = sbuf; + len = c->leb_size; + while (offs + UBIFS_MST_NODE_SZ <= c->leb_size) { + struct ubifs_ch *ch = buf; + + if (le32_to_cpu(ch->magic) != UBIFS_NODE_MAGIC) + break; + offs += sz; + buf += sz; + len -= sz; + } + /* See if there was a valid master node before that */ + if (offs) { + int ret; + + offs -= sz; + buf -= sz; + len += sz; + ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1); + if (ret != SCANNED_A_NODE && offs) { + /* Could have been corruption so check one place back */ + offs -= sz; + buf -= sz; + len += sz; + ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1); + if (ret != SCANNED_A_NODE) + /* + * We accept only one area of corruption because + * we are assuming that it was caused while + * trying to write a master node. + */ + goto out_err; + } + if (ret == SCANNED_A_NODE) { + struct ubifs_ch *ch = buf; + + if (ch->node_type != UBIFS_MST_NODE) + goto out_err; + dbg_rcvry("found a master node at %d:%d", lnum, offs); + *mst = buf; + offs += sz; + buf += sz; + len -= sz; + } + } + /* Check for corruption */ + if (offs < c->leb_size) { + if (!is_empty(buf, min_t(int, len, sz))) { + *cor = buf; + dbg_rcvry("found corruption at %d:%d", lnum, offs); + } + offs += sz; + buf += sz; + len -= sz; + } + /* Check remaining empty space */ + if (offs < c->leb_size) + if (!is_empty(buf, len)) + goto out_err; + *pbuf = sbuf; + return 0; + +out_err: + err = -EINVAL; +out_free: + vfree(sbuf); + *mst = NULL; + *cor = NULL; + return err; +} + +/** + * write_rcvrd_mst_node - write recovered master node. + * @c: UBIFS file-system description object + * @mst: master node + * + * This function returns %0 on success and a negative error code on failure. + */ +static int write_rcvrd_mst_node(struct ubifs_info *c, + struct ubifs_mst_node *mst) +{ + int err = 0, lnum = UBIFS_MST_LNUM, sz = c->mst_node_alsz; + uint32_t save_flags; + + dbg_rcvry("recovery"); + + save_flags = mst->flags; + mst->flags = cpu_to_le32(le32_to_cpu(mst->flags) | UBIFS_MST_RCVRY); + + ubifs_prepare_node(c, mst, UBIFS_MST_NODE_SZ, 1); + err = ubi_leb_change(c->ubi, lnum, mst, sz, UBI_SHORTTERM); + if (err) + goto out; + err = ubi_leb_change(c->ubi, lnum + 1, mst, sz, UBI_SHORTTERM); + if (err) + goto out; +out: + mst->flags = save_flags; + return err; +} + +/** + * ubifs_recover_master_node - recover the master node. + * @c: UBIFS file-system description object + * + * This function recovers the master node from corruption that may occur due to + * an unclean unmount. + * + * This function returns %0 on success and a negative error code on failure. + */ +int ubifs_recover_master_node(struct ubifs_info *c) +{ + void *buf1 = NULL, *buf2 = NULL, *cor1 = NULL, *cor2 = NULL; + struct ubifs_mst_node *mst1 = NULL, *mst2 = NULL, *mst; + const int sz = c->mst_node_alsz; + int err, offs1, offs2; + + dbg_rcvry("recovery"); + + err = get_master_node(c, UBIFS_MST_LNUM, &buf1, &mst1, &cor1); + if (err) + goto out_free; + + err = get_master_node(c, UBIFS_MST_LNUM + 1, &buf2, &mst2, &cor2); + if (err) + goto out_free; + + if (mst1) { + offs1 = (void *)mst1 - buf1; + if ((le32_to_cpu(mst1->flags) & UBIFS_MST_RCVRY) && + (offs1 == 0 && !cor1)) { + /* + * mst1 was written by recovery at offset 0 with no + * corruption. + */ + dbg_rcvry("recovery recovery"); + mst = mst1; + } else if (mst2) { + offs2 = (void *)mst2 - buf2; + if (offs1 == offs2) { + /* Same offset, so must be the same */ + if (memcmp((void *)mst1 + UBIFS_CH_SZ, + (void *)mst2 + UBIFS_CH_SZ, + UBIFS_MST_NODE_SZ - UBIFS_CH_SZ)) + goto out_err; + mst = mst1; + } else if (offs2 + sz == offs1) { + /* 1st LEB was written, 2nd was not */ + if (cor1) + goto out_err; + mst = mst1; + } else if (offs1 == 0 && offs2 + sz >= c->leb_size) { + /* 1st LEB was unmapped and written, 2nd not */ + if (cor1) + goto out_err; + mst = mst1; + } else + goto out_err; + } else { + /* + * 2nd LEB was unmapped and about to be written, so + * there must be only one master node in the first LEB + * and no corruption. + */ + if (offs1 != 0 || cor1) + goto out_err; + mst = mst1; + } + } else { + if (!mst2) + goto out_err; + /* + * 1st LEB was unmapped and about to be written, so there must + * be no room left in 2nd LEB. + */ + offs2 = (void *)mst2 - buf2; + if (offs2 + sz + sz <= c->leb_size) + goto out_err; + mst = mst2; + } + + dbg_rcvry("recovered master node from LEB %d", + (mst == mst1 ? UBIFS_MST_LNUM : UBIFS_MST_LNUM + 1)); + + memcpy(c->mst_node, mst, UBIFS_MST_NODE_SZ); + + if ((c->vfs_sb->s_flags & MS_RDONLY)) { + /* Read-only mode. Keep a copy for switching to rw mode */ + c->rcvrd_mst_node = kmalloc(sz, GFP_KERNEL); + if (!c->rcvrd_mst_node) { + err = -ENOMEM; + goto out_free; + } + memcpy(c->rcvrd_mst_node, c->mst_node, UBIFS_MST_NODE_SZ); + } else { + /* Write the recovered master node */ + c->max_sqnum = le64_to_cpu(mst->ch.sqnum) - 1; + err = write_rcvrd_mst_node(c, c->mst_node); + if (err) + goto out_free; + } + + vfree(buf2); + vfree(buf1); + + return 0; + +out_err: + err = -EINVAL; +out_free: + ubifs_err("failed to recover master node"); + if (mst1) { + dbg_err("dumping first master node"); + dbg_dump_node(c, mst1); + } + if (mst2) { + dbg_err("dumping second master node"); + dbg_dump_node(c, mst2); + } + vfree(buf2); + vfree(buf1); + return err; +} + +/** + * ubifs_write_rcvrd_mst_node - write the recovered master node. + * @c: UBIFS file-system description object + * + * This function writes the master node that was recovered during mounting in + * read-only mode and must now be written because we are remounting rw. + * + * This function returns %0 on success and a negative error code on failure. + */ +int ubifs_write_rcvrd_mst_node(struct ubifs_info *c) +{ + int err; + + if (!c->rcvrd_mst_node) + return 0; + c->rcvrd_mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY); + c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY); + err = write_rcvrd_mst_node(c, c->rcvrd_mst_node); + if (err) + return err; + kfree(c->rcvrd_mst_node); + c->rcvrd_mst_node = NULL; + return 0; +} + +/** + * is_last_write - determine if an offset was in the last write to a LEB. + * @c: UBIFS file-system description object + * @buf: buffer to check + * @offs: offset to check + * + * This function returns %1 if @offs was in the last write to the LEB whose data + * is in @buf, otherwise %0 is returned. The determination is made by checking + * for subsequent empty space starting from the next min_io_size boundary (or a + * bit less than the common header size if min_io_size is one). + */ +static int is_last_write(const struct ubifs_info *c, void *buf, int offs) +{ + int empty_offs; + int check_len; + uint8_t *p; + + if (c->min_io_size == 1) { + check_len = c->leb_size - offs; + p = buf + check_len; + for (; check_len > 0; check_len--) + if (*--p != 0xff) + break; + /* + * 'check_len' is the size of the corruption which cannot be + * more than the size of 1 node if it was caused by an unclean + * unmount. + */ + if (check_len > UBIFS_MAX_NODE_SZ) + return 0; + return 1; + } + + /* + * Round up to the next c->min_io_size boundary i.e. 'offs' is in the + * last wbuf written. After that should be empty space. + */ + empty_offs = ALIGN(offs + 1, c->min_io_size); + check_len = c->leb_size - empty_offs; + p = buf + empty_offs - offs; + + for (; check_len > 0; check_len--) + if (*p++ != 0xff) + return 0; + return 1; +} + +/** + * clean_buf - clean the data from an LEB sitting in a buffer. + * @c: UBIFS file-system description object + * @buf: buffer to clean + * @lnum: LEB number to clean + * @offs: offset from which to clean + * @len: length of buffer + * + * This function pads up to the next min_io_size boundary (if there is one) and + * sets empty space to all 0xff. @buf, @offs and @len are updated to the next + * min_io_size boundary (if there is one). + */ +static void clean_buf(const struct ubifs_info *c, void **buf, int lnum, + int *offs, int *len) +{ + int empty_offs, pad_len; + + lnum = lnum; + dbg_rcvry("cleaning corruption at %d:%d", lnum, *offs); + + if (c->min_io_size == 1) { + memset(*buf, 0xff, c->leb_size - *offs); + return; + } + + ubifs_assert(!(*offs & 7)); + empty_offs = ALIGN(*offs, c->min_io_size); + pad_len = empty_offs - *offs; + ubifs_pad(c, *buf, pad_len); + *offs += pad_len; + *buf += pad_len; + *len -= pad_len; + memset(*buf, 0xff, c->leb_size - empty_offs); +} + +/** + * no_more_nodes - determine if there are no more nodes in a buffer. + * @c: UBIFS file-system description object + * @buf: buffer to check + * @len: length of buffer + * @lnum: LEB number of the LEB from which @buf was read + * @offs: offset from which @buf was read + * + * This function scans @buf for more nodes and returns %0 is a node is found and + * %1 if no more nodes are found. + */ +static int no_more_nodes(const struct ubifs_info *c, void *buf, int len, + int lnum, int offs) +{ + int skip, next_offs = 0; + + if (len > UBIFS_DATA_NODE_SZ) { + struct ubifs_ch *ch = buf; + int dlen = le32_to_cpu(ch->len); + + if (ch->node_type == UBIFS_DATA_NODE && dlen >= UBIFS_CH_SZ && + dlen <= UBIFS_MAX_DATA_NODE_SZ) + /* The corrupt node looks like a data node */ + next_offs = ALIGN(offs + dlen, 8); + } + + if (c->min_io_size == 1) + skip = 8; + else + skip = ALIGN(offs + 1, c->min_io_size) - offs; + + offs += skip; + buf += skip; + len -= skip; + while (len > 8) { + struct ubifs_ch *ch = buf; + uint32_t magic = le32_to_cpu(ch->magic); + int ret; + + if (magic == UBIFS_NODE_MAGIC) { + ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1); + if (ret == SCANNED_A_NODE || ret > 0) { + /* + * There is a small chance this is just data in + * a data node, so check that possibility. e.g. + * this is part of a file that itself contains + * a UBIFS image. + */ + if (next_offs && offs + le32_to_cpu(ch->len) <= + next_offs) + continue; + dbg_rcvry("unexpected node at %d:%d", lnum, + offs); + return 0; + } + } + offs += 8; + buf += 8; + len -= 8; + } + return 1; +} + +/** + * fix_unclean_leb - fix an unclean LEB. + * @c: UBIFS file-system description object + * @sleb: scanned LEB information + * @start: offset where scan started + */ +static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb, + int start) +{ + int lnum = sleb->lnum, endpt = start; + + /* Get the end offset of the last node we are keeping */ + if (!list_empty(&sleb->nodes)) { + struct ubifs_scan_node *snod; + + snod = list_entry(sleb->nodes.prev, + struct ubifs_scan_node, list); + endpt = snod->offs + snod->len; + } + + if ((c->vfs_sb->s_flags & MS_RDONLY) && !c->remounting_rw) { + /* Add to recovery list */ + struct ubifs_unclean_leb *ucleb; + + dbg_rcvry("need to fix LEB %d start %d endpt %d", + lnum, start, sleb->endpt); + ucleb = kzalloc(sizeof(struct ubifs_unclean_leb), GFP_NOFS); + if (!ucleb) + return -ENOMEM; + ucleb->lnum = lnum; + ucleb->endpt = endpt; + list_add_tail(&ucleb->list, &c->unclean_leb_list); + } else { + /* Write the fixed LEB back to flash */ + int err; + + dbg_rcvry("fixing LEB %d start %d endpt %d", + lnum, start, sleb->endpt); + if (endpt == 0) { + err = ubifs_leb_unmap(c, lnum); + if (err) + return err; + } else { + int len = ALIGN(endpt, c->min_io_size); + + if (start) { + err = ubi_read(c->ubi, lnum, sleb->buf, 0, + start); + if (err) + return err; + } + /* Pad to min_io_size */ + if (len > endpt) { + int pad_len = len - ALIGN(endpt, 8); + + if (pad_len > 0) { + void *buf = sleb->buf + len - pad_len; + + ubifs_pad(c, buf, pad_len); + } + } + err = ubi_leb_change(c->ubi, lnum, sleb->buf, len, + UBI_UNKNOWN); + if (err) + return err; + } + } + return 0; +} + +/** + * drop_incomplete_group - drop nodes from an incomplete group. + * @sleb: scanned LEB information + * @offs: offset of dropped nodes is returned here + * + * This function returns %1 if nodes are dropped and %0 otherwise. + */ +static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs) +{ + int dropped = 0; + + while (!list_empty(&sleb->nodes)) { + struct ubifs_scan_node *snod; + struct ubifs_ch *ch; + + snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node, + list); + ch = snod->node; + if (ch->group_type != UBIFS_IN_NODE_GROUP) + return dropped; + dbg_rcvry("dropping node at %d:%d", sleb->lnum, snod->offs); + *offs = snod->offs; + list_del(&snod->list); + kfree(snod); + sleb->nodes_cnt -= 1; + dropped = 1; + } + return dropped; +} + +/** + * ubifs_recover_leb - scan and recover a LEB. + * @c: UBIFS file-system description object + * @lnum: LEB number + * @offs: offset + * @sbuf: LEB-sized buffer to use + * @grouped: nodes may be grouped for recovery + * + * This function does a scan of a LEB, but caters for errors that might have + * been caused by the unclean unmount from which we are attempting to recover. + * + * This function returns %0 on success and a negative error code on failure. + */ +struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, + int offs, void *sbuf, int grouped) +{ + int err, len = c->leb_size - offs, need_clean = 0, quiet = 1; + int empty_chkd = 0, start = offs; + struct ubifs_scan_leb *sleb; + void *buf = sbuf + offs; + + dbg_rcvry("%d:%d", lnum, offs); + + sleb = ubifs_start_scan(c, lnum, offs, sbuf); + if (IS_ERR(sleb)) + return sleb; + + if (sleb->ecc) + need_clean = 1; + + while (len >= 8) { + int ret; + + dbg_scan("look at LEB %d:%d (%d bytes left)", + lnum, offs, len); + + cond_resched(); + + /* + * Scan quietly until there is an error from which we cannot + * recover + */ + ret = ubifs_scan_a_node(c, buf, len, lnum, offs, quiet); + + if (ret == SCANNED_A_NODE) { + /* A valid node, and not a padding node */ + struct ubifs_ch *ch = buf; + int node_len; + + err = ubifs_add_snod(c, sleb, buf, offs); + if (err) + goto error; + node_len = ALIGN(le32_to_cpu(ch->len), 8); + offs += node_len; + buf += node_len; + len -= node_len; + continue; + } + + if (ret > 0) { + /* Padding bytes or a valid padding node */ + offs += ret; + buf += ret; + len -= ret; + continue; + } + + if (ret == SCANNED_EMPTY_SPACE) { + if (!is_empty(buf, len)) { + if (!is_last_write(c, buf, offs)) + break; + clean_buf(c, &buf, lnum, &offs, &len); + need_clean = 1; + } + empty_chkd = 1; + break; + } + + if (ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE) + if (is_last_write(c, buf, offs)) { + clean_buf(c, &buf, lnum, &offs, &len); + need_clean = 1; + empty_chkd = 1; + break; + } + + if (ret == SCANNED_A_CORRUPT_NODE) + if (no_more_nodes(c, buf, len, lnum, offs)) { + clean_buf(c, &buf, lnum, &offs, &len); + need_clean = 1; + empty_chkd = 1; + break; + } + + if (quiet) { + /* Redo the last scan but noisily */ + quiet = 0; + continue; + } + + switch (ret) { + case SCANNED_GARBAGE: + dbg_err("garbage"); + goto corrupted; + case SCANNED_A_CORRUPT_NODE: + case SCANNED_A_BAD_PAD_NODE: + dbg_err("bad node"); + goto corrupted; + default: + dbg_err("unknown"); + goto corrupted; + } + } + + if (!empty_chkd && !is_empty(buf, len)) { + if (is_last_write(c, buf, offs)) { + clean_buf(c, &buf, lnum, &offs, &len); + need_clean = 1; + } else { + ubifs_err("corrupt empty space at LEB %d:%d", + lnum, offs); + goto corrupted; + } + } + + /* Drop nodes from incomplete group */ + if (grouped && drop_incomplete_group(sleb, &offs)) { + buf = sbuf + offs; + len = c->leb_size - offs; + clean_buf(c, &buf, lnum, &offs, &len); + need_clean = 1; + } + + if (offs % c->min_io_size) { + clean_buf(c, &buf, lnum, &offs, &len); + need_clean = 1; + } + + ubifs_end_scan(c, sleb, lnum, offs); + + if (need_clean) { + err = fix_unclean_leb(c, sleb, start); + if (err) + goto error; + } + + return sleb; + +corrupted: + ubifs_scanned_corruption(c, lnum, offs, buf); + err = -EUCLEAN; +error: + ubifs_err("LEB %d scanning failed", lnum); + ubifs_scan_destroy(sleb); + return ERR_PTR(err); +} + +/** + * get_cs_sqnum - get commit start sequence number. + * @c: UBIFS file-system description object + * @lnum: LEB number of commit start node + * @offs: offset of commit start node + * @cs_sqnum: commit start sequence number is returned here + * + * This function returns %0 on success and a negative error code on failure. + */ +static int get_cs_sqnum(struct ubifs_info *c, int lnum, int offs, + unsigned long long *cs_sqnum) +{ + struct ubifs_cs_node *cs_node = NULL; + int err, ret; + + dbg_rcvry("at %d:%d", lnum, offs); + cs_node = kmalloc(UBIFS_CS_NODE_SZ, GFP_KERNEL); + if (!cs_node) + return -ENOMEM; + if (c->leb_size - offs < UBIFS_CS_NODE_SZ) + goto out_err; + err = ubi_read(c->ubi, lnum, (void *)cs_node, offs, UBIFS_CS_NODE_SZ); + if (err && err != -EBADMSG) + goto out_free; + ret = ubifs_scan_a_node(c, cs_node, UBIFS_CS_NODE_SZ, lnum, offs, 0); + if (ret != SCANNED_A_NODE) { + dbg_err("Not a valid node"); + goto out_err; + } + if (cs_node->ch.node_type != UBIFS_CS_NODE) { + dbg_err("Node a CS node, type is %d", cs_node->ch.node_type); + goto out_err; + } + if (le64_to_cpu(cs_node->cmt_no) != c->cmt_no) { + dbg_err("CS node cmt_no %llu != current cmt_no %llu", + (unsigned long long)le64_to_cpu(cs_node->cmt_no), + c->cmt_no); + goto out_err; + } + *cs_sqnum = le64_to_cpu(cs_node->ch.sqnum); + dbg_rcvry("commit start sqnum %llu", *cs_sqnum); + kfree(cs_node); + return 0; + +out_err: + err = -EINVAL; +out_free: + ubifs_err("failed to get CS sqnum"); + kfree(cs_node); + return err; +} + +/** + * ubifs_recover_log_leb - scan and recover a log LEB. + * @c: UBIFS file-system description object + * @lnum: LEB number + * @offs: offset + * @sbuf: LEB-sized buffer to use + * + * This function does a scan of a LEB, but caters for errors that might have + * been caused by the unclean unmount from which we are attempting to recover. + * + * This function returns %0 on success and a negative error code on failure. + */ +struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, + int offs, void *sbuf) +{ + struct ubifs_scan_leb *sleb; + int next_lnum; + + dbg_rcvry("LEB %d", lnum); + next_lnum = lnum + 1; + if (next_lnum >= UBIFS_LOG_LNUM + c->log_lebs) + next_lnum = UBIFS_LOG_LNUM; + if (next_lnum != c->ltail_lnum) { + /* + * We can only recover at the end of the log, so check that the + * next log LEB is empty or out of date. + */ + sleb = ubifs_scan(c, next_lnum, 0, sbuf); + if (IS_ERR(sleb)) + return sleb; + if (sleb->nodes_cnt) { + struct ubifs_scan_node *snod; + unsigned long long cs_sqnum = c->cs_sqnum; + + snod = list_entry(sleb->nodes.next, + struct ubifs_scan_node, list); + if (cs_sqnum == 0) { + int err; + + err = get_cs_sqnum(c, lnum, offs, &cs_sqnum); + if (err) { + ubifs_scan_destroy(sleb); + return ERR_PTR(err); + } + } + if (snod->sqnum > cs_sqnum) { + ubifs_err("unrecoverable log corruption " + "in LEB %d", lnum); + ubifs_scan_destroy(sleb); + return ERR_PTR(-EUCLEAN); + } + } + ubifs_scan_destroy(sleb); + } + return ubifs_recover_leb(c, lnum, offs, sbuf, 0); +} + +/** + * recover_head - recover a head. + * @c: UBIFS file-system description object + * @lnum: LEB number of head to recover + * @offs: offset of head to recover + * @sbuf: LEB-sized buffer to use + * + * This function ensures that there is no data on the flash at a head location. + * + * This function returns %0 on success and a negative error code on failure. + */ +static int recover_head(const struct ubifs_info *c, int lnum, int offs, + void *sbuf) +{ + int len, err, need_clean = 0; + + if (c->min_io_size > 1) + len = c->min_io_size; + else + len = 512; + if (offs + len > c->leb_size) + len = c->leb_size - offs; + + if (!len) + return 0; + + /* Read at the head location and check it is empty flash */ + err = ubi_read(c->ubi, lnum, sbuf, offs, len); + if (err) + need_clean = 1; + else { + uint8_t *p = sbuf; + + while (len--) + if (*p++ != 0xff) { + need_clean = 1; + break; + } + } + + if (need_clean) { + dbg_rcvry("cleaning head at %d:%d", lnum, offs); + if (offs == 0) + return ubifs_leb_unmap(c, lnum); + err = ubi_read(c->ubi, lnum, sbuf, 0, offs); + if (err) + return err; + return ubi_leb_change(c->ubi, lnum, sbuf, offs, UBI_UNKNOWN); + } + + return 0; +} + +/** + * ubifs_recover_inl_heads - recover index and LPT heads. + * @c: UBIFS file-system description object + * @sbuf: LEB-sized buffer to use + * + * This function ensures that there is no data on the flash at the index and + * LPT head locations. + * + * This deals with the recovery of a half-completed journal commit. UBIFS is + * careful never to overwrite the last version of the index or the LPT. Because + * the index and LPT are wandering trees, data from a half-completed commit will + * not be referenced anywhere in UBIFS. The data will be either in LEBs that are + * assumed to be empty and will be unmapped anyway before use, or in the index + * and LPT heads. + * + * This function returns %0 on success and a negative error code on failure. + */ +int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf) +{ + int err; + + ubifs_assert(!(c->vfs_sb->s_flags & MS_RDONLY) || c->remounting_rw); + + dbg_rcvry("checking index head at %d:%d", c->ihead_lnum, c->ihead_offs); + err = recover_head(c, c->ihead_lnum, c->ihead_offs, sbuf); + if (err) + return err; + + dbg_rcvry("checking LPT head at %d:%d", c->nhead_lnum, c->nhead_offs); + err = recover_head(c, c->nhead_lnum, c->nhead_offs, sbuf); + if (err) + return err; + + return 0; +} + +/** + * clean_an_unclean_leb - read and write a LEB to remove corruption. + * @c: UBIFS file-system description object + * @ucleb: unclean LEB information + * @sbuf: LEB-sized buffer to use + * + * This function reads a LEB up to a point pre-determined by the mount recovery, + * checks the nodes, and writes the result back to the flash, thereby cleaning + * off any following corruption, or non-fatal ECC errors. + * + * This function returns %0 on success and a negative error code on failure. + */ +static int clean_an_unclean_leb(const struct ubifs_info *c, + struct ubifs_unclean_leb *ucleb, void *sbuf) +{ + int err, lnum = ucleb->lnum, offs = 0, len = ucleb->endpt, quiet = 1; + void *buf = sbuf; + + dbg_rcvry("LEB %d len %d", lnum, len); + + if (len == 0) { + /* Nothing to read, just unmap it */ + err = ubifs_leb_unmap(c, lnum); + if (err) + return err; + return 0; + } + + err = ubi_read(c->ubi, lnum, buf, offs, len); + if (err && err != -EBADMSG) + return err; + + while (len >= 8) { + int ret; + + cond_resched(); + + /* Scan quietly until there is an error */ + ret = ubifs_scan_a_node(c, buf, len, lnum, offs, quiet); + + if (ret == SCANNED_A_NODE) { + /* A valid node, and not a padding node */ + struct ubifs_ch *ch = buf; + int node_len; + + node_len = ALIGN(le32_to_cpu(ch->len), 8); + offs += node_len; + buf += node_len; + len -= node_len; + continue; + } + + if (ret > 0) { + /* Padding bytes or a valid padding node */ + offs += ret; + buf += ret; + len -= ret; + continue; + } + + if (ret == SCANNED_EMPTY_SPACE) { + ubifs_err("unexpected empty space at %d:%d", + lnum, offs); + return -EUCLEAN; + } + + if (quiet) { + /* Redo the last scan but noisily */ + quiet = 0; + continue; + } + + ubifs_scanned_corruption(c, lnum, offs, buf); + return -EUCLEAN; + } + + /* Pad to min_io_size */ + len = ALIGN(ucleb->endpt, c->min_io_size); + if (len > ucleb->endpt) { + int pad_len = len - ALIGN(ucleb->endpt, 8); + + if (pad_len > 0) { + buf = c->sbuf + len - pad_len; + ubifs_pad(c, buf, pad_len); + } + } + + /* Write back the LEB atomically */ + err = ubi_leb_change(c->ubi, lnum, sbuf, len, UBI_UNKNOWN); + if (err) + return err; + + dbg_rcvry("cleaned LEB %d", lnum); + + return 0; +} + +/** + * ubifs_clean_lebs - clean LEBs recovered during read-only mount. + * @c: UBIFS file-system description object + * @sbuf: LEB-sized buffer to use + * + * This function cleans a LEB identified during recovery that needs to be + * written but was not because UBIFS was mounted read-only. This happens when + * remounting to read-write mode. + * + * This function returns %0 on success and a negative error code on failure. + */ +int ubifs_clean_lebs(const struct ubifs_info *c, void *sbuf) +{ + dbg_rcvry("recovery"); + while (!list_empty(&c->unclean_leb_list)) { + struct ubifs_unclean_leb *ucleb; + int err; + + ucleb = list_entry(c->unclean_leb_list.next, + struct ubifs_unclean_leb, list); + err = clean_an_unclean_leb(c, ucleb, sbuf); + if (err) + return err; + list_del(&ucleb->list); + kfree(ucleb); + } + return 0; +} + +/** + * ubifs_rcvry_gc_commit - recover the GC LEB number and run the commit. + * @c: UBIFS file-system description object + * + * Out-of-place garbage collection requires always one empty LEB with which to + * start garbage collection. The LEB number is recorded in c->gc_lnum and is + * written to the master node on unmounting. In the case of an unclean unmount + * the value of gc_lnum recorded in the master node is out of date and cannot + * be used. Instead, recovery must allocate an empty LEB for this purpose. + * However, there may not be enough empty space, in which case it must be + * possible to GC the dirtiest LEB into the GC head LEB. + * + * This function also runs the commit which causes the TNC updates from + * size-recovery and orphans to be written to the flash. That is important to + * ensure correct replay order for subsequent mounts. + * + * This function returns %0 on success and a negative error code on failure. + */ +int ubifs_rcvry_gc_commit(struct ubifs_info *c) +{ + struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf; + struct ubifs_lprops lp; + int lnum, err; + + c->gc_lnum = -1; + if (wbuf->lnum == -1) { + dbg_rcvry("no GC head LEB"); + goto find_free; + } + /* + * See whether the used space in the dirtiest LEB fits in the GC head + * LEB. + */ + if (wbuf->offs == c->leb_size) { + dbg_rcvry("no room in GC head LEB"); + goto find_free; + } + err = ubifs_find_dirty_leb(c, &lp, wbuf->offs, 2); + if (err) { + if (err == -ENOSPC) + dbg_err("could not find a dirty LEB"); + return err; + } + ubifs_assert(!(lp.flags & LPROPS_INDEX)); + lnum = lp.lnum; + if (lp.free + lp.dirty == c->leb_size) { + /* An empty LEB was returned */ + if (lp.free != c->leb_size) { + err = ubifs_change_one_lp(c, lnum, c->leb_size, + 0, 0, 0, 0); + if (err) + return err; + } + err = ubifs_leb_unmap(c, lnum); + if (err) + return err; + c->gc_lnum = lnum; + dbg_rcvry("allocated LEB %d for GC", lnum); + /* Run the commit */ + dbg_rcvry("committing"); + return ubifs_run_commit(c); + } + /* + * There was no empty LEB so the used space in the dirtiest LEB must fit + * in the GC head LEB. + */ + if (lp.free + lp.dirty < wbuf->offs) { + dbg_rcvry("LEB %d doesn't fit in GC head LEB %d:%d", + lnum, wbuf->lnum, wbuf->offs); + err = ubifs_return_leb(c, lnum); + if (err) + return err; + goto find_free; + } + /* + * We run the commit before garbage collection otherwise subsequent + * mounts will see the GC and orphan deletion in a different order. + */ + dbg_rcvry("committing"); + err = ubifs_run_commit(c); + if (err) + return err; + /* + * The data in the dirtiest LEB fits in the GC head LEB, so do the GC + * - use locking to keep 'ubifs_assert()' happy. + */ + dbg_rcvry("GC'ing LEB %d", lnum); + mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); + err = ubifs_garbage_collect_leb(c, &lp); + if (err >= 0) { + int err2 = ubifs_wbuf_sync_nolock(wbuf); + + if (err2) + err = err2; + } + mutex_unlock(&wbuf->io_mutex); + if (err < 0) { + dbg_err("GC failed, error %d", err); + if (err == -EAGAIN) + err = -EINVAL; + return err; + } + if (err != LEB_RETAINED) { + dbg_err("GC returned %d", err); + return -EINVAL; + } + err = ubifs_leb_unmap(c, c->gc_lnum); + if (err) + return err; + dbg_rcvry("allocated LEB %d for GC", lnum); + return 0; + +find_free: + /* + * There is no GC head LEB or the free space in the GC head LEB is too + * small. Allocate gc_lnum by calling 'ubifs_find_free_leb_for_idx()' so + * GC is not run. + */ + lnum = ubifs_find_free_leb_for_idx(c); + if (lnum < 0) { + dbg_err("could not find an empty LEB"); + return lnum; + } + /* And reset the index flag */ + err = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0, + LPROPS_INDEX, 0); + if (err) + return err; + c->gc_lnum = lnum; + dbg_rcvry("allocated LEB %d for GC", lnum); + /* Run the commit */ + dbg_rcvry("committing"); + return ubifs_run_commit(c); +} + +/** + * struct size_entry - inode size information for recovery. + * @rb: link in the RB-tree of sizes + * @inum: inode number + * @i_size: size on inode + * @d_size: maximum size based on data nodes + * @exists: indicates whether the inode exists + * @inode: inode if pinned in memory awaiting rw mode to fix it + */ +struct size_entry { + struct rb_node rb; + ino_t inum; + loff_t i_size; + loff_t d_size; + int exists; + struct inode *inode; +}; + +/** + * add_ino - add an entry to the size tree. + * @c: UBIFS file-system description object + * @inum: inode number + * @i_size: size on inode + * @d_size: maximum size based on data nodes + * @exists: indicates whether the inode exists + */ +static int add_ino(struct ubifs_info *c, ino_t inum, loff_t i_size, + loff_t d_size, int exists) +{ + struct rb_node **p = &c->size_tree.rb_node, *parent = NULL; + struct size_entry *e; + + while (*p) { + parent = *p; + e = rb_entry(parent, struct size_entry, rb); + if (inum < e->inum) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + + e = kzalloc(sizeof(struct size_entry), GFP_KERNEL); + if (!e) + return -ENOMEM; + + e->inum = inum; + e->i_size = i_size; + e->d_size = d_size; + e->exists = exists; + + rb_link_node(&e->rb, parent, p); + rb_insert_color(&e->rb, &c->size_tree); + + return 0; +} + +/** + * find_ino - find an entry on the size tree. + * @c: UBIFS file-system description object + * @inum: inode number + */ +static struct size_entry *find_ino(struct ubifs_info *c, ino_t inum) +{ + struct rb_node *p = c->size_tree.rb_node; + struct size_entry *e; + + while (p) { + e = rb_entry(p, struct size_entry, rb); + if (inum < e->inum) + p = p->rb_left; + else if (inum > e->inum) + p = p->rb_right; + else + return e; + } + return NULL; +} + +/** + * remove_ino - remove an entry from the size tree. + * @c: UBIFS file-system description object + * @inum: inode number + */ +static void remove_ino(struct ubifs_info *c, ino_t inum) +{ + struct size_entry *e = find_ino(c, inum); + + if (!e) + return; + rb_erase(&e->rb, &c->size_tree); + kfree(e); +} + +/** + * ubifs_destroy_size_tree - free resources related to the size tree. + * @c: UBIFS file-system description object + */ +void ubifs_destroy_size_tree(struct ubifs_info *c) +{ + struct rb_node *this = c->size_tree.rb_node; + struct size_entry *e; + + while (this) { + if (this->rb_left) { + this = this->rb_left; + continue; + } else if (this->rb_right) { + this = this->rb_right; + continue; + } + e = rb_entry(this, struct size_entry, rb); + if (e->inode) + iput(e->inode); + this = rb_parent(this); + if (this) { + if (this->rb_left == &e->rb) + this->rb_left = NULL; + else + this->rb_right = NULL; + } + kfree(e); + } + c->size_tree = RB_ROOT; +} + +/** + * ubifs_recover_size_accum - accumulate inode sizes for recovery. + * @c: UBIFS file-system description object + * @key: node key + * @deletion: node is for a deletion + * @new_size: inode size + * + * This function has two purposes: + * 1) to ensure there are no data nodes that fall outside the inode size + * 2) to ensure there are no data nodes for inodes that do not exist + * To accomplish those purposes, a rb-tree is constructed containing an entry + * for each inode number in the journal that has not been deleted, and recording + * the size from the inode node, the maximum size of any data node (also altered + * by truncations) and a flag indicating a inode number for which no inode node + * was present in the journal. + * + * Note that there is still the possibility that there are data nodes that have + * been committed that are beyond the inode size, however the only way to find + * them would be to scan the entire index. Alternatively, some provision could + * be made to record the size of inodes at the start of commit, which would seem + * very cumbersome for a scenario that is quite unlikely and the only negative + * consequence of which is wasted space. + * + * This functions returns %0 on success and a negative error code on failure. + */ +int ubifs_recover_size_accum(struct ubifs_info *c, union ubifs_key *key, + int deletion, loff_t new_size) +{ + ino_t inum = key_inum(c, key); + struct size_entry *e; + int err; + + switch (key_type(c, key)) { + case UBIFS_INO_KEY: + if (deletion) + remove_ino(c, inum); + else { + e = find_ino(c, inum); + if (e) { + e->i_size = new_size; + e->exists = 1; + } else { + err = add_ino(c, inum, new_size, 0, 1); + if (err) + return err; + } + } + break; + case UBIFS_DATA_KEY: + e = find_ino(c, inum); + if (e) { + if (new_size > e->d_size) + e->d_size = new_size; + } else { + err = add_ino(c, inum, 0, new_size, 0); + if (err) + return err; + } + break; + case UBIFS_TRUN_KEY: + e = find_ino(c, inum); + if (e) + e->d_size = new_size; + break; + } + return 0; +} + +/** + * fix_size_in_place - fix inode size in place on flash. + * @c: UBIFS file-system description object + * @e: inode size information for recovery + */ +static int fix_size_in_place(struct ubifs_info *c, struct size_entry *e) +{ + struct ubifs_ino_node *ino = c->sbuf; + unsigned char *p; + union ubifs_key key; + int err, lnum, offs, len; + loff_t i_size; + uint32_t crc; + + /* Locate the inode node LEB number and offset */ + ino_key_init(c, &key, e->inum); + err = ubifs_tnc_locate(c, &key, ino, &lnum, &offs); + if (err) + goto out; + /* + * If the size recorded on the inode node is greater than the size that + * was calculated from nodes in the journal then don't change the inode. + */ + i_size = le64_to_cpu(ino->size); + if (i_size >= e->d_size) + return 0; + /* Read the LEB */ + err = ubi_read(c->ubi, lnum, c->sbuf, 0, c->leb_size); + if (err) + goto out; + /* Change the size field and recalculate the CRC */ + ino = c->sbuf + offs; + ino->size = cpu_to_le64(e->d_size); + len = le32_to_cpu(ino->ch.len); + crc = crc32(UBIFS_CRC32_INIT, (void *)ino + 8, len - 8); + ino->ch.crc = cpu_to_le32(crc); + /* Work out where data in the LEB ends and free space begins */ + p = c->sbuf; + len = c->leb_size - 1; + while (p[len] == 0xff) + len -= 1; + len = ALIGN(len + 1, c->min_io_size); + /* Atomically write the fixed LEB back again */ + err = ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN); + if (err) + goto out; + dbg_rcvry("inode %lu at %d:%d size %lld -> %lld ", e->inum, lnum, offs, + i_size, e->d_size); + return 0; + +out: + ubifs_warn("inode %lu failed to fix size %lld -> %lld error %d", + e->inum, e->i_size, e->d_size, err); + return err; +} + +/** + * ubifs_recover_size - recover inode size. + * @c: UBIFS file-system description object + * + * This function attempts to fix inode size discrepancies identified by the + * 'ubifs_recover_size_accum()' function. + * + * This functions returns %0 on success and a negative error code on failure. + */ +int ubifs_recover_size(struct ubifs_info *c) +{ + struct rb_node *this = rb_first(&c->size_tree); + + while (this) { + struct size_entry *e; + int err; + + e = rb_entry(this, struct size_entry, rb); + if (!e->exists) { + union ubifs_key key; + + ino_key_init(c, &key, e->inum); + err = ubifs_tnc_lookup(c, &key, c->sbuf); + if (err && err != -ENOENT) + return err; + if (err == -ENOENT) { + /* Remove data nodes that have no inode */ + dbg_rcvry("removing ino %lu", e->inum); + err = ubifs_tnc_remove_ino(c, e->inum); + if (err) + return err; + } else { + struct ubifs_ino_node *ino = c->sbuf; + + e->exists = 1; + e->i_size = le64_to_cpu(ino->size); + } + } + if (e->exists && e->i_size < e->d_size) { + if (!e->inode && (c->vfs_sb->s_flags & MS_RDONLY)) { + /* Fix the inode size and pin it in memory */ + struct inode *inode; + + inode = ubifs_iget(c->vfs_sb, e->inum); + if (IS_ERR(inode)) + return PTR_ERR(inode); + if (inode->i_size < e->d_size) { + dbg_rcvry("ino %lu size %lld -> %lld", + e->inum, e->d_size, + inode->i_size); + inode->i_size = e->d_size; + ubifs_inode(inode)->ui_size = e->d_size; + e->inode = inode; + this = rb_next(this); + continue; + } + iput(inode); + } else { + /* Fix the size in place */ + err = fix_size_in_place(c, e); + if (err) + return err; + if (e->inode) + iput(e->inode); + } + } + this = rb_next(this); + rb_erase(&e->rb, &c->size_tree); + kfree(e); + } + return 0; +} diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c new file mode 100644 index 0000000..7399692 --- /dev/null +++ b/fs/ubifs/replay.c @@ -0,0 +1,1075 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Adrian Hunter + * Artem Bityutskiy (Битюцкий Артём) + */ + +/* + * This file contains journal replay code. It runs when the file-system is being + * mounted and requires no locking. + * + * The larger is the journal, the longer it takes to scan it, so the longer it + * takes to mount UBIFS. This is why the journal has limited size which may be + * changed depending on the system requirements. But a larger journal gives + * faster I/O speed because it writes the index less frequently. So this is a + * trade-off. Also, the journal is indexed by the in-memory index (TNC), so the + * larger is the journal, the more memory its index may consume. + */ + +#include "ubifs.h" + +/* + * Replay flags. + * + * REPLAY_DELETION: node was deleted + * REPLAY_REF: node is a reference node + */ +enum { + REPLAY_DELETION = 1, + REPLAY_REF = 2, +}; + +/** + * struct replay_entry - replay tree entry. + * @lnum: logical eraseblock number of the node + * @offs: node offset + * @len: node length + * @sqnum: node sequence number + * @flags: replay flags + * @rb: links the replay tree + * @key: node key + * @nm: directory entry name + * @old_size: truncation old size + * @new_size: truncation new size + * @free: amount of free space in a bud + * @dirty: amount of dirty space in a bud from padding and deletion nodes + * + * UBIFS journal replay must compare node sequence numbers, which means it must + * build a tree of node information to insert into the TNC. + */ +struct replay_entry { + int lnum; + int offs; + int len; + unsigned long long sqnum; + int flags; + struct rb_node rb; + union ubifs_key key; + union { + struct qstr nm; + struct { + loff_t old_size; + loff_t new_size; + }; + struct { + int free; + int dirty; + }; + }; +}; + +/** + * struct bud_entry - entry in the list of buds to replay. + * @list: next bud in the list + * @bud: bud description object + * @free: free bytes in the bud + * @sqnum: reference node sequence number + */ +struct bud_entry { + struct list_head list; + struct ubifs_bud *bud; + int free; + unsigned long long sqnum; +}; + +/** + * set_bud_lprops - set free and dirty space used by a bud. + * @c: UBIFS file-system description object + * @r: replay entry of bud + */ +static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r) +{ + const struct ubifs_lprops *lp; + int err = 0, dirty; + + ubifs_get_lprops(c); + + lp = ubifs_lpt_lookup_dirty(c, r->lnum); + if (IS_ERR(lp)) { + err = PTR_ERR(lp); + goto out; + } + + dirty = lp->dirty; + if (r->offs == 0 && (lp->free != c->leb_size || lp->dirty != 0)) { + /* + * The LEB was added to the journal with a starting offset of + * zero which means the LEB must have been empty. The LEB + * property values should be lp->free == c->leb_size and + * lp->dirty == 0, but that is not the case. The reason is that + * the LEB was garbage collected. The garbage collector resets + * the free and dirty space without recording it anywhere except + * lprops, so if there is not a commit then lprops does not have + * that information next time the file system is mounted. + * + * We do not need to adjust free space because the scan has told + * us the exact value which is recorded in the replay entry as + * r->free. + * + * However we do need to subtract from the dirty space the + * amount of space that the garbage collector reclaimed, which + * is the whole LEB minus the amount of space that was free. + */ + dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum, + lp->free, lp->dirty); + dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum, + lp->free, lp->dirty); + dirty -= c->leb_size - lp->free; + /* + * If the replay order was perfect the dirty space would now be + * zero. The order is not perfect because the the journal heads + * race with eachother. This is not a problem but is does mean + * that the dirty space may temporarily exceed c->leb_size + * during the replay. + */ + if (dirty != 0) + dbg_msg("LEB %d lp: %d free %d dirty " + "replay: %d free %d dirty", r->lnum, lp->free, + lp->dirty, r->free, r->dirty); + } + lp = ubifs_change_lp(c, lp, r->free, dirty + r->dirty, + lp->flags | LPROPS_TAKEN, 0); + if (IS_ERR(lp)) { + err = PTR_ERR(lp); + goto out; + } +out: + ubifs_release_lprops(c); + return err; +} + +/** + * trun_remove_range - apply a replay entry for a truncation to the TNC. + * @c: UBIFS file-system description object + * @r: replay entry of truncation + */ +static int trun_remove_range(struct ubifs_info *c, struct replay_entry *r) +{ + unsigned min_blk, max_blk; + union ubifs_key min_key, max_key; + ino_t ino; + + min_blk = r->new_size / UBIFS_BLOCK_SIZE; + if (r->new_size & (UBIFS_BLOCK_SIZE - 1)) + min_blk += 1; + + max_blk = r->old_size / UBIFS_BLOCK_SIZE; + if ((r->old_size & (UBIFS_BLOCK_SIZE - 1)) == 0) + max_blk -= 1; + + ino = key_inum(c, &r->key); + + data_key_init(c, &min_key, ino, min_blk); + data_key_init(c, &max_key, ino, max_blk); + + return ubifs_tnc_remove_range(c, &min_key, &max_key); +} + +/** + * apply_replay_entry - apply a replay entry to the TNC. + * @c: UBIFS file-system description object + * @r: replay entry to apply + * + * Apply a replay entry to the TNC. + */ +static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r) +{ + int err, deletion = ((r->flags & REPLAY_DELETION) != 0); + + dbg_mnt("LEB %d:%d len %d flgs %d sqnum %llu %s", r->lnum, + r->offs, r->len, r->flags, r->sqnum, DBGKEY(&r->key)); + + /* Set c->replay_sqnum to help deal with dangling branches. */ + c->replay_sqnum = r->sqnum; + + if (r->flags & REPLAY_REF) + err = set_bud_lprops(c, r); + else if (is_hash_key(c, &r->key)) { + if (deletion) + err = ubifs_tnc_remove_nm(c, &r->key, &r->nm); + else + err = ubifs_tnc_add_nm(c, &r->key, r->lnum, r->offs, + r->len, &r->nm); + } else { + if (deletion) + switch (key_type(c, &r->key)) { + case UBIFS_INO_KEY: + { + ino_t inum = key_inum(c, &r->key); + + err = ubifs_tnc_remove_ino(c, inum); + break; + } + case UBIFS_TRUN_KEY: + err = trun_remove_range(c, r); + break; + default: + err = ubifs_tnc_remove(c, &r->key); + break; + } + else + err = ubifs_tnc_add(c, &r->key, r->lnum, r->offs, + r->len); + if (err) + return err; + + if (c->need_recovery) + err = ubifs_recover_size_accum(c, &r->key, deletion, + r->new_size); + } + + return err; +} + +/** + * destroy_replay_tree - destroy the replay. + * @c: UBIFS file-system description object + * + * Destroy the replay tree. + */ +static void destroy_replay_tree(struct ubifs_info *c) +{ + struct rb_node *this = c->replay_tree.rb_node; + struct replay_entry *r; + + while (this) { + if (this->rb_left) { + this = this->rb_left; + continue; + } else if (this->rb_right) { + this = this->rb_right; + continue; + } + r = rb_entry(this, struct replay_entry, rb); + this = rb_parent(this); + if (this) { + if (this->rb_left == &r->rb) + this->rb_left = NULL; + else + this->rb_right = NULL; + } + if (is_hash_key(c, &r->key)) + kfree(r->nm.name); + kfree(r); + } + c->replay_tree = RB_ROOT; +} + +/** + * apply_replay_tree - apply the replay tree to the TNC. + * @c: UBIFS file-system description object + * + * Apply the replay tree. + * Returns zero in case of success and a negative error code in case of + * failure. + */ +static int apply_replay_tree(struct ubifs_info *c) +{ + struct rb_node *this = rb_first(&c->replay_tree); + + while (this) { + struct replay_entry *r; + int err; + + cond_resched(); + + r = rb_entry(this, struct replay_entry, rb); + err = apply_replay_entry(c, r); + if (err) + return err; + this = rb_next(this); + } + return 0; +} + +/** + * insert_node - insert a node to the replay tree. + * @c: UBIFS file-system description object + * @lnum: node logical eraseblock number + * @offs: node offset + * @len: node length + * @key: node key + * @sqnum: sequence number + * @deletion: non-zero if this is a deletion + * @used: number of bytes in use in a LEB + * @old_size: truncation old size + * @new_size: truncation new size + * + * This function inserts a scanned non-direntry node to the replay tree. The + * replay tree is an RB-tree containing @struct replay_entry elements which are + * indexed by the sequence number. The replay tree is applied at the very end + * of the replay process. Since the tree is sorted in sequence number order, + * the older modifications are applied first. This function returns zero in + * case of success and a negative error code in case of failure. + */ +static int insert_node(struct ubifs_info *c, int lnum, int offs, int len, + union ubifs_key *key, unsigned long long sqnum, + int deletion, int *used, loff_t old_size, + loff_t new_size) +{ + struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; + struct replay_entry *r; + + if (key_inum(c, key) >= c->highest_inum) + c->highest_inum = key_inum(c, key); + + dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key)); + while (*p) { + parent = *p; + r = rb_entry(parent, struct replay_entry, rb); + if (sqnum < r->sqnum) { + p = &(*p)->rb_left; + continue; + } else if (sqnum > r->sqnum) { + p = &(*p)->rb_right; + continue; + } + ubifs_err("duplicate sqnum in replay"); + return -EINVAL; + } + + r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); + if (!r) + return -ENOMEM; + + if (!deletion) + *used += ALIGN(len, 8); + r->lnum = lnum; + r->offs = offs; + r->len = len; + r->sqnum = sqnum; + r->flags = (deletion ? REPLAY_DELETION : 0); + r->old_size = old_size; + r->new_size = new_size; + key_copy(c, key, &r->key); + + rb_link_node(&r->rb, parent, p); + rb_insert_color(&r->rb, &c->replay_tree); + return 0; +} + +/** + * insert_dent - insert a directory entry node into the replay tree. + * @c: UBIFS file-system description object + * @lnum: node logical eraseblock number + * @offs: node offset + * @len: node length + * @key: node key + * @name: directory entry name + * @nlen: directory entry name length + * @sqnum: sequence number + * @deletion: non-zero if this is a deletion + * @used: number of bytes in use in a LEB + * + * This function inserts a scanned directory entry node to the replay tree. + * Returns zero in case of success and a negative error code in case of + * failure. + * + * This function is also used for extended attribute entries because they are + * implemented as directory entry nodes. + */ +static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len, + union ubifs_key *key, const char *name, int nlen, + unsigned long long sqnum, int deletion, int *used) +{ + struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; + struct replay_entry *r; + char *nbuf; + + if (key_inum(c, key) >= c->highest_inum) + c->highest_inum = key_inum(c, key); + + dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key)); + while (*p) { + parent = *p; + r = rb_entry(parent, struct replay_entry, rb); + if (sqnum < r->sqnum) { + p = &(*p)->rb_left; + continue; + } + if (sqnum > r->sqnum) { + p = &(*p)->rb_right; + continue; + } + ubifs_err("duplicate sqnum in replay"); + return -EINVAL; + } + + r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); + if (!r) + return -ENOMEM; + nbuf = kmalloc(nlen + 1, GFP_KERNEL); + if (!nbuf) { + kfree(r); + return -ENOMEM; + } + + if (!deletion) + *used += ALIGN(len, 8); + r->lnum = lnum; + r->offs = offs; + r->len = len; + r->sqnum = sqnum; + r->nm.len = nlen; + memcpy(nbuf, name, nlen); + nbuf[nlen] = '\0'; + r->nm.name = nbuf; + r->flags = (deletion ? REPLAY_DELETION : 0); + key_copy(c, key, &r->key); + + ubifs_assert(!*p); + rb_link_node(&r->rb, parent, p); + rb_insert_color(&r->rb, &c->replay_tree); + return 0; +} + +/** + * ubifs_validate_entry - validate directory or extended attribute entry node. + * @c: UBIFS file-system description object + * @dent: the node to validate + * + * This function validates directory or extended attribute entry node @dent. + * Returns zero if the node is all right and a %-EINVAL if not. + */ +int ubifs_validate_entry(struct ubifs_info *c, + const struct ubifs_dent_node *dent) +{ + int key_type = key_type_flash(c, dent->key); + int nlen = le16_to_cpu(dent->nlen); + + if (le32_to_cpu(dent->ch.len) != nlen + UBIFS_DENT_NODE_SZ + 1 || + dent->type >= UBIFS_ITYPES_CNT || + nlen > UBIFS_MAX_NLEN || dent->name[nlen] != 0 || + strnlen(dent->name, nlen) != nlen || + le64_to_cpu(dent->inum) > MAX_INUM) { + ubifs_err("bad %s node", key_type == UBIFS_DENT_KEY ? + "directory entry" : "extended attribute entry"); + return -EINVAL; + } + + if (key_type != UBIFS_DENT_KEY && key_type != UBIFS_XENT_KEY) { + ubifs_err("bad key type %d", key_type); + return -EINVAL; + } + + return 0; +} + +/** + * replay_bud - replay a bud logical eraseblock. + * @c: UBIFS file-system description object + * @lnum: bud logical eraseblock number to replay + * @offs: bud start offset + * @jhead: journal head to which this bud belongs + * @free: amount of free space in the bud is returned here + * @dirty: amount of dirty space from padding and deletion nodes is returned + * here + * + * This function returns zero in case of success and a negative error code in + * case of failure. + */ +static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead, + int *free, int *dirty) +{ + int err = 0, used = 0; + struct ubifs_scan_leb *sleb; + struct ubifs_scan_node *snod; + struct ubifs_bud *bud; + + dbg_mnt("replay bud LEB %d, head %d", lnum, jhead); + if (c->need_recovery) + sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, jhead != GCHD); + else + sleb = ubifs_scan(c, lnum, offs, c->sbuf); + if (IS_ERR(sleb)) + return PTR_ERR(sleb); + + /* + * The bud does not have to start from offset zero - the beginning of + * the 'lnum' LEB may contain previously committed data. One of the + * things we have to do in replay is to correctly update lprops with + * newer information about this LEB. + * + * At this point lprops thinks that this LEB has 'c->leb_size - offs' + * bytes of free space because it only contain information about + * committed data. + * + * But we know that real amount of free space is 'c->leb_size - + * sleb->endpt', and the space in the 'lnum' LEB between 'offs' and + * 'sleb->endpt' is used by bud data. We have to correctly calculate + * how much of these data are dirty and update lprops with this + * information. + * + * The dirt in that LEB region is comprised of padding nodes, deletion + * nodes, truncation nodes and nodes which are obsoleted by subsequent + * nodes in this LEB. So instead of calculating clean space, we + * calculate used space ('used' variable). + */ + + list_for_each_entry(snod, &sleb->nodes, list) { + int deletion = 0; + + cond_resched(); + + if (snod->sqnum >= SQNUM_WATERMARK) { + ubifs_err("file system's life ended"); + goto out_dump; + } + + if (snod->sqnum > c->max_sqnum) + c->max_sqnum = snod->sqnum; + + switch (snod->type) { + case UBIFS_INO_NODE: + { + struct ubifs_ino_node *ino = snod->node; + loff_t new_size = le64_to_cpu(ino->size); + + if (le32_to_cpu(ino->nlink) == 0) + deletion = 1; + err = insert_node(c, lnum, snod->offs, snod->len, + &snod->key, snod->sqnum, deletion, + &used, 0, new_size); + break; + } + case UBIFS_DATA_NODE: + { + struct ubifs_data_node *dn = snod->node; + loff_t new_size = le32_to_cpu(dn->size) + + key_block(c, &snod->key) * + UBIFS_BLOCK_SIZE; + + err = insert_node(c, lnum, snod->offs, snod->len, + &snod->key, snod->sqnum, deletion, + &used, 0, new_size); + break; + } + case UBIFS_DENT_NODE: + case UBIFS_XENT_NODE: + { + struct ubifs_dent_node *dent = snod->node; + + err = ubifs_validate_entry(c, dent); + if (err) + goto out_dump; + + err = insert_dent(c, lnum, snod->offs, snod->len, + &snod->key, dent->name, + le16_to_cpu(dent->nlen), snod->sqnum, + !le64_to_cpu(dent->inum), &used); + break; + } + case UBIFS_TRUN_NODE: + { + struct ubifs_trun_node *trun = snod->node; + loff_t old_size = le64_to_cpu(trun->old_size); + loff_t new_size = le64_to_cpu(trun->new_size); + union ubifs_key key; + + /* Validate truncation node */ + if (old_size < 0 || old_size > c->max_inode_sz || + new_size < 0 || new_size > c->max_inode_sz || + old_size <= new_size) { + ubifs_err("bad truncation node"); + goto out_dump; + } + + /* + * Create a fake truncation key just to use the same + * functions which expect nodes to have keys. + */ + trun_key_init(c, &key, le32_to_cpu(trun->inum)); + err = insert_node(c, lnum, snod->offs, snod->len, + &key, snod->sqnum, 1, &used, + old_size, new_size); + break; + } + default: + ubifs_err("unexpected node type %d in bud LEB %d:%d", + snod->type, lnum, snod->offs); + err = -EINVAL; + goto out_dump; + } + if (err) + goto out; + } + + bud = ubifs_search_bud(c, lnum); + if (!bud) + BUG(); + + ubifs_assert(sleb->endpt - offs >= used); + ubifs_assert(sleb->endpt % c->min_io_size == 0); + + if (sleb->endpt + c->min_io_size <= c->leb_size && + !(c->vfs_sb->s_flags & MS_RDONLY)) + err = ubifs_wbuf_seek_nolock(&c->jheads[jhead].wbuf, lnum, + sleb->endpt, UBI_SHORTTERM); + + *dirty = sleb->endpt - offs - used; + *free = c->leb_size - sleb->endpt; + +out: + ubifs_scan_destroy(sleb); + return err; + +out_dump: + ubifs_err("bad node is at LEB %d:%d", lnum, snod->offs); + dbg_dump_node(c, snod->node); + ubifs_scan_destroy(sleb); + return -EINVAL; +} + +/** + * insert_ref_node - insert a reference node to the replay tree. + * @c: UBIFS file-system description object + * @lnum: node logical eraseblock number + * @offs: node offset + * @sqnum: sequence number + * @free: amount of free space in bud + * @dirty: amount of dirty space from padding and deletion nodes + * + * This function inserts a reference node to the replay tree and returns zero + * in case of success ort a negative error code in case of failure. + */ +static int insert_ref_node(struct ubifs_info *c, int lnum, int offs, + unsigned long long sqnum, int free, int dirty) +{ + struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; + struct replay_entry *r; + + dbg_mnt("add ref LEB %d:%d", lnum, offs); + while (*p) { + parent = *p; + r = rb_entry(parent, struct replay_entry, rb); + if (sqnum < r->sqnum) { + p = &(*p)->rb_left; + continue; + } else if (sqnum > r->sqnum) { + p = &(*p)->rb_right; + continue; + } + ubifs_err("duplicate sqnum in replay tree"); + return -EINVAL; + } + + r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); + if (!r) + return -ENOMEM; + + r->lnum = lnum; + r->offs = offs; + r->sqnum = sqnum; + r->flags = REPLAY_REF; + r->free = free; + r->dirty = dirty; + + rb_link_node(&r->rb, parent, p); + rb_insert_color(&r->rb, &c->replay_tree); + return 0; +} + +/** + * replay_buds - replay all buds. + * @c: UBIFS file-system description object + * + * This function returns zero in case of success and a negative error code in + * case of failure. + */ +static int replay_buds(struct ubifs_info *c) +{ + struct bud_entry *b; + int err, uninitialized_var(free), uninitialized_var(dirty); + + list_for_each_entry(b, &c->replay_buds, list) { + err = replay_bud(c, b->bud->lnum, b->bud->start, b->bud->jhead, + &free, &dirty); + if (err) + return err; + err = insert_ref_node(c, b->bud->lnum, b->bud->start, b->sqnum, + free, dirty); + if (err) + return err; + } + + return 0; +} + +/** + * destroy_bud_list - destroy the list of buds to replay. + * @c: UBIFS file-system description object + */ +static void destroy_bud_list(struct ubifs_info *c) +{ + struct bud_entry *b; + + while (!list_empty(&c->replay_buds)) { + b = list_entry(c->replay_buds.next, struct bud_entry, list); + list_del(&b->list); + kfree(b); + } +} + +/** + * add_replay_bud - add a bud to the list of buds to replay. + * @c: UBIFS file-system description object + * @lnum: bud logical eraseblock number to replay + * @offs: bud start offset + * @jhead: journal head to which this bud belongs + * @sqnum: reference node sequence number + * + * This function returns zero in case of success and a negative error code in + * case of failure. + */ +static int add_replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead, + unsigned long long sqnum) +{ + struct ubifs_bud *bud; + struct bud_entry *b; + + dbg_mnt("add replay bud LEB %d:%d, head %d", lnum, offs, jhead); + + bud = kmalloc(sizeof(struct ubifs_bud), GFP_KERNEL); + if (!bud) + return -ENOMEM; + + b = kmalloc(sizeof(struct bud_entry), GFP_KERNEL); + if (!b) { + kfree(bud); + return -ENOMEM; + } + + bud->lnum = lnum; + bud->start = offs; + bud->jhead = jhead; + ubifs_add_bud(c, bud); + + b->bud = bud; + b->sqnum = sqnum; + list_add_tail(&b->list, &c->replay_buds); + + return 0; +} + +/** + * validate_ref - validate a reference node. + * @c: UBIFS file-system description object + * @ref: the reference node to validate + * @ref_lnum: LEB number of the reference node + * @ref_offs: reference node offset + * + * This function returns %1 if a bud reference already exists for the LEB. %0 is + * returned if the reference node is new, otherwise %-EINVAL is returned if + * validation failed. + */ +static int validate_ref(struct ubifs_info *c, const struct ubifs_ref_node *ref) +{ + struct ubifs_bud *bud; + int lnum = le32_to_cpu(ref->lnum); + unsigned int offs = le32_to_cpu(ref->offs); + unsigned int jhead = le32_to_cpu(ref->jhead); + + /* + * ref->offs may point to the end of LEB when the journal head points + * to the end of LEB and we write reference node for it during commit. + * So this is why we require 'offs > c->leb_size'. + */ + if (jhead >= c->jhead_cnt || lnum >= c->leb_cnt || + lnum < c->main_first || offs > c->leb_size || + offs & (c->min_io_size - 1)) + return -EINVAL; + + /* Make sure we have not already looked at this bud */ + bud = ubifs_search_bud(c, lnum); + if (bud) { + if (bud->jhead == jhead && bud->start <= offs) + return 1; + ubifs_err("bud at LEB %d:%d was already referred", lnum, offs); + return -EINVAL; + } + + return 0; +} + +/** + * replay_log_leb - replay a log logical eraseblock. + * @c: UBIFS file-system description object + * @lnum: log logical eraseblock to replay + * @offs: offset to start replaying from + * @sbuf: scan buffer + * + * This function replays a log LEB and returns zero in case of success, %1 if + * this is the last LEB in the log, and a negative error code in case of + * failure. + */ +static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf) +{ + int err; + struct ubifs_scan_leb *sleb; + struct ubifs_scan_node *snod; + const struct ubifs_cs_node *node; + + dbg_mnt("replay log LEB %d:%d", lnum, offs); + sleb = ubifs_scan(c, lnum, offs, sbuf); + if (IS_ERR(sleb)) { + if (c->need_recovery) + sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf); + if (IS_ERR(sleb)) + return PTR_ERR(sleb); + } + + if (sleb->nodes_cnt == 0) { + err = 1; + goto out; + } + + node = sleb->buf; + + snod = list_entry(sleb->nodes.next, struct ubifs_scan_node, list); + if (c->cs_sqnum == 0) { + /* + * This is the first log LEB we are looking at, make sure that + * the first node is a commit start node. Also record its + * sequence number so that UBIFS can determine where the log + * ends, because all nodes which were have higher sequence + * numbers. + */ + if (snod->type != UBIFS_CS_NODE) { + dbg_err("first log node at LEB %d:%d is not CS node", + lnum, offs); + goto out_dump; + } + if (le64_to_cpu(node->cmt_no) != c->cmt_no) { + dbg_err("first CS node at LEB %d:%d has wrong " + "commit number %llu expected %llu", + lnum, offs, + (unsigned long long)le64_to_cpu(node->cmt_no), + c->cmt_no); + goto out_dump; + } + + c->cs_sqnum = le64_to_cpu(node->ch.sqnum); + dbg_mnt("commit start sqnum %llu", c->cs_sqnum); + } + + if (snod->sqnum < c->cs_sqnum) { + /* + * This means that we reached end of log and now + * look to the older log data, which was already + * committed but the eraseblock was not erased (UBIFS + * only unmaps it). So this basically means we have to + * exit with "end of log" code. + */ + err = 1; + goto out; + } + + /* Make sure the first node sits at offset zero of the LEB */ + if (snod->offs != 0) { + dbg_err("first node is not at zero offset"); + goto out_dump; + } + + list_for_each_entry(snod, &sleb->nodes, list) { + + cond_resched(); + + if (snod->sqnum >= SQNUM_WATERMARK) { + ubifs_err("file system's life ended"); + goto out_dump; + } + + if (snod->sqnum < c->cs_sqnum) { + dbg_err("bad sqnum %llu, commit sqnum %llu", + snod->sqnum, c->cs_sqnum); + goto out_dump; + } + + if (snod->sqnum > c->max_sqnum) + c->max_sqnum = snod->sqnum; + + switch (snod->type) { + case UBIFS_REF_NODE: { + const struct ubifs_ref_node *ref = snod->node; + + err = validate_ref(c, ref); + if (err == 1) + break; /* Already have this bud */ + if (err) + goto out_dump; + + err = add_replay_bud(c, le32_to_cpu(ref->lnum), + le32_to_cpu(ref->offs), + le32_to_cpu(ref->jhead), + snod->sqnum); + if (err) + goto out; + + break; + } + case UBIFS_CS_NODE: + /* Make sure it sits at the beginning of LEB */ + if (snod->offs != 0) { + ubifs_err("unexpected node in log"); + goto out_dump; + } + break; + default: + ubifs_err("unexpected node in log"); + goto out_dump; + } + } + + if (sleb->endpt || c->lhead_offs >= c->leb_size) { + c->lhead_lnum = lnum; + c->lhead_offs = sleb->endpt; + } + + err = !sleb->endpt; +out: + ubifs_scan_destroy(sleb); + return err; + +out_dump: + ubifs_err("log error detected while replying the log at LEB %d:%d", + lnum, offs + snod->offs); + dbg_dump_node(c, snod->node); + ubifs_scan_destroy(sleb); + return -EINVAL; +} + +/** + * take_ihead - update the status of the index head in lprops to 'taken'. + * @c: UBIFS file-system description object + * + * This function returns the amount of free space in the index head LEB or a + * negative error code. + */ +static int take_ihead(struct ubifs_info *c) +{ + const struct ubifs_lprops *lp; + int err, free; + + ubifs_get_lprops(c); + + lp = ubifs_lpt_lookup_dirty(c, c->ihead_lnum); + if (IS_ERR(lp)) { + err = PTR_ERR(lp); + goto out; + } + + free = lp->free; + + lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC, + lp->flags | LPROPS_TAKEN, 0); + if (IS_ERR(lp)) { + err = PTR_ERR(lp); + goto out; + } + + err = free; +out: + ubifs_release_lprops(c); + return err; +} + +/** + * ubifs_replay_journal - replay journal. + * @c: UBIFS file-system description object + * + * This function scans the journal, replays and cleans it up. It makes sure all + * memory data structures related to uncommitted journal are built (dirty TNC + * tree, tree of buds, modified lprops, etc). + */ +int ubifs_replay_journal(struct ubifs_info *c) +{ + int err, i, lnum, offs, free; + void *sbuf = NULL; + + BUILD_BUG_ON(UBIFS_TRUN_KEY > 5); + + /* Update the status of the index head in lprops to 'taken' */ + free = take_ihead(c); + if (free < 0) + return free; /* Error code */ + + if (c->ihead_offs != c->leb_size - free) { + ubifs_err("bad index head LEB %d:%d", c->ihead_lnum, + c->ihead_offs); + return -EINVAL; + } + + sbuf = vmalloc(c->leb_size); + if (!sbuf) + return -ENOMEM; + + dbg_mnt("start replaying the journal"); + + c->replaying = 1; + + lnum = c->ltail_lnum = c->lhead_lnum; + offs = c->lhead_offs; + + for (i = 0; i < c->log_lebs; i++, lnum++) { + if (lnum >= UBIFS_LOG_LNUM + c->log_lebs) { + /* + * The log is logically circular, we reached the last + * LEB, switch to the first one. + */ + lnum = UBIFS_LOG_LNUM; + offs = 0; + } + err = replay_log_leb(c, lnum, offs, sbuf); + if (err == 1) + /* We hit the end of the log */ + break; + if (err) + goto out; + offs = 0; + } + + err = replay_buds(c); + if (err) + goto out; + + err = apply_replay_tree(c); + if (err) + goto out; + + ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery); + dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, " + "highest_inum %lu", c->lhead_lnum, c->lhead_offs, c->max_sqnum, + c->highest_inum); +out: + destroy_replay_tree(c); + destroy_bud_list(c); + vfree(sbuf); + c->replaying = 0; + return err; +} diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c new file mode 100644 index 0000000..2bf753b --- /dev/null +++ b/fs/ubifs/sb.c @@ -0,0 +1,629 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Artem Bityutskiy (Битюцкий Артём) + * Adrian Hunter + */ + +/* + * This file implements UBIFS superblock. The superblock is stored at the first + * LEB of the volume and is never changed by UBIFS. Only user-space tools may + * change it. The superblock node mostly contains geometry information. + */ + +#include "ubifs.h" +#include + +/* + * Default journal size in logical eraseblocks as a percent of total + * flash size. + */ +#define DEFAULT_JNL_PERCENT 5 + +/* Default maximum journal size in bytes */ +#define DEFAULT_MAX_JNL (32*1024*1024) + +/* Default indexing tree fanout */ +#define DEFAULT_FANOUT 8 + +/* Default number of data journal heads */ +#define DEFAULT_JHEADS_CNT 1 + +/* Default positions of different LEBs in the main area */ +#define DEFAULT_IDX_LEB 0 +#define DEFAULT_DATA_LEB 1 +#define DEFAULT_GC_LEB 2 + +/* Default number of LEB numbers in LPT's save table */ +#define DEFAULT_LSAVE_CNT 256 + +/* Default reserved pool size as a percent of maximum free space */ +#define DEFAULT_RP_PERCENT 5 + +/* The default maximum size of reserved pool in bytes */ +#define DEFAULT_MAX_RP_SIZE (5*1024*1024) + +/* Default time granularity in nanoseconds */ +#define DEFAULT_TIME_GRAN 1000000000 + +/** + * create_default_filesystem - format empty UBI volume. + * @c: UBIFS file-system description object + * + * This function creates default empty file-system. Returns zero in case of + * success and a negative error code in case of failure. + */ +static int create_default_filesystem(struct ubifs_info *c) +{ + struct ubifs_sb_node *sup; + struct ubifs_mst_node *mst; + struct ubifs_idx_node *idx; + struct ubifs_branch *br; + struct ubifs_ino_node *ino; + struct ubifs_cs_node *cs; + union ubifs_key key; + int err, tmp, jnl_lebs, log_lebs, max_buds, main_lebs, main_first; + int lpt_lebs, lpt_first, orph_lebs, big_lpt, ino_waste, sup_flags = 0; + int min_leb_cnt = UBIFS_MIN_LEB_CNT; + uint64_t tmp64, main_bytes; + + /* Some functions called from here depend on the @c->key_len filed */ + c->key_len = UBIFS_SK_LEN; + + /* + * First of all, we have to calculate default file-system geometry - + * log size, journal size, etc. + */ + if (c->leb_cnt < 0x7FFFFFFF / DEFAULT_JNL_PERCENT) + /* We can first multiply then divide and have no overflow */ + jnl_lebs = c->leb_cnt * DEFAULT_JNL_PERCENT / 100; + else + jnl_lebs = (c->leb_cnt / 100) * DEFAULT_JNL_PERCENT; + + if (jnl_lebs < UBIFS_MIN_JNL_LEBS) + jnl_lebs = UBIFS_MIN_JNL_LEBS; + if (jnl_lebs * c->leb_size > DEFAULT_MAX_JNL) + jnl_lebs = DEFAULT_MAX_JNL / c->leb_size; + + /* + * The log should be large enough to fit reference nodes for all bud + * LEBs. Because buds do not have to start from the beginning of LEBs + * (half of the LEB may contain committed data), the log should + * generally be larger, make it twice as large. + */ + tmp = 2 * (c->ref_node_alsz * jnl_lebs) + c->leb_size - 1; + log_lebs = tmp / c->leb_size; + /* Plus one LEB reserved for commit */ + log_lebs += 1; + if (c->leb_cnt - min_leb_cnt > 8) { + /* And some extra space to allow writes while committing */ + log_lebs += 1; + min_leb_cnt += 1; + } + + max_buds = jnl_lebs - log_lebs; + if (max_buds < UBIFS_MIN_BUD_LEBS) + max_buds = UBIFS_MIN_BUD_LEBS; + + /* + * Orphan nodes are stored in a separate area. One node can store a lot + * of orphan inode numbers, but when new orphan comes we just add a new + * orphan node. At some point the nodes are consolidated into one + * orphan node. + */ + orph_lebs = UBIFS_MIN_ORPH_LEBS; +#ifdef CONFIG_UBIFS_FS_DEBUG + if (c->leb_cnt - min_leb_cnt > 1) + /* + * For debugging purposes it is better to have at least 2 + * orphan LEBs, because the orphan subsystem would need to do + * consolidations and would be stressed more. + */ + orph_lebs += 1; +#endif + + main_lebs = c->leb_cnt - UBIFS_SB_LEBS - UBIFS_MST_LEBS - log_lebs; + main_lebs -= orph_lebs; + + lpt_first = UBIFS_LOG_LNUM + log_lebs; + c->lsave_cnt = DEFAULT_LSAVE_CNT; + c->max_leb_cnt = c->leb_cnt; + err = ubifs_create_dflt_lpt(c, &main_lebs, lpt_first, &lpt_lebs, + &big_lpt); + if (err) + return err; + + dbg_gen("LEB Properties Tree created (LEBs %d-%d)", lpt_first, + lpt_first + lpt_lebs - 1); + + main_first = c->leb_cnt - main_lebs; + + /* Create default superblock */ + tmp = ALIGN(UBIFS_SB_NODE_SZ, c->min_io_size); + sup = kzalloc(tmp, GFP_KERNEL); + if (!sup) + return -ENOMEM; + + tmp64 = (uint64_t)max_buds * c->leb_size; + if (big_lpt) + sup_flags |= UBIFS_FLG_BIGLPT; + + sup->ch.node_type = UBIFS_SB_NODE; + sup->key_hash = UBIFS_KEY_HASH_R5; + sup->flags = cpu_to_le32(sup_flags); + sup->min_io_size = cpu_to_le32(c->min_io_size); + sup->leb_size = cpu_to_le32(c->leb_size); + sup->leb_cnt = cpu_to_le32(c->leb_cnt); + sup->max_leb_cnt = cpu_to_le32(c->max_leb_cnt); + sup->max_bud_bytes = cpu_to_le64(tmp64); + sup->log_lebs = cpu_to_le32(log_lebs); + sup->lpt_lebs = cpu_to_le32(lpt_lebs); + sup->orph_lebs = cpu_to_le32(orph_lebs); + sup->jhead_cnt = cpu_to_le32(DEFAULT_JHEADS_CNT); + sup->fanout = cpu_to_le32(DEFAULT_FANOUT); + sup->lsave_cnt = cpu_to_le32(c->lsave_cnt); + sup->fmt_version = cpu_to_le32(UBIFS_FORMAT_VERSION); + sup->default_compr = cpu_to_le16(UBIFS_COMPR_LZO); + sup->time_gran = cpu_to_le32(DEFAULT_TIME_GRAN); + + generate_random_uuid(sup->uuid); + + main_bytes = (uint64_t)main_lebs * c->leb_size; + tmp64 = main_bytes * DEFAULT_RP_PERCENT; + do_div(tmp64, 100); + if (tmp64 > DEFAULT_MAX_RP_SIZE) + tmp64 = DEFAULT_MAX_RP_SIZE; + sup->rp_size = cpu_to_le64(tmp64); + + err = ubifs_write_node(c, sup, UBIFS_SB_NODE_SZ, 0, 0, UBI_LONGTERM); + kfree(sup); + if (err) + return err; + + dbg_gen("default superblock created at LEB 0:0"); + + /* Create default master node */ + mst = kzalloc(c->mst_node_alsz, GFP_KERNEL); + if (!mst) + return -ENOMEM; + + mst->ch.node_type = UBIFS_MST_NODE; + mst->log_lnum = cpu_to_le32(UBIFS_LOG_LNUM); + mst->highest_inum = cpu_to_le64(UBIFS_FIRST_INO); + mst->cmt_no = 0; + mst->root_lnum = cpu_to_le32(main_first + DEFAULT_IDX_LEB); + mst->root_offs = 0; + tmp = ubifs_idx_node_sz(c, 1); + mst->root_len = cpu_to_le32(tmp); + mst->gc_lnum = cpu_to_le32(main_first + DEFAULT_GC_LEB); + mst->ihead_lnum = cpu_to_le32(main_first + DEFAULT_IDX_LEB); + mst->ihead_offs = cpu_to_le32(ALIGN(tmp, c->min_io_size)); + mst->index_size = cpu_to_le64(ALIGN(tmp, 8)); + mst->lpt_lnum = cpu_to_le32(c->lpt_lnum); + mst->lpt_offs = cpu_to_le32(c->lpt_offs); + mst->nhead_lnum = cpu_to_le32(c->nhead_lnum); + mst->nhead_offs = cpu_to_le32(c->nhead_offs); + mst->ltab_lnum = cpu_to_le32(c->ltab_lnum); + mst->ltab_offs = cpu_to_le32(c->ltab_offs); + mst->lsave_lnum = cpu_to_le32(c->lsave_lnum); + mst->lsave_offs = cpu_to_le32(c->lsave_offs); + mst->lscan_lnum = cpu_to_le32(main_first); + mst->empty_lebs = cpu_to_le32(main_lebs - 2); + mst->idx_lebs = cpu_to_le32(1); + mst->leb_cnt = cpu_to_le32(c->leb_cnt); + + /* Calculate lprops statistics */ + tmp64 = main_bytes; + tmp64 -= ALIGN(ubifs_idx_node_sz(c, 1), c->min_io_size); + tmp64 -= ALIGN(UBIFS_INO_NODE_SZ, c->min_io_size); + mst->total_free = cpu_to_le64(tmp64); + + tmp64 = ALIGN(ubifs_idx_node_sz(c, 1), c->min_io_size); + ino_waste = ALIGN(UBIFS_INO_NODE_SZ, c->min_io_size) - + UBIFS_INO_NODE_SZ; + tmp64 += ino_waste; + tmp64 -= ALIGN(ubifs_idx_node_sz(c, 1), 8); + mst->total_dirty = cpu_to_le64(tmp64); + + /* The indexing LEB does not contribute to dark space */ + tmp64 = (c->main_lebs - 1) * c->dark_wm; + mst->total_dark = cpu_to_le64(tmp64); + + mst->total_used = cpu_to_le64(UBIFS_INO_NODE_SZ); + + err = ubifs_write_node(c, mst, UBIFS_MST_NODE_SZ, UBIFS_MST_LNUM, 0, + UBI_UNKNOWN); + if (err) { + kfree(mst); + return err; + } + err = ubifs_write_node(c, mst, UBIFS_MST_NODE_SZ, UBIFS_MST_LNUM + 1, 0, + UBI_UNKNOWN); + kfree(mst); + if (err) + return err; + + dbg_gen("default master node created at LEB %d:0", UBIFS_MST_LNUM); + + /* Create the root indexing node */ + tmp = ubifs_idx_node_sz(c, 1); + idx = kzalloc(ALIGN(tmp, c->min_io_size), GFP_KERNEL); + if (!idx) + return -ENOMEM; + + c->key_fmt = UBIFS_SIMPLE_KEY_FMT; + c->key_hash = key_r5_hash; + + idx->ch.node_type = UBIFS_IDX_NODE; + idx->child_cnt = cpu_to_le16(1); + ino_key_init(c, &key, UBIFS_ROOT_INO); + br = ubifs_idx_branch(c, idx, 0); + key_write_idx(c, &key, &br->key); + br->lnum = cpu_to_le32(main_first + DEFAULT_DATA_LEB); + br->len = cpu_to_le32(UBIFS_INO_NODE_SZ); + err = ubifs_write_node(c, idx, tmp, main_first + DEFAULT_IDX_LEB, 0, + UBI_UNKNOWN); + kfree(idx); + if (err) + return err; + + dbg_gen("default root indexing node created LEB %d:0", + main_first + DEFAULT_IDX_LEB); + + /* Create default root inode */ + tmp = ALIGN(UBIFS_INO_NODE_SZ, c->min_io_size); + ino = kzalloc(tmp, GFP_KERNEL); + if (!ino) + return -ENOMEM; + + ino_key_init_flash(c, &ino->key, UBIFS_ROOT_INO); + ino->ch.node_type = UBIFS_INO_NODE; + ino->creat_sqnum = cpu_to_le64(++c->max_sqnum); + ino->nlink = cpu_to_le32(2); + tmp = cpu_to_le64(CURRENT_TIME_SEC.tv_sec); + ino->atime_sec = tmp; + ino->ctime_sec = tmp; + ino->mtime_sec = tmp; + ino->atime_nsec = 0; + ino->ctime_nsec = 0; + ino->mtime_nsec = 0; + ino->mode = cpu_to_le32(S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO); + ino->size = cpu_to_le64(UBIFS_INO_NODE_SZ); + + /* Set compression enabled by default */ + ino->flags = cpu_to_le32(UBIFS_COMPR_FL); + + err = ubifs_write_node(c, ino, UBIFS_INO_NODE_SZ, + main_first + DEFAULT_DATA_LEB, 0, + UBI_UNKNOWN); + kfree(ino); + if (err) + return err; + + dbg_gen("root inode created at LEB %d:0", + main_first + DEFAULT_DATA_LEB); + + /* + * The first node in the log has to be the commit start node. This is + * always the case during normal file-system operation. Write a fake + * commit start node to the log. + */ + tmp = ALIGN(UBIFS_CS_NODE_SZ, c->min_io_size); + cs = kzalloc(tmp, GFP_KERNEL); + if (!cs) + return -ENOMEM; + + cs->ch.node_type = UBIFS_CS_NODE; + err = ubifs_write_node(c, cs, UBIFS_CS_NODE_SZ, UBIFS_LOG_LNUM, + 0, UBI_UNKNOWN); + kfree(cs); + + ubifs_msg("default file-system created"); + return 0; +} + +/** + * validate_sb - validate superblock node. + * @c: UBIFS file-system description object + * @sup: superblock node + * + * This function validates superblock node @sup. Since most of data was read + * from the superblock and stored in @c, the function validates fields in @c + * instead. Returns zero in case of success and %-EINVAL in case of validation + * failure. + */ +static int validate_sb(struct ubifs_info *c, struct ubifs_sb_node *sup) +{ + long long max_bytes; + int err = 1, min_leb_cnt; + + if (!c->key_hash) { + err = 2; + goto failed; + } + + if (sup->key_fmt != UBIFS_SIMPLE_KEY_FMT) { + err = 3; + goto failed; + } + + if (le32_to_cpu(sup->min_io_size) != c->min_io_size) { + ubifs_err("min. I/O unit mismatch: %d in superblock, %d real", + le32_to_cpu(sup->min_io_size), c->min_io_size); + goto failed; + } + + if (le32_to_cpu(sup->leb_size) != c->leb_size) { + ubifs_err("LEB size mismatch: %d in superblock, %d real", + le32_to_cpu(sup->leb_size), c->leb_size); + goto failed; + } + + if (c->log_lebs < UBIFS_MIN_LOG_LEBS || + c->lpt_lebs < UBIFS_MIN_LPT_LEBS || + c->orph_lebs < UBIFS_MIN_ORPH_LEBS || + c->main_lebs < UBIFS_MIN_MAIN_LEBS) { + err = 4; + goto failed; + } + + /* + * Calculate minimum allowed amount of main area LEBs. This is very + * similar to %UBIFS_MIN_LEB_CNT, but we take into account real what we + * have just read from the superblock. + */ + min_leb_cnt = UBIFS_SB_LEBS + UBIFS_MST_LEBS + c->log_lebs; + min_leb_cnt += c->lpt_lebs + c->orph_lebs + c->jhead_cnt + 6; + + if (c->leb_cnt < min_leb_cnt || c->leb_cnt > c->vi.size) { + ubifs_err("bad LEB count: %d in superblock, %d on UBI volume, " + "%d minimum required", c->leb_cnt, c->vi.size, + min_leb_cnt); + goto failed; + } + + if (c->max_leb_cnt < c->leb_cnt) { + ubifs_err("max. LEB count %d less than LEB count %d", + c->max_leb_cnt, c->leb_cnt); + goto failed; + } + + if (c->main_lebs < UBIFS_MIN_MAIN_LEBS) { + err = 7; + goto failed; + } + + if (c->max_bud_bytes < (long long)c->leb_size * UBIFS_MIN_BUD_LEBS || + c->max_bud_bytes > (long long)c->leb_size * c->main_lebs) { + err = 8; + goto failed; + } + + if (c->jhead_cnt < NONDATA_JHEADS_CNT + 1 || + c->jhead_cnt > NONDATA_JHEADS_CNT + UBIFS_MAX_JHEADS) { + err = 9; + goto failed; + } + + if (c->fanout < UBIFS_MIN_FANOUT || + ubifs_idx_node_sz(c, c->fanout) > c->leb_size) { + err = 10; + goto failed; + } + + if (c->lsave_cnt < 0 || (c->lsave_cnt > DEFAULT_LSAVE_CNT && + c->lsave_cnt > c->max_leb_cnt - UBIFS_SB_LEBS - UBIFS_MST_LEBS - + c->log_lebs - c->lpt_lebs - c->orph_lebs)) { + err = 11; + goto failed; + } + + if (UBIFS_SB_LEBS + UBIFS_MST_LEBS + c->log_lebs + c->lpt_lebs + + c->orph_lebs + c->main_lebs != c->leb_cnt) { + err = 12; + goto failed; + } + + if (c->default_compr < 0 || c->default_compr >= UBIFS_COMPR_TYPES_CNT) { + err = 13; + goto failed; + } + + max_bytes = c->main_lebs * (long long)c->leb_size; + if (c->rp_size < 0 || max_bytes < c->rp_size) { + err = 14; + goto failed; + } + + if (le32_to_cpu(sup->time_gran) > 1000000000 || + le32_to_cpu(sup->time_gran) < 1) { + err = 15; + goto failed; + } + + return 0; + +failed: + ubifs_err("bad superblock, error %d", err); + dbg_dump_node(c, sup); + return -EINVAL; +} + +/** + * ubifs_read_sb_node - read superblock node. + * @c: UBIFS file-system description object + * + * This function returns a pointer to the superblock node or a negative error + * code. + */ +struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c) +{ + struct ubifs_sb_node *sup; + int err; + + sup = kmalloc(ALIGN(UBIFS_SB_NODE_SZ, c->min_io_size), GFP_NOFS); + if (!sup) + return ERR_PTR(-ENOMEM); + + err = ubifs_read_node(c, sup, UBIFS_SB_NODE, UBIFS_SB_NODE_SZ, + UBIFS_SB_LNUM, 0); + if (err) { + kfree(sup); + return ERR_PTR(err); + } + + return sup; +} + +/** + * ubifs_write_sb_node - write superblock node. + * @c: UBIFS file-system description object + * @sup: superblock node read with 'ubifs_read_sb_node()' + * + * This function returns %0 on success and a negative error code on failure. + */ +int ubifs_write_sb_node(struct ubifs_info *c, struct ubifs_sb_node *sup) +{ + int len = ALIGN(UBIFS_SB_NODE_SZ, c->min_io_size); + + ubifs_prepare_node(c, sup, UBIFS_SB_NODE_SZ, 1); + return ubifs_leb_change(c, UBIFS_SB_LNUM, sup, len, UBI_LONGTERM); +} + +/** + * ubifs_read_superblock - read superblock. + * @c: UBIFS file-system description object + * + * This function finds, reads and checks the superblock. If an empty UBI volume + * is being mounted, this function creates default superblock. Returns zero in + * case of success, and a negative error code in case of failure. + */ +int ubifs_read_superblock(struct ubifs_info *c) +{ + int err, sup_flags; + struct ubifs_sb_node *sup; + + if (c->empty) { + err = create_default_filesystem(c); + if (err) + return err; + } + + sup = ubifs_read_sb_node(c); + if (IS_ERR(sup)) + return PTR_ERR(sup); + + /* + * The software supports all previous versions but not future versions, + * due to the unavailability of time-travelling equipment. + */ + c->fmt_version = le32_to_cpu(sup->fmt_version); + if (c->fmt_version > UBIFS_FORMAT_VERSION) { + ubifs_err("on-flash format version is %d, but software only " + "supports up to version %d", c->fmt_version, + UBIFS_FORMAT_VERSION); + err = -EINVAL; + goto out; + } + + if (c->fmt_version < 3) { + ubifs_err("on-flash format version %d is not supported", + c->fmt_version); + err = -EINVAL; + goto out; + } + + switch (sup->key_hash) { + case UBIFS_KEY_HASH_R5: + c->key_hash = key_r5_hash; + c->key_hash_type = UBIFS_KEY_HASH_R5; + break; + + case UBIFS_KEY_HASH_TEST: + c->key_hash = key_test_hash; + c->key_hash_type = UBIFS_KEY_HASH_TEST; + break; + }; + + c->key_fmt = sup->key_fmt; + + switch (c->key_fmt) { + case UBIFS_SIMPLE_KEY_FMT: + c->key_len = UBIFS_SK_LEN; + break; + default: + ubifs_err("unsupported key format"); + err = -EINVAL; + goto out; + } + + c->leb_cnt = le32_to_cpu(sup->leb_cnt); + c->max_leb_cnt = le32_to_cpu(sup->max_leb_cnt); + c->max_bud_bytes = le64_to_cpu(sup->max_bud_bytes); + c->log_lebs = le32_to_cpu(sup->log_lebs); + c->lpt_lebs = le32_to_cpu(sup->lpt_lebs); + c->orph_lebs = le32_to_cpu(sup->orph_lebs); + c->jhead_cnt = le32_to_cpu(sup->jhead_cnt) + NONDATA_JHEADS_CNT; + c->fanout = le32_to_cpu(sup->fanout); + c->lsave_cnt = le32_to_cpu(sup->lsave_cnt); + c->default_compr = le16_to_cpu(sup->default_compr); + c->rp_size = le64_to_cpu(sup->rp_size); + c->rp_uid = le32_to_cpu(sup->rp_uid); + c->rp_gid = le32_to_cpu(sup->rp_gid); + sup_flags = le32_to_cpu(sup->flags); + + c->vfs_sb->s_time_gran = le32_to_cpu(sup->time_gran); + + memcpy(&c->uuid, &sup->uuid, 16); + + c->big_lpt = !!(sup_flags & UBIFS_FLG_BIGLPT); + + /* Automatically increase file system size to the maximum size */ + c->old_leb_cnt = c->leb_cnt; + if (c->leb_cnt < c->vi.size && c->leb_cnt < c->max_leb_cnt) { + c->leb_cnt = min_t(int, c->max_leb_cnt, c->vi.size); + if (c->vfs_sb->s_flags & MS_RDONLY) + dbg_mnt("Auto resizing (ro) from %d LEBs to %d LEBs", + c->old_leb_cnt, c->leb_cnt); + else { + dbg_mnt("Auto resizing (sb) from %d LEBs to %d LEBs", + c->old_leb_cnt, c->leb_cnt); + sup->leb_cnt = cpu_to_le32(c->leb_cnt); + err = ubifs_write_sb_node(c, sup); + if (err) + goto out; + c->old_leb_cnt = c->leb_cnt; + } + } + + c->log_bytes = (long long)c->log_lebs * c->leb_size; + c->log_last = UBIFS_LOG_LNUM + c->log_lebs - 1; + c->lpt_first = UBIFS_LOG_LNUM + c->log_lebs; + c->lpt_last = c->lpt_first + c->lpt_lebs - 1; + c->orph_first = c->lpt_last + 1; + c->orph_last = c->orph_first + c->orph_lebs - 1; + c->main_lebs = c->leb_cnt - UBIFS_SB_LEBS - UBIFS_MST_LEBS; + c->main_lebs -= c->log_lebs + c->lpt_lebs + c->orph_lebs; + c->main_first = c->leb_cnt - c->main_lebs; + c->report_rp_size = ubifs_reported_space(c, c->rp_size); + + err = validate_sb(c, sup); +out: + kfree(sup); + return err; +} diff --git a/fs/ubifs/scan.c b/fs/ubifs/scan.c new file mode 100644 index 0000000..acf5c5f --- /dev/null +++ b/fs/ubifs/scan.c @@ -0,0 +1,362 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Adrian Hunter + * Artem Bityutskiy (Битюцкий Артём) + */ + +/* + * This file implements the scan which is a general-purpose function for + * determining what nodes are in an eraseblock. The scan is used to replay the + * journal, to do garbage collection. for the TNC in-the-gaps method, and by + * debugging functions. + */ + +#include "ubifs.h" + +/** + * scan_padding_bytes - scan for padding bytes. + * @buf: buffer to scan + * @len: length of buffer + * + * This function returns the number of padding bytes on success and + * %SCANNED_GARBAGE on failure. + */ +static int scan_padding_bytes(void *buf, int len) +{ + int pad_len = 0, max_pad_len = min_t(int, UBIFS_PAD_NODE_SZ, len); + uint8_t *p = buf; + + dbg_scan("not a node"); + + while (pad_len < max_pad_len && *p++ == UBIFS_PADDING_BYTE) + pad_len += 1; + + if (!pad_len || (pad_len & 7)) + return SCANNED_GARBAGE; + + dbg_scan("%d padding bytes", pad_len); + + return pad_len; +} + +/** + * ubifs_scan_a_node - scan for a node or padding. + * @c: UBIFS file-system description object + * @buf: buffer to scan + * @len: length of buffer + * @lnum: logical eraseblock number + * @offs: offset within the logical eraseblock + * @quiet: print no messages + * + * This function returns a scanning code to indicate what was scanned. + */ +int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum, + int offs, int quiet) +{ + struct ubifs_ch *ch = buf; + uint32_t magic; + + magic = le32_to_cpu(ch->magic); + + if (magic == 0xFFFFFFFF) { + dbg_scan("hit empty space"); + return SCANNED_EMPTY_SPACE; + } + + if (magic != UBIFS_NODE_MAGIC) + return scan_padding_bytes(buf, len); + + if (len < UBIFS_CH_SZ) + return SCANNED_GARBAGE; + + dbg_scan("scanning %s", dbg_ntype(ch->node_type)); + + if (ubifs_check_node(c, buf, lnum, offs, quiet)) + return SCANNED_A_CORRUPT_NODE; + + if (ch->node_type == UBIFS_PAD_NODE) { + struct ubifs_pad_node *pad = buf; + int pad_len = le32_to_cpu(pad->pad_len); + int node_len = le32_to_cpu(ch->len); + + /* Validate the padding node */ + if (pad_len < 0 || + offs + node_len + pad_len > c->leb_size) { + if (!quiet) { + ubifs_err("bad pad node at LEB %d:%d", + lnum, offs); + dbg_dump_node(c, pad); + } + return SCANNED_A_BAD_PAD_NODE; + } + + /* Make the node pads to 8-byte boundary */ + if ((node_len + pad_len) & 7) { + if (!quiet) { + dbg_err("bad padding length %d - %d", + offs, offs + node_len + pad_len); + } + return SCANNED_A_BAD_PAD_NODE; + } + + dbg_scan("%d bytes padded, offset now %d", + pad_len, ALIGN(offs + node_len + pad_len, 8)); + + return node_len + pad_len; + } + + return SCANNED_A_NODE; +} + +/** + * ubifs_start_scan - create LEB scanning information at start of scan. + * @c: UBIFS file-system description object + * @lnum: logical eraseblock number + * @offs: offset to start at (usually zero) + * @sbuf: scan buffer (must be c->leb_size) + * + * This function returns %0 on success and a negative error code on failure. + */ +struct ubifs_scan_leb *ubifs_start_scan(const struct ubifs_info *c, int lnum, + int offs, void *sbuf) +{ + struct ubifs_scan_leb *sleb; + int err; + + dbg_scan("scan LEB %d:%d", lnum, offs); + + sleb = kzalloc(sizeof(struct ubifs_scan_leb), GFP_NOFS); + if (!sleb) + return ERR_PTR(-ENOMEM); + + sleb->lnum = lnum; + INIT_LIST_HEAD(&sleb->nodes); + sleb->buf = sbuf; + + err = ubi_read(c->ubi, lnum, sbuf + offs, offs, c->leb_size - offs); + if (err && err != -EBADMSG) { + ubifs_err("cannot read %d bytes from LEB %d:%d," + " error %d", c->leb_size - offs, lnum, offs, err); + kfree(sleb); + return ERR_PTR(err); + } + + if (err == -EBADMSG) + sleb->ecc = 1; + + return sleb; +} + +/** + * ubifs_end_scan - update LEB scanning information at end of scan. + * @c: UBIFS file-system description object + * @sleb: scanning information + * @lnum: logical eraseblock number + * @offs: offset to start at (usually zero) + * + * This function returns %0 on success and a negative error code on failure. + */ +void ubifs_end_scan(const struct ubifs_info *c, struct ubifs_scan_leb *sleb, + int lnum, int offs) +{ + lnum = lnum; + dbg_scan("stop scanning LEB %d at offset %d", lnum, offs); + ubifs_assert(offs % c->min_io_size == 0); + + sleb->endpt = ALIGN(offs, c->min_io_size); +} + +/** + * ubifs_add_snod - add a scanned node to LEB scanning information. + * @c: UBIFS file-system description object + * @sleb: scanning information + * @buf: buffer containing node + * @offs: offset of node on flash + * + * This function returns %0 on success and a negative error code on failure. + */ +int ubifs_add_snod(const struct ubifs_info *c, struct ubifs_scan_leb *sleb, + void *buf, int offs) +{ + struct ubifs_ch *ch = buf; + struct ubifs_ino_node *ino = buf; + struct ubifs_scan_node *snod; + + snod = kzalloc(sizeof(struct ubifs_scan_node), GFP_NOFS); + if (!snod) + return -ENOMEM; + + snod->sqnum = le64_to_cpu(ch->sqnum); + snod->type = ch->node_type; + snod->offs = offs; + snod->len = le32_to_cpu(ch->len); + snod->node = buf; + + switch (ch->node_type) { + case UBIFS_INO_NODE: + case UBIFS_DENT_NODE: + case UBIFS_XENT_NODE: + case UBIFS_DATA_NODE: + case UBIFS_TRUN_NODE: + /* + * The key is in the same place in all keyed + * nodes. + */ + key_read(c, &ino->key, &snod->key); + break; + } + list_add_tail(&snod->list, &sleb->nodes); + sleb->nodes_cnt += 1; + return 0; +} + +/** + * ubifs_scanned_corruption - print information after UBIFS scanned corruption. + * @c: UBIFS file-system description object + * @lnum: LEB number of corruption + * @offs: offset of corruption + * @buf: buffer containing corruption + */ +void ubifs_scanned_corruption(const struct ubifs_info *c, int lnum, int offs, + void *buf) +{ + int len; + + ubifs_err("corrupted data at LEB %d:%d", lnum, offs); + if (dbg_failure_mode) + return; + len = c->leb_size - offs; + if (len > 4096) + len = 4096; + dbg_err("first %d bytes from LEB %d:%d", len, lnum, offs); + print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 4, buf, len, 1); +} + +/** + * ubifs_scan - scan a logical eraseblock. + * @c: UBIFS file-system description object + * @lnum: logical eraseblock number + * @offs: offset to start at (usually zero) + * @sbuf: scan buffer (must be c->leb_size) + * + * This function scans LEB number @lnum and returns complete information about + * its contents. Returns an error code in case of failure. + */ +struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum, + int offs, void *sbuf) +{ + void *buf = sbuf + offs; + int err, len = c->leb_size - offs; + struct ubifs_scan_leb *sleb; + + sleb = ubifs_start_scan(c, lnum, offs, sbuf); + if (IS_ERR(sleb)) + return sleb; + + while (len >= 8) { + struct ubifs_ch *ch = buf; + int node_len, ret; + + dbg_scan("look at LEB %d:%d (%d bytes left)", + lnum, offs, len); + + cond_resched(); + + ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 0); + + if (ret > 0) { + /* Padding bytes or a valid padding node */ + offs += ret; + buf += ret; + len -= ret; + continue; + } + + if (ret == SCANNED_EMPTY_SPACE) + /* Empty space is checked later */ + break; + + switch (ret) { + case SCANNED_GARBAGE: + dbg_err("garbage"); + goto corrupted; + case SCANNED_A_NODE: + break; + case SCANNED_A_CORRUPT_NODE: + case SCANNED_A_BAD_PAD_NODE: + dbg_err("bad node"); + goto corrupted; + default: + dbg_err("unknown"); + goto corrupted; + } + + err = ubifs_add_snod(c, sleb, buf, offs); + if (err) + goto error; + + node_len = ALIGN(le32_to_cpu(ch->len), 8); + offs += node_len; + buf += node_len; + len -= node_len; + } + + if (offs % c->min_io_size) + goto corrupted; + + ubifs_end_scan(c, sleb, lnum, offs); + + for (; len > 4; offs += 4, buf = buf + 4, len -= 4) + if (*(uint32_t *)buf != 0xffffffff) + break; + for (; len; offs++, buf++, len--) + if (*(uint8_t *)buf != 0xff) { + ubifs_err("corrupt empty space at LEB %d:%d", + lnum, offs); + goto corrupted; + } + + return sleb; + +corrupted: + ubifs_scanned_corruption(c, lnum, offs, buf); + err = -EUCLEAN; +error: + ubifs_err("LEB %d scanning failed", lnum); + ubifs_scan_destroy(sleb); + return ERR_PTR(err); +} + +/** + * ubifs_scan_destroy - destroy LEB scanning information. + * @sleb: scanning information to free + */ +void ubifs_scan_destroy(struct ubifs_scan_leb *sleb) +{ + struct ubifs_scan_node *node; + struct list_head *head; + + head = &sleb->nodes; + while (!list_empty(head)) { + node = list_entry(head->next, struct ubifs_scan_node, list); + list_del(&node->list); + kfree(node); + } + kfree(sleb); +} diff --git a/fs/ubifs/shrinker.c b/fs/ubifs/shrinker.c new file mode 100644 index 0000000..f248533 --- /dev/null +++ b/fs/ubifs/shrinker.c @@ -0,0 +1,322 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Artem Bityutskiy (Битюцкий Артём) + * Adrian Hunter + */ + +/* + * This file implements UBIFS shrinker which evicts clean znodes from the TNC + * tree when Linux VM needs more RAM. + * + * We do not implement any LRU lists to find oldest znodes to free because it + * would add additional overhead to the file system fast paths. So the shrinker + * just walks the TNC tree when searching for znodes to free. + * + * If the root of a TNC sub-tree is clean and old enough, then the children are + * also clean and old enough. So the shrinker walks the TNC in level order and + * dumps entire sub-trees. + * + * The age of znodes is just the time-stamp when they were last looked at. + * The current shrinker first tries to evict old znodes, then young ones. + * + * Since the shrinker is global, it has to protect against races with FS + * un-mounts, which is done by the 'ubifs_infos_lock' and 'c->umount_mutex'. + */ + +#include "ubifs.h" + +/* List of all UBIFS file-system instances */ +LIST_HEAD(ubifs_infos); + +/* + * We number each shrinker run and record the number on the ubifs_info structure + * so that we can easily work out which ubifs_info structures have already been + * done by the current run. + */ +static unsigned int shrinker_run_no; + +/* Protects 'ubifs_infos' list */ +DEFINE_SPINLOCK(ubifs_infos_lock); + +/* Global clean znode counter (for all mounted UBIFS instances) */ +atomic_long_t ubifs_clean_zn_cnt; + +/** + * shrink_tnc - shrink TNC tree. + * @c: UBIFS file-system description object + * @nr: number of znodes to free + * @age: the age of znodes to free + * @contention: if any contention, this is set to %1 + * + * This function traverses TNC tree and frees clean znodes. It does not free + * clean znodes which younger then @age. Returns number of freed znodes. + */ +static int shrink_tnc(struct ubifs_info *c, int nr, int age, int *contention) +{ + int total_freed = 0; + struct ubifs_znode *znode, *zprev; + int time = get_seconds(); + + ubifs_assert(mutex_is_locked(&c->umount_mutex)); + ubifs_assert(mutex_is_locked(&c->tnc_mutex)); + + if (!c->zroot.znode || atomic_long_read(&c->clean_zn_cnt) == 0) + return 0; + + /* + * Traverse the TNC tree in levelorder manner, so that it is possible + * to destroy large sub-trees. Indeed, if a znode is old, then all its + * children are older or of the same age. + * + * Note, we are holding 'c->tnc_mutex', so we do not have to lock the + * 'c->space_lock' when _reading_ 'c->clean_zn_cnt', because it is + * changed only when the 'c->tnc_mutex' is held. + */ + zprev = NULL; + znode = ubifs_tnc_levelorder_next(c->zroot.znode, NULL); + while (znode && total_freed < nr && + atomic_long_read(&c->clean_zn_cnt) > 0) { + int freed; + + /* + * If the znode is clean, but it is in the 'c->cnext' list, this + * means that this znode has just been written to flash as a + * part of commit and was marked clean. They will be removed + * from the list at end commit. We cannot change the list, + * because it is not protected by any mutex (design decision to + * make commit really independent and parallel to main I/O). So + * we just skip these znodes. + * + * Note, the 'clean_zn_cnt' counters are not updated until + * after the commit, so the UBIFS shrinker does not report + * the znodes which are in the 'c->cnext' list as freeable. + * + * Also note, if the root of a sub-tree is not in 'c->cnext', + * then the whole sub-tree is not in 'c->cnext' as well, so it + * is safe to dump whole sub-tree. + */ + + if (znode->cnext) { + /* + * Very soon these znodes will be removed from the list + * and become freeable. + */ + *contention = 1; + } else if (!ubifs_zn_dirty(znode) && + abs(time - znode->time) >= age) { + if (znode->parent) + znode->parent->zbranch[znode->iip].znode = NULL; + else + c->zroot.znode = NULL; + + freed = ubifs_destroy_tnc_subtree(znode); + atomic_long_sub(freed, &ubifs_clean_zn_cnt); + atomic_long_sub(freed, &c->clean_zn_cnt); + ubifs_assert(atomic_long_read(&c->clean_zn_cnt) >= 0); + total_freed += freed; + znode = zprev; + } + + if (unlikely(!c->zroot.znode)) + break; + + zprev = znode; + znode = ubifs_tnc_levelorder_next(c->zroot.znode, znode); + cond_resched(); + } + + return total_freed; +} + +/** + * shrink_tnc_trees - shrink UBIFS TNC trees. + * @nr: number of znodes to free + * @age: the age of znodes to free + * @contention: if any contention, this is set to %1 + * + * This function walks the list of mounted UBIFS file-systems and frees clean + * znodes which are older then @age, until at least @nr znodes are freed. + * Returns the number of freed znodes. + */ +static int shrink_tnc_trees(int nr, int age, int *contention) +{ + struct ubifs_info *c; + struct list_head *p; + unsigned int run_no; + int freed = 0; + + spin_lock(&ubifs_infos_lock); + do { + run_no = ++shrinker_run_no; + } while (run_no == 0); + /* Iterate over all mounted UBIFS file-systems and try to shrink them */ + p = ubifs_infos.next; + while (p != &ubifs_infos) { + c = list_entry(p, struct ubifs_info, infos_list); + /* + * We move the ones we do to the end of the list, so we stop + * when we see one we have already done. + */ + if (c->shrinker_run_no == run_no) + break; + if (!mutex_trylock(&c->umount_mutex)) { + /* Some un-mount is in progress, try next FS */ + *contention = 1; + p = p->next; + continue; + } + /* + * We're holding 'c->umount_mutex', so the file-system won't go + * away. + */ + if (!mutex_trylock(&c->tnc_mutex)) { + mutex_unlock(&c->umount_mutex); + *contention = 1; + p = p->next; + continue; + } + spin_unlock(&ubifs_infos_lock); + /* + * OK, now we have TNC locked, the file-system cannot go away - + * it is safe to reap the cache. + */ + c->shrinker_run_no = run_no; + freed += shrink_tnc(c, nr, age, contention); + mutex_unlock(&c->tnc_mutex); + spin_lock(&ubifs_infos_lock); + /* Get the next list element before we move this one */ + p = p->next; + /* + * Move this one to the end of the list to provide some + * fairness. + */ + list_del(&c->infos_list); + list_add_tail(&c->infos_list, &ubifs_infos); + mutex_unlock(&c->umount_mutex); + if (freed >= nr) + break; + } + spin_unlock(&ubifs_infos_lock); + return freed; +} + +/** + * kick_a_thread - kick a background thread to start commit. + * + * This function kicks a background thread to start background commit. Returns + * %-1 if a thread was kicked or there is another reason to assume the memory + * will soon be freed or become freeable. If there are no dirty znodes, returns + * %0. + */ +static int kick_a_thread(void) +{ + int i; + struct ubifs_info *c; + + /* + * Iterate over all mounted UBIFS file-systems and find out if there is + * already an ongoing commit operation there. If no, then iterate for + * the second time and initiate background commit. + */ + spin_lock(&ubifs_infos_lock); + for (i = 0; i < 2; i++) { + list_for_each_entry(c, &ubifs_infos, infos_list) { + long dirty_zn_cnt; + + if (!mutex_trylock(&c->umount_mutex)) { + /* + * Some un-mount is in progress, it will + * certainly free memory, so just return. + */ + spin_unlock(&ubifs_infos_lock); + return -1; + } + + dirty_zn_cnt = atomic_long_read(&c->dirty_zn_cnt); + + if (!dirty_zn_cnt || c->cmt_state == COMMIT_BROKEN || + c->ro_media) { + mutex_unlock(&c->umount_mutex); + continue; + } + + if (c->cmt_state != COMMIT_RESTING) { + spin_unlock(&ubifs_infos_lock); + mutex_unlock(&c->umount_mutex); + return -1; + } + + if (i == 1) { + list_del(&c->infos_list); + list_add_tail(&c->infos_list, &ubifs_infos); + spin_unlock(&ubifs_infos_lock); + + ubifs_request_bg_commit(c); + mutex_unlock(&c->umount_mutex); + return -1; + } + mutex_unlock(&c->umount_mutex); + } + } + spin_unlock(&ubifs_infos_lock); + + return 0; +} + +int ubifs_shrinker(int nr, gfp_t gfp_mask) +{ + int freed, contention = 0; + long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt); + + if (nr == 0) + return clean_zn_cnt; + + if (!clean_zn_cnt) { + /* + * No clean znodes, nothing to reap. All we can do in this case + * is to kick background threads to start commit, which will + * probably make clean znodes which, in turn, will be freeable. + * And we return -1 which means will make VM call us again + * later. + */ + dbg_tnc("no clean znodes, kick a thread"); + return kick_a_thread(); + } + + freed = shrink_tnc_trees(nr, OLD_ZNODE_AGE, &contention); + if (freed >= nr) + goto out; + + dbg_tnc("not enough old znodes, try to free young ones"); + freed += shrink_tnc_trees(nr - freed, YOUNG_ZNODE_AGE, &contention); + if (freed >= nr) + goto out; + + dbg_tnc("not enough young znodes, free all"); + freed += shrink_tnc_trees(nr - freed, 0, &contention); + + if (!freed && contention) { + dbg_tnc("freed nothing, but contention"); + return -1; + } + +out: + dbg_tnc("%d znodes were freed, requested %d", freed, nr); + return freed; +} diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c new file mode 100644 index 0000000..00eb9c6 --- /dev/null +++ b/fs/ubifs/super.c @@ -0,0 +1,1951 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Artem Bityutskiy (Битюцкий Артём) + * Adrian Hunter + */ + +/* + * This file implements UBIFS initialization and VFS superblock operations. Some + * initialization stuff which is rather large and complex is placed at + * corresponding subsystems, but most of it is here. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "ubifs.h" + +/* Slab cache for UBIFS inodes */ +struct kmem_cache *ubifs_inode_slab; + +/* UBIFS TNC shrinker description */ +static struct shrinker ubifs_shrinker_info = { + .shrink = ubifs_shrinker, + .seeks = DEFAULT_SEEKS, +}; + +/** + * validate_inode - validate inode. + * @c: UBIFS file-system description object + * @inode: the inode to validate + * + * This is a helper function for 'ubifs_iget()' which validates various fields + * of a newly built inode to make sure they contain sane values and prevent + * possible vulnerabilities. Returns zero if the inode is all right and + * a non-zero error code if not. + */ +static int validate_inode(struct ubifs_info *c, const struct inode *inode) +{ + int err; + const struct ubifs_inode *ui = ubifs_inode(inode); + + if (inode->i_size > c->max_inode_sz) { + ubifs_err("inode is too large (%lld)", + (long long)inode->i_size); + return 1; + } + + if (ui->compr_type < 0 || ui->compr_type >= UBIFS_COMPR_TYPES_CNT) { + ubifs_err("unknown compression type %d", ui->compr_type); + return 2; + } + + if (ui->xattr_names + ui->xattr_cnt > XATTR_LIST_MAX) + return 3; + + if (ui->data_len < 0 || ui->data_len > UBIFS_MAX_INO_DATA) + return 4; + + if (ui->xattr && (inode->i_mode & S_IFMT) != S_IFREG) + return 5; + + if (!ubifs_compr_present(ui->compr_type)) { + ubifs_warn("inode %lu uses '%s' compression, but it was not " + "compiled in", inode->i_ino, + ubifs_compr_name(ui->compr_type)); + } + + err = dbg_check_dir_size(c, inode); + return err; +} + +struct inode *ubifs_iget(struct super_block *sb, unsigned long inum) +{ + int err; + union ubifs_key key; + struct ubifs_ino_node *ino; + struct ubifs_info *c = sb->s_fs_info; + struct inode *inode; + struct ubifs_inode *ui; + + dbg_gen("inode %lu", inum); + + inode = iget_locked(sb, inum); + if (!inode) + return ERR_PTR(-ENOMEM); + if (!(inode->i_state & I_NEW)) + return inode; + ui = ubifs_inode(inode); + + ino = kmalloc(UBIFS_MAX_INO_NODE_SZ, GFP_NOFS); + if (!ino) { + err = -ENOMEM; + goto out; + } + + ino_key_init(c, &key, inode->i_ino); + + err = ubifs_tnc_lookup(c, &key, ino); + if (err) + goto out_ino; + + inode->i_flags |= (S_NOCMTIME | S_NOATIME); + inode->i_nlink = le32_to_cpu(ino->nlink); + inode->i_uid = le32_to_cpu(ino->uid); + inode->i_gid = le32_to_cpu(ino->gid); + inode->i_atime.tv_sec = (int64_t)le64_to_cpu(ino->atime_sec); + inode->i_atime.tv_nsec = le32_to_cpu(ino->atime_nsec); + inode->i_mtime.tv_sec = (int64_t)le64_to_cpu(ino->mtime_sec); + inode->i_mtime.tv_nsec = le32_to_cpu(ino->mtime_nsec); + inode->i_ctime.tv_sec = (int64_t)le64_to_cpu(ino->ctime_sec); + inode->i_ctime.tv_nsec = le32_to_cpu(ino->ctime_nsec); + inode->i_mode = le32_to_cpu(ino->mode); + inode->i_size = le64_to_cpu(ino->size); + + ui->data_len = le32_to_cpu(ino->data_len); + ui->flags = le32_to_cpu(ino->flags); + ui->compr_type = le16_to_cpu(ino->compr_type); + ui->creat_sqnum = le64_to_cpu(ino->creat_sqnum); + ui->xattr_cnt = le32_to_cpu(ino->xattr_cnt); + ui->xattr_size = le32_to_cpu(ino->xattr_size); + ui->xattr_names = le32_to_cpu(ino->xattr_names); + ui->synced_i_size = ui->ui_size = inode->i_size; + + ui->xattr = (ui->flags & UBIFS_XATTR_FL) ? 1 : 0; + + err = validate_inode(c, inode); + if (err) + goto out_invalid; + + /* Disable readahead */ + inode->i_mapping->backing_dev_info = &c->bdi; + + switch (inode->i_mode & S_IFMT) { + case S_IFREG: + inode->i_mapping->a_ops = &ubifs_file_address_operations; + inode->i_op = &ubifs_file_inode_operations; + inode->i_fop = &ubifs_file_operations; + if (ui->xattr) { + ui->data = kmalloc(ui->data_len + 1, GFP_NOFS); + if (!ui->data) { + err = -ENOMEM; + goto out_ino; + } + memcpy(ui->data, ino->data, ui->data_len); + ((char *)ui->data)[ui->data_len] = '\0'; + } else if (ui->data_len != 0) { + err = 10; + goto out_invalid; + } + break; + case S_IFDIR: + inode->i_op = &ubifs_dir_inode_operations; + inode->i_fop = &ubifs_dir_operations; + if (ui->data_len != 0) { + err = 11; + goto out_invalid; + } + break; + case S_IFLNK: + inode->i_op = &ubifs_symlink_inode_operations; + if (ui->data_len <= 0 || ui->data_len > UBIFS_MAX_INO_DATA) { + err = 12; + goto out_invalid; + } + ui->data = kmalloc(ui->data_len + 1, GFP_NOFS); + if (!ui->data) { + err = -ENOMEM; + goto out_ino; + } + memcpy(ui->data, ino->data, ui->data_len); + ((char *)ui->data)[ui->data_len] = '\0'; + break; + case S_IFBLK: + case S_IFCHR: + { + dev_t rdev; + union ubifs_dev_desc *dev; + + ui->data = kmalloc(sizeof(union ubifs_dev_desc), GFP_NOFS); + if (!ui->data) { + err = -ENOMEM; + goto out_ino; + } + + dev = (union ubifs_dev_desc *)ino->data; + if (ui->data_len == sizeof(dev->new)) + rdev = new_decode_dev(le32_to_cpu(dev->new)); + else if (ui->data_len == sizeof(dev->huge)) + rdev = huge_decode_dev(le64_to_cpu(dev->huge)); + else { + err = 13; + goto out_invalid; + } + memcpy(ui->data, ino->data, ui->data_len); + inode->i_op = &ubifs_file_inode_operations; + init_special_inode(inode, inode->i_mode, rdev); + break; + } + case S_IFSOCK: + case S_IFIFO: + inode->i_op = &ubifs_file_inode_operations; + init_special_inode(inode, inode->i_mode, 0); + if (ui->data_len != 0) { + err = 14; + goto out_invalid; + } + break; + default: + err = 15; + goto out_invalid; + } + + kfree(ino); + ubifs_set_inode_flags(inode); + unlock_new_inode(inode); + return inode; + +out_invalid: + ubifs_err("inode %lu validation failed, error %d", inode->i_ino, err); + dbg_dump_node(c, ino); + dbg_dump_inode(c, inode); + err = -EINVAL; +out_ino: + kfree(ino); +out: + ubifs_err("failed to read inode %lu, error %d", inode->i_ino, err); + iget_failed(inode); + return ERR_PTR(err); +} + +static struct inode *ubifs_alloc_inode(struct super_block *sb) +{ + struct ubifs_inode *ui; + + ui = kmem_cache_alloc(ubifs_inode_slab, GFP_NOFS); + if (!ui) + return NULL; + + memset((void *)ui + sizeof(struct inode), 0, + sizeof(struct ubifs_inode) - sizeof(struct inode)); + mutex_init(&ui->ui_mutex); + spin_lock_init(&ui->ui_lock); + return &ui->vfs_inode; +}; + +static void ubifs_destroy_inode(struct inode *inode) +{ + struct ubifs_inode *ui = ubifs_inode(inode); + + kfree(ui->data); + kmem_cache_free(ubifs_inode_slab, inode); +} + +/* + * Note, Linux write-back code calls this without 'i_mutex'. + */ +static int ubifs_write_inode(struct inode *inode, int wait) +{ + int err; + struct ubifs_info *c = inode->i_sb->s_fs_info; + struct ubifs_inode *ui = ubifs_inode(inode); + + ubifs_assert(!ui->xattr); + if (is_bad_inode(inode)) + return 0; + + mutex_lock(&ui->ui_mutex); + /* + * Due to races between write-back forced by budgeting + * (see 'sync_some_inodes()') and pdflush write-back, the inode may + * have already been synchronized, do not do this again. This might + * also happen if it was synchronized in an VFS operation, e.g. + * 'ubifs_link()'. + */ + if (!ui->dirty) { + mutex_unlock(&ui->ui_mutex); + return 0; + } + + dbg_gen("inode %lu", inode->i_ino); + err = ubifs_jnl_write_inode(c, inode, 0); + if (err) + ubifs_err("can't write inode %lu, error %d", inode->i_ino, err); + + ui->dirty = 0; + mutex_unlock(&ui->ui_mutex); + ubifs_release_dirty_inode_budget(c, ui); + return err; +} + +static void ubifs_delete_inode(struct inode *inode) +{ + int err; + struct ubifs_info *c = inode->i_sb->s_fs_info; + + if (ubifs_inode(inode)->xattr) + /* + * Extended attribute inode deletions are fully handled in + * 'ubifs_removexattr()'. These inodes are special and have + * limited usage, so there is nothing to do here. + */ + goto out; + + dbg_gen("inode %lu", inode->i_ino); + ubifs_assert(!atomic_read(&inode->i_count)); + ubifs_assert(inode->i_nlink == 0); + + truncate_inode_pages(&inode->i_data, 0); + if (is_bad_inode(inode)) + goto out; + + ubifs_inode(inode)->ui_size = inode->i_size = 0; + err = ubifs_jnl_write_inode(c, inode, 1); + if (err) + /* + * Worst case we have a lost orphan inode wasting space, so a + * simple error message is ok here. + */ + ubifs_err("can't write inode %lu, error %d", inode->i_ino, err); +out: + clear_inode(inode); +} + +static void ubifs_dirty_inode(struct inode *inode) +{ + struct ubifs_inode *ui = ubifs_inode(inode); + + ubifs_assert(mutex_is_locked(&ui->ui_mutex)); + if (!ui->dirty) { + ui->dirty = 1; + dbg_gen("inode %lu", inode->i_ino); + } +} + +static int ubifs_statfs(struct dentry *dentry, struct kstatfs *buf) +{ + struct ubifs_info *c = dentry->d_sb->s_fs_info; + unsigned long long free; + + free = ubifs_budg_get_free_space(c); + dbg_gen("free space %lld bytes (%lld blocks)", + free, free >> UBIFS_BLOCK_SHIFT); + + buf->f_type = UBIFS_SUPER_MAGIC; + buf->f_bsize = UBIFS_BLOCK_SIZE; + buf->f_blocks = c->block_cnt; + buf->f_bfree = free >> UBIFS_BLOCK_SHIFT; + if (free > c->report_rp_size) + buf->f_bavail = (free - c->report_rp_size) >> UBIFS_BLOCK_SHIFT; + else + buf->f_bavail = 0; + buf->f_files = 0; + buf->f_ffree = 0; + buf->f_namelen = UBIFS_MAX_NLEN; + + return 0; +} + +static int ubifs_show_options(struct seq_file *s, struct vfsmount *mnt) +{ + struct ubifs_info *c = mnt->mnt_sb->s_fs_info; + + if (c->mount_opts.unmount_mode == 2) + seq_printf(s, ",fast_unmount"); + else if (c->mount_opts.unmount_mode == 1) + seq_printf(s, ",norm_unmount"); + + return 0; +} + +static int ubifs_sync_fs(struct super_block *sb, int wait) +{ + struct ubifs_info *c = sb->s_fs_info; + int i, ret = 0, err; + + if (c->jheads) + for (i = 0; i < c->jhead_cnt; i++) { + err = ubifs_wbuf_sync(&c->jheads[i].wbuf); + if (err && !ret) + ret = err; + } + /* + * We ought to call sync for c->ubi but it does not have one. If it had + * it would in turn call mtd->sync, however mtd operations are + * synchronous anyway, so we don't lose any sleep here. + */ + return ret; +} + +/** + * init_constants_early - initialize UBIFS constants. + * @c: UBIFS file-system description object + * + * This function initialize UBIFS constants which do not need the superblock to + * be read. It also checks that the UBI volume satisfies basic UBIFS + * requirements. Returns zero in case of success and a negative error code in + * case of failure. + */ +static int init_constants_early(struct ubifs_info *c) +{ + if (c->vi.corrupted) { + ubifs_warn("UBI volume is corrupted - read-only mode"); + c->ro_media = 1; + } + + if (c->di.ro_mode) { + ubifs_msg("read-only UBI device"); + c->ro_media = 1; + } + + if (c->vi.vol_type == UBI_STATIC_VOLUME) { + ubifs_msg("static UBI volume - read-only mode"); + c->ro_media = 1; + } + + c->leb_cnt = c->vi.size; + c->leb_size = c->vi.usable_leb_size; + c->half_leb_size = c->leb_size / 2; + c->min_io_size = c->di.min_io_size; + c->min_io_shift = fls(c->min_io_size) - 1; + + if (c->leb_size < UBIFS_MIN_LEB_SZ) { + ubifs_err("too small LEBs (%d bytes), min. is %d bytes", + c->leb_size, UBIFS_MIN_LEB_SZ); + return -EINVAL; + } + + if (c->leb_cnt < UBIFS_MIN_LEB_CNT) { + ubifs_err("too few LEBs (%d), min. is %d", + c->leb_cnt, UBIFS_MIN_LEB_CNT); + return -EINVAL; + } + + if (!is_power_of_2(c->min_io_size)) { + ubifs_err("bad min. I/O size %d", c->min_io_size); + return -EINVAL; + } + + /* + * UBIFS aligns all node to 8-byte boundary, so to make function in + * io.c simpler, assume minimum I/O unit size to be 8 bytes if it is + * less than 8. + */ + if (c->min_io_size < 8) { + c->min_io_size = 8; + c->min_io_shift = 3; + } + + c->ref_node_alsz = ALIGN(UBIFS_REF_NODE_SZ, c->min_io_size); + c->mst_node_alsz = ALIGN(UBIFS_MST_NODE_SZ, c->min_io_size); + + /* + * Initialize node length ranges which are mostly needed for node + * length validation. + */ + c->ranges[UBIFS_PAD_NODE].len = UBIFS_PAD_NODE_SZ; + c->ranges[UBIFS_SB_NODE].len = UBIFS_SB_NODE_SZ; + c->ranges[UBIFS_MST_NODE].len = UBIFS_MST_NODE_SZ; + c->ranges[UBIFS_REF_NODE].len = UBIFS_REF_NODE_SZ; + c->ranges[UBIFS_TRUN_NODE].len = UBIFS_TRUN_NODE_SZ; + c->ranges[UBIFS_CS_NODE].len = UBIFS_CS_NODE_SZ; + + c->ranges[UBIFS_INO_NODE].min_len = UBIFS_INO_NODE_SZ; + c->ranges[UBIFS_INO_NODE].max_len = UBIFS_MAX_INO_NODE_SZ; + c->ranges[UBIFS_ORPH_NODE].min_len = + UBIFS_ORPH_NODE_SZ + sizeof(__le64); + c->ranges[UBIFS_ORPH_NODE].max_len = c->leb_size; + c->ranges[UBIFS_DENT_NODE].min_len = UBIFS_DENT_NODE_SZ; + c->ranges[UBIFS_DENT_NODE].max_len = UBIFS_MAX_DENT_NODE_SZ; + c->ranges[UBIFS_XENT_NODE].min_len = UBIFS_XENT_NODE_SZ; + c->ranges[UBIFS_XENT_NODE].max_len = UBIFS_MAX_XENT_NODE_SZ; + c->ranges[UBIFS_DATA_NODE].min_len = UBIFS_DATA_NODE_SZ; + c->ranges[UBIFS_DATA_NODE].max_len = UBIFS_MAX_DATA_NODE_SZ; + /* + * Minimum indexing node size is amended later when superblock is + * read and the key length is known. + */ + c->ranges[UBIFS_IDX_NODE].min_len = UBIFS_IDX_NODE_SZ + UBIFS_BRANCH_SZ; + /* + * Maximum indexing node size is amended later when superblock is + * read and the fanout is known. + */ + c->ranges[UBIFS_IDX_NODE].max_len = INT_MAX; + + /* + * Initialize dead and dark LEB space watermarks. + * + * Dead space is the space which cannot be used. Its watermark is + * equivalent to min. I/O unit or minimum node size if it is greater + * then min. I/O unit. + * + * Dark space is the space which might be used, or might not, depending + * on which node should be written to the LEB. Its watermark is + * equivalent to maximum UBIFS node size. + */ + c->dead_wm = ALIGN(MIN_WRITE_SZ, c->min_io_size); + c->dark_wm = ALIGN(UBIFS_MAX_NODE_SZ, c->min_io_size); + + return 0; +} + +/** + * bud_wbuf_callback - bud LEB write-buffer synchronization call-back. + * @c: UBIFS file-system description object + * @lnum: LEB the write-buffer was synchronized to + * @free: how many free bytes left in this LEB + * @pad: how many bytes were padded + * + * This is a callback function which is called by the I/O unit when the + * write-buffer is synchronized. We need this to correctly maintain space + * accounting in bud logical eraseblocks. This function returns zero in case of + * success and a negative error code in case of failure. + * + * This function actually belongs to the journal, but we keep it here because + * we want to keep it static. + */ +static int bud_wbuf_callback(struct ubifs_info *c, int lnum, int free, int pad) +{ + return ubifs_update_one_lp(c, lnum, free, pad, 0, 0); +} + +/* + * init_constants_late - initialize UBIFS constants. + * @c: UBIFS file-system description object + * + * This is a helper function which initializes various UBIFS constants after + * the superblock has been read. It also checks various UBIFS parameters and + * makes sure they are all right. Returns zero in case of success and a + * negative error code in case of failure. + */ +static int init_constants_late(struct ubifs_info *c) +{ + int tmp, err; + uint64_t tmp64; + + c->main_bytes = (long long)c->main_lebs * c->leb_size; + c->max_znode_sz = sizeof(struct ubifs_znode) + + c->fanout * sizeof(struct ubifs_zbranch); + + tmp = ubifs_idx_node_sz(c, 1); + c->ranges[UBIFS_IDX_NODE].min_len = tmp; + c->min_idx_node_sz = ALIGN(tmp, 8); + + tmp = ubifs_idx_node_sz(c, c->fanout); + c->ranges[UBIFS_IDX_NODE].max_len = tmp; + c->max_idx_node_sz = ALIGN(tmp, 8); + + /* Make sure LEB size is large enough to fit full commit */ + tmp = UBIFS_CS_NODE_SZ + UBIFS_REF_NODE_SZ * c->jhead_cnt; + tmp = ALIGN(tmp, c->min_io_size); + if (tmp > c->leb_size) { + dbg_err("too small LEB size %d, at least %d needed", + c->leb_size, tmp); + return -EINVAL; + } + + /* + * Make sure that the log is large enough to fit reference nodes for + * all buds plus one reserved LEB. + */ + tmp64 = c->max_bud_bytes; + tmp = do_div(tmp64, c->leb_size); + c->max_bud_cnt = tmp64 + !!tmp; + tmp = (c->ref_node_alsz * c->max_bud_cnt + c->leb_size - 1); + tmp /= c->leb_size; + tmp += 1; + if (c->log_lebs < tmp) { + dbg_err("too small log %d LEBs, required min. %d LEBs", + c->log_lebs, tmp); + return -EINVAL; + } + + /* + * When budgeting we assume worst-case scenarios when the pages are not + * be compressed and direntries are of the maximum size. + * + * Note, data, which may be stored in inodes is budgeted separately, so + * it is not included into 'c->inode_budget'. + */ + c->page_budget = UBIFS_MAX_DATA_NODE_SZ * UBIFS_BLOCKS_PER_PAGE; + c->inode_budget = UBIFS_INO_NODE_SZ; + c->dent_budget = UBIFS_MAX_DENT_NODE_SZ; + + /* + * When the amount of flash space used by buds becomes + * 'c->max_bud_bytes', UBIFS just blocks all writers and starts commit. + * The writers are unblocked when the commit is finished. To avoid + * writers to be blocked UBIFS initiates background commit in advance, + * when number of bud bytes becomes above the limit defined below. + */ + c->bg_bud_bytes = (c->max_bud_bytes * 13) >> 4; + + /* + * Ensure minimum journal size. All the bytes in the journal heads are + * considered to be used, when calculating the current journal usage. + * Consequently, if the journal is too small, UBIFS will treat it as + * always full. + */ + tmp64 = (uint64_t)(c->jhead_cnt + 1) * c->leb_size + 1; + if (c->bg_bud_bytes < tmp64) + c->bg_bud_bytes = tmp64; + if (c->max_bud_bytes < tmp64 + c->leb_size) + c->max_bud_bytes = tmp64 + c->leb_size; + + err = ubifs_calc_lpt_geom(c); + if (err) + return err; + + c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); + + /* + * Calculate total amount of FS blocks. This number is not used + * internally because it does not make much sense for UBIFS, but it is + * necessary to report something for the 'statfs()' call. + * + * Subtract the LEB reserved for GC and the LEB which is reserved for + * deletions. + * + * Review 'ubifs_calc_available()' if changing this calculation. + */ + tmp64 = c->main_lebs - 2; + tmp64 *= (uint64_t)c->leb_size - c->dark_wm; + tmp64 = ubifs_reported_space(c, tmp64); + c->block_cnt = tmp64 >> UBIFS_BLOCK_SHIFT; + + return 0; +} + +/** + * take_gc_lnum - reserve GC LEB. + * @c: UBIFS file-system description object + * + * This function ensures that the LEB reserved for garbage collection is + * unmapped and is marked as "taken" in lprops. We also have to set free space + * to LEB size and dirty space to zero, because lprops may contain out-of-date + * information if the file-system was un-mounted before it has been committed. + * This function returns zero in case of success and a negative error code in + * case of failure. + */ +static int take_gc_lnum(struct ubifs_info *c) +{ + int err; + + if (c->gc_lnum == -1) { + ubifs_err("no LEB for GC"); + return -EINVAL; + } + + err = ubifs_leb_unmap(c, c->gc_lnum); + if (err) + return err; + + /* And we have to tell lprops that this LEB is taken */ + err = ubifs_change_one_lp(c, c->gc_lnum, c->leb_size, 0, + LPROPS_TAKEN, 0, 0); + return err; +} + +/** + * alloc_wbufs - allocate write-buffers. + * @c: UBIFS file-system description object + * + * This helper function allocates and initializes UBIFS write-buffers. Returns + * zero in case of success and %-ENOMEM in case of failure. + */ +static int alloc_wbufs(struct ubifs_info *c) +{ + int i, err; + + c->jheads = kzalloc(c->jhead_cnt * sizeof(struct ubifs_jhead), + GFP_KERNEL); + if (!c->jheads) + return -ENOMEM; + + /* Initialize journal heads */ + for (i = 0; i < c->jhead_cnt; i++) { + INIT_LIST_HEAD(&c->jheads[i].buds_list); + err = ubifs_wbuf_init(c, &c->jheads[i].wbuf); + if (err) + return err; + + c->jheads[i].wbuf.sync_callback = &bud_wbuf_callback; + c->jheads[i].wbuf.jhead = i; + } + + c->jheads[BASEHD].wbuf.dtype = UBI_SHORTTERM; + /* + * Garbage Collector head likely contains long-term data and + * does not need to be synchronized by timer. + */ + c->jheads[GCHD].wbuf.dtype = UBI_LONGTERM; + c->jheads[GCHD].wbuf.timeout = 0; + + return 0; +} + +/** + * free_wbufs - free write-buffers. + * @c: UBIFS file-system description object + */ +static void free_wbufs(struct ubifs_info *c) +{ + int i; + + if (c->jheads) { + for (i = 0; i < c->jhead_cnt; i++) { + kfree(c->jheads[i].wbuf.buf); + kfree(c->jheads[i].wbuf.inodes); + } + kfree(c->jheads); + c->jheads = NULL; + } +} + +/** + * free_orphans - free orphans. + * @c: UBIFS file-system description object + */ +static void free_orphans(struct ubifs_info *c) +{ + struct ubifs_orphan *orph; + + while (c->orph_dnext) { + orph = c->orph_dnext; + c->orph_dnext = orph->dnext; + list_del(&orph->list); + kfree(orph); + } + + while (!list_empty(&c->orph_list)) { + orph = list_entry(c->orph_list.next, struct ubifs_orphan, list); + list_del(&orph->list); + kfree(orph); + dbg_err("orphan list not empty at unmount"); + } + + vfree(c->orph_buf); + c->orph_buf = NULL; +} + +/** + * free_buds - free per-bud objects. + * @c: UBIFS file-system description object + */ +static void free_buds(struct ubifs_info *c) +{ + struct rb_node *this = c->buds.rb_node; + struct ubifs_bud *bud; + + while (this) { + if (this->rb_left) + this = this->rb_left; + else if (this->rb_right) + this = this->rb_right; + else { + bud = rb_entry(this, struct ubifs_bud, rb); + this = rb_parent(this); + if (this) { + if (this->rb_left == &bud->rb) + this->rb_left = NULL; + else + this->rb_right = NULL; + } + kfree(bud); + } + } +} + +/** + * check_volume_empty - check if the UBI volume is empty. + * @c: UBIFS file-system description object + * + * This function checks if the UBIFS volume is empty by looking if its LEBs are + * mapped or not. The result of checking is stored in the @c->empty variable. + * Returns zero in case of success and a negative error code in case of + * failure. + */ +static int check_volume_empty(struct ubifs_info *c) +{ + int lnum, err; + + c->empty = 1; + for (lnum = 0; lnum < c->leb_cnt; lnum++) { + err = ubi_is_mapped(c->ubi, lnum); + if (unlikely(err < 0)) + return err; + if (err == 1) { + c->empty = 0; + break; + } + + cond_resched(); + } + + return 0; +} + +/* + * UBIFS mount options. + * + * Opt_fast_unmount: do not run a journal commit before un-mounting + * Opt_norm_unmount: run a journal commit before un-mounting + * Opt_err: just end of array marker + */ +enum { + Opt_fast_unmount, + Opt_norm_unmount, + Opt_err, +}; + +static match_table_t tokens = { + {Opt_fast_unmount, "fast_unmount"}, + {Opt_norm_unmount, "norm_unmount"}, + {Opt_err, NULL}, +}; + +/** + * ubifs_parse_options - parse mount parameters. + * @c: UBIFS file-system description object + * @options: parameters to parse + * @is_remount: non-zero if this is FS re-mount + * + * This function parses UBIFS mount options and returns zero in case success + * and a negative error code in case of failure. + */ +static int ubifs_parse_options(struct ubifs_info *c, char *options, + int is_remount) +{ + char *p; + substring_t args[MAX_OPT_ARGS]; + + if (!options) + return 0; + + while ((p = strsep(&options, ","))) { + int token; + + if (!*p) + continue; + + token = match_token(p, tokens, args); + switch (token) { + case Opt_fast_unmount: + c->mount_opts.unmount_mode = 2; + c->fast_unmount = 1; + break; + case Opt_norm_unmount: + c->mount_opts.unmount_mode = 1; + c->fast_unmount = 0; + break; + default: + ubifs_err("unrecognized mount option \"%s\" " + "or missing value", p); + return -EINVAL; + } + } + + return 0; +} + +/** + * destroy_journal - destroy journal data structures. + * @c: UBIFS file-system description object + * + * This function destroys journal data structures including those that may have + * been created by recovery functions. + */ +static void destroy_journal(struct ubifs_info *c) +{ + while (!list_empty(&c->unclean_leb_list)) { + struct ubifs_unclean_leb *ucleb; + + ucleb = list_entry(c->unclean_leb_list.next, + struct ubifs_unclean_leb, list); + list_del(&ucleb->list); + kfree(ucleb); + } + while (!list_empty(&c->old_buds)) { + struct ubifs_bud *bud; + + bud = list_entry(c->old_buds.next, struct ubifs_bud, list); + list_del(&bud->list); + kfree(bud); + } + ubifs_destroy_idx_gc(c); + ubifs_destroy_size_tree(c); + ubifs_tnc_close(c); + free_buds(c); +} + +/** + * mount_ubifs - mount UBIFS file-system. + * @c: UBIFS file-system description object + * + * This function mounts UBIFS file system. Returns zero in case of success and + * a negative error code in case of failure. + * + * Note, the function does not de-allocate resources it it fails half way + * through, and the caller has to do this instead. + */ +static int mount_ubifs(struct ubifs_info *c) +{ + struct super_block *sb = c->vfs_sb; + int err, mounted_read_only = (sb->s_flags & MS_RDONLY); + long long x; + size_t sz; + + err = init_constants_early(c); + if (err) + return err; + +#ifdef CONFIG_UBIFS_FS_DEBUG + c->dbg_buf = vmalloc(c->leb_size); + if (!c->dbg_buf) + return -ENOMEM; +#endif + + err = check_volume_empty(c); + if (err) + goto out_free; + + if (c->empty && (mounted_read_only || c->ro_media)) { + /* + * This UBI volume is empty, and read-only, or the file system + * is mounted read-only - we cannot format it. + */ + ubifs_err("can't format empty UBI volume: read-only %s", + c->ro_media ? "UBI volume" : "mount"); + err = -EROFS; + goto out_free; + } + + if (c->ro_media && !mounted_read_only) { + ubifs_err("cannot mount read-write - read-only media"); + err = -EROFS; + goto out_free; + } + + /* + * The requirement for the buffer is that it should fit indexing B-tree + * height amount of integers. We assume the height if the TNC tree will + * never exceed 64. + */ + err = -ENOMEM; + c->bottom_up_buf = kmalloc(BOTTOM_UP_HEIGHT * sizeof(int), GFP_KERNEL); + if (!c->bottom_up_buf) + goto out_free; + + c->sbuf = vmalloc(c->leb_size); + if (!c->sbuf) + goto out_free; + + if (!mounted_read_only) { + c->ileb_buf = vmalloc(c->leb_size); + if (!c->ileb_buf) + goto out_free; + } + + err = ubifs_read_superblock(c); + if (err) + goto out_free; + + /* + * Make sure the compressor which is set as the default on in the + * superblock was actually compiled in. + */ + if (!ubifs_compr_present(c->default_compr)) { + ubifs_warn("'%s' compressor is set by superblock, but not " + "compiled in", ubifs_compr_name(c->default_compr)); + c->default_compr = UBIFS_COMPR_NONE; + } + + dbg_failure_mode_registration(c); + + err = init_constants_late(c); + if (err) + goto out_dereg; + + sz = ALIGN(c->max_idx_node_sz, c->min_io_size); + sz = ALIGN(sz + c->max_idx_node_sz, c->min_io_size); + c->cbuf = kmalloc(sz, GFP_NOFS); + if (!c->cbuf) { + err = -ENOMEM; + goto out_dereg; + } + + if (!mounted_read_only) { + err = alloc_wbufs(c); + if (err) + goto out_cbuf; + + /* Create background thread */ + sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num, + c->vi.vol_id); + c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name); + if (!c->bgt) + c->bgt = ERR_PTR(-EINVAL); + if (IS_ERR(c->bgt)) { + err = PTR_ERR(c->bgt); + c->bgt = NULL; + ubifs_err("cannot spawn \"%s\", error %d", + c->bgt_name, err); + goto out_wbufs; + } + wake_up_process(c->bgt); + } + + err = ubifs_read_master(c); + if (err) + goto out_master; + + if ((c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY)) != 0) { + ubifs_msg("recovery needed"); + c->need_recovery = 1; + if (!mounted_read_only) { + err = ubifs_recover_inl_heads(c, c->sbuf); + if (err) + goto out_master; + } + } else if (!mounted_read_only) { + /* + * Set the "dirty" flag so that if we reboot uncleanly we + * will notice this immediately on the next mount. + */ + c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY); + err = ubifs_write_master(c); + if (err) + goto out_master; + } + + err = ubifs_lpt_init(c, 1, !mounted_read_only); + if (err) + goto out_lpt; + + err = dbg_check_idx_size(c, c->old_idx_sz); + if (err) + goto out_lpt; + + err = ubifs_replay_journal(c); + if (err) + goto out_journal; + + err = ubifs_mount_orphans(c, c->need_recovery, mounted_read_only); + if (err) + goto out_orphans; + + if (!mounted_read_only) { + int lnum; + + /* Check for enough free space */ + if (ubifs_calc_available(c, c->min_idx_lebs) <= 0) { + ubifs_err("insufficient available space"); + err = -EINVAL; + goto out_orphans; + } + + /* Check for enough log space */ + lnum = c->lhead_lnum + 1; + if (lnum >= UBIFS_LOG_LNUM + c->log_lebs) + lnum = UBIFS_LOG_LNUM; + if (lnum == c->ltail_lnum) { + err = ubifs_consolidate_log(c); + if (err) + goto out_orphans; + } + + if (c->need_recovery) { + err = ubifs_recover_size(c); + if (err) + goto out_orphans; + err = ubifs_rcvry_gc_commit(c); + } else + err = take_gc_lnum(c); + if (err) + goto out_orphans; + + err = dbg_check_lprops(c); + if (err) + goto out_orphans; + } else if (c->need_recovery) { + err = ubifs_recover_size(c); + if (err) + goto out_orphans; + } + + spin_lock(&ubifs_infos_lock); + list_add_tail(&c->infos_list, &ubifs_infos); + spin_unlock(&ubifs_infos_lock); + + if (c->need_recovery) { + if (mounted_read_only) + ubifs_msg("recovery deferred"); + else { + c->need_recovery = 0; + ubifs_msg("recovery completed"); + } + } + + err = dbg_check_filesystem(c); + if (err) + goto out_infos; + + ubifs_msg("mounted UBI device %d, volume %d", c->vi.ubi_num, + c->vi.vol_id); + if (mounted_read_only) + ubifs_msg("mounted read-only"); + x = (long long)c->main_lebs * c->leb_size; + ubifs_msg("file system size: %lld bytes (%lld KiB, %lld MiB, %d LEBs)", + x, x >> 10, x >> 20, c->main_lebs); + x = (long long)c->log_lebs * c->leb_size + c->max_bud_bytes; + ubifs_msg("journal size: %lld bytes (%lld KiB, %lld MiB, %d LEBs)", + x, x >> 10, x >> 20, c->log_lebs + c->max_bud_cnt); + ubifs_msg("default compressor: %s", ubifs_compr_name(c->default_compr)); + ubifs_msg("media format %d, latest format %d", + c->fmt_version, UBIFS_FORMAT_VERSION); + + dbg_msg("compiled on: " __DATE__ " at " __TIME__); + dbg_msg("min. I/O unit size: %d bytes", c->min_io_size); + dbg_msg("LEB size: %d bytes (%d KiB)", + c->leb_size, c->leb_size / 1024); + dbg_msg("data journal heads: %d", + c->jhead_cnt - NONDATA_JHEADS_CNT); + dbg_msg("UUID: %02X%02X%02X%02X-%02X%02X" + "-%02X%02X-%02X%02X-%02X%02X%02X%02X%02X%02X", + c->uuid[0], c->uuid[1], c->uuid[2], c->uuid[3], + c->uuid[4], c->uuid[5], c->uuid[6], c->uuid[7], + c->uuid[8], c->uuid[9], c->uuid[10], c->uuid[11], + c->uuid[12], c->uuid[13], c->uuid[14], c->uuid[15]); + dbg_msg("fast unmount: %d", c->fast_unmount); + dbg_msg("big_lpt %d", c->big_lpt); + dbg_msg("log LEBs: %d (%d - %d)", + c->log_lebs, UBIFS_LOG_LNUM, c->log_last); + dbg_msg("LPT area LEBs: %d (%d - %d)", + c->lpt_lebs, c->lpt_first, c->lpt_last); + dbg_msg("orphan area LEBs: %d (%d - %d)", + c->orph_lebs, c->orph_first, c->orph_last); + dbg_msg("main area LEBs: %d (%d - %d)", + c->main_lebs, c->main_first, c->leb_cnt - 1); + dbg_msg("index LEBs: %d", c->lst.idx_lebs); + dbg_msg("total index bytes: %lld (%lld KiB, %lld MiB)", + c->old_idx_sz, c->old_idx_sz >> 10, c->old_idx_sz >> 20); + dbg_msg("key hash type: %d", c->key_hash_type); + dbg_msg("tree fanout: %d", c->fanout); + dbg_msg("reserved GC LEB: %d", c->gc_lnum); + dbg_msg("first main LEB: %d", c->main_first); + dbg_msg("dead watermark: %d", c->dead_wm); + dbg_msg("dark watermark: %d", c->dark_wm); + x = (long long)c->main_lebs * c->dark_wm; + dbg_msg("max. dark space: %lld (%lld KiB, %lld MiB)", + x, x >> 10, x >> 20); + dbg_msg("maximum bud bytes: %lld (%lld KiB, %lld MiB)", + c->max_bud_bytes, c->max_bud_bytes >> 10, + c->max_bud_bytes >> 20); + dbg_msg("BG commit bud bytes: %lld (%lld KiB, %lld MiB)", + c->bg_bud_bytes, c->bg_bud_bytes >> 10, + c->bg_bud_bytes >> 20); + dbg_msg("current bud bytes %lld (%lld KiB, %lld MiB)", + c->bud_bytes, c->bud_bytes >> 10, c->bud_bytes >> 20); + dbg_msg("max. seq. number: %llu", c->max_sqnum); + dbg_msg("commit number: %llu", c->cmt_no); + + return 0; + +out_infos: + spin_lock(&ubifs_infos_lock); + list_del(&c->infos_list); + spin_unlock(&ubifs_infos_lock); +out_orphans: + free_orphans(c); +out_journal: + destroy_journal(c); +out_lpt: + ubifs_lpt_free(c, 0); +out_master: + kfree(c->mst_node); + kfree(c->rcvrd_mst_node); + if (c->bgt) + kthread_stop(c->bgt); +out_wbufs: + free_wbufs(c); +out_cbuf: + kfree(c->cbuf); +out_dereg: + dbg_failure_mode_deregistration(c); +out_free: + vfree(c->ileb_buf); + vfree(c->sbuf); + kfree(c->bottom_up_buf); + UBIFS_DBG(vfree(c->dbg_buf)); + return err; +} + +/** + * ubifs_umount - un-mount UBIFS file-system. + * @c: UBIFS file-system description object + * + * Note, this function is called to free allocated resourced when un-mounting, + * as well as free resources when an error occurred while we were half way + * through mounting (error path cleanup function). So it has to make sure the + * resource was actually allocated before freeing it. + */ +static void ubifs_umount(struct ubifs_info *c) +{ + dbg_gen("un-mounting UBI device %d, volume %d", c->vi.ubi_num, + c->vi.vol_id); + + spin_lock(&ubifs_infos_lock); + list_del(&c->infos_list); + spin_unlock(&ubifs_infos_lock); + + if (c->bgt) + kthread_stop(c->bgt); + + destroy_journal(c); + free_wbufs(c); + free_orphans(c); + ubifs_lpt_free(c, 0); + + kfree(c->cbuf); + kfree(c->rcvrd_mst_node); + kfree(c->mst_node); + vfree(c->sbuf); + kfree(c->bottom_up_buf); + UBIFS_DBG(vfree(c->dbg_buf)); + vfree(c->ileb_buf); + dbg_failure_mode_deregistration(c); +} + +/** + * ubifs_remount_rw - re-mount in read-write mode. + * @c: UBIFS file-system description object + * + * UBIFS avoids allocating many unnecessary resources when mounted in read-only + * mode. This function allocates the needed resources and re-mounts UBIFS in + * read-write mode. + */ +static int ubifs_remount_rw(struct ubifs_info *c) +{ + int err, lnum; + + if (c->ro_media) + return -EINVAL; + + mutex_lock(&c->umount_mutex); + c->remounting_rw = 1; + + /* Check for enough free space */ + if (ubifs_calc_available(c, c->min_idx_lebs) <= 0) { + ubifs_err("insufficient available space"); + err = -EINVAL; + goto out; + } + + if (c->old_leb_cnt != c->leb_cnt) { + struct ubifs_sb_node *sup; + + sup = ubifs_read_sb_node(c); + if (IS_ERR(sup)) { + err = PTR_ERR(sup); + goto out; + } + sup->leb_cnt = cpu_to_le32(c->leb_cnt); + err = ubifs_write_sb_node(c, sup); + if (err) + goto out; + } + + if (c->need_recovery) { + ubifs_msg("completing deferred recovery"); + err = ubifs_write_rcvrd_mst_node(c); + if (err) + goto out; + err = ubifs_recover_size(c); + if (err) + goto out; + err = ubifs_clean_lebs(c, c->sbuf); + if (err) + goto out; + err = ubifs_recover_inl_heads(c, c->sbuf); + if (err) + goto out; + } + + if (!(c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY))) { + c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY); + err = ubifs_write_master(c); + if (err) + goto out; + } + + c->ileb_buf = vmalloc(c->leb_size); + if (!c->ileb_buf) { + err = -ENOMEM; + goto out; + } + + err = ubifs_lpt_init(c, 0, 1); + if (err) + goto out; + + err = alloc_wbufs(c); + if (err) + goto out; + + ubifs_create_buds_lists(c); + + /* Create background thread */ + c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name); + if (!c->bgt) + c->bgt = ERR_PTR(-EINVAL); + if (IS_ERR(c->bgt)) { + err = PTR_ERR(c->bgt); + c->bgt = NULL; + ubifs_err("cannot spawn \"%s\", error %d", + c->bgt_name, err); + return err; + } + wake_up_process(c->bgt); + + c->orph_buf = vmalloc(c->leb_size); + if (!c->orph_buf) + return -ENOMEM; + + /* Check for enough log space */ + lnum = c->lhead_lnum + 1; + if (lnum >= UBIFS_LOG_LNUM + c->log_lebs) + lnum = UBIFS_LOG_LNUM; + if (lnum == c->ltail_lnum) { + err = ubifs_consolidate_log(c); + if (err) + goto out; + } + + if (c->need_recovery) + err = ubifs_rcvry_gc_commit(c); + else + err = take_gc_lnum(c); + if (err) + goto out; + + if (c->need_recovery) { + c->need_recovery = 0; + ubifs_msg("deferred recovery completed"); + } + + dbg_gen("re-mounted read-write"); + c->vfs_sb->s_flags &= ~MS_RDONLY; + c->remounting_rw = 0; + mutex_unlock(&c->umount_mutex); + return 0; + +out: + vfree(c->orph_buf); + c->orph_buf = NULL; + if (c->bgt) { + kthread_stop(c->bgt); + c->bgt = NULL; + } + free_wbufs(c); + vfree(c->ileb_buf); + c->ileb_buf = NULL; + ubifs_lpt_free(c, 1); + c->remounting_rw = 0; + mutex_unlock(&c->umount_mutex); + return err; +} + +/** + * commit_on_unmount - commit the journal when un-mounting. + * @c: UBIFS file-system description object + * + * This function is called during un-mounting and it commits the journal unless + * the "fast unmount" mode is enabled. It also avoids committing the journal if + * it contains too few data. + * + * Sometimes recovery requires the journal to be committed at least once, and + * this function takes care about this. + */ +static void commit_on_unmount(struct ubifs_info *c) +{ + if (!c->fast_unmount) { + long long bud_bytes; + + spin_lock(&c->buds_lock); + bud_bytes = c->bud_bytes; + spin_unlock(&c->buds_lock); + if (bud_bytes > c->leb_size) + ubifs_run_commit(c); + } +} + +/** + * ubifs_remount_ro - re-mount in read-only mode. + * @c: UBIFS file-system description object + * + * We rely on VFS to have stopped writing. Possibly the background thread could + * be running a commit, however kthread_stop will wait in that case. + */ +static void ubifs_remount_ro(struct ubifs_info *c) +{ + int i, err; + + ubifs_assert(!c->need_recovery); + commit_on_unmount(c); + + mutex_lock(&c->umount_mutex); + if (c->bgt) { + kthread_stop(c->bgt); + c->bgt = NULL; + } + + for (i = 0; i < c->jhead_cnt; i++) { + ubifs_wbuf_sync(&c->jheads[i].wbuf); + del_timer_sync(&c->jheads[i].wbuf.timer); + } + + if (!c->ro_media) { + c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_DIRTY); + c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS); + c->mst_node->gc_lnum = cpu_to_le32(c->gc_lnum); + err = ubifs_write_master(c); + if (err) + ubifs_ro_mode(c, err); + } + + ubifs_destroy_idx_gc(c); + free_wbufs(c); + vfree(c->orph_buf); + c->orph_buf = NULL; + vfree(c->ileb_buf); + c->ileb_buf = NULL; + ubifs_lpt_free(c, 1); + mutex_unlock(&c->umount_mutex); +} + +static void ubifs_put_super(struct super_block *sb) +{ + int i; + struct ubifs_info *c = sb->s_fs_info; + + ubifs_msg("un-mount UBI device %d, volume %d", c->vi.ubi_num, + c->vi.vol_id); + /* + * The following asserts are only valid if there has not been a failure + * of the media. For example, there will be dirty inodes if we failed + * to write them back because of I/O errors. + */ + ubifs_assert(atomic_long_read(&c->dirty_pg_cnt) == 0); + ubifs_assert(c->budg_idx_growth == 0); + ubifs_assert(c->budg_data_growth == 0); + + /* + * The 'c->umount_lock' prevents races between UBIFS memory shrinker + * and file system un-mount. Namely, it prevents the shrinker from + * picking this superblock for shrinking - it will be just skipped if + * the mutex is locked. + */ + mutex_lock(&c->umount_mutex); + if (!(c->vfs_sb->s_flags & MS_RDONLY)) { + /* + * First of all kill the background thread to make sure it does + * not interfere with un-mounting and freeing resources. + */ + if (c->bgt) { + kthread_stop(c->bgt); + c->bgt = NULL; + } + + /* Synchronize write-buffers */ + if (c->jheads) + for (i = 0; i < c->jhead_cnt; i++) { + ubifs_wbuf_sync(&c->jheads[i].wbuf); + del_timer_sync(&c->jheads[i].wbuf.timer); + } + + /* + * On fatal errors c->ro_media is set to 1, in which case we do + * not write the master node. + */ + if (!c->ro_media) { + /* + * We are being cleanly unmounted which means the + * orphans were killed - indicate this in the master + * node. Also save the reserved GC LEB number. + */ + int err; + + c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_DIRTY); + c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS); + c->mst_node->gc_lnum = cpu_to_le32(c->gc_lnum); + err = ubifs_write_master(c); + if (err) + /* + * Recovery will attempt to fix the master area + * next mount, so we just print a message and + * continue to unmount normally. + */ + ubifs_err("failed to write master node, " + "error %d", err); + } + } + + ubifs_umount(c); + bdi_destroy(&c->bdi); + ubi_close_volume(c->ubi); + mutex_unlock(&c->umount_mutex); + kfree(c); +} + +static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) +{ + int err; + struct ubifs_info *c = sb->s_fs_info; + + dbg_gen("old flags %#lx, new flags %#x", sb->s_flags, *flags); + + err = ubifs_parse_options(c, data, 1); + if (err) { + ubifs_err("invalid or unknown remount parameter"); + return err; + } + if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) { + err = ubifs_remount_rw(c); + if (err) + return err; + } else if (!(sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) + ubifs_remount_ro(c); + + return 0; +} + +struct super_operations ubifs_super_operations = { + .alloc_inode = ubifs_alloc_inode, + .destroy_inode = ubifs_destroy_inode, + .put_super = ubifs_put_super, + .write_inode = ubifs_write_inode, + .delete_inode = ubifs_delete_inode, + .statfs = ubifs_statfs, + .dirty_inode = ubifs_dirty_inode, + .remount_fs = ubifs_remount_fs, + .show_options = ubifs_show_options, + .sync_fs = ubifs_sync_fs, +}; + +/** + * open_ubi - parse UBI device name string and open the UBI device. + * @name: UBI volume name + * @mode: UBI volume open mode + * + * There are several ways to specify UBI volumes when mounting UBIFS: + * o ubiX_Y - UBI device number X, volume Y; + * o ubiY - UBI device number 0, volume Y; + * o ubiX:NAME - mount UBI device X, volume with name NAME; + * o ubi:NAME - mount UBI device 0, volume with name NAME. + * + * Alternative '!' separator may be used instead of ':' (because some shells + * like busybox may interpret ':' as an NFS host name separator). This function + * returns ubi volume object in case of success and a negative error code in + * case of failure. + */ +static struct ubi_volume_desc *open_ubi(const char *name, int mode) +{ + int dev, vol; + char *endptr; + + if (name[0] != 'u' || name[1] != 'b' || name[2] != 'i') + return ERR_PTR(-EINVAL); + + /* ubi:NAME method */ + if ((name[3] == ':' || name[3] == '!') && name[4] != '\0') + return ubi_open_volume_nm(0, name + 4, mode); + + if (!isdigit(name[3])) + return ERR_PTR(-EINVAL); + + dev = simple_strtoul(name + 3, &endptr, 0); + + /* ubiY method */ + if (*endptr == '\0') + return ubi_open_volume(0, dev, mode); + + /* ubiX_Y method */ + if (*endptr == '_' && isdigit(endptr[1])) { + vol = simple_strtoul(endptr + 1, &endptr, 0); + if (*endptr != '\0') + return ERR_PTR(-EINVAL); + return ubi_open_volume(dev, vol, mode); + } + + /* ubiX:NAME method */ + if ((*endptr == ':' || *endptr == '!') && endptr[1] != '\0') + return ubi_open_volume_nm(dev, ++endptr, mode); + + return ERR_PTR(-EINVAL); +} + +static int ubifs_fill_super(struct super_block *sb, void *data, int silent) +{ + struct ubi_volume_desc *ubi = sb->s_fs_info; + struct ubifs_info *c; + struct inode *root; + int err; + + c = kzalloc(sizeof(struct ubifs_info), GFP_KERNEL); + if (!c) + return -ENOMEM; + + spin_lock_init(&c->cnt_lock); + spin_lock_init(&c->cs_lock); + spin_lock_init(&c->buds_lock); + spin_lock_init(&c->space_lock); + spin_lock_init(&c->orphan_lock); + init_rwsem(&c->commit_sem); + mutex_init(&c->lp_mutex); + mutex_init(&c->tnc_mutex); + mutex_init(&c->log_mutex); + mutex_init(&c->mst_mutex); + mutex_init(&c->umount_mutex); + init_waitqueue_head(&c->cmt_wq); + c->buds = RB_ROOT; + c->old_idx = RB_ROOT; + c->size_tree = RB_ROOT; + c->orph_tree = RB_ROOT; + INIT_LIST_HEAD(&c->infos_list); + INIT_LIST_HEAD(&c->idx_gc); + INIT_LIST_HEAD(&c->replay_list); + INIT_LIST_HEAD(&c->replay_buds); + INIT_LIST_HEAD(&c->uncat_list); + INIT_LIST_HEAD(&c->empty_list); + INIT_LIST_HEAD(&c->freeable_list); + INIT_LIST_HEAD(&c->frdi_idx_list); + INIT_LIST_HEAD(&c->unclean_leb_list); + INIT_LIST_HEAD(&c->old_buds); + INIT_LIST_HEAD(&c->orph_list); + INIT_LIST_HEAD(&c->orph_new); + + c->highest_inum = UBIFS_FIRST_INO; + get_random_bytes(&c->vfs_gen, sizeof(int)); + c->lhead_lnum = c->ltail_lnum = UBIFS_LOG_LNUM; + + ubi_get_volume_info(ubi, &c->vi); + ubi_get_device_info(c->vi.ubi_num, &c->di); + + /* Re-open the UBI device in read-write mode */ + c->ubi = ubi_open_volume(c->vi.ubi_num, c->vi.vol_id, UBI_READWRITE); + if (IS_ERR(c->ubi)) { + err = PTR_ERR(c->ubi); + goto out_free; + } + + /* + * UBIFS provids 'backing_dev_info' in order to disable readahead. For + * UBIFS, I/O is not deferred, it is done immediately in readpage, + * which means the user would have to wait not just for their own I/O + * but the readahead I/O as well i.e. completely pointless. + * + * Read-ahead will be disabled because @c->bdi.ra_pages is 0. + */ + c->bdi.capabilities = BDI_CAP_MAP_COPY; + c->bdi.unplug_io_fn = default_unplug_io_fn; + err = bdi_init(&c->bdi); + if (err) + goto out_close; + + err = ubifs_parse_options(c, data, 0); + if (err) + goto out_bdi; + + c->vfs_sb = sb; + + sb->s_fs_info = c; + sb->s_magic = UBIFS_SUPER_MAGIC; + sb->s_blocksize = UBIFS_BLOCK_SIZE; + sb->s_blocksize_bits = UBIFS_BLOCK_SHIFT; + sb->s_dev = c->vi.cdev; + sb->s_maxbytes = c->max_inode_sz = key_max_inode_size(c); + if (c->max_inode_sz > MAX_LFS_FILESIZE) + sb->s_maxbytes = c->max_inode_sz = MAX_LFS_FILESIZE; + sb->s_op = &ubifs_super_operations; + + mutex_lock(&c->umount_mutex); + err = mount_ubifs(c); + if (err) { + ubifs_assert(err < 0); + goto out_unlock; + } + + /* Read the root inode */ + root = ubifs_iget(sb, UBIFS_ROOT_INO); + if (IS_ERR(root)) { + err = PTR_ERR(root); + goto out_umount; + } + + sb->s_root = d_alloc_root(root); + if (!sb->s_root) + goto out_iput; + + mutex_unlock(&c->umount_mutex); + + return 0; + +out_iput: + iput(root); +out_umount: + ubifs_umount(c); +out_unlock: + mutex_unlock(&c->umount_mutex); +out_bdi: + bdi_destroy(&c->bdi); +out_close: + ubi_close_volume(c->ubi); +out_free: + kfree(c); + return err; +} + +static int sb_test(struct super_block *sb, void *data) +{ + dev_t *dev = data; + + return sb->s_dev == *dev; +} + +static int sb_set(struct super_block *sb, void *data) +{ + dev_t *dev = data; + + sb->s_dev = *dev; + return 0; +} + +static int ubifs_get_sb(struct file_system_type *fs_type, int flags, + const char *name, void *data, struct vfsmount *mnt) +{ + struct ubi_volume_desc *ubi; + struct ubi_volume_info vi; + struct super_block *sb; + int err; + + dbg_gen("name %s, flags %#x", name, flags); + + /* + * Get UBI device number and volume ID. Mount it read-only so far + * because this might be a new mount point, and UBI allows only one + * read-write user at a time. + */ + ubi = open_ubi(name, UBI_READONLY); + if (IS_ERR(ubi)) { + ubifs_err("cannot open \"%s\", error %d", + name, (int)PTR_ERR(ubi)); + return PTR_ERR(ubi); + } + ubi_get_volume_info(ubi, &vi); + + dbg_gen("opened ubi%d_%d", vi.ubi_num, vi.vol_id); + + sb = sget(fs_type, &sb_test, &sb_set, &vi.cdev); + if (IS_ERR(sb)) { + err = PTR_ERR(sb); + goto out_close; + } + + if (sb->s_root) { + /* A new mount point for already mounted UBIFS */ + dbg_gen("this ubi volume is already mounted"); + if ((flags ^ sb->s_flags) & MS_RDONLY) { + err = -EBUSY; + goto out_deact; + } + } else { + sb->s_flags = flags; + /* + * Pass 'ubi' to 'fill_super()' in sb->s_fs_info where it is + * replaced by 'c'. + */ + sb->s_fs_info = ubi; + err = ubifs_fill_super(sb, data, flags & MS_SILENT ? 1 : 0); + if (err) + goto out_deact; + /* We do not support atime */ + sb->s_flags |= MS_ACTIVE | MS_NOATIME; + } + + /* 'fill_super()' opens ubi again so we must close it here */ + ubi_close_volume(ubi); + + return simple_set_mnt(mnt, sb); + +out_deact: + up_write(&sb->s_umount); + deactivate_super(sb); +out_close: + ubi_close_volume(ubi); + return err; +} + +static void ubifs_kill_sb(struct super_block *sb) +{ + struct ubifs_info *c = sb->s_fs_info; + + /* + * We do 'commit_on_unmount()' here instead of 'ubifs_put_super()' + * in order to be outside BKL. + */ + if (sb->s_root && !(sb->s_flags & MS_RDONLY)) + commit_on_unmount(c); + /* The un-mount routine is actually done in put_super() */ + generic_shutdown_super(sb); +} + +static struct file_system_type ubifs_fs_type = { + .name = "ubifs", + .owner = THIS_MODULE, + .get_sb = ubifs_get_sb, + .kill_sb = ubifs_kill_sb +}; + +/* + * Inode slab cache constructor. + */ +static void inode_slab_ctor(struct kmem_cache *cachep, void *obj) +{ + struct ubifs_inode *ui = obj; + inode_init_once(&ui->vfs_inode); +} + +static int __init ubifs_init(void) +{ + int err; + + BUILD_BUG_ON(sizeof(struct ubifs_ch) != 24); + + /* Make sure node sizes are 8-byte aligned */ + BUILD_BUG_ON(UBIFS_CH_SZ & 7); + BUILD_BUG_ON(UBIFS_INO_NODE_SZ & 7); + BUILD_BUG_ON(UBIFS_DENT_NODE_SZ & 7); + BUILD_BUG_ON(UBIFS_XENT_NODE_SZ & 7); + BUILD_BUG_ON(UBIFS_DATA_NODE_SZ & 7); + BUILD_BUG_ON(UBIFS_TRUN_NODE_SZ & 7); + BUILD_BUG_ON(UBIFS_SB_NODE_SZ & 7); + BUILD_BUG_ON(UBIFS_MST_NODE_SZ & 7); + BUILD_BUG_ON(UBIFS_REF_NODE_SZ & 7); + BUILD_BUG_ON(UBIFS_CS_NODE_SZ & 7); + BUILD_BUG_ON(UBIFS_ORPH_NODE_SZ & 7); + + BUILD_BUG_ON(UBIFS_MAX_DENT_NODE_SZ & 7); + BUILD_BUG_ON(UBIFS_MAX_XENT_NODE_SZ & 7); + BUILD_BUG_ON(UBIFS_MAX_DATA_NODE_SZ & 7); + BUILD_BUG_ON(UBIFS_MAX_INO_NODE_SZ & 7); + BUILD_BUG_ON(UBIFS_MAX_NODE_SZ & 7); + BUILD_BUG_ON(MIN_WRITE_SZ & 7); + + /* Check min. node size */ + BUILD_BUG_ON(UBIFS_INO_NODE_SZ < MIN_WRITE_SZ); + BUILD_BUG_ON(UBIFS_DENT_NODE_SZ < MIN_WRITE_SZ); + BUILD_BUG_ON(UBIFS_XENT_NODE_SZ < MIN_WRITE_SZ); + BUILD_BUG_ON(UBIFS_TRUN_NODE_SZ < MIN_WRITE_SZ); + + BUILD_BUG_ON(UBIFS_MAX_DENT_NODE_SZ > UBIFS_MAX_NODE_SZ); + BUILD_BUG_ON(UBIFS_MAX_XENT_NODE_SZ > UBIFS_MAX_NODE_SZ); + BUILD_BUG_ON(UBIFS_MAX_DATA_NODE_SZ > UBIFS_MAX_NODE_SZ); + BUILD_BUG_ON(UBIFS_MAX_INO_NODE_SZ > UBIFS_MAX_NODE_SZ); + + /* Defined node sizes */ + BUILD_BUG_ON(UBIFS_SB_NODE_SZ != 4096); + BUILD_BUG_ON(UBIFS_MST_NODE_SZ != 512); + BUILD_BUG_ON(UBIFS_INO_NODE_SZ != 160); + BUILD_BUG_ON(UBIFS_REF_NODE_SZ != 64); + + /* + * We require that PAGE_CACHE_SIZE is greater-than-or-equal-to + * UBIFS_BLOCK_SIZE. It is assumed that both are powers of 2. + */ + if (PAGE_CACHE_SIZE < UBIFS_BLOCK_SIZE) { + ubifs_err("VFS page cache size is %u bytes, but UBIFS requires" + " at least 4096 bytes", + (unsigned int)PAGE_CACHE_SIZE); + return -EINVAL; + } + + err = register_filesystem(&ubifs_fs_type); + if (err) { + ubifs_err("cannot register file system, error %d", err); + return err; + } + + err = -ENOMEM; + ubifs_inode_slab = kmem_cache_create("ubifs_inode_slab", + sizeof(struct ubifs_inode), 0, + SLAB_MEM_SPREAD | SLAB_RECLAIM_ACCOUNT, + &inode_slab_ctor); + if (!ubifs_inode_slab) + goto out_reg; + + register_shrinker(&ubifs_shrinker_info); + + err = ubifs_compressors_init(); + if (err) + goto out_compr; + + return 0; + +out_compr: + unregister_shrinker(&ubifs_shrinker_info); + kmem_cache_destroy(ubifs_inode_slab); +out_reg: + unregister_filesystem(&ubifs_fs_type); + return err; +} +/* late_initcall to let compressors initialize first */ +late_initcall(ubifs_init); + +static void __exit ubifs_exit(void) +{ + ubifs_assert(list_empty(&ubifs_infos)); + ubifs_assert(atomic_long_read(&ubifs_clean_zn_cnt) == 0); + + ubifs_compressors_exit(); + unregister_shrinker(&ubifs_shrinker_info); + kmem_cache_destroy(ubifs_inode_slab); + unregister_filesystem(&ubifs_fs_type); +} +module_exit(ubifs_exit); + +MODULE_LICENSE("GPL"); +MODULE_VERSION(__stringify(UBIFS_VERSION)); +MODULE_AUTHOR("Artem Bityutskiy, Adrian Hunter"); +MODULE_DESCRIPTION("UBIFS - UBI File System"); diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c new file mode 100644 index 0000000..e909f4a --- /dev/null +++ b/fs/ubifs/tnc.c @@ -0,0 +1,2956 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Adrian Hunter + * Artem Bityutskiy (Битюцкий Артём) + */ + +/* + * This file implements TNC (Tree Node Cache) which caches indexing nodes of + * the UBIFS B-tree. + * + * At the moment the locking rules of the TNC tree are quite simple and + * straightforward. We just have a mutex and lock it when we traverse the + * tree. If a znode is not in memory, we read it from flash while still having + * the mutex locked. + */ + +#include +#include "ubifs.h" + +/* + * Returned codes of 'matches_name()' and 'fallible_matches_name()' functions. + * @NAME_LESS: name corresponding to the first argument is less than second + * @NAME_MATCHES: names match + * @NAME_GREATER: name corresponding to the second argument is greater than + * first + * @NOT_ON_MEDIA: node referred by zbranch does not exist on the media + * + * These constants were introduce to improve readability. + */ +enum { + NAME_LESS = 0, + NAME_MATCHES = 1, + NAME_GREATER = 2, + NOT_ON_MEDIA = 3, +}; + +/** + * insert_old_idx - record an index node obsoleted since the last commit start. + * @c: UBIFS file-system description object + * @lnum: LEB number of obsoleted index node + * @offs: offset of obsoleted index node + * + * Returns %0 on success, and a negative error code on failure. + * + * For recovery, there must always be a complete intact version of the index on + * flash at all times. That is called the "old index". It is the index as at the + * time of the last successful commit. Many of the index nodes in the old index + * may be dirty, but they must not be erased until the next successful commit + * (at which point that index becomes the old index). + * + * That means that the garbage collection and the in-the-gaps method of + * committing must be able to determine if an index node is in the old index. + * Most of the old index nodes can be found by looking up the TNC using the + * 'lookup_znode()' function. However, some of the old index nodes may have + * been deleted from the current index or may have been changed so much that + * they cannot be easily found. In those cases, an entry is added to an RB-tree. + * That is what this function does. The RB-tree is ordered by LEB number and + * offset because they uniquely identify the old index node. + */ +static int insert_old_idx(struct ubifs_info *c, int lnum, int offs) +{ + struct ubifs_old_idx *old_idx, *o; + struct rb_node **p, *parent = NULL; + + old_idx = kmalloc(sizeof(struct ubifs_old_idx), GFP_NOFS); + if (unlikely(!old_idx)) + return -ENOMEM; + old_idx->lnum = lnum; + old_idx->offs = offs; + + p = &c->old_idx.rb_node; + while (*p) { + parent = *p; + o = rb_entry(parent, struct ubifs_old_idx, rb); + if (lnum < o->lnum) + p = &(*p)->rb_left; + else if (lnum > o->lnum) + p = &(*p)->rb_right; + else if (offs < o->offs) + p = &(*p)->rb_left; + else if (offs > o->offs) + p = &(*p)->rb_right; + else { + ubifs_err("old idx added twice!"); + kfree(old_idx); + return 0; + } + } + rb_link_node(&old_idx->rb, parent, p); + rb_insert_color(&old_idx->rb, &c->old_idx); + return 0; +} + +/** + * insert_old_idx_znode - record a znode obsoleted since last commit start. + * @c: UBIFS file-system description object + * @znode: znode of obsoleted index node + * + * Returns %0 on success, and a negative error code on failure. + */ +int insert_old_idx_znode(struct ubifs_info *c, struct ubifs_znode *znode) +{ + if (znode->parent) { + struct ubifs_zbranch *zbr; + + zbr = &znode->parent->zbranch[znode->iip]; + if (zbr->len) + return insert_old_idx(c, zbr->lnum, zbr->offs); + } else + if (c->zroot.len) + return insert_old_idx(c, c->zroot.lnum, + c->zroot.offs); + return 0; +} + +/** + * ins_clr_old_idx_znode - record a znode obsoleted since last commit start. + * @c: UBIFS file-system description object + * @znode: znode of obsoleted index node + * + * Returns %0 on success, and a negative error code on failure. + */ +static int ins_clr_old_idx_znode(struct ubifs_info *c, + struct ubifs_znode *znode) +{ + int err; + + if (znode->parent) { + struct ubifs_zbranch *zbr; + + zbr = &znode->parent->zbranch[znode->iip]; + if (zbr->len) { + err = insert_old_idx(c, zbr->lnum, zbr->offs); + if (err) + return err; + zbr->lnum = 0; + zbr->offs = 0; + zbr->len = 0; + } + } else + if (c->zroot.len) { + err = insert_old_idx(c, c->zroot.lnum, c->zroot.offs); + if (err) + return err; + c->zroot.lnum = 0; + c->zroot.offs = 0; + c->zroot.len = 0; + } + return 0; +} + +/** + * destroy_old_idx - destroy the old_idx RB-tree. + * @c: UBIFS file-system description object + * + * During start commit, the old_idx RB-tree is used to avoid overwriting index + * nodes that were in the index last commit but have since been deleted. This + * is necessary for recovery i.e. the old index must be kept intact until the + * new index is successfully written. The old-idx RB-tree is used for the + * in-the-gaps method of writing index nodes and is destroyed every commit. + */ +void destroy_old_idx(struct ubifs_info *c) +{ + struct rb_node *this = c->old_idx.rb_node; + struct ubifs_old_idx *old_idx; + + while (this) { + if (this->rb_left) { + this = this->rb_left; + continue; + } else if (this->rb_right) { + this = this->rb_right; + continue; + } + old_idx = rb_entry(this, struct ubifs_old_idx, rb); + this = rb_parent(this); + if (this) { + if (this->rb_left == &old_idx->rb) + this->rb_left = NULL; + else + this->rb_right = NULL; + } + kfree(old_idx); + } + c->old_idx = RB_ROOT; +} + +/** + * copy_znode - copy a dirty znode. + * @c: UBIFS file-system description object + * @znode: znode to copy + * + * A dirty znode being committed may not be changed, so it is copied. + */ +static struct ubifs_znode *copy_znode(struct ubifs_info *c, + struct ubifs_znode *znode) +{ + struct ubifs_znode *zn; + + zn = kmalloc(c->max_znode_sz, GFP_NOFS); + if (unlikely(!zn)) + return ERR_PTR(-ENOMEM); + + memcpy(zn, znode, c->max_znode_sz); + zn->cnext = NULL; + __set_bit(DIRTY_ZNODE, &zn->flags); + __clear_bit(COW_ZNODE, &zn->flags); + + ubifs_assert(!test_bit(OBSOLETE_ZNODE, &znode->flags)); + __set_bit(OBSOLETE_ZNODE, &znode->flags); + + if (znode->level != 0) { + int i; + const int n = zn->child_cnt; + + /* The children now have new parent */ + for (i = 0; i < n; i++) { + struct ubifs_zbranch *zbr = &zn->zbranch[i]; + + if (zbr->znode) + zbr->znode->parent = zn; + } + } + + atomic_long_inc(&c->dirty_zn_cnt); + return zn; +} + +/** + * add_idx_dirt - add dirt due to a dirty znode. + * @c: UBIFS file-system description object + * @lnum: LEB number of index node + * @dirt: size of index node + * + * This function updates lprops dirty space and the new size of the index. + */ +static int add_idx_dirt(struct ubifs_info *c, int lnum, int dirt) +{ + c->calc_idx_sz -= ALIGN(dirt, 8); + return ubifs_add_dirt(c, lnum, dirt); +} + +/** + * dirty_cow_znode - ensure a znode is not being committed. + * @c: UBIFS file-system description object + * @zbr: branch of znode to check + * + * Returns dirtied znode on success or negative error code on failure. + */ +static struct ubifs_znode *dirty_cow_znode(struct ubifs_info *c, + struct ubifs_zbranch *zbr) +{ + struct ubifs_znode *znode = zbr->znode; + struct ubifs_znode *zn; + int err; + + if (!test_bit(COW_ZNODE, &znode->flags)) { + /* znode is not being committed */ + if (!test_and_set_bit(DIRTY_ZNODE, &znode->flags)) { + atomic_long_inc(&c->dirty_zn_cnt); + atomic_long_dec(&c->clean_zn_cnt); + atomic_long_dec(&ubifs_clean_zn_cnt); + err = add_idx_dirt(c, zbr->lnum, zbr->len); + if (unlikely(err)) + return ERR_PTR(err); + } + return znode; + } + + zn = copy_znode(c, znode); + if (unlikely(IS_ERR(zn))) + return zn; + + if (zbr->len) { + err = insert_old_idx(c, zbr->lnum, zbr->offs); + if (unlikely(err)) + return ERR_PTR(err); + err = add_idx_dirt(c, zbr->lnum, zbr->len); + } else + err = 0; + + zbr->znode = zn; + zbr->lnum = 0; + zbr->offs = 0; + zbr->len = 0; + + if (unlikely(err)) + return ERR_PTR(err); + return zn; +} + +/** + * lnc_add - add a leaf node to the leaf node cache. + * @c: UBIFS file-system description object + * @zbr: zbranch of leaf node + * @node: leaf node + * + * Leaf nodes are non-index nodes directory entry nodes or data nodes. The + * purpose of the leaf node cache is to save re-reading the same leaf node over + * and over again. Most things are cached by VFS, however the file system must + * cache directory entries for readdir and for resolving hash collisions. The + * present implementation of the leaf node cache is extremely simple, and + * allows for error returns that are not used but that may be needed if a more + * complex implementation is created. + * + * Note, this function does not add the @node object to LNC directly, but + * allocates a copy of the object and adds the copy to LNC. The reason for this + * is that @node has been allocated outside of the TNC subsystem and will be + * used with @c->tnc_mutex unlock upon return from the TNC subsystem. But LNC + * may be changed at any time, e.g. freed by the shrinker. + */ +static int lnc_add(struct ubifs_info *c, struct ubifs_zbranch *zbr, + const void *node) +{ + int err; + void *lnc_node; + const struct ubifs_dent_node *dent = node; + + ubifs_assert(!zbr->leaf); + ubifs_assert(zbr->len != 0); + ubifs_assert(is_hash_key(c, &zbr->key)); + + err = ubifs_validate_entry(c, dent); + if (err) { + dbg_dump_stack(); + dbg_dump_node(c, dent); + return err; + } + + lnc_node = kmalloc(zbr->len, GFP_NOFS); + if (!lnc_node) + /* We don't have to have the cache, so no error */ + return 0; + + memcpy(lnc_node, node, zbr->len); + zbr->leaf = lnc_node; + return 0; +} + + /** + * lnc_add_directly - add a leaf node to the leaf-node-cache. + * @c: UBIFS file-system description object + * @zbr: zbranch of leaf node + * @node: leaf node + * + * This function is similar to 'lnc_add()', but it does not create a copy of + * @node but inserts @node to TNC directly. + */ +static int lnc_add_directly(struct ubifs_info *c, struct ubifs_zbranch *zbr, + void *node) +{ + int err; + + ubifs_assert(!zbr->leaf); + ubifs_assert(zbr->len != 0); + + err = ubifs_validate_entry(c, node); + if (err) { + dbg_dump_stack(); + dbg_dump_node(c, node); + return err; + } + + zbr->leaf = node; + return 0; +} + +/** + * lnc_free - remove a leaf node from the leaf node cache. + * @zbr: zbranch of leaf node + * @node: leaf node + */ +static void lnc_free(struct ubifs_zbranch *zbr) +{ + if (!zbr->leaf) + return; + kfree(zbr->leaf); + zbr->leaf = NULL; +} + +/** + * tnc_read_node_nm - read a "hashed" leaf node. + * @c: UBIFS file-system description object + * @zbr: key and position of the node + * @node: node is returned here + * + * This function reads a "hashed" node defined by @zbr from the leaf node cache + * (in it is there) or from the hash media, in which case the node is also + * added to LNC. Returns zero in case of success or a negative negative error + * code in case of failure. + */ +static int tnc_read_node_nm(struct ubifs_info *c, struct ubifs_zbranch *zbr, + void *node) +{ + int err; + + ubifs_assert(is_hash_key(c, &zbr->key)); + + if (zbr->leaf) { + /* Read from the leaf node cache */ + ubifs_assert(zbr->len != 0); + memcpy(node, zbr->leaf, zbr->len); + return 0; + } + + err = ubifs_tnc_read_node(c, zbr, node); + if (err) + return err; + + /* Add the node to the leaf node cache */ + err = lnc_add(c, zbr, node); + return err; +} + +/** + * try_read_node - read a node if it is a node. + * @c: UBIFS file-system description object + * @buf: buffer to read to + * @type: node type + * @len: node length (not aligned) + * @lnum: LEB number of node to read + * @offs: offset of node to read + * + * This function tries to read a node of known type and length, checks it and + * stores it in @buf. This function returns %1 if a node is present and %0 if + * a node is not present. A negative error code is returned for I/O errors. + * This function performs that same function as ubifs_read_node except that + * it does not require that there is actually a node present and instead + * the return code indicates if a node was read. + */ +static int try_read_node(const struct ubifs_info *c, void *buf, int type, + int len, int lnum, int offs) +{ + int err, node_len; + struct ubifs_ch *ch = buf; + uint32_t crc, node_crc; + + dbg_io("LEB %d:%d, %s, length %d", lnum, offs, dbg_ntype(type), len); + + err = ubi_read(c->ubi, lnum, buf, offs, len); + if (err) { + ubifs_err("cannot read node type %d from LEB %d:%d, error %d", + type, lnum, offs, err); + return err; + } + + if (le32_to_cpu(ch->magic) != UBIFS_NODE_MAGIC) + return 0; + + if (ch->node_type != type) + return 0; + + node_len = le32_to_cpu(ch->len); + if (node_len != len) + return 0; + + crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8); + node_crc = le32_to_cpu(ch->crc); + if (crc != node_crc) + return 0; + + return 1; +} + +/** + * fallible_read_node - try to read a leaf node. + * @c: UBIFS file-system description object + * @key: key of node to read + * @zbr: position of node + * @node: node returned + * + * This function tries to read a node and returns %1 if the node is read, %0 + * if the node is not present, and a negative error code in the case of error. + */ +static int fallible_read_node(struct ubifs_info *c, const union ubifs_key *key, + struct ubifs_zbranch *zbr, void *node) +{ + int ret; + + dbg_tnc("LEB %d:%d, key %s", zbr->lnum, zbr->offs, DBGKEY(key)); + + ret = try_read_node(c, node, key_type(c, key), zbr->len, zbr->lnum, + zbr->offs); + if (ret == 1) { + union ubifs_key node_key; + struct ubifs_dent_node *dent = node; + + /* All nodes have key in the same place */ + key_read(c, &dent->key, &node_key); + if (keys_cmp(c, key, &node_key) != 0) + ret = 0; + } + if (ret == 0) + dbg_mnt("dangling branch LEB %d:%d len %d, key %s", + zbr->lnum, zbr->offs, zbr->len, DBGKEY(key)); + return ret; +} + +/** + * matches_name - determine if a direntry or xattr entry matches a given name. + * @c: UBIFS file-system description object + * @zbr: zbranch of dent + * @nm: name to match + * + * This function checks if xentry/direntry referred by zbranch @zbr matches name + * @nm. Returns %NAME_MATCHES if it does, %NAME_LESS if the name referred by + * @zbr is less than @nm, and %NAME_GREATER if it is greater than @nm. In case + * of failure, a negative error code is returned. + */ +static int matches_name(struct ubifs_info *c, struct ubifs_zbranch *zbr, + const struct qstr *nm) +{ + struct ubifs_dent_node *dent; + int nlen, err; + + /* If possible, match against the dent in the leaf node cache */ + if (!zbr->leaf) { + dent = kmalloc(zbr->len, GFP_NOFS); + if (!dent) + return -ENOMEM; + + err = ubifs_tnc_read_node(c, zbr, dent); + if (err) + goto out_free; + + /* Add the node to the leaf node cache */ + err = lnc_add_directly(c, zbr, dent); + if (err) + goto out_free; + } else + dent = zbr->leaf; + + nlen = le16_to_cpu(dent->nlen); + err = memcmp(dent->name, nm->name, min_t(int, nlen, nm->len)); + if (err == 0) { + if (nlen == nm->len) + return NAME_MATCHES; + else if (nlen < nm->len) + return NAME_LESS; + else + return NAME_GREATER; + } else if (err < 0) + return NAME_LESS; + else + return NAME_GREATER; + +out_free: + kfree(dent); + return err; +} + +/** + * get_znode - get a TNC znode that may not be loaded yet. + * @c: UBIFS file-system description object + * @znode: parent znode + * @n: znode branch slot number + * + * This function returns the znode or a negative error code. + */ +static struct ubifs_znode *get_znode(struct ubifs_info *c, + struct ubifs_znode *znode, int n) +{ + struct ubifs_zbranch *zbr; + + zbr = &znode->zbranch[n]; + if (zbr->znode) + znode = zbr->znode; + else + znode = ubifs_load_znode(c, zbr, znode, n); + return znode; +} + +/** + * tnc_next - find next TNC entry. + * @c: UBIFS file-system description object + * @zn: znode is passed and returned here + * @n: znode branch slot number is passed and returned here + * + * This function returns %0 if the next TNC entry is found, %-ENOENT if there is + * no next entry, or a negative error code otherwise. + */ +static int tnc_next(struct ubifs_info *c, struct ubifs_znode **zn, int *n) +{ + struct ubifs_znode *znode = *zn; + int nn = *n; + + nn += 1; + if (nn < znode->child_cnt) { + *n = nn; + return 0; + } + while (1) { + struct ubifs_znode *zp; + + zp = znode->parent; + if (!zp) + return -ENOENT; + nn = znode->iip + 1; + znode = zp; + if (nn < znode->child_cnt) { + znode = get_znode(c, znode, nn); + if (IS_ERR(znode)) + return PTR_ERR(znode); + while (znode->level != 0) { + znode = get_znode(c, znode, 0); + if (IS_ERR(znode)) + return PTR_ERR(znode); + } + nn = 0; + break; + } + } + *zn = znode; + *n = nn; + return 0; +} + +/** + * tnc_prev - find previous TNC entry. + * @c: UBIFS file-system description object + * @zn: znode is returned here + * @n: znode branch slot number is passed and returned here + * + * This function returns %0 if the previous TNC entry is found, %-ENOENT if + * there is no next entry, or a negative error code otherwise. + */ +static int tnc_prev(struct ubifs_info *c, struct ubifs_znode **zn, int *n) +{ + struct ubifs_znode *znode = *zn; + int nn = *n; + + if (nn > 0) { + *n = nn - 1; + return 0; + } + while (1) { + struct ubifs_znode *zp; + + zp = znode->parent; + if (!zp) + return -ENOENT; + nn = znode->iip - 1; + znode = zp; + if (nn >= 0) { + znode = get_znode(c, znode, nn); + if (IS_ERR(znode)) + return PTR_ERR(znode); + while (znode->level != 0) { + nn = znode->child_cnt - 1; + znode = get_znode(c, znode, nn); + if (IS_ERR(znode)) + return PTR_ERR(znode); + } + nn = znode->child_cnt - 1; + break; + } + } + *zn = znode; + *n = nn; + return 0; +} + +/** + * resolve_collision - resolve a collision. + * @c: UBIFS file-system description object + * @key: key of a directory or extended attribute entry + * @zn: znode is returned here + * @n: zbranch number is passed and returned here + * @nm: name of the entry + * + * This function is called for "hashed" keys to make sure that the found key + * really corresponds to the looked up node (directory or extended attribute + * entry). It returns %1 and sets @zn and @n if the collision is resolved. + * %0 is returned if @nm is not found and @zn and @n are set to the previous + * entry, i.e. to the entry after which @nm could follow if it were in TNC. + * This means that @n may be set to %-1 if the leftmost key in @zn is the + * previous one. A negative error code is returned on failures. + */ +static int resolve_collision(struct ubifs_info *c, const union ubifs_key *key, + struct ubifs_znode **zn, int *n, + const struct qstr *nm) +{ + int err; + + err = matches_name(c, &(*zn)->zbranch[*n], nm); + if (unlikely(err < 0)) + return err; + if (err == NAME_MATCHES) + return 1; + + if (err == NAME_GREATER) { + /* Look left */ + while (1) { + err = tnc_prev(c, zn, n); + if (err == -ENOENT) { + ubifs_assert(*n == 0); + *n = -1; + return 0; + } + if (err < 0) + return err; + if (keys_cmp(c, &(*zn)->zbranch[*n].key, key)) { + /* + * We have found the branch after which we would + * like to insert, but inserting in this znode + * may still be wrong. Consider the following 3 + * znodes, in the case where we are resolving a + * collision with Key2. + * + * znode zp + * ---------------------- + * level 1 | Key0 | Key1 | + * ----------------------- + * | | + * znode za | | znode zb + * ------------ ------------ + * level 0 | Key0 | | Key2 | + * ------------ ------------ + * + * The lookup finds Key2 in znode zb. Lets say + * there is no match and the name is greater so + * we look left. When we find Key0, we end up + * here. If we return now, we will insert into + * znode za at slot n = 1. But that is invalid + * according to the parent's keys. Key2 must + * be inserted into znode zb. + * + * Note, this problem is not relevant for the + * case when we go right, because + * 'tnc_insert()' would correct the parent key. + */ + if (*n == (*zn)->child_cnt - 1) { + err = tnc_next(c, zn, n); + if (err) { + /* Should be impossible */ + ubifs_assert(0); + if (err == -ENOENT) + err = -EINVAL; + return err; + } + ubifs_assert(*n == 0); + *n = -1; + } + return 0; + } + err = matches_name(c, &(*zn)->zbranch[*n], nm); + if (err < 0) + return err; + if (err == NAME_LESS) + return 0; + if (err == NAME_MATCHES) + return 1; + ubifs_assert(err == NAME_GREATER); + } + } else { + int nn = *n; + struct ubifs_znode *znode = *zn; + + /* Look right */ + while (1) { + err = tnc_next(c, &znode, &nn); + if (err == -ENOENT) + return 0; + if (err < 0) + return err; + if (keys_cmp(c, &znode->zbranch[nn].key, key)) + return 0; + err = matches_name(c, &znode->zbranch[nn], nm); + if (err < 0) + return err; + if (err == NAME_GREATER) + return 0; + *zn = znode; + *n = nn; + if (err == NAME_MATCHES) + return 1; + ubifs_assert(err == NAME_LESS); + } + } +} + +/** + * fallible_matches_name - determine if a dent matches a given name. + * @c: UBIFS file-system description object + * @zbr: zbranch of dent + * @nm: name to match + * + * This is a "fallible" version of 'matches_name()' function which does not + * panic if the direntry/xentry referred by @zbr does not exist on the media. + * + * This function checks if xentry/direntry referred by zbranch @zbr matches name + * @nm. Returns %NAME_MATCHES it does, %NAME_LESS if the name referred by @zbr + * is less than @nm, %NAME_GREATER if it is greater than @nm, and @NOT_ON_MEDIA + * if xentry/direntry referred by @zbr does not exist on the media. A negative + * error code is returned in case of failure. + */ +static int fallible_matches_name(struct ubifs_info *c, + struct ubifs_zbranch *zbr, + const struct qstr *nm) +{ + struct ubifs_dent_node *dent; + int nlen, err; + + /* If possible, match against the dent in the leaf node cache */ + if (!zbr->leaf) { + dent = kmalloc(zbr->len, GFP_NOFS); + if (!dent) + return -ENOMEM; + + err = fallible_read_node(c, &zbr->key, zbr, dent); + if (err < 0) + goto out_free; + if (err == 0) { + /* The node was not present */ + err = NOT_ON_MEDIA; + goto out_free; + } + ubifs_assert(err == 1); + + err = lnc_add_directly(c, zbr, dent); + if (err) + goto out_free; + } else + dent = zbr->leaf; + + nlen = le16_to_cpu(dent->nlen); + err = memcmp(dent->name, nm->name, min_t(int, nlen, nm->len)); + if (err == 0) { + if (nlen == nm->len) + return NAME_MATCHES; + else if (nlen < nm->len) + return NAME_LESS; + else + return NAME_GREATER; + } else if (err < 0) + return NAME_LESS; + else + return NAME_GREATER; + +out_free: + kfree(dent); + return err; +} + +/** + * fallible_resolve_collision - resolve a collision even if nodes are missing. + * @c: UBIFS file-system description object + * @key: key + * @zn: znode is returned here + * @n: branch number is passed and returned here + * @nm: name of directory entry + * @adding: indicates caller is adding a key to the TNC + * + * This is a "fallible" version of the 'resolve_collision()' function which + * does not panic if one of the nodes referred to by TNC does not exist on the + * media. This may happen when replaying the journal if a deleted node was + * Garbage-collected and the commit was not done. A branch that refers to a node + * that is not present is called a dangling branch. The following are the return + * codes for this function: + * o if @nm was found, %1 is returned and @zn and @n are set to the found + * branch; + * o if we are @adding and @nm was not found, %0 is returned; + * o if we are not @adding and @nm was not found, but a dangling branch was + * found, then %1 is returned and @zn and @n are set to the dangling branch; + * o a negative error code is returned in case of failure. + */ +static int fallible_resolve_collision(struct ubifs_info *c, + const union ubifs_key *key, + struct ubifs_znode **zn, int *n, + const struct qstr *nm, int adding) +{ + struct ubifs_znode *o_znode = NULL, *znode = *zn; + int uninitialized_var(o_n), err, cmp, unsure = 0, nn = *n; + + cmp = fallible_matches_name(c, &znode->zbranch[nn], nm); + if (unlikely(cmp < 0)) + return cmp; + if (cmp == NAME_MATCHES) + return 1; + if (cmp == NOT_ON_MEDIA) { + o_znode = znode; + o_n = nn; + /* + * We are unlucky and hit a dangling branch straight away. + * Now we do not really know where to go to find the needed + * branch - to the left or to the right. Well, let's try left. + */ + unsure = 1; + } else if (!adding) + unsure = 1; /* Remove a dangling branch wherever it is */ + + if (cmp == NAME_GREATER || unsure) { + /* Look left */ + while (1) { + err = tnc_prev(c, zn, n); + if (err == -ENOENT) { + ubifs_assert(*n == 0); + *n = -1; + break; + } + if (err < 0) + return err; + if (keys_cmp(c, &(*zn)->zbranch[*n].key, key)) { + /* See comments in 'resolve_collision()' */ + if (*n == (*zn)->child_cnt - 1) { + err = tnc_next(c, zn, n); + if (err) { + /* Should be impossible */ + ubifs_assert(0); + if (err == -ENOENT) + err = -EINVAL; + return err; + } + ubifs_assert(*n == 0); + *n = -1; + } + break; + } + err = fallible_matches_name(c, &(*zn)->zbranch[*n], nm); + if (err < 0) + return err; + if (err == NAME_MATCHES) + return 1; + if (err == NOT_ON_MEDIA) { + o_znode = *zn; + o_n = *n; + continue; + } + if (!adding) + continue; + if (err == NAME_LESS) + break; + else + unsure = 0; + } + } + + if (cmp == NAME_LESS || unsure) { + /* Look right */ + *zn = znode; + *n = nn; + while (1) { + err = tnc_next(c, &znode, &nn); + if (err == -ENOENT) + break; + if (err < 0) + return err; + if (keys_cmp(c, &znode->zbranch[nn].key, key)) + break; + err = fallible_matches_name(c, &znode->zbranch[nn], nm); + if (err < 0) + return err; + if (err == NAME_GREATER) + break; + *zn = znode; + *n = nn; + if (err == NAME_MATCHES) + return 1; + if (err == NOT_ON_MEDIA) { + o_znode = znode; + o_n = nn; + } + } + } + + /* Never match a dangling branch when adding */ + if (adding || !o_znode) + return 0; + + dbg_mnt("dangling match LEB %d:%d len %d %s", + o_znode->zbranch[o_n].lnum, o_znode->zbranch[o_n].offs, + o_znode->zbranch[o_n].len, DBGKEY(key)); + *zn = o_znode; + *n = o_n; + return 1; +} + +/** + * matches_position - determine if a zbranch matches a given position. + * @zbr: zbranch of dent + * @lnum: LEB number of dent to match + * @offs: offset of dent to match + * + * This function returns %1 if @lnum:@offs matches, and %0 otherwise. + */ +static int matches_position(struct ubifs_zbranch *zbr, int lnum, int offs) +{ + if (zbr->lnum == lnum && zbr->offs == offs) + return 1; + else + return 0; +} + +/** + * resolve_collision_directly - resolve a collision directly. + * @c: UBIFS file-system description object + * @key: key of directory entry + * @zn: znode is passed and returned here + * @n: zbranch number is passed and returned here + * @lnum: LEB number of dent node to match + * @offs: offset of dent node to match + * + * This function is used for "hashed" keys to make sure the found directory or + * extended attribute entry node is what was looked for. It is used when the + * flash address of the right node is known (@lnum:@offs) which makes it much + * easier to resolve collisions (no need to read entries and match full + * names). This function returns %1 and sets @zn and @n if the collision is + * resolved, %0 if @lnum:@offs is not found and @zn and @n are set to the + * previous directory entry. Otherwise a negative error code is returned. + */ +static int resolve_collision_directly(struct ubifs_info *c, + const union ubifs_key *key, + struct ubifs_znode **zn, int *n, + int lnum, int offs) +{ + struct ubifs_znode *znode; + int nn, err; + + znode = *zn; + nn = *n; + if (matches_position(&znode->zbranch[nn], lnum, offs)) + return 1; + + /* Look left */ + while (1) { + err = tnc_prev(c, &znode, &nn); + if (err == -ENOENT) + break; + if (err < 0) + return err; + if (keys_cmp(c, &znode->zbranch[nn].key, key)) + break; + if (matches_position(&znode->zbranch[nn], lnum, offs)) { + *zn = znode; + *n = nn; + return 1; + } + } + + /* Look right */ + znode = *zn; + nn = *n; + while (1) { + err = tnc_next(c, &znode, &nn); + if (err == -ENOENT) + return 0; + if (err < 0) + return err; + if (keys_cmp(c, &znode->zbranch[nn].key, key)) + return 0; + *zn = znode; + *n = nn; + if (matches_position(&znode->zbranch[nn], lnum, offs)) + return 1; + } +} + +/** + * dirty_cow_bottom_up - dirty a znode and its ancestors. + * @c: UBIFS file-system description object + * @znode: znode to dirty + * + * If we do not have a unique key that resides in a znode, then we cannot + * dirty that znode from the top down (i.e. by using lookup_level0_dirty) + * This function records the path back to the last dirty ancestor, and then + * dirties the znodes on that path. + */ +static struct ubifs_znode *dirty_cow_bottom_up(struct ubifs_info *c, + struct ubifs_znode *znode) +{ + struct ubifs_znode *zp; + int *path = c->bottom_up_buf, p = 0; + + ubifs_assert(c->zroot.znode); + ubifs_assert(znode); + if (c->zroot.znode->level > BOTTOM_UP_HEIGHT) { + kfree(c->bottom_up_buf); + c->bottom_up_buf = kmalloc(c->zroot.znode->level * sizeof(int), + GFP_NOFS); + if (!c->bottom_up_buf) + return ERR_PTR(-ENOMEM); + path = c->bottom_up_buf; + } + if (c->zroot.znode->level) { + /* Go up until parent is dirty */ + while (1) { + int n; + + zp = znode->parent; + if (!zp) + break; + n = znode->iip; + ubifs_assert(p < c->zroot.znode->level); + path[p++] = n; + if (!zp->cnext && ubifs_zn_dirty(znode)) + break; + znode = zp; + } + } + + /* Come back down, dirtying as we go */ + while (1) { + struct ubifs_zbranch *zbr; + + zp = znode->parent; + if (zp) { + ubifs_assert(path[p - 1] >= 0); + ubifs_assert(path[p - 1] < zp->child_cnt); + zbr = &zp->zbranch[path[--p]]; + znode = dirty_cow_znode(c, zbr); + } else { + ubifs_assert(znode == c->zroot.znode); + znode = dirty_cow_znode(c, &c->zroot); + } + if (unlikely(IS_ERR(znode)) || !p) + break; + ubifs_assert(path[p - 1] >= 0); + ubifs_assert(path[p - 1] < znode->child_cnt); + znode = znode->zbranch[path[p - 1]].znode; + } + + return znode; +} + +/** + * ubifs_lookup_level0 - search for zero-level znode. + * @c: UBIFS file-system description object + * @key: key to lookup + * @zn: znode is returned here + * @n: znode branch slot number is returned here + * + * This function looks up the TNC tree and search for zero-level znode which + * refers key @key. The found zero-level znode is returned in @zn. There are 3 + * cases: + * o exact match, i.e. the found zero-level znode contains key @key, then %1 + * is returned and slot number of the matched branch is stored in @n; + * o not exact match, which means that zero-level znode does not contain + * @key, then %0 is returned and slot number of the closed branch is stored + * in @n; + * o @key is so small that it is even less than the lowest key of the + * leftmost zero-level node, then %0 is returned and %0 is stored in @n. + * + * Note, when the TNC tree is traversed, some znodes may be absent, then this + * function reads corresponding indexing nodes and inserts them to TNC. In + * case of failure, a negative error code is returned. + */ +int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key, + struct ubifs_znode **zn, int *n) +{ + int err, exact; + struct ubifs_znode *znode; + unsigned long time = get_seconds(); + + dbg_tnc("search key %s", DBGKEY(key)); + + znode = c->zroot.znode; + if (unlikely(!znode)) { + znode = ubifs_load_znode(c, &c->zroot, NULL, 0); + if (IS_ERR(znode)) + return PTR_ERR(znode); + } + + znode->time = time; + + while (1) { + struct ubifs_zbranch *zbr; + + exact = ubifs_search_zbranch(c, znode, key, n); + + if (znode->level == 0) + break; + + if (*n < 0) + *n = 0; + zbr = &znode->zbranch[*n]; + + if (zbr->znode) { + znode->time = time; + znode = zbr->znode; + continue; + } + + /* znode is not in TNC cache, load it from the media */ + znode = ubifs_load_znode(c, zbr, znode, *n); + if (IS_ERR(znode)) + return PTR_ERR(znode); + } + + *zn = znode; + if (exact || !is_hash_key(c, key) || *n != -1) { + dbg_tnc("found %d, lvl %d, n %d", exact, znode->level, *n); + return exact; + } + + /* + * Here is a tricky place. We have not found the key and this is a + * "hashed" key, which may collide. The rest of the code deals with + * situations like this: + * + * | 3 | 5 | + * / \ + * | 3 | 5 | | 6 | 7 | (x) + * + * Or more a complex example: + * + * | 1 | 5 | + * / \ + * | 1 | 3 | | 5 | 8 | + * \ / + * | 5 | 5 | | 6 | 7 | (x) + * + * In the examples, if we are looking for key "5", we may reach nodes + * marked with "(x)". In this case what we have do is to look at the + * left and see if there is "5" key there. If there is, we have to + * return it. + * + * Note, this whole situation is possible because we allow to have + * elements which are equivalent to the next key in the parent in the + * children of current znode. For example, this happens if we split a + * znode like this: | 3 | 5 | 5 | 6 | 7 |, which results in something + * like this: + * | 3 | 5 | + * / \ + * | 3 | 5 | | 5 | 6 | 7 | + * ^ + * And this becomes what is at the first "picture" after key "5" marked + * with "^" is removed. What could be done is we could prohibit + * splitting in the middle of the colliding sequence. Also, when + * removing the leftmost key, we would have to correct the key of the + * parent node, which would introduce additional complications. Namely, + * if we changed the the leftmost key of the parent znode, the garbage + * collector would be unable to find it (GC is doing this when GC'ing + * indexing LEBs). Although we already have an additional RB-tree where + * we save such changed znodes (see 'ins_clr_old_idx_znode()') until + * after the commit. But anyway, this does not look easy to implement + * so we did not try this. + */ + err = tnc_prev(c, &znode, n); + if (err == -ENOENT) { + dbg_tnc("found 0, lvl %d, n -1", znode->level); + *n = -1; + return 0; + } + if (unlikely(err < 0)) + return err; + if (keys_cmp(c, key, &znode->zbranch[*n].key)) { + dbg_tnc("found 0, lvl %d, n -1", znode->level); + *n = -1; + return 0; + } + + dbg_tnc("found 1, lvl %d, n %d", znode->level, *n); + *zn = znode; + return 1; +} + +/** + * lookup_level0_dirty - search for zero-level znode dirtying. + * @c: UBIFS file-system description object + * @key: key to lookup + * @zn: znode is returned here + * @n: znode branch slot number is returned here + * + * This function looks up the TNC tree and search for zero-level znode which + * refers key @key. The found zero-level znode is returned in @zn. There are 3 + * cases: + * o exact match, i.e. the found zero-level znode contains key @key, then %1 + * is returned and slot number of the matched branch is stored in @n; + * o not exact match, which means that zero-level znode does not contain @key + * then %0 is returned and slot number of the closed branch is stored in + * @n; + * o @key is so small that it is even less than the lowest key of the + * leftmost zero-level node, then %0 is returned and %-1 is stored in @n. + * + * Additionally all znodes in the path from the root to the located zero-level + * znode are marked as dirty. + * + * Note, when the TNC tree is traversed, some znodes may be absent, then this + * function reads corresponding indexing nodes and inserts them to TNC. In + * case of failure, a negative error code is returned. + */ +static int lookup_level0_dirty(struct ubifs_info *c, const union ubifs_key *key, + struct ubifs_znode **zn, int *n) +{ + int err, exact; + struct ubifs_znode *znode; + unsigned long time = get_seconds(); + + dbg_tnc("search and dirty key %s", DBGKEY(key)); + + znode = c->zroot.znode; + if (unlikely(!znode)) { + znode = ubifs_load_znode(c, &c->zroot, NULL, 0); + if (IS_ERR(znode)) + return PTR_ERR(znode); + } + + znode = dirty_cow_znode(c, &c->zroot); + if (IS_ERR(znode)) + return PTR_ERR(znode); + + znode->time = time; + + while (1) { + struct ubifs_zbranch *zbr; + + exact = ubifs_search_zbranch(c, znode, key, n); + + if (znode->level == 0) + break; + + if (*n < 0) + *n = 0; + zbr = &znode->zbranch[*n]; + + if (zbr->znode) { + znode->time = time; + znode = dirty_cow_znode(c, zbr); + if (IS_ERR(znode)) + return PTR_ERR(znode); + continue; + } + + /* znode is not in TNC cache, load it from the media */ + znode = ubifs_load_znode(c, zbr, znode, *n); + if (IS_ERR(znode)) + return PTR_ERR(znode); + znode = dirty_cow_znode(c, zbr); + if (IS_ERR(znode)) + return PTR_ERR(znode); + } + + *zn = znode; + if (exact || !is_hash_key(c, key) || *n != -1) { + dbg_tnc("found %d, lvl %d, n %d", exact, znode->level, *n); + return exact; + } + + /* + * See huge comment at 'lookup_level0_dirty()' what is the rest of the + * code. + */ + err = tnc_prev(c, &znode, n); + if (err == -ENOENT) { + *n = -1; + dbg_tnc("found 0, lvl %d, n -1", znode->level); + return 0; + } + if (unlikely(err < 0)) + return err; + if (keys_cmp(c, key, &znode->zbranch[*n].key)) { + *n = -1; + dbg_tnc("found 0, lvl %d, n -1", znode->level); + return 0; + } + + if (znode->cnext || !ubifs_zn_dirty(znode)) { + znode = dirty_cow_bottom_up(c, znode); + if (IS_ERR(znode)) + return PTR_ERR(znode); + } + + dbg_tnc("found 1, lvl %d, n %d", znode->level, *n); + *zn = znode; + return 1; +} + +/** + * ubifs_tnc_lookup - look up a file-system node. + * @c: UBIFS file-system description object + * @key: node key to lookup + * @node: the node is returned here + * + * This function look up and reads node with key @key. The caller has to make + * sure the @node buffer is large enough to fit the node. Returns zero in case + * of success, %-ENOENT if the node was not found, and a negative error code in + * case of failure. + */ +int ubifs_tnc_lookup(struct ubifs_info *c, const union ubifs_key *key, + void *node) +{ + int found, n, err; + struct ubifs_znode *znode; + struct ubifs_zbranch zbr, *zt; + + mutex_lock(&c->tnc_mutex); + found = ubifs_lookup_level0(c, key, &znode, &n); + if (!found) { + err = -ENOENT; + goto out; + } else if (found < 0) { + err = found; + goto out; + } + zt = &znode->zbranch[n]; + if (is_hash_key(c, key)) { + /* + * In this case the leaf node cache gets used, so we pass the + * address of the zbranch and keep the mutex locked + */ + err = tnc_read_node_nm(c, zt, node); + goto out; + } + zbr = znode->zbranch[n]; + mutex_unlock(&c->tnc_mutex); + + err = ubifs_tnc_read_node(c, &zbr, node); + return err; + +out: + mutex_unlock(&c->tnc_mutex); + return err; +} + +/** + * ubifs_tnc_locate - look up a file-system node and return it and its location. + * @c: UBIFS file-system description object + * @key: node key to lookup + * @node: the node is returned here + * @lnum: LEB number is returned here + * @offs: offset is returned here + * + * This function is the same as 'ubifs_tnc_lookup()' but it returns the node + * location also. See 'ubifs_tnc_lookup()'. + */ +int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, + void *node, int *lnum, int *offs) +{ + int found, n, err; + struct ubifs_znode *znode; + struct ubifs_zbranch zbr, *zt; + + mutex_lock(&c->tnc_mutex); + found = ubifs_lookup_level0(c, key, &znode, &n); + if (!found) { + err = -ENOENT; + goto out; + } else if (found < 0) { + err = found; + goto out; + } + zt = &znode->zbranch[n]; + if (is_hash_key(c, key)) { + /* + * In this case the leaf node cache gets used, so we pass the + * address of the zbranch and keep the mutex locked + */ + *lnum = zt->lnum; + *offs = zt->offs; + err = tnc_read_node_nm(c, zt, node); + goto out; + } + zbr = znode->zbranch[n]; + mutex_unlock(&c->tnc_mutex); + + *lnum = zbr.lnum; + *offs = zbr.offs; + + err = ubifs_tnc_read_node(c, &zbr, node); + return err; + +out: + mutex_unlock(&c->tnc_mutex); + return err; +} + +/** + * do_lookup_nm- look up a "hashed" node. + * @c: UBIFS file-system description object + * @key: node key to lookup + * @node: the node is returned here + * @nm: node name + * + * This function look up and reads a node which contains name hash in the key. + * Since the hash may have collisions, there may be many nodes with the same + * key, so we have to sequentially look to all of them until the needed one is + * found. This function returns zero in case of success, %-ENOENT if the node + * was not found, and a negative error code in case of failure. + */ +static int do_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, + void *node, const struct qstr *nm) +{ + int found, n, err; + struct ubifs_znode *znode; + struct ubifs_zbranch zbr; + + dbg_tnc("name '%.*s' key %s", nm->len, nm->name, DBGKEY(key)); + mutex_lock(&c->tnc_mutex); + found = ubifs_lookup_level0(c, key, &znode, &n); + if (!found) { + err = -ENOENT; + goto out_unlock; + } else if (found < 0) { + err = found; + goto out_unlock; + } + + ubifs_assert(n >= 0); + + err = resolve_collision(c, key, &znode, &n, nm); + dbg_tnc("rc returned %d, znode %p, n %d", err, znode, n); + if (unlikely(err < 0)) + goto out_unlock; + if (err == 0) { + err = -ENOENT; + goto out_unlock; + } + + zbr = znode->zbranch[n]; + mutex_unlock(&c->tnc_mutex); + + err = tnc_read_node_nm(c, &zbr, node); + return err; + +out_unlock: + mutex_unlock(&c->tnc_mutex); + return err; +} + +/** + * ubifs_tnc_lookup_nm - look up a "hashed" node. + * @c: UBIFS file-system description object + * @key: node key to lookup + * @node: the node is returned here + * @nm: node name + * + * This function look up and reads a node which contains name hash in the key. + * Since the hash may have collisions, there may be many nodes with the same + * key, so we have to sequentially look to all of them until the needed one is + * found. This function returns zero in case of success, %-ENOENT if the node + * was not found, and a negative error code in case of failure. + */ +int ubifs_tnc_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, + void *node, const struct qstr *nm) +{ + int err, len; + const struct ubifs_dent_node *dent = node; + + /* + * We assume that in most of the cases there are no name collisions and + * 'ubifs_tnc_lookup()' returns us the right direntry. + */ + err = ubifs_tnc_lookup(c, key, node); + if (err) + return err; + + len = le16_to_cpu(dent->nlen); + if (nm->len == len && !memcmp(dent->name, nm->name, len)) + return 0; + + /* + * Unluckily, there are hash collisions and we have to iterate over + * them look at each direntry with colliding name hash sequentially. + */ + return do_lookup_nm(c, key, node, nm); +} + +/** + * correct_parent_keys - correct parent znodes' keys. + * @c: UBIFS file-system description object + * @znode: znode to correct parent znodes for + * + * This is a helper function for 'tnc_insert()'. When the key of the leftmost + * zbranch changes, keys of parent znodes have to be corrected. This helper + * function is called in such situations and corrects the keys if needed. + */ +static void correct_parent_keys(const struct ubifs_info *c, + struct ubifs_znode *znode) +{ + union ubifs_key *key, *key1; + + ubifs_assert(znode->parent); + ubifs_assert(znode->iip == 0); + + key = &znode->zbranch[0].key; + key1 = &znode->parent->zbranch[0].key; + + while (keys_cmp(c, key, key1) < 0) { + key_copy(c, key, key1); + znode = znode->parent; + znode->alt = 1; + if (!znode->parent || znode->iip) + break; + key1 = &znode->parent->zbranch[0].key; + } +} + +/** + * insert_zbranch - insert a zbranch into a znode. + * @znode: znode into which to insert + * @zbr: zbranch to insert + * @n: slot number to insert to + * + * This is a helper function for 'tnc_insert()'. UBIFS does not allow "gaps" in + * znode's array of zbranches and keeps zbranches consolidated, so when a new + * zbranch has to be inserted to the @znode->zbranches[]' array at the @n-th + * slot, zbranches starting from @n have to be moved right. + */ +static void insert_zbranch(struct ubifs_znode *znode, + const struct ubifs_zbranch *zbr, int n) +{ + int i; + + ubifs_assert(ubifs_zn_dirty(znode)); + + if (znode->level) { + for (i = znode->child_cnt; i > n; i--) { + znode->zbranch[i] = znode->zbranch[i - 1]; + if (znode->zbranch[i].znode) + znode->zbranch[i].znode->iip = i; + } + if (zbr->znode) + zbr->znode->iip = n; + } else + for (i = znode->child_cnt; i > n; i--) + znode->zbranch[i] = znode->zbranch[i - 1]; + + znode->zbranch[n] = *zbr; + znode->child_cnt += 1; + + /* + * After inserting at slot zero, the lower bound of the key range of + * this znode may have changed. If this znode is subsequently split + * then the upper bound of the key range may change, and furthermore + * it could change to be lower than the original lower bound. If that + * happens, then it will no longer be possible to find this znode in the + * TNC using the key from the index node on flash. That is bad because + * if it is not found, we will assume it is obsolete and may overwrite + * it. Then if there is an unclean unmount, we will start using the + * old index which will be broken. + * + * So we first mark znodes that have insertions at slot zero, and then + * if they are split we add their lnum/offs to the old_idx tree. + */ + if (n == 0) + znode->alt = 1; +} + +/** + * tnc_insert - insert a node into TNC. + * @c: UBIFS file-system description object + * @znode: znode to insert into + * @zbr: branch to insert + * @n: slot number to insert new zbranch to + * + * This function inserts a new node described by @zbr into znode @znode. If + * znode does not have a free slot for new zbranch, it is split. Parent znodes + * are splat as well if needed. Returns zero in case of success or a negative + * error code in case of failure. + */ +static int tnc_insert(struct ubifs_info *c, struct ubifs_znode *znode, + struct ubifs_zbranch *zbr, int n) +{ + struct ubifs_znode *zn, *zi, *zp; + int i, keep, move, appending = 0; + union ubifs_key *key = &zbr->key; + + ubifs_assert(n >= 0 && n <= c->fanout); + + /* Implement naive insert for now */ +again: + zp = znode->parent; + if (znode->child_cnt < c->fanout) { + ubifs_assert(n != c->fanout); + dbg_tnc("inserted at %d level %d, key %s", n, znode->level, + DBGKEY(key)); + + insert_zbranch(znode, zbr, n); + + /* Ensure parent's key is correct */ + if (n == 0 && zp && znode->iip == 0) + correct_parent_keys(c, znode); + + return 0; + } + + /* + * Unfortunately, @znode does not have more empty slots and we have to + * split it. + */ + dbg_tnc("splitting level %d, key %s", znode->level, DBGKEY(key)); + + if (znode->alt) + /* + * We can no longer be sure of finding this znode by key, so we + * record it in the old_idx tree. + */ + ins_clr_old_idx_znode(c, znode); + + zn = kzalloc(c->max_znode_sz, GFP_NOFS); + if (!zn) + return -ENOMEM; + zn->parent = zp; + zn->level = znode->level; + + /* Decide where to split */ + if (znode->level == 0 && n == c->fanout && + key_type(c, key) == UBIFS_DATA_KEY) { + union ubifs_key *key1; + + /* + * If this is an inode which is being appended - do not split + * it because no other zbranches can be inserted between + * zbranches of consecutive data nodes anyway. + */ + key1 = &znode->zbranch[n - 1].key; + if (key_inum(c, key1) == key_inum(c, key) && + key_type(c, key1) == UBIFS_DATA_KEY && + key_block(c, key1) == key_block(c, key) - 1) + appending = 1; + } + + if (appending) { + keep = c->fanout; + move = 0; + } else { + keep = (c->fanout + 1) / 2; + move = c->fanout - keep; + } + + /* + * Although we don't at present, we could look at the neighbors and see + * if we can move some zbranches there. + */ + + if (n < keep) { + /* Insert into existing znode */ + zi = znode; + move += 1; + keep -= 1; + } else { + /* Insert into new znode */ + zi = zn; + n -= keep; + /* Re-parent */ + if (zn->level != 0) + zbr->znode->parent = zn; + } + + __set_bit(DIRTY_ZNODE, &zn->flags); + atomic_long_inc(&c->dirty_zn_cnt); + + zn->child_cnt = move; + znode->child_cnt = keep; + + dbg_tnc("moving %d, keeping %d", move, keep); + + /* Move zbranch */ + for (i = 0; i < move; i++) { + zn->zbranch[i] = znode->zbranch[keep + i]; + /* Re-parent */ + if (zn->level != 0) + if (zn->zbranch[i].znode) { + zn->zbranch[i].znode->parent = zn; + zn->zbranch[i].znode->iip = i; + } + } + + /* Insert new key and branch */ + dbg_tnc("inserting at %d level %d, key %s", n, zn->level, DBGKEY(key)); + + insert_zbranch(zi, zbr, n); + + /* Insert new znode (produced by spitting) into the parent */ + if (zp) { + i = n; + /* Locate insertion point */ + n = znode->iip + 1; + if (appending && n != c->fanout) + appending = 0; + + if (i == 0 && zi == znode && znode->iip == 0) + correct_parent_keys(c, znode); + + /* Tail recursion */ + zbr->key = zn->zbranch[0].key; + zbr->znode = zn; + zbr->lnum = 0; + zbr->offs = 0; + zbr->len = 0; + znode = zp; + + goto again; + } + + /* We have to split root znode */ + dbg_tnc("creating new zroot at level %d", znode->level + 1); + + zi = kzalloc(c->max_znode_sz, GFP_NOFS); + if (!zi) + return -ENOMEM; + + zi->child_cnt = 2; + zi->level = znode->level + 1; + + __set_bit(DIRTY_ZNODE, &zi->flags); + atomic_long_inc(&c->dirty_zn_cnt); + + zi->zbranch[0].key = znode->zbranch[0].key; + zi->zbranch[0].znode = znode; + zi->zbranch[0].lnum = c->zroot.lnum; + zi->zbranch[0].offs = c->zroot.offs; + zi->zbranch[0].len = c->zroot.len; + zi->zbranch[1].key = zn->zbranch[0].key; + zi->zbranch[1].znode = zn; + + c->zroot.lnum = 0; + c->zroot.offs = 0; + c->zroot.len = 0; + c->zroot.znode = zi; + + zn->parent = zi; + zn->iip = 1; + znode->parent = zi; + znode->iip = 0; + + return 0; +} + +/** + * ubifs_tnc_add - add a node to TNC. + * @c: UBIFS file-system description object + * @key: key to add + * @lnum: LEB number of node + * @offs: node offset + * @len: node length + * + * This function adds a node with key @key to TNC. The node may be new or it may + * obsolete some existing one. Returns %0 on success or negative error code on + * failure. + */ +int ubifs_tnc_add(struct ubifs_info *c, const union ubifs_key *key, int lnum, + int offs, int len) +{ + int found, n, err = 0; + struct ubifs_znode *znode; + + mutex_lock(&c->tnc_mutex); + dbg_tnc("%d:%d, len %d, key %s", lnum, offs, len, DBGKEY(key)); + found = lookup_level0_dirty(c, key, &znode, &n); + if (!found) { + struct ubifs_zbranch zbr; + + zbr.znode = NULL; + zbr.lnum = lnum; + zbr.offs = offs; + zbr.len = len; + key_copy(c, key, &zbr.key); + err = tnc_insert(c, znode, &zbr, n + 1); + } else if (found == 1) { + struct ubifs_zbranch *zbr = &znode->zbranch[n]; + + lnc_free(zbr); + err = ubifs_add_dirt(c, zbr->lnum, zbr->len); + zbr->lnum = lnum; + zbr->offs = offs; + zbr->len = len; + } else + err = found; + if (!err) + err = dbg_check_tnc(c, 0); + mutex_unlock(&c->tnc_mutex); + + return err; +} + +/** + * ubifs_tnc_replace - replace a node in the TNC only if the old node is found. + * @c: UBIFS file-system description object + * @key: key to add + * @old_lnum: LEB number of old node + * @old_offs: old node offset + * @lnum: LEB number of node + * @offs: node offset + * @len: node length + * + * This function replaces a node with key @key in the TNC only if the old node + * is found. This function is called by garbage collection when node are moved. + * Returns %0 on success or negative error code on failure. + */ +int ubifs_tnc_replace(struct ubifs_info *c, const union ubifs_key *key, + int old_lnum, int old_offs, int lnum, int offs, int len) +{ + int found, n, err = 0; + struct ubifs_znode *znode; + + mutex_lock(&c->tnc_mutex); + dbg_tnc("old LEB %d:%d, new LEB %d:%d, len %d, key %s", old_lnum, + old_offs, lnum, offs, len, DBGKEY(key)); + found = lookup_level0_dirty(c, key, &znode, &n); + if (found < 0) { + err = found; + goto out_unlock; + } + + if (found == 1) { + struct ubifs_zbranch *zbr = &znode->zbranch[n]; + + found = 0; + if (zbr->lnum == old_lnum && zbr->offs == old_offs) { + lnc_free(zbr); + err = ubifs_add_dirt(c, zbr->lnum, zbr->len); + if (err) + goto out_unlock; + zbr->lnum = lnum; + zbr->offs = offs; + zbr->len = len; + found = 1; + } else if (is_hash_key(c, key)) { + found = resolve_collision_directly(c, key, &znode, &n, + old_lnum, old_offs); + dbg_tnc("rc returned %d, znode %p, n %d, LEB %d:%d", + found, znode, n, old_lnum, old_offs); + if (found < 0) { + err = found; + goto out_unlock; + } + + if (found) { + /* Ensure the znode is dirtied */ + if (znode->cnext || !ubifs_zn_dirty(znode)) { + znode = dirty_cow_bottom_up(c, + znode); + if (IS_ERR(znode)) { + err = PTR_ERR(znode); + goto out_unlock; + } + } + zbr = &znode->zbranch[n]; + lnc_free(zbr); + err = ubifs_add_dirt(c, zbr->lnum, + zbr->len); + if (err) + goto out_unlock; + zbr->lnum = lnum; + zbr->offs = offs; + zbr->len = len; + } + } + } + + if (!found) + err = ubifs_add_dirt(c, lnum, len); + + if (!err) + err = dbg_check_tnc(c, 0); + +out_unlock: + mutex_unlock(&c->tnc_mutex); + return err; +} + +/** + * ubifs_tnc_add_nm - add a "hashed" node to TNC. + * @c: UBIFS file-system description object + * @key: key to add + * @lnum: LEB number of node + * @offs: node offset + * @len: node length + * @nm: node name + * + * This is the same as 'ubifs_tnc_add()' but it should be used with keys which + * may have collisions, like directory entry keys. + */ +int ubifs_tnc_add_nm(struct ubifs_info *c, const union ubifs_key *key, + int lnum, int offs, int len, const struct qstr *nm) +{ + int found, n, err = 0; + struct ubifs_znode *znode; + + mutex_lock(&c->tnc_mutex); + dbg_tnc("LEB %d:%d, name '%.*s', key %s", lnum, offs, nm->len, nm->name, + DBGKEY(key)); + found = lookup_level0_dirty(c, key, &znode, &n); + if (found < 0) { + err = found; + goto out_unlock; + } + + if (found == 1) { + if (c->replaying) + found = fallible_resolve_collision(c, key, &znode, &n, + nm, 1); + else + found = resolve_collision(c, key, &znode, &n, nm); + dbg_tnc("rc returned %d, znode %p, n %d", found, znode, n); + if (found < 0) { + err = found; + goto out_unlock; + } + + /* Ensure the znode is dirtied */ + if (znode->cnext || !ubifs_zn_dirty(znode)) { + znode = dirty_cow_bottom_up(c, znode); + if (IS_ERR(znode)) { + err = PTR_ERR(znode); + goto out_unlock; + } + } + + if (found == 1) { + struct ubifs_zbranch *zbr = &znode->zbranch[n]; + + lnc_free(zbr); + err = ubifs_add_dirt(c, zbr->lnum, zbr->len); + zbr->lnum = lnum; + zbr->offs = offs; + zbr->len = len; + goto out_unlock; + } + } + + if (!found) { + struct ubifs_zbranch zbr; + + zbr.znode = NULL; + zbr.lnum = lnum; + zbr.offs = offs; + zbr.len = len; + key_copy(c, key, &zbr.key); + err = tnc_insert(c, znode, &zbr, n + 1); + if (err) + goto out_unlock; + if (c->replaying) { + /* + * We did not find it in the index so there may be a + * dangling branch still in the index. So we remove it + * by passing 'ubifs_tnc_remove_nm()' the same key but + * an unmatchable name. + */ + struct qstr noname = { .len = 0, .name = "" }; + + err = dbg_check_tnc(c, 0); + mutex_unlock(&c->tnc_mutex); + if (err) + return err; + return ubifs_tnc_remove_nm(c, key, &noname); + } + } + +out_unlock: + if (!err) + err = dbg_check_tnc(c, 0); + mutex_unlock(&c->tnc_mutex); + return err; +} + +/** + * tnc_delete - delete a znode form TNC. + * @c: UBIFS file-system description object + * @znode: znode to delete from + * @n: zbranch slot number to delete + * + * This function deletes a leaf node from @n-th slot of @znode. Returns zero in + * case of success and a negative error code in case of failure. + */ +static int tnc_delete(struct ubifs_info *c, struct ubifs_znode *znode, int n) +{ + struct ubifs_zbranch *zbr; + struct ubifs_znode *zp; + int i, err; + + /* Delete without merge for now */ + ubifs_assert(znode->level == 0); + ubifs_assert(n >= 0 && n < c->fanout); + dbg_tnc("deleting %s", DBGKEY(&znode->zbranch[n].key)); + + zbr = &znode->zbranch[n]; + lnc_free(zbr); + + err = ubifs_add_dirt(c, zbr->lnum, zbr->len); + if (err) { + dbg_dump_znode(c, znode); + return err; + } + + /* We do not "gap" zbranch slots */ + for (i = n; i < znode->child_cnt - 1; i++) + znode->zbranch[i] = znode->zbranch[i + 1]; + znode->child_cnt -= 1; + + if (znode->child_cnt > 0) + return 0; + + /* + * This was the last zbranch, we have to delete this znode from the + * parent. + */ + + do { + ubifs_assert(!test_bit(OBSOLETE_ZNODE, &znode->flags)); + ubifs_assert(ubifs_zn_dirty(znode)); + + zp = znode->parent; + n = znode->iip; + + atomic_long_dec(&c->dirty_zn_cnt); + + err = insert_old_idx_znode(c, znode); + if (err) + return err; + + if (znode->cnext) { + __set_bit(OBSOLETE_ZNODE, &znode->flags); + atomic_long_inc(&c->clean_zn_cnt); + atomic_long_inc(&ubifs_clean_zn_cnt); + } else + kfree(znode); + znode = zp; + } while (znode->child_cnt == 1); /* while removing last child */ + + /* Remove from znode, entry n - 1 */ + znode->child_cnt -= 1; + ubifs_assert(znode->level != 0); + for (i = n; i < znode->child_cnt; i++) { + znode->zbranch[i] = znode->zbranch[i + 1]; + if (znode->zbranch[i].znode) + znode->zbranch[i].znode->iip = i; + } + + /* + * If this is the root and it has only 1 child then + * collapse the tree. + */ + if (!znode->parent) { + while (znode->child_cnt == 1 && znode->level != 0) { + zp = znode; + zbr = &znode->zbranch[0]; + znode = get_znode(c, znode, 0); + if (IS_ERR(znode)) + return PTR_ERR(znode); + znode = dirty_cow_znode(c, zbr); + if (IS_ERR(znode)) + return PTR_ERR(znode); + znode->parent = NULL; + znode->iip = 0; + if (c->zroot.len) { + err = insert_old_idx(c, c->zroot.lnum, + c->zroot.offs); + if (err) + return err; + } + c->zroot.lnum = zbr->lnum; + c->zroot.offs = zbr->offs; + c->zroot.len = zbr->len; + c->zroot.znode = znode; + ubifs_assert(!test_bit(OBSOLETE_ZNODE, + &zp->flags)); + ubifs_assert(test_bit(DIRTY_ZNODE, &zp->flags)); + atomic_long_dec(&c->dirty_zn_cnt); + + if (zp->cnext) { + __set_bit(OBSOLETE_ZNODE, &zp->flags); + atomic_long_inc(&c->clean_zn_cnt); + atomic_long_inc(&ubifs_clean_zn_cnt); + } else + kfree(zp); + } + } + + return 0; +} + +/** + * ubifs_tnc_remove - remove an index entry of a node. + * @c: UBIFS file-system description object + * @key: key of node + * + * Returns %0 on success or negative error code on failure. + */ +int ubifs_tnc_remove(struct ubifs_info *c, const union ubifs_key *key) +{ + int found, n, err = 0; + struct ubifs_znode *znode; + + mutex_lock(&c->tnc_mutex); + dbg_tnc("key %s", DBGKEY(key)); + found = lookup_level0_dirty(c, key, &znode, &n); + if (found < 0) { + err = found; + goto out_unlock; + } + if (found == 1) + err = tnc_delete(c, znode, n); + if (!err) + err = dbg_check_tnc(c, 0); + +out_unlock: + mutex_unlock(&c->tnc_mutex); + return err; +} + +/** + * ubifs_tnc_remove_nm - remove an index entry for a "hashed" node. + * @c: UBIFS file-system description object + * @key: key of node + * @nm: directory entry name + * + * Returns %0 on success or negative error code on failure. + */ +int ubifs_tnc_remove_nm(struct ubifs_info *c, const union ubifs_key *key, + const struct qstr *nm) +{ + int n, err; + struct ubifs_znode *znode; + + mutex_lock(&c->tnc_mutex); + dbg_tnc("%.*s, key %s", nm->len, nm->name, DBGKEY(key)); + err = lookup_level0_dirty(c, key, &znode, &n); + if (err < 0) + goto out_unlock; + + if (err) { + if (c->replaying) + err = fallible_resolve_collision(c, key, &znode, &n, + nm, 0); + else + err = resolve_collision(c, key, &znode, &n, nm); + dbg_tnc("rc returned %d, znode %p, n %d", err, znode, n); + if (err < 0) + goto out_unlock; + if (err) { + /* Ensure the znode is dirtied */ + if (znode->cnext || !ubifs_zn_dirty(znode)) { + znode = dirty_cow_bottom_up(c, znode); + if (IS_ERR(znode)) { + err = PTR_ERR(znode); + goto out_unlock; + } + } + err = tnc_delete(c, znode, n); + } + } + +out_unlock: + if (!err) + err = dbg_check_tnc(c, 0); + mutex_unlock(&c->tnc_mutex); + return err; +} + +/** + * key_in_range - determine if a key falls within a range of keys. + * @c: UBIFS file-system description object + * @key: key to check + * @from_key: lowest key in range + * @to_key: highest key in range + * + * This function returns %1 if the key is in range and %0 otherwise. + */ +static int key_in_range(struct ubifs_info *c, union ubifs_key *key, + union ubifs_key *from_key, union ubifs_key *to_key) +{ + if (keys_cmp(c, key, from_key) < 0) + return 0; + if (keys_cmp(c, key, to_key) > 0) + return 0; + return 1; +} + +/** + * ubifs_tnc_remove_range - remove index entries in range. + * @c: UBIFS file-system description object + * @from_key: lowest key to remove + * @to_key: highest key to remove + * + * This function removes index entries starting at @from_key and ending at + * @to_key. This function returns zero in case of success and a negative error + * code in case of failure. + */ +int ubifs_tnc_remove_range(struct ubifs_info *c, union ubifs_key *from_key, + union ubifs_key *to_key) +{ + int i, n, k, err = 0; + struct ubifs_znode *znode; + union ubifs_key *key; + + mutex_lock(&c->tnc_mutex); + while (1) { + /* Find first level 0 znode that contains keys to remove */ + err = ubifs_lookup_level0(c, from_key, &znode, &n); + if (err < 0) + goto out_unlock; + + if (err) + key = from_key; + else { + err = tnc_next(c, &znode, &n); + if (err == -ENOENT) { + err = 0; + goto out_unlock; + } + if (err < 0) + goto out_unlock; + key = &znode->zbranch[n].key; + if (!key_in_range(c, key, from_key, to_key)) { + err = 0; + goto out_unlock; + } + } + + /* Ensure the znode is dirtied */ + if (znode->cnext || !ubifs_zn_dirty(znode)) { + znode = dirty_cow_bottom_up(c, znode); + if (IS_ERR(znode)) { + err = PTR_ERR(znode); + goto out_unlock; + } + } + + /* Remove all keys in range except the first */ + for (i = n + 1, k = 0; i < znode->child_cnt; i++, k++) { + key = &znode->zbranch[i].key; + if (!key_in_range(c, key, from_key, to_key)) + break; + lnc_free(&znode->zbranch[i]); + err = ubifs_add_dirt(c, znode->zbranch[i].lnum, + znode->zbranch[i].len); + if (err) { + dbg_dump_znode(c, znode); + goto out_unlock; + } + dbg_tnc("removing %s", DBGKEY(key)); + } + if (k) { + for (i = n + 1 + k; i < znode->child_cnt; i++) + znode->zbranch[i - k] = znode->zbranch[i]; + znode->child_cnt -= k; + } + + /* Now delete the first */ + err = tnc_delete(c, znode, n); + if (err) + goto out_unlock; + } + +out_unlock: + if (!err) + err = dbg_check_tnc(c, 0); + mutex_unlock(&c->tnc_mutex); + return err; +} + +/** + * ubifs_tnc_remove_ino - remove an inode from TNC. + * @c: UBIFS file-system description object + * @inum: inode number to remove + * + * This function remove inode @inum and all the extended attributes associated + * with the anode from TNC and returns zero in case of success or a negative + * error code in case of failure. + */ +int ubifs_tnc_remove_ino(struct ubifs_info *c, ino_t inum) +{ + union ubifs_key key1, key2; + struct ubifs_dent_node *xent, *pxent = NULL; + struct qstr nm = { .name = NULL }; + + dbg_tnc("ino %lu", inum); + + /* + * Walk all extended attribute entries and remove them together with + * corresponding extended attribute inodes. + */ + lowest_xent_key(c, &key1, inum); + while (1) { + ino_t xattr_inum; + int err; + + xent = ubifs_tnc_next_ent(c, &key1, &nm); + if (IS_ERR(xent)) { + err = PTR_ERR(xent); + if (err == -ENOENT) + break; + return err; + } + + xattr_inum = le64_to_cpu(xent->inum); + dbg_tnc("xent '%s', ino %lu", xent->name, xattr_inum); + + nm.name = xent->name; + nm.len = le16_to_cpu(xent->nlen); + err = ubifs_tnc_remove_nm(c, &key1, &nm); + if (err) { + kfree(xent); + return err; + } + + lowest_ino_key(c, &key1, xattr_inum); + highest_ino_key(c, &key2, xattr_inum); + err = ubifs_tnc_remove_range(c, &key1, &key2); + if (err) { + kfree(xent); + return err; + } + + kfree(pxent); + pxent = xent; + key_read(c, &xent->key, &key1); + } + + kfree(pxent); + lowest_ino_key(c, &key1, inum); + highest_ino_key(c, &key2, inum); + + return ubifs_tnc_remove_range(c, &key1, &key2); +} + +/** + * ubifs_tnc_next_ent - walk directory or extended attribute entries. + * @c: UBIFS file-system description object + * @key: key of last entry + * @nm: name of last entry found or %NULL + * + * This function finds and reads the next directory or extended attribute entry + * after the given key (@key) if there is one. @nm is used to resolve + * collisions. + * + * If the name of the current entry is not known and only the key is known, + * @nm->name has to be %NULL. In this case the semantics of this function is a + * little bit different and it returns the entry corresponding to this key, not + * the next one. If the key was not found, the closest "right" entry is + * returned. + * + * If the fist entry has to be found, @key has to contain the lowest possible + * key value for this inode and @name has to be %NULL. + * + * This function returns the found directory or extended attribute entry node + * in case of success, %-ENOENT is returned if no entry was found, and a + * negative error code is returned in case of failure. + */ +struct ubifs_dent_node *ubifs_tnc_next_ent(struct ubifs_info *c, + union ubifs_key *key, + const struct qstr *nm) +{ + int n, err, type = key_type(c, key); + struct ubifs_znode *znode; + struct ubifs_dent_node *dent; + struct ubifs_zbranch *zbr; + union ubifs_key *dkey; + + dbg_tnc("%s %s", nm->name ? (char *)nm->name : "(lowest)", DBGKEY(key)); + ubifs_assert(is_hash_key(c, key)); + + mutex_lock(&c->tnc_mutex); + err = ubifs_lookup_level0(c, key, &znode, &n); + if (unlikely(err < 0)) + goto out_unlock; + + if (nm->name) { + if (err) { + /* Handle collisions */ + err = resolve_collision(c, key, &znode, &n, nm); + dbg_tnc("rc returned %d, znode %p, n %d", + err, znode, n); + if (unlikely(err < 0)) + goto out_unlock; + } + + /* Now find next entry */ + err = tnc_next(c, &znode, &n); + if (unlikely(err)) + goto out_unlock; + } else { + /* + * The full name of the entry was not given, in which case the + * behavior of this function is a little different and it + * returns current entry, not the next one. + */ + if (!err) { + /* + * However, the given key does not exist in the TNC + * tree and @znode/@n variables contain the closest + * "preceding" element. Switch to the next one. + */ + err = tnc_next(c, &znode, &n); + if (err) + goto out_unlock; + } + } + + zbr = &znode->zbranch[n]; + dent = kmalloc(zbr->len, GFP_NOFS); + if (unlikely(!dent)) { + err = -ENOMEM; + goto out_unlock; + } + + /* + * The above 'tnc_next()' call could lead us to the next inode, check + * this. + */ + dkey = &zbr->key; + if (key_inum(c, dkey) != key_inum(c, key) || + key_type(c, dkey) != type) { + err = -ENOENT; + goto out_free; + } + + err = tnc_read_node_nm(c, zbr, dent); + if (unlikely(err)) + goto out_free; + + mutex_unlock(&c->tnc_mutex); + return dent; + +out_free: + kfree(dent); +out_unlock: + mutex_unlock(&c->tnc_mutex); + return ERR_PTR(err); +} + +/** + * tnc_destroy_cnext - destroy left-over obsolete znodes from a failed commit. + * @c: UBIFS file-system description object + * + * Destroy left-over obsolete znodes from a failed commit. + */ +static void tnc_destroy_cnext(struct ubifs_info *c) +{ + struct ubifs_znode *cnext; + + if (!c->cnext) + return; + ubifs_assert(c->cmt_state == COMMIT_BROKEN); + cnext = c->cnext; + do { + struct ubifs_znode *znode = cnext; + + cnext = cnext->cnext; + if (test_bit(OBSOLETE_ZNODE, &znode->flags)) + kfree(znode); + } while (cnext && cnext != c->cnext); +} + +/** + * ubifs_tnc_close - close TNC subsystem and free all related resources. + * @c: UBIFS file-system description object + */ +void ubifs_tnc_close(struct ubifs_info *c) +{ + long clean_freed; + + tnc_destroy_cnext(c); + if (c->zroot.znode) { + clean_freed = ubifs_destroy_tnc_subtree(c->zroot.znode); + atomic_long_sub(clean_freed, &ubifs_clean_zn_cnt); + } + kfree(c->gap_lebs); + kfree(c->ilebs); + destroy_old_idx(c); +} + +/** + * left_znode - get the znode to the left. + * @c: UBIFS file-system description object + * @znode: znode + * + * This function returns a pointer to the znode to the left of @znode or NULL if + * there is not one. A negative error code is returned on failure. + */ +static struct ubifs_znode *left_znode(struct ubifs_info *c, + struct ubifs_znode *znode) +{ + int level = znode->level; + + while (1) { + int n = znode->iip - 1; + + /* Go up until we can go left */ + znode = znode->parent; + if (!znode) + return NULL; + if (n >= 0) { + /* Now go down the rightmost branch to 'level' */ + znode = get_znode(c, znode, n); + if (IS_ERR(znode)) + return znode; + while (znode->level != level) { + n = znode->child_cnt - 1; + znode = get_znode(c, znode, n); + if (IS_ERR(znode)) + return znode; + } + break; + } + } + return znode; +} + +/** + * right_znode - get the znode to the right. + * @c: UBIFS file-system description object + * @znode: znode + * + * This function returns a pointer to the znode to the right of @znode or NULL + * if there is not one. A negative error code is returned on failure. + */ +static struct ubifs_znode *right_znode(struct ubifs_info *c, + struct ubifs_znode *znode) +{ + int level = znode->level; + + while (1) { + int n = znode->iip + 1; + + /* Go up until we can go right */ + znode = znode->parent; + if (!znode) + return NULL; + if (n < znode->child_cnt) { + /* Now go down the leftmost branch to 'level' */ + znode = get_znode(c, znode, n); + if (IS_ERR(znode)) + return znode; + while (znode->level != level) { + znode = get_znode(c, znode, 0); + if (IS_ERR(znode)) + return znode; + } + break; + } + } + return znode; +} + +/** + * lookup_znode - find a particular indexing node from TNC. + * @c: UBIFS file-system description object + * @key: index node key to lookup + * @level: index node level + * @lnum: index node LEB number + * @offs: index node offset + * + * This function searches an indexing node by its first key @key and its + * address @lnum:@offs. It looks up the indexing tree by pulling all indexing + * nodes it traverses to TNC. This function is called fro indexing nodes which + * were found on the media by scanning, for example when garbage-collecting or + * when doing in-the-gaps commit. This means that the indexing node which is + * looked for does not have to have exactly the same leftmost key @key, because + * the leftmost key may have been changed, in which case TNC will contain a + * dirty znode which still refers the same @lnum:@offs. This function is clever + * enough to recognize such indexing nodes. + * + * Note, if a znode was deleted or changed too much, then this function will + * not find it. For situations like this UBIFS has the old index RB-tree + * (indexed by @lnum:@offs). + * + * This function returns a pointer to the znode found or %NULL if it is not + * found. A negative error code is returned on failure. + */ +static struct ubifs_znode *lookup_znode(struct ubifs_info *c, + union ubifs_key *key, int level, + int lnum, int offs) +{ + struct ubifs_znode *znode, *zn; + int n, nn; + + /* + * The arguments have probably been read off flash, so don't assume + * they are valid. + */ + if (level < 0) + return ERR_PTR(-EINVAL); + + /* Get the root znode */ + znode = c->zroot.znode; + if (!znode) { + znode = ubifs_load_znode(c, &c->zroot, NULL, 0); + if (IS_ERR(znode)) + return znode; + } + /* Check if it is the one we are looking for */ + if (c->zroot.lnum == lnum && c->zroot.offs == offs) + return znode; + /* Descend to the parent level i.e. (level + 1) */ + if (level >= znode->level) + return NULL; + while (1) { + ubifs_search_zbranch(c, znode, key, &n); + if (n < 0) { + /* + * We reached a znode where the leftmost key is greater + * than the key we are searching for. This is the same + * situation as the one described in a huge comment at + * the end of the 'ubifs_lookup_level0()' function. And + * for exactly the same reasons we have to try to look + * left before giving up. + */ + znode = left_znode(c, znode); + if (!znode) + return NULL; + if (IS_ERR(znode)) + return znode; + ubifs_search_zbranch(c, znode, key, &n); + ubifs_assert(n >= 0); + } + if (znode->level == level + 1) + break; + znode = get_znode(c, znode, n); + if (IS_ERR(znode)) + return znode; + } + /* Check if the child is the one we are looking for */ + if (znode->zbranch[n].lnum == lnum && znode->zbranch[n].offs == offs) + return get_znode(c, znode, n); + /* If the key is unique, there is nowhere else to look */ + if (!is_hash_key(c, key)) + return NULL; + /* + * The key is not unique and so may be also in the znodes to either + * side. + */ + zn = znode; + nn = n; + /* Look left */ + while (1) { + /* Move one branch to the left */ + if (n) + n -= 1; + else { + znode = left_znode(c, znode); + if (!znode) + break; + if (IS_ERR(znode)) + return znode; + n = znode->child_cnt - 1; + } + /* Check it */ + if (znode->zbranch[n].lnum == lnum && + znode->zbranch[n].offs == offs) + return get_znode(c, znode, n); + /* Stop if the key is less than the one we are looking for */ + if (keys_cmp(c, &znode->zbranch[n].key, key) < 0) + break; + } + /* Back to the middle */ + znode = zn; + n = nn; + /* Look right */ + while (1) { + /* Move one branch to the right */ + if (++n >= znode->child_cnt) { + znode = right_znode(c, znode); + if (!znode) + break; + if (IS_ERR(znode)) + return znode; + n = 0; + } + /* Check it */ + if (znode->zbranch[n].lnum == lnum && + znode->zbranch[n].offs == offs) + return get_znode(c, znode, n); + /* Stop if the key is greater than the one we are looking for */ + if (keys_cmp(c, &znode->zbranch[n].key, key) > 0) + break; + } + return NULL; +} + +/** + * is_idx_node_in_tnc - determine if an index node is in the TNC. + * @c: UBIFS file-system description object + * @key: key of index node + * @level: index node level + * @lnum: LEB number of index node + * @offs: offset of index node + * + * This function returns %0 if the index node is not referred to in the TNC, %1 + * if the index node is referred to in the TNC and the corresponding znode is + * dirty, %2 if an index node is referred to in the TNC and the corresponding + * znode is clean, and a negative error code in case of failure. + * + * Note, the @key argument has to be the key of the first child. Also note, + * this function relies on the fact that 0:0 is never a valid LEB number and + * offset for a main-area node. + */ +int is_idx_node_in_tnc(struct ubifs_info *c, union ubifs_key *key, int level, + int lnum, int offs) +{ + struct ubifs_znode *znode; + + znode = lookup_znode(c, key, level, lnum, offs); + if (!znode) + return 0; + if (IS_ERR(znode)) + return PTR_ERR(znode); + + return ubifs_zn_dirty(znode) ? 1 : 2; +} + +/** + * is_leaf_node_in_tnc - determine if a non-indexing not is in the TNC. + * @c: UBIFS file-system description object + * @key: node key + * @lnum: node LEB number + * @offs: node offset + * + * This function returns %1 if the node is referred to in the TNC, %0 if it is + * not, and a negative error code in case of failure. + * + * Note, this function relies on the fact that 0:0 is never a valid LEB number + * and offset for a main-area node. + */ +static int is_leaf_node_in_tnc(struct ubifs_info *c, union ubifs_key *key, + int lnum, int offs) +{ + struct ubifs_zbranch *zbr; + struct ubifs_znode *znode, *zn; + int n, found, err, nn; + const int unique = !is_hash_key(c, key); + + found = ubifs_lookup_level0(c, key, &znode, &n); + if (found < 0) + return found; /* Error code */ + if (!found) + return 0; + zbr = &znode->zbranch[n]; + if (lnum == zbr->lnum && offs == zbr->offs) + return 1; /* Found it */ + if (unique) + return 0; + /* + * Because the key is not unique, we have to look left + * and right as well + */ + zn = znode; + nn = n; + /* Look left */ + while (1) { + err = tnc_prev(c, &znode, &n); + if (err == -ENOENT) + break; + if (err) + return err; + if (keys_cmp(c, key, &znode->zbranch[n].key)) + break; + zbr = &znode->zbranch[n]; + if (lnum == zbr->lnum && offs == zbr->offs) + return 1; /* Found it */ + } + /* Look right */ + znode = zn; + n = nn; + while (1) { + err = tnc_next(c, &znode, &n); + if (err) { + if (err == -ENOENT) + return 0; + return err; + } + if (keys_cmp(c, key, &znode->zbranch[n].key)) + break; + zbr = &znode->zbranch[n]; + if (lnum == zbr->lnum && offs == zbr->offs) + return 1; /* Found it */ + } + return 0; +} + +/** + * ubifs_tnc_has_node - determine whether a node is in the TNC. + * @c: UBIFS file-system description object + * @key: node key + * @level: index node level (if it is an index node) + * @lnum: node LEB number + * @offs: node offset + * @is_idx: non-zero if the node is an index node + * + * This function returns %1 if the node is in the TNC, %0 if it is not, and a + * negative error code in case of failure. For index nodes, @key has to be the + * key of the first child. An index node is considered to be in the TNC only if + * the corresponding znode is clean or has not been loaded. + */ +int ubifs_tnc_has_node(struct ubifs_info *c, union ubifs_key *key, int level, + int lnum, int offs, int is_idx) +{ + int err; + + mutex_lock(&c->tnc_mutex); + if (is_idx) { + err = is_idx_node_in_tnc(c, key, level, lnum, offs); + if (err < 0) + goto out_unlock; + if (err == 1) + /* The index node was found but it was dirty */ + err = 0; + else if (err == 2) + /* The index node was found and it was clean */ + err = 1; + else + BUG_ON(err != 0); + } else + err = is_leaf_node_in_tnc(c, key, lnum, offs); + +out_unlock: + mutex_unlock(&c->tnc_mutex); + return err; +} + +/** + * ubifs_dirty_idx_node - dirty an index node. + * @c: UBIFS file-system description object + * @key: index node key + * @level: index node level + * @lnum: index node LEB number + * @offs: index node offset + * + * This function loads and dirties an index node so that it can be garbage + * collected. The @key argument has to be the key of the first child. This + * function relies on the fact that 0:0 is never a valid LEB number and offset + * for a main-area node. Returns %0 on success and a negative error code on + * failure. + */ +int ubifs_dirty_idx_node(struct ubifs_info *c, union ubifs_key *key, int level, + int lnum, int offs) +{ + struct ubifs_znode *znode; + int err = 0; + + mutex_lock(&c->tnc_mutex); + znode = lookup_znode(c, key, level, lnum, offs); + if (!znode) + goto out_unlock; + if (IS_ERR(znode)) { + err = PTR_ERR(znode); + goto out_unlock; + } + znode = dirty_cow_bottom_up(c, znode); + if (IS_ERR(znode)) { + err = PTR_ERR(znode); + goto out_unlock; + } + +out_unlock: + mutex_unlock(&c->tnc_mutex); + return err; +} diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c new file mode 100644 index 0000000..8117e65 --- /dev/null +++ b/fs/ubifs/tnc_commit.c @@ -0,0 +1,1103 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Adrian Hunter + * Artem Bityutskiy (Битюцкий Артём) + */ + +/* This file implements TNC functions for committing */ + +#include "ubifs.h" + +/** + * make_idx_node - make an index node for fill-the-gaps method of TNC commit. + * @c: UBIFS file-system description object + * @idx: buffer in which to place new index node + * @znode: znode from which to make new index node + * @lnum: LEB number where new index node will be written + * @offs: offset where new index node will be written + * @len: length of new index node + */ +static int make_idx_node(struct ubifs_info *c, struct ubifs_idx_node *idx, + struct ubifs_znode *znode, int lnum, int offs, int len) +{ + struct ubifs_znode *zp; + int i, err; + + /* Make index node */ + idx->ch.node_type = UBIFS_IDX_NODE; + idx->child_cnt = cpu_to_le16(znode->child_cnt); + idx->level = cpu_to_le16(znode->level); + for (i = 0; i < znode->child_cnt; i++) { + struct ubifs_branch *br = ubifs_idx_branch(c, idx, i); + struct ubifs_zbranch *zbr = &znode->zbranch[i]; + + key_write_idx(c, &zbr->key, &br->key); + br->lnum = cpu_to_le32(zbr->lnum); + br->offs = cpu_to_le32(zbr->offs); + br->len = cpu_to_le32(zbr->len); + if (!zbr->lnum || !zbr->len) { + ubifs_err("bad ref in znode"); + dbg_dump_znode(c, znode); + if (zbr->znode) + dbg_dump_znode(c, zbr->znode); + } + } + ubifs_prepare_node(c, idx, len, 0); + +#ifdef CONFIG_UBIFS_FS_DEBUG + znode->lnum = lnum; + znode->offs = offs; + znode->len = len; +#endif + + err = insert_old_idx_znode(c, znode); + + /* Update the parent */ + zp = znode->parent; + if (zp) { + struct ubifs_zbranch *zbr; + + zbr = &zp->zbranch[znode->iip]; + zbr->lnum = lnum; + zbr->offs = offs; + zbr->len = len; + } else { + c->zroot.lnum = lnum; + c->zroot.offs = offs; + c->zroot.len = len; + } + c->calc_idx_sz += ALIGN(len, 8); + + atomic_long_dec(&c->dirty_zn_cnt); + + ubifs_assert(ubifs_zn_dirty(znode)); + ubifs_assert(test_bit(COW_ZNODE, &znode->flags)); + + __clear_bit(DIRTY_ZNODE, &znode->flags); + __clear_bit(COW_ZNODE, &znode->flags); + + return err; +} + +/** + * fill_gap - make index nodes in gaps in dirty index LEBs. + * @c: UBIFS file-system description object + * @lnum: LEB number that gap appears in + * @gap_start: offset of start of gap + * @gap_end: offset of end of gap + * @dirt: adds dirty space to this + * + * This function returns the number of index nodes written into the gap. + */ +static int fill_gap(struct ubifs_info *c, int lnum, int gap_start, int gap_end, + int *dirt) +{ + int len, gap_remains, gap_pos, written, pad_len; + + ubifs_assert((gap_start & 7) == 0); + ubifs_assert((gap_end & 7) == 0); + ubifs_assert(gap_end >= gap_start); + + gap_remains = gap_end - gap_start; + if (!gap_remains) + return 0; + gap_pos = gap_start; + written = 0; + while (c->enext) { + len = ubifs_idx_node_sz(c, c->enext->child_cnt); + if (len < gap_remains) { + struct ubifs_znode *znode = c->enext; + const int alen = ALIGN(len, 8); + int err; + + ubifs_assert(alen <= gap_remains); + err = make_idx_node(c, c->ileb_buf + gap_pos, znode, + lnum, gap_pos, len); + if (err) + return err; + gap_remains -= alen; + gap_pos += alen; + c->enext = znode->cnext; + if (c->enext == c->cnext) + c->enext = NULL; + written += 1; + } else + break; + } + if (gap_end == c->leb_size) { + c->ileb_len = ALIGN(gap_pos, c->min_io_size); + /* Pad to end of min_io_size */ + pad_len = c->ileb_len - gap_pos; + } else + /* Pad to end of gap */ + pad_len = gap_remains; + dbg_gc("LEB %d:%d to %d len %d nodes written %d wasted bytes %d", + lnum, gap_start, gap_end, gap_end - gap_start, written, pad_len); + ubifs_pad(c, c->ileb_buf + gap_pos, pad_len); + *dirt += pad_len; + return written; +} + +/** + * find_old_idx - find an index node obsoleted since the last commit start. + * @c: UBIFS file-system description object + * @lnum: LEB number of obsoleted index node + * @offs: offset of obsoleted index node + * + * Returns %1 if found and %0 otherwise. + */ +static int find_old_idx(struct ubifs_info *c, int lnum, int offs) +{ + struct ubifs_old_idx *o; + struct rb_node *p; + + p = c->old_idx.rb_node; + while (p) { + o = rb_entry(p, struct ubifs_old_idx, rb); + if (lnum < o->lnum) + p = p->rb_left; + else if (lnum > o->lnum) + p = p->rb_right; + else if (offs < o->offs) + p = p->rb_left; + else if (offs > o->offs) + p = p->rb_right; + else + return 1; + } + return 0; +} + +/** + * is_idx_node_in_use - determine if an index node can be overwritten. + * @c: UBIFS file-system description object + * @key: key of index node + * @level: index node level + * @lnum: LEB number of index node + * @offs: offset of index node + * + * If @key / @lnum / @offs identify an index node that was not part of the old + * index, then this function returns %0 (obsolete). Else if the index node was + * part of the old index but is now dirty %1 is returned, else if it is clean %2 + * is returned. A negative error code is returned on failure. + */ +static int is_idx_node_in_use(struct ubifs_info *c, union ubifs_key *key, + int level, int lnum, int offs) +{ + int ret; + + ret = is_idx_node_in_tnc(c, key, level, lnum, offs); + if (ret < 0) + return ret; /* Error code */ + if (ret == 0) + if (find_old_idx(c, lnum, offs)) + return 1; + return ret; +} + +/** + * layout_leb_in_gaps - layout index nodes using in-the-gaps method. + * @c: UBIFS file-system description object + * @p: return LEB number here + * + * This function lays out new index nodes for dirty znodes using in-the-gaps + * method of TNC commit. + * This function merely puts the next znode into the next gap, making no attempt + * to try to maximise the number of znodes that fit. + * This function returns the number of index nodes written into the gaps, or a + * negative error code on failure. + */ +static int layout_leb_in_gaps(struct ubifs_info *c, int *p) +{ + struct ubifs_scan_leb *sleb; + struct ubifs_scan_node *snod; + int lnum, dirt = 0, gap_start, gap_end, err, written, tot_written; + + tot_written = 0; + /* Get an index LEB with lots of obsolete index nodes */ + lnum = ubifs_find_dirty_idx_leb(c); + if (lnum < 0) + /* + * There also may be dirt in the index head that could be + * filled, however we do not check there at present. + */ + return lnum; /* Error code */ + *p = lnum; + dbg_gc("LEB %d", lnum); + /* + * Scan the index LEB. We use the generic scan for this even though + * it is more comprehensive and less efficient than is needed for this + * purpose. + */ + sleb = ubifs_scan(c, lnum, 0, c->ileb_buf); + c->ileb_len = 0; + if (IS_ERR(sleb)) + return PTR_ERR(sleb); + gap_start = 0; + list_for_each_entry(snod, &sleb->nodes, list) { + struct ubifs_idx_node *idx; + int in_use, level; + + ubifs_assert(snod->type == UBIFS_IDX_NODE); + idx = snod->node; + key_read(c, ubifs_idx_key(c, idx), &snod->key); + level = le16_to_cpu(idx->level); + /* Determine if the index node is in use (not obsolete) */ + in_use = is_idx_node_in_use(c, &snod->key, level, lnum, + snod->offs); + if (in_use < 0) { + ubifs_scan_destroy(sleb); + return in_use; /* Error code */ + } + if (in_use) { + if (in_use == 1) + dirt += ALIGN(snod->len, 8); + /* + * The obsolete index nodes form gaps that can be + * overwritten. This gap has ended because we have + * found an index node that is still in use + * i.e. not obsolete + */ + gap_end = snod->offs; + /* Try to fill gap */ + written = fill_gap(c, lnum, gap_start, gap_end, &dirt); + if (written < 0) { + ubifs_scan_destroy(sleb); + return written; /* Error code */ + } + tot_written += written; + gap_start = ALIGN(snod->offs + snod->len, 8); + } + } + ubifs_scan_destroy(sleb); + c->ileb_len = c->leb_size; + gap_end = c->leb_size; + /* Try to fill gap */ + written = fill_gap(c, lnum, gap_start, gap_end, &dirt); + if (written < 0) + return written; /* Error code */ + tot_written += written; + if (tot_written == 0) { + struct ubifs_lprops lp; + + dbg_gc("LEB %d wrote %d index nodes", lnum, tot_written); + err = ubifs_read_one_lp(c, lnum, &lp); + if (err) + return err; + if (lp.free == c->leb_size) { + /* + * We must have snatched this LEB from the idx_gc list + * so we need to correct the free and dirty space. + */ + err = ubifs_change_one_lp(c, lnum, + c->leb_size - c->ileb_len, + dirt, 0, 0, 0); + if (err) + return err; + } + return 0; + } + err = ubifs_change_one_lp(c, lnum, c->leb_size - c->ileb_len, dirt, + 0, 0, 0); + if (err) + return err; + err = ubifs_leb_change(c, lnum, c->ileb_buf, c->ileb_len, + UBI_SHORTTERM); + if (err) + return err; + dbg_gc("LEB %d wrote %d index nodes", lnum, tot_written); + return tot_written; +} + +/** + * get_leb_cnt - calculate the number of empty LEBs needed to commit. + * @c: UBIFS file-system description object + * @cnt: number of znodes to commit + * + * This function returns the number of empty LEBs needed to commit @cnt znodes + * to the current index head. The number is not exact and may be more than + * needed. + */ +static int get_leb_cnt(struct ubifs_info *c, int cnt) +{ + int d; + + /* Assume maximum index node size (i.e. overestimate space needed) */ + cnt -= (c->leb_size - c->ihead_offs) / c->max_idx_node_sz; + if (cnt < 0) + cnt = 0; + d = c->leb_size / c->max_idx_node_sz; + return DIV_ROUND_UP(cnt, d); +} + +/** + * layout_in_gaps - in-the-gaps method of committing TNC. + * @c: UBIFS file-system description object + * @cnt: number of dirty znodes to commit. + * + * This function lays out new index nodes for dirty znodes using in-the-gaps + * method of TNC commit. + * + * This function returns %0 on success and a negative error code on failure. + */ +static int layout_in_gaps(struct ubifs_info *c, int cnt) +{ + int err, leb_needed_cnt, written, *p; + + dbg_gc("%d znodes to write", cnt); + + c->gap_lebs = kmalloc(sizeof(int) * (c->lst.idx_lebs + 1), GFP_NOFS); + if (!c->gap_lebs) + return -ENOMEM; + + p = c->gap_lebs; + do { + ubifs_assert(p < c->gap_lebs + sizeof(int) * c->lst.idx_lebs); + written = layout_leb_in_gaps(c, p); + if (written < 0) { + err = written; + if (err == -ENOSPC) { + if (!dbg_force_in_the_gaps_enabled) { + /* + * Do not print scary warnings if the + * debugging option which forces + * in-the-gaps is enabled. + */ + ubifs_err("out of space"); + spin_lock(&c->space_lock); + dbg_dump_budg(c); + spin_unlock(&c->space_lock); + dbg_dump_lprops(c); + } + /* Try to commit anyway */ + err = 0; + break; + } + kfree(c->gap_lebs); + c->gap_lebs = NULL; + return err; + } + p++; + cnt -= written; + leb_needed_cnt = get_leb_cnt(c, cnt); + dbg_gc("%d znodes remaining, need %d LEBs, have %d", cnt, + leb_needed_cnt, c->ileb_cnt); + } while (leb_needed_cnt > c->ileb_cnt); + + *p = -1; + return 0; +} + +/** + * layout_in_empty_space - layout index nodes in empty space. + * @c: UBIFS file-system description object + * + * This function lays out new index nodes for dirty znodes using empty LEBs. + * + * This function returns %0 on success and a negative error code on failure. + */ +static int layout_in_empty_space(struct ubifs_info *c) +{ + struct ubifs_znode *znode, *cnext, *zp; + int lnum, offs, len, next_len, buf_len, buf_offs, used, avail; + int wlen, blen, err; + + cnext = c->enext; + if (!cnext) + return 0; + + lnum = c->ihead_lnum; + buf_offs = c->ihead_offs; + + buf_len = ubifs_idx_node_sz(c, c->fanout); + buf_len = ALIGN(buf_len, c->min_io_size); + used = 0; + avail = buf_len; + + /* Ensure there is enough room for first write */ + next_len = ubifs_idx_node_sz(c, cnext->child_cnt); + if (buf_offs + next_len > c->leb_size) + lnum = -1; + + while (1) { + znode = cnext; + + len = ubifs_idx_node_sz(c, znode->child_cnt); + + /* Determine the index node position */ + if (lnum == -1) { + if (c->ileb_nxt >= c->ileb_cnt) { + ubifs_err("out of space"); + return -ENOSPC; + } + lnum = c->ilebs[c->ileb_nxt++]; + buf_offs = 0; + used = 0; + avail = buf_len; + } + + offs = buf_offs + used; + +#ifdef CONFIG_UBIFS_FS_DEBUG + znode->lnum = lnum; + znode->offs = offs; + znode->len = len; +#endif + + /* Update the parent */ + zp = znode->parent; + if (zp) { + struct ubifs_zbranch *zbr; + int i; + + i = znode->iip; + zbr = &zp->zbranch[i]; + zbr->lnum = lnum; + zbr->offs = offs; + zbr->len = len; + } else { + c->zroot.lnum = lnum; + c->zroot.offs = offs; + c->zroot.len = len; + } + c->calc_idx_sz += ALIGN(len, 8); + + /* + * Once lprops is updated, we can decrease the dirty znode count + * but it is easier to just do it here. + */ + atomic_long_dec(&c->dirty_zn_cnt); + + /* + * Calculate the next index node length to see if there is + * enough room for it + */ + cnext = znode->cnext; + if (cnext == c->cnext) + next_len = 0; + else + next_len = ubifs_idx_node_sz(c, cnext->child_cnt); + + if (c->min_io_size == 1) { + buf_offs += ALIGN(len, 8); + if (next_len) { + if (buf_offs + next_len <= c->leb_size) + continue; + err = ubifs_update_one_lp(c, lnum, 0, + c->leb_size - buf_offs, 0, 0); + if (err) + return err; + lnum = -1; + continue; + } + err = ubifs_update_one_lp(c, lnum, + c->leb_size - buf_offs, 0, 0, 0); + if (err) + return err; + break; + } + + /* Update buffer positions */ + wlen = used + len; + used += ALIGN(len, 8); + avail -= ALIGN(len, 8); + + if (next_len != 0 && + buf_offs + used + next_len <= c->leb_size && + avail > 0) + continue; + + if (avail <= 0 && next_len && + buf_offs + used + next_len <= c->leb_size) + blen = buf_len; + else + blen = ALIGN(wlen, c->min_io_size); + + /* The buffer is full or there are no more znodes to do */ + buf_offs += blen; + if (next_len) { + if (buf_offs + next_len > c->leb_size) { + err = ubifs_update_one_lp(c, lnum, + c->leb_size - buf_offs, blen - used, + 0, 0); + if (err) + return err; + lnum = -1; + } + used -= blen; + if (used < 0) + used = 0; + avail = buf_len - used; + continue; + } + err = ubifs_update_one_lp(c, lnum, c->leb_size - buf_offs, + blen - used, 0, 0); + if (err) + return err; + break; + } + +#ifdef CONFIG_UBIFS_FS_DEBUG + c->new_ihead_lnum = lnum; + c->new_ihead_offs = buf_offs; +#endif + + return 0; +} + +/** + * layout_commit - determine positions of index nodes to commit. + * @c: UBIFS file-system description object + * @no_space: indicates that insufficient empty LEBs were allocated + * @cnt: number of znodes to commit + * + * Calculate and update the positions of index nodes to commit. If there were + * an insufficient number of empty LEBs allocated, then index nodes are placed + * into the gaps created by obsolete index nodes in non-empty index LEBs. For + * this purpose, an obsolete index node is one that was not in the index as at + * the end of the last commit. To write "in-the-gaps" requires that those index + * LEBs are updated atomically in-place. + */ +static int layout_commit(struct ubifs_info *c, int no_space, int cnt) +{ + int err; + + if (no_space) { + err = layout_in_gaps(c, cnt); + if (err) + return err; + } + err = layout_in_empty_space(c); + return err; +} + +/** + * find_first_dirty - find first dirty znode. + * @znode: znode to begin searching from + */ +static struct ubifs_znode *find_first_dirty(struct ubifs_znode *znode) +{ + int i, cont; + + if (!znode) + return NULL; + + while (1) { + if (znode->level == 0) { + if (ubifs_zn_dirty(znode)) + return znode; + return NULL; + } + cont = 0; + for (i = 0; i < znode->child_cnt; i++) { + struct ubifs_zbranch *zbr = &znode->zbranch[i]; + + if (zbr->znode && ubifs_zn_dirty(zbr->znode)) { + znode = zbr->znode; + cont = 1; + break; + } + } + if (!cont) { + if (ubifs_zn_dirty(znode)) + return znode; + return NULL; + } + } +} + +/** + * find_next_dirty - find next dirty znode. + * @znode: znode to begin searching from + */ +static struct ubifs_znode *find_next_dirty(struct ubifs_znode *znode) +{ + int n = znode->iip + 1; + + znode = znode->parent; + if (!znode) + return NULL; + for (; n < znode->child_cnt; n++) { + struct ubifs_zbranch *zbr = &znode->zbranch[n]; + + if (zbr->znode && ubifs_zn_dirty(zbr->znode)) + return find_first_dirty(zbr->znode); + } + return znode; +} + +/** + * get_znodes_to_commit - create list of dirty znodes to commit. + * @c: UBIFS file-system description object + * + * This function returns the number of znodes to commit. + */ +static int get_znodes_to_commit(struct ubifs_info *c) +{ + struct ubifs_znode *znode, *cnext; + int cnt = 0; + + c->cnext = find_first_dirty(c->zroot.znode); + znode = c->enext = c->cnext; + if (!znode) { + dbg_cmt("no znodes to commit"); + return 0; + } + cnt += 1; + while (1) { + ubifs_assert(!test_bit(COW_ZNODE, &znode->flags)); + __set_bit(COW_ZNODE, &znode->flags); + znode->alt = 0; + cnext = find_next_dirty(znode); + if (!cnext) { + znode->cnext = c->cnext; + break; + } + znode->cnext = cnext; + znode = cnext; + cnt += 1; + } + dbg_cmt("committing %d znodes", cnt); + ubifs_assert(cnt == atomic_long_read(&c->dirty_zn_cnt)); + return cnt; +} + +/** + * alloc_idx_lebs - allocate empty LEBs to be used to commit. + * @c: UBIFS file-system description object + * @cnt: number of znodes to commit + * + * This function returns %-ENOSPC if it cannot allocate a sufficient number of + * empty LEBs. %0 is returned on success, otherwise a negative error code + * is returned. + */ +static int alloc_idx_lebs(struct ubifs_info *c, int cnt) +{ + int i, leb_cnt, lnum; + + c->ileb_cnt = 0; + c->ileb_nxt = 0; + leb_cnt = get_leb_cnt(c, cnt); + dbg_cmt("need about %d empty LEBS for TNC commit", leb_cnt); + if (!leb_cnt) + return 0; + c->ilebs = kmalloc(leb_cnt * sizeof(int), GFP_NOFS); + if (!c->ilebs) + return -ENOMEM; + for (i = 0; i < leb_cnt; i++) { + lnum = ubifs_find_free_leb_for_idx(c); + if (lnum < 0) + return lnum; + c->ilebs[c->ileb_cnt++] = lnum; + dbg_cmt("LEB %d", lnum); + } + if (dbg_force_in_the_gaps()) + return -ENOSPC; + return 0; +} + +/** + * free_unused_idx_lebs - free unused LEBs that were allocated for the commit. + * @c: UBIFS file-system description object + * + * It is possible that we allocate more empty LEBs for the commit than we need. + * This functions frees the surplus. + * + * This function returns %0 on success and a negative error code on failure. + */ +static int free_unused_idx_lebs(struct ubifs_info *c) +{ + int i, err = 0, lnum, er; + + for (i = c->ileb_nxt; i < c->ileb_cnt; i++) { + lnum = c->ilebs[i]; + dbg_cmt("LEB %d", lnum); + er = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0, + LPROPS_INDEX | LPROPS_TAKEN, 0); + if (!err) + err = er; + } + return err; +} + +/** + * free_idx_lebs - free unused LEBs after commit end. + * @c: UBIFS file-system description object + * + * This function returns %0 on success and a negative error code on failure. + */ +static int free_idx_lebs(struct ubifs_info *c) +{ + int err; + + err = free_unused_idx_lebs(c); + kfree(c->ilebs); + c->ilebs = NULL; + return err; +} + +/** + * ubifs_tnc_start_commit - start TNC commit. + * @c: UBIFS file-system description object + * @zroot: new index root position is returned here + * + * This function prepares the list of indexing nodes to commit and lays out + * their positions on flash. If there is not enough free space it uses the + * in-gap commit method. Returns zero in case of success and a negative error + * code in case of failure. + */ +int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot) +{ + int err = 0, cnt; + + mutex_lock(&c->tnc_mutex); + err = dbg_check_tnc(c, 1); + if (err) + goto out; + cnt = get_znodes_to_commit(c); + if (cnt != 0) { + int no_space = 0; + + err = alloc_idx_lebs(c, cnt); + if (err == -ENOSPC) + no_space = 1; + else if (err) + goto out_free; + err = layout_commit(c, no_space, cnt); + if (err) + goto out_free; + ubifs_assert(atomic_long_read(&c->dirty_zn_cnt) == 0); + err = free_unused_idx_lebs(c); + if (err) + goto out; + } + destroy_old_idx(c); + memcpy(zroot, &c->zroot, sizeof(struct ubifs_zbranch)); + + err = ubifs_save_dirty_idx_lnums(c); + if (err) + goto out; + + spin_lock(&c->space_lock); + /* + * Although we have not finished committing yet, update size of the + * committed index ('c->old_idx_sz') and zero out the index growth + * budget. It is OK to do this now, because we've reserved all the + * space which is needed to commit the index, and it is save for the + * budgeting subsystem to assume the index is already committed, + * even though it is not. + */ + c->old_idx_sz = c->calc_idx_sz; + c->budg_uncommitted_idx = 0; + spin_unlock(&c->space_lock); + mutex_unlock(&c->tnc_mutex); + + dbg_cmt("number of index LEBs %d", c->lst.idx_lebs); + dbg_cmt("size of index %llu", c->calc_idx_sz); + return err; + +out_free: + free_idx_lebs(c); +out: + mutex_unlock(&c->tnc_mutex); + return err; +} + +/** + * write_index - write index nodes. + * @c: UBIFS file-system description object + * + * This function writes the index nodes whose positions were laid out in the + * layout_in_empty_space function. + */ +static int write_index(struct ubifs_info *c) +{ + struct ubifs_idx_node *idx; + struct ubifs_znode *znode, *cnext; + int i, lnum, offs, len, next_len, buf_len, buf_offs, used; + int avail, wlen, err, lnum_pos = 0; + + cnext = c->enext; + if (!cnext) + return 0; + + /* + * Always write index nodes to the index head so that index nodes and + * other types of nodes are never mixed in the same erase block. + */ + lnum = c->ihead_lnum; + buf_offs = c->ihead_offs; + + /* Allocate commit buffer */ + buf_len = ALIGN(c->max_idx_node_sz, c->min_io_size); + used = 0; + avail = buf_len; + + /* Ensure there is enough room for first write */ + next_len = ubifs_idx_node_sz(c, cnext->child_cnt); + if (buf_offs + next_len > c->leb_size) { + err = ubifs_update_one_lp(c, lnum, LPROPS_NC, 0, 0, + LPROPS_TAKEN); + if (err) + return err; + lnum = -1; + } + + while (1) { + cond_resched(); + + znode = cnext; + idx = c->cbuf + used; + + /* Make index node */ + idx->ch.node_type = UBIFS_IDX_NODE; + idx->child_cnt = cpu_to_le16(znode->child_cnt); + idx->level = cpu_to_le16(znode->level); + for (i = 0; i < znode->child_cnt; i++) { + struct ubifs_branch *br = ubifs_idx_branch(c, idx, i); + struct ubifs_zbranch *zbr = &znode->zbranch[i]; + + key_write_idx(c, &zbr->key, &br->key); + br->lnum = cpu_to_le32(zbr->lnum); + br->offs = cpu_to_le32(zbr->offs); + br->len = cpu_to_le32(zbr->len); + if (!zbr->lnum || !zbr->len) { + ubifs_err("bad ref in znode"); + dbg_dump_znode(c, znode); + if (zbr->znode) + dbg_dump_znode(c, zbr->znode); + } + } + len = ubifs_idx_node_sz(c, znode->child_cnt); + ubifs_prepare_node(c, idx, len, 0); + + /* Determine the index node position */ + if (lnum == -1) { + lnum = c->ilebs[lnum_pos++]; + buf_offs = 0; + used = 0; + avail = buf_len; + } + offs = buf_offs + used; + +#ifdef CONFIG_UBIFS_FS_DEBUG + if (lnum != znode->lnum || offs != znode->offs || + len != znode->len) { + ubifs_err("inconsistent znode posn"); + return -EINVAL; + } +#endif + + /* Grab some stuff from znode while we still can */ + cnext = znode->cnext; + + ubifs_assert(ubifs_zn_dirty(znode)); + ubifs_assert(test_bit(COW_ZNODE, &znode->flags)); + + /* + * It is important that other threads should see %DIRTY_ZNODE + * flag cleared before %COW_ZNODE. Specifically, it matters in + * the 'dirty_cow_znode()' function. This is the reason for the + * first barrier. Also, we want the bit changes to be seen to + * other threads ASAP, to avoid unnecesarry copying, which is + * the reason for the second barrier. + */ + clear_bit(DIRTY_ZNODE, &znode->flags); + smp_mb__before_clear_bit(); + clear_bit(COW_ZNODE, &znode->flags); + smp_mb__after_clear_bit(); + + /* Do not access znode from this point on */ + + /* Update buffer positions */ + wlen = used + len; + used += ALIGN(len, 8); + avail -= ALIGN(len, 8); + + /* + * Calculate the next index node length to see if there is + * enough room for it + */ + if (cnext == c->cnext) + next_len = 0; + else + next_len = ubifs_idx_node_sz(c, cnext->child_cnt); + + if (c->min_io_size == 1) { + /* + * Write the prepared index node immediately if there is + * no minimum IO size + */ + err = ubifs_leb_write(c, lnum, c->cbuf, buf_offs, + wlen, UBI_SHORTTERM); + if (err) + return err; + buf_offs += ALIGN(wlen, 8); + if (next_len) { + used = 0; + avail = buf_len; + if (buf_offs + next_len > c->leb_size) { + err = ubifs_update_one_lp(c, lnum, + LPROPS_NC, 0, 0, LPROPS_TAKEN); + if (err) + return err; + lnum = -1; + } + continue; + } + } else { + int blen, nxt_offs = buf_offs + used + next_len; + + if (next_len && nxt_offs <= c->leb_size) { + if (avail > 0) + continue; + else + blen = buf_len; + } else { + wlen = ALIGN(wlen, 8); + blen = ALIGN(wlen, c->min_io_size); + ubifs_pad(c, c->cbuf + wlen, blen - wlen); + } + /* + * The buffer is full or there are no more znodes + * to do + */ + err = ubifs_leb_write(c, lnum, c->cbuf, buf_offs, + blen, UBI_SHORTTERM); + if (err) + return err; + buf_offs += blen; + if (next_len) { + if (nxt_offs > c->leb_size) { + err = ubifs_update_one_lp(c, lnum, + LPROPS_NC, 0, 0, LPROPS_TAKEN); + if (err) + return err; + lnum = -1; + } + used -= blen; + if (used < 0) + used = 0; + avail = buf_len - used; + memmove(c->cbuf, c->cbuf + blen, used); + continue; + } + } + break; + } + +#ifdef CONFIG_UBIFS_FS_DEBUG + if (lnum != c->new_ihead_lnum || buf_offs != c->new_ihead_offs) { + ubifs_err("inconsistent ihead"); + return -EINVAL; + } +#endif + + c->ihead_lnum = lnum; + c->ihead_offs = buf_offs; + + return 0; +} + +/** + * free_obsolete_znodes - free obsolete znodes. + * @c: UBIFS file-system description object + * + * At the end of commit end, obsolete znodes are freed. + */ +static void free_obsolete_znodes(struct ubifs_info *c) +{ + struct ubifs_znode *znode, *cnext; + + cnext = c->cnext; + do { + znode = cnext; + cnext = znode->cnext; + if (test_bit(OBSOLETE_ZNODE, &znode->flags)) + kfree(znode); + else { + znode->cnext = NULL; + atomic_long_inc(&c->clean_zn_cnt); + atomic_long_inc(&ubifs_clean_zn_cnt); + } + } while (cnext != c->cnext); +} + +/** + * return_gap_lebs - return LEBs used by the in-gap commit method. + * @c: UBIFS file-system description object + * + * This function clears the "taken" flag for the LEBs which were used by the + * "commit in-the-gaps" method. + */ +static int return_gap_lebs(struct ubifs_info *c) +{ + int *p, err; + + if (!c->gap_lebs) + return 0; + + dbg_cmt(""); + for (p = c->gap_lebs; *p != -1; p++) { + err = ubifs_change_one_lp(c, *p, LPROPS_NC, LPROPS_NC, 0, + LPROPS_TAKEN, 0); + if (err) + return err; + } + + kfree(c->gap_lebs); + c->gap_lebs = NULL; + return 0; +} + +/** + * ubifs_tnc_end_commit - update the TNC for commit end. + * @c: UBIFS file-system description object + * + * Write the dirty znodes. + */ +int ubifs_tnc_end_commit(struct ubifs_info *c) +{ + int err; + + if (!c->cnext) + return 0; + + err = return_gap_lebs(c); + if (err) + return err; + + err = write_index(c); + if (err) + return err; + + mutex_lock(&c->tnc_mutex); + + dbg_cmt("TNC height is %d", c->zroot.znode->level + 1); + + free_obsolete_znodes(c); + + c->cnext = NULL; + kfree(c->ilebs); + c->ilebs = NULL; + + mutex_unlock(&c->tnc_mutex); + + return 0; +} diff --git a/fs/ubifs/tnc_misc.c b/fs/ubifs/tnc_misc.c new file mode 100644 index 0000000..a25c1cc --- /dev/null +++ b/fs/ubifs/tnc_misc.c @@ -0,0 +1,494 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Adrian Hunter + * Artem Bityutskiy (Битюцкий Артём) + */ + +/* + * This file contains miscelanious TNC-related functions shared betweend + * different files. This file does not form any logically separate TNC + * sub-system. The file was created because there is a lot of TNC code and + * putting it all in one file would make that file too big and unreadable. + */ + +#include "ubifs.h" + +/** + * ubifs_tnc_levelorder_next - next TNC tree element in levelorder traversal. + * @zr: root of the subtree to traverse + * @znode: previous znode + * + * This function implements levelorder TNC traversal. The LNC is ignored. + * Returns the next element or %NULL if @znode is already the last one. + */ +struct ubifs_znode *ubifs_tnc_levelorder_next(struct ubifs_znode *zr, + struct ubifs_znode *znode) +{ + int level, iip, level_search = 0; + struct ubifs_znode *zn; + + ubifs_assert(zr); + + if (unlikely(!znode)) + return zr; + + if (unlikely(znode == zr)) { + if (znode->level == 0) + return NULL; + return ubifs_tnc_find_child(zr, 0); + } + + level = znode->level; + + iip = znode->iip; + while (1) { + ubifs_assert(znode->level <= zr->level); + + /* + * First walk up until there is a znode with next branch to + * look at. + */ + while (znode->parent != zr && iip >= znode->parent->child_cnt) { + znode = znode->parent; + iip = znode->iip; + } + + if (unlikely(znode->parent == zr && + iip >= znode->parent->child_cnt)) { + /* This level is done, switch to the lower one */ + level -= 1; + if (level_search || level < 0) + /* + * We were already looking for znode at lower + * level ('level_search'). As we are here + * again, it just does not exist. Or all levels + * were finished ('level < 0'). + */ + return NULL; + + level_search = 1; + iip = -1; + znode = ubifs_tnc_find_child(zr, 0); + ubifs_assert(znode); + } + + /* Switch to the next index */ + zn = ubifs_tnc_find_child(znode->parent, iip + 1); + if (!zn) { + /* No more children to look at, we have walk up */ + iip = znode->parent->child_cnt; + continue; + } + + /* Walk back down to the level we came from ('level') */ + while (zn->level != level) { + znode = zn; + zn = ubifs_tnc_find_child(zn, 0); + if (!zn) { + /* + * This path is not too deep so it does not + * reach 'level'. Try next path. + */ + iip = znode->iip; + break; + } + } + + if (zn) { + ubifs_assert(zn->level >= 0); + return zn; + } + } +} + +/** + * ubifs_search_zbranch - search znode branch. + * @c: UBIFS file-system description object + * @znode: znode to search in + * @key: key to search for + * @n: znode branch slot number is returned here + * + * This is a helper function which search branch with key @key in @znode using + * binary search. The result of the search may be: + * o exact match, then %1 is returned, and the slot number of the branch is + * stored in @n; + * o no exact match, then %0 is returned and the slot number of the left + * closest branch is returned in @n; the slot if all keys in this znode are + * greater than @key, then %-1 is returned in @n. + */ +int ubifs_search_zbranch(const struct ubifs_info *c, + const struct ubifs_znode *znode, + const union ubifs_key *key, int *n) +{ + int beg = 0, end = znode->child_cnt, uninitialized_var(mid); + int uninitialized_var(cmp); + const struct ubifs_zbranch *zbr = &znode->zbranch[0]; + + ubifs_assert(end > beg); + + while (end > beg) { + mid = (beg + end) >> 1; + cmp = keys_cmp(c, key, &zbr[mid].key); + if (cmp > 0) + beg = mid + 1; + else if (cmp < 0) + end = mid; + else { + *n = mid; + return 1; + } + } + + *n = end - 1; + + /* The insert point is after *n */ + ubifs_assert(*n >= -1 && *n < znode->child_cnt); + if (*n == -1) + ubifs_assert(keys_cmp(c, key, &zbr[0].key) < 0); + else + ubifs_assert(keys_cmp(c, key, &zbr[*n].key) > 0); + if (*n + 1 < znode->child_cnt) + ubifs_assert(keys_cmp(c, key, &zbr[*n + 1].key) < 0); + + return 0; +} + +/** + * ubifs_tnc_postorder_first - find first znode to do postorder tree traversal. + * @znode: znode to start at (root of the sub-tree to traverse) + * + * Find the lowest leftmost znode in a subtree of the TNC tree. The LNC is + * ignored. + */ +struct ubifs_znode *ubifs_tnc_postorder_first(struct ubifs_znode *znode) +{ + if (unlikely(!znode)) + return NULL; + + while (znode->level > 0) { + struct ubifs_znode *child; + + child = ubifs_tnc_find_child(znode, 0); + if (!child) + return znode; + znode = child; + } + + return znode; +} + +/** + * ubifs_tnc_postorder_next - next TNC tree element in postorder traversal. + * @znode: previous znode + * + * This function implements postorder TNC traversal. The LNC is ignored. + * Returns the next element or %NULL if @znode is already the last one. + */ +struct ubifs_znode *ubifs_tnc_postorder_next(struct ubifs_znode *znode) +{ + struct ubifs_znode *zn; + + ubifs_assert(znode); + if (unlikely(!znode->parent)) + return NULL; + + /* Switch to the next index in the parent */ + zn = ubifs_tnc_find_child(znode->parent, znode->iip + 1); + if (!zn) + /* This is in fact the last child, return parent */ + return znode->parent; + + /* Go to the first znode in this new subtree */ + return ubifs_tnc_postorder_first(zn); +} + +/** + * ubifs_destroy_tnc_subtree - destroy all znodes connected to a subtree. + * @znode: znode defining subtree to destroy + * + * This function destroys subtree of the TNC tree. Returns number of clean + * znodes in the subtree. + */ +long ubifs_destroy_tnc_subtree(struct ubifs_znode *znode) +{ + struct ubifs_znode *zn = ubifs_tnc_postorder_first(znode); + long clean_freed = 0; + int n; + + ubifs_assert(zn); + while (1) { + for (n = 0; n < zn->child_cnt; n++) { + if (!zn->zbranch[n].znode) + continue; + + if (zn->level > 0 && + !ubifs_zn_dirty(zn->zbranch[n].znode)) + clean_freed += 1; + + cond_resched(); + kfree(zn->zbranch[n].znode); + } + + if (zn == znode) { + if (!ubifs_zn_dirty(zn)) + clean_freed += 1; + kfree(zn); + return clean_freed; + } + + zn = ubifs_tnc_postorder_next(zn); + } +} + +/** + * read_znode - read an indexing node from flash and fill znode. + * @c: UBIFS file-system description object + * @lnum: LEB of the indexing node to read + * @offs: node offset + * @len: node length + * @znode: znode to read to + * + * This function reads an indexing node from the flash media and fills znode + * with the read data. Returns zero in case of success and a negative error + * code in case of failure. The read indexing node is validated and if anything + * is wrong with it, this function prints complaint messages and returns + * %-EINVAL. + */ +static int read_znode(struct ubifs_info *c, int lnum, int offs, int len, + struct ubifs_znode *znode) +{ + int i, err, type, cmp; + struct ubifs_idx_node *idx; + + idx = kmalloc(c->max_idx_node_sz, GFP_NOFS); + if (!idx) + return -ENOMEM; + + err = ubifs_read_node(c, idx, UBIFS_IDX_NODE, len, lnum, offs); + if (err < 0) { + kfree(idx); + return err; + } + + znode->child_cnt = le16_to_cpu(idx->child_cnt); + znode->level = le16_to_cpu(idx->level); + + dbg_tnc("LEB %d:%d, level %d, %d branch", + lnum, offs, znode->level, znode->child_cnt); + + if (znode->child_cnt > c->fanout || znode->level > UBIFS_MAX_LEVELS) { + dbg_err("current fanout %d, branch count %d", + c->fanout, znode->child_cnt); + dbg_err("max levels %d, znode level %d", + UBIFS_MAX_LEVELS, znode->level); + err = 1; + goto out_dump; + } + + for (i = 0; i < znode->child_cnt; i++) { + const struct ubifs_branch *br = ubifs_idx_branch(c, idx, i); + struct ubifs_zbranch *zbr = &znode->zbranch[i]; + + key_read(c, &br->key, &zbr->key); + zbr->lnum = le32_to_cpu(br->lnum); + zbr->offs = le32_to_cpu(br->offs); + zbr->len = le32_to_cpu(br->len); + zbr->znode = NULL; + + /* Validate branch */ + + if (zbr->lnum < c->main_first || + zbr->lnum >= c->leb_cnt || zbr->offs < 0 || + zbr->offs + zbr->len > c->leb_size || zbr->offs & 7) { + dbg_err("bad branch %d", i); + err = 2; + goto out_dump; + } + + switch (key_type(c, &zbr->key)) { + case UBIFS_INO_KEY: + case UBIFS_DATA_KEY: + case UBIFS_DENT_KEY: + case UBIFS_XENT_KEY: + break; + default: + dbg_msg("bad key type at slot %d: %s", i, + DBGKEY(&zbr->key)); + err = 3; + goto out_dump; + } + + if (znode->level) + continue; + + type = key_type(c, &zbr->key); + if (c->ranges[type].max_len == 0) { + if (zbr->len != c->ranges[type].len) { + dbg_err("bad target node (type %d) length (%d)", + type, zbr->len); + dbg_err("have to be %d", c->ranges[type].len); + err = 4; + goto out_dump; + } + } else if (zbr->len < c->ranges[type].min_len || + zbr->len > c->ranges[type].max_len) { + dbg_err("bad target node (type %d) length (%d)", + type, zbr->len); + dbg_err("have to be in range of %d-%d", + c->ranges[type].min_len, + c->ranges[type].max_len); + err = 5; + goto out_dump; + } + } + + /* + * Ensure that the next key is greater or equivalent to the + * previous one. + */ + for (i = 0; i < znode->child_cnt - 1; i++) { + const union ubifs_key *key1, *key2; + + key1 = &znode->zbranch[i].key; + key2 = &znode->zbranch[i + 1].key; + + cmp = keys_cmp(c, key1, key2); + if (cmp > 0) { + dbg_err("bad key order (keys %d and %d)", i, i + 1); + err = 6; + goto out_dump; + } else if (cmp == 0 && !is_hash_key(c, key1)) { + /* These can only be keys with colliding hash */ + dbg_err("keys %d and %d are not hashed but equivalent", + i, i + 1); + err = 7; + goto out_dump; + } + } + + kfree(idx); + return 0; + +out_dump: + ubifs_err("bad indexing node at LEB %d:%d, error %d", lnum, offs, err); + dbg_dump_node(c, idx); + kfree(idx); + return -EINVAL; +} + +/** + * ubifs_load_znode - load znode to TNC cache. + * @c: UBIFS file-system description object + * @zbr: znode branch + * @parent: znode's parent + * @iip: index in parent + * + * This function loads znode pointed to by @zbr into the TNC cache and + * returns pointer to it in case of success and a negative error code in case + * of failure. + */ +struct ubifs_znode *ubifs_load_znode(struct ubifs_info *c, + struct ubifs_zbranch *zbr, + struct ubifs_znode *parent, int iip) +{ + int err; + struct ubifs_znode *znode; + + ubifs_assert(!zbr->znode); + /* + * A slab cache is not presently used for znodes because the znode size + * depends on the fanout which is stored in the superblock. + */ + znode = kzalloc(c->max_znode_sz, GFP_NOFS); + if (!znode) + return ERR_PTR(-ENOMEM); + + err = read_znode(c, zbr->lnum, zbr->offs, zbr->len, znode); + if (err) + goto out; + + atomic_long_inc(&c->clean_zn_cnt); + + /* + * Increment the global clean znode counter as well. It is OK that + * global and per-FS clean znode counters may be inconsistent for some + * short time (because we might be preempted at this point), the global + * one is only used in shrinker. + */ + atomic_long_inc(&ubifs_clean_zn_cnt); + + zbr->znode = znode; + znode->parent = parent; + znode->time = get_seconds(); + znode->iip = iip; + + return znode; + +out: + kfree(znode); + return ERR_PTR(err); +} + +/** + * ubifs_tnc_read_node - read a leaf node from the flash media. + * @c: UBIFS file-system description object + * @zbr: key and position of the node + * @node: node is returned here + * + * This function reads a node defined by @zbr from the flash media. Returns + * zero in case of success or a negative negative error code in case of + * failure. + */ +int ubifs_tnc_read_node(struct ubifs_info *c, struct ubifs_zbranch *zbr, + void *node) +{ + union ubifs_key key1, *key = &zbr->key; + int err, type = key_type(c, key); + struct ubifs_wbuf *wbuf; + + /* + * 'zbr' has to point to on-flash node. The node may sit in a bud and + * may even be in a write buffer, so we have to take care about this. + */ + wbuf = ubifs_get_wbuf(c, zbr->lnum); + if (wbuf) + err = ubifs_read_node_wbuf(wbuf, node, type, zbr->len, + zbr->lnum, zbr->offs); + else + err = ubifs_read_node(c, node, type, zbr->len, zbr->lnum, + zbr->offs); + + if (err) { + dbg_tnc("key %s", DBGKEY(key)); + return err; + } + + /* Make sure the key of the read node is correct */ + key_read(c, key, &key1); + if (memcmp(node + UBIFS_KEY_OFFSET, &key1, c->key_len)) { + ubifs_err("bad key in node at LEB %d:%d", + zbr->lnum, zbr->offs); + dbg_tnc("looked for key %s found node's key %s", + DBGKEY(key), DBGKEY1(&key1)); + dbg_dump_node(c, node); + return -EINVAL; + } + + return 0; +} diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h new file mode 100644 index 0000000..0cc7da9 --- /dev/null +++ b/fs/ubifs/ubifs-media.h @@ -0,0 +1,745 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Artem Bityutskiy (Битюцкий Артём) + * Adrian Hunter + */ + +/* + * This file describes UBIFS on-flash format and contains definitions of all the + * relevant data structures and constants. + * + * All UBIFS on-flash objects are stored in the form of nodes. All nodes start + * with the UBIFS node magic number and have the same common header. Nodes + * always sit at 8-byte aligned positions on the media and node header sizes are + * also 8-byte aligned (except for the indexing node and the padding node). + */ + +#ifndef __UBIFS_MEDIA_H__ +#define __UBIFS_MEDIA_H__ + +/* UBIFS node magic number (must not have the padding byte first or last) */ +#define UBIFS_NODE_MAGIC 0x06101831 + +/* UBIFS on-flash format version */ +#define UBIFS_FORMAT_VERSION 4 + +/* Minimum logical eraseblock size in bytes */ +#define UBIFS_MIN_LEB_SZ (15*1024) + +/* Initial CRC32 value used when calculating CRC checksums */ +#define UBIFS_CRC32_INIT 0xFFFFFFFFU + +/* + * UBIFS does not try to compress data if its length is less than the below + * constant. + */ +#define UBIFS_MIN_COMPR_LEN 128 + +/* Root inode number */ +#define UBIFS_ROOT_INO 1 + +/* Lowest inode number used for regular inodes (not UBIFS-only internal ones) */ +#define UBIFS_FIRST_INO 64 + +/* + * Maximum file name and extended attribute length (must be a multiple of 8, + * minus 1). + */ +#define UBIFS_MAX_NLEN 255 + +/* Maximum number of data journal heads */ +#define UBIFS_MAX_JHEADS 1 + +/* + * Size of UBIFS data block. Note, UBIFS is not a block oriented file-system, + * which means that it does not treat the underlying media as consisting of + * blocks like in case of hard drives. Do not be confused. UBIFS block is just + * the maximum amount of data which one data node can have or which can be + * attached to an inode node. + */ +#define UBIFS_BLOCK_SIZE 4096 +#define UBIFS_BLOCK_SHIFT 12 +#define UBIFS_BLOCK_MASK 0x00000FFF + +/* UBIFS padding byte pattern (must not be first or last byte of node magic) */ +#define UBIFS_PADDING_BYTE 0xCE + +/* Maximum possible key length */ +#define UBIFS_MAX_KEY_LEN 16 + +/* Key length ("simple" format) */ +#define UBIFS_SK_LEN 8 + +/* Minimum index tree fanout */ +#define UBIFS_MIN_FANOUT 2 + +/* Maximum number of levels in UBIFS indexing B-tree */ +#define UBIFS_MAX_LEVELS 512 + +/* Maximum amount of data attached to an inode in bytes */ +#define UBIFS_MAX_INO_DATA UBIFS_BLOCK_SIZE + +/* LEB Properties Tree fanout (must be power of 2) and fanout shift */ +#define UBIFS_LPT_FANOUT 4 +#define UBIFS_LPT_FANOUT_SHIFT 2 + +/* LEB Properties Tree bit field sizes */ +#define UBIFS_LPT_CRC_BITS 16 +#define UBIFS_LPT_CRC_BYTES 2 +#define UBIFS_LPT_TYPE_BITS 4 + +/* The key is always at the same position in all keyed nodes */ +#define UBIFS_KEY_OFFSET offsetof(struct ubifs_ino_node, key) + +/* + * LEB Properties Tree node types. + * + * UBIFS_LPT_PNODE: LPT leaf node (contains LEB properties) + * UBIFS_LPT_NNODE: LPT internal node + * UBIFS_LPT_LTAB: LPT's own lprops table + * UBIFS_LPT_LSAVE: LPT's save table (big model only) + * UBIFS_LPT_NODE_CNT: count of LPT node types + * UBIFS_LPT_NOT_A_NODE: all ones (15 for 4 bits) is never a valid node type + */ +enum { + UBIFS_LPT_PNODE, + UBIFS_LPT_NNODE, + UBIFS_LPT_LTAB, + UBIFS_LPT_LSAVE, + UBIFS_LPT_NODE_CNT, + UBIFS_LPT_NOT_A_NODE = (1 << UBIFS_LPT_TYPE_BITS) - 1, +}; + +/* + * UBIFS inode types. + * + * UBIFS_ITYPE_REG: regular file + * UBIFS_ITYPE_DIR: directory + * UBIFS_ITYPE_LNK: soft link + * UBIFS_ITYPE_BLK: block device node + * UBIFS_ITYPE_CHR: character device node + * UBIFS_ITYPE_FIFO: fifo + * UBIFS_ITYPE_SOCK: socket + * UBIFS_ITYPES_CNT: count of supported file types + */ +enum { + UBIFS_ITYPE_REG, + UBIFS_ITYPE_DIR, + UBIFS_ITYPE_LNK, + UBIFS_ITYPE_BLK, + UBIFS_ITYPE_CHR, + UBIFS_ITYPE_FIFO, + UBIFS_ITYPE_SOCK, + UBIFS_ITYPES_CNT, +}; + +/* + * Supported key hash functions. + * + * UBIFS_KEY_HASH_R5: R5 hash + * UBIFS_KEY_HASH_TEST: test hash which just returns first 4 bytes of the name + */ +enum { + UBIFS_KEY_HASH_R5, + UBIFS_KEY_HASH_TEST, +}; + +/* + * Supported key formats. + * + * UBIFS_SIMPLE_KEY_FMT: simple key format + */ +enum { + UBIFS_SIMPLE_KEY_FMT, +}; + +/* + * The simple key format uses 29 bits for storing UBIFS block number and hash + * value. + */ +#define UBIFS_S_KEY_BLOCK_BITS 29 +#define UBIFS_S_KEY_BLOCK_MASK 0x1FFFFFFF +#define UBIFS_S_KEY_HASH_BITS UBIFS_S_KEY_BLOCK_BITS +#define UBIFS_S_KEY_HASH_MASK UBIFS_S_KEY_BLOCK_MASK + +/* + * Key types. + * + * UBIFS_INO_KEY: inode node key + * UBIFS_DATA_KEY: data node key + * UBIFS_DENT_KEY: directory entry node key + * UBIFS_XENT_KEY: extended attribute entry key + * UBIFS_KEY_TYPES_CNT: number of supported key types + */ +enum { + UBIFS_INO_KEY, + UBIFS_DATA_KEY, + UBIFS_DENT_KEY, + UBIFS_XENT_KEY, + UBIFS_KEY_TYPES_CNT, +}; + +/* Count of LEBs reserved for the superblock area */ +#define UBIFS_SB_LEBS 1 +/* Count of LEBs reserved for the master area */ +#define UBIFS_MST_LEBS 2 + +/* First LEB of the superblock area */ +#define UBIFS_SB_LNUM 0 +/* First LEB of the master area */ +#define UBIFS_MST_LNUM (UBIFS_SB_LNUM + UBIFS_SB_LEBS) +/* First LEB of the log area */ +#define UBIFS_LOG_LNUM (UBIFS_MST_LNUM + UBIFS_MST_LEBS) + +/* + * The below constants define the absolute minimum values for various UBIFS + * media areas. Many of them actually depend of flash geometry and the FS + * configuration (number of journal heads, orphan LEBs, etc). This means that + * the smallest volume size which can be used for UBIFS cannot be pre-defined + * by these constants. The file-system that meets the below limitation will not + * necessarily mount. UBIFS does run-time calculations and validates the FS + * size. + */ + +/* Minimum number of logical eraseblocks in the log */ +#define UBIFS_MIN_LOG_LEBS 2 +/* Minimum number of bud logical eraseblocks (one for each head) */ +#define UBIFS_MIN_BUD_LEBS 3 +/* Minimum number of journal logical eraseblocks */ +#define UBIFS_MIN_JNL_LEBS (UBIFS_MIN_LOG_LEBS + UBIFS_MIN_BUD_LEBS) +/* Minimum number of LPT area logical eraseblocks */ +#define UBIFS_MIN_LPT_LEBS 2 +/* Minimum number of orphan area logical eraseblocks */ +#define UBIFS_MIN_ORPH_LEBS 1 +/* + * Minimum number of main area logical eraseblocks (buds, 2 for the index, 1 + * for GC, 1 for deletions, and at least 1 for committed data). + */ +#define UBIFS_MIN_MAIN_LEBS (UBIFS_MIN_BUD_LEBS + 5) + +/* Minimum number of logical eraseblocks */ +#define UBIFS_MIN_LEB_CNT (UBIFS_SB_LEBS + UBIFS_MST_LEBS + \ + UBIFS_MIN_LOG_LEBS + UBIFS_MIN_LPT_LEBS + \ + UBIFS_MIN_ORPH_LEBS + UBIFS_MIN_MAIN_LEBS) + +/* Node sizes (N.B. these are guaranteed to be multiples of 8) */ +#define UBIFS_CH_SZ sizeof(struct ubifs_ch) +#define UBIFS_INO_NODE_SZ sizeof(struct ubifs_ino_node) +#define UBIFS_DATA_NODE_SZ sizeof(struct ubifs_data_node) +#define UBIFS_DENT_NODE_SZ sizeof(struct ubifs_dent_node) +#define UBIFS_TRUN_NODE_SZ sizeof(struct ubifs_trun_node) +#define UBIFS_PAD_NODE_SZ sizeof(struct ubifs_pad_node) +#define UBIFS_SB_NODE_SZ sizeof(struct ubifs_sb_node) +#define UBIFS_MST_NODE_SZ sizeof(struct ubifs_mst_node) +#define UBIFS_REF_NODE_SZ sizeof(struct ubifs_ref_node) +#define UBIFS_IDX_NODE_SZ sizeof(struct ubifs_idx_node) +#define UBIFS_CS_NODE_SZ sizeof(struct ubifs_cs_node) +#define UBIFS_ORPH_NODE_SZ sizeof(struct ubifs_orph_node) +/* Extended attribute entry nodes are identical to directory entry nodes */ +#define UBIFS_XENT_NODE_SZ UBIFS_DENT_NODE_SZ +/* Only this does not have to be multiple of 8 bytes */ +#define UBIFS_BRANCH_SZ sizeof(struct ubifs_branch) + +/* Maximum node sizes (N.B. these are guaranteed to be multiples of 8) */ +#define UBIFS_MAX_DATA_NODE_SZ (UBIFS_DATA_NODE_SZ + UBIFS_BLOCK_SIZE) +#define UBIFS_MAX_INO_NODE_SZ (UBIFS_INO_NODE_SZ + UBIFS_MAX_INO_DATA) +#define UBIFS_MAX_DENT_NODE_SZ (UBIFS_DENT_NODE_SZ + UBIFS_MAX_NLEN + 1) +#define UBIFS_MAX_XENT_NODE_SZ UBIFS_MAX_DENT_NODE_SZ + +/* The largest UBIFS node */ +#define UBIFS_MAX_NODE_SZ UBIFS_MAX_INO_NODE_SZ + +/* + * On-flash inode flags. + * + * UBIFS_COMPR_FL: use compression for this inode + * UBIFS_SYNC_FL: I/O on this inode has to be synchronous + * UBIFS_IMMUTABLE_FL: inode is immutable + * UBIFS_APPEND_FL: writes to the inode may only append data + * UBIFS_DIRSYNC_FL: I/O on this directory inode has to be synchronous + * UBIFS_XATTR_FL: this inode is the inode for an extended attribute value + * + * Note, these are on-flash flags which correspond to ioctl flags + * (@FS_COMPR_FL, etc). They have the same values now, but generally, do not + * have to be the same. + */ +enum { + UBIFS_COMPR_FL = 0x01, + UBIFS_SYNC_FL = 0x02, + UBIFS_IMMUTABLE_FL = 0x04, + UBIFS_APPEND_FL = 0x08, + UBIFS_DIRSYNC_FL = 0x10, + UBIFS_XATTR_FL = 0x20, +}; + +/* Inode flag bits used by UBIFS */ +#define UBIFS_FL_MASK 0x0000001F + +/* + * UBIFS compression algorithms. + * + * UBIFS_COMPR_NONE: no compression + * UBIFS_COMPR_LZO: LZO compression + * UBIFS_COMPR_ZLIB: ZLIB compression + * UBIFS_COMPR_TYPES_CNT: count of supported compression types + */ +enum { + UBIFS_COMPR_NONE, + UBIFS_COMPR_LZO, + UBIFS_COMPR_ZLIB, + UBIFS_COMPR_TYPES_CNT, +}; + +/* + * UBIFS node types. + * + * UBIFS_INO_NODE: inode node + * UBIFS_DATA_NODE: data node + * UBIFS_DENT_NODE: directory entry node + * UBIFS_XENT_NODE: extended attribute node + * UBIFS_TRUN_NODE: truncation node + * UBIFS_PAD_NODE: padding node + * UBIFS_SB_NODE: superblock node + * UBIFS_MST_NODE: master node + * UBIFS_REF_NODE: LEB reference node + * UBIFS_IDX_NODE: index node + * UBIFS_CS_NODE: commit start node + * UBIFS_ORPH_NODE: orphan node + * UBIFS_NODE_TYPES_CNT: count of supported node types + * + * Note, we index arrays by these numbers, so keep them low and contiguous. + * Node type constants for inodes, direntries and so on have to be the same as + * corresponding key type constants. + */ +enum { + UBIFS_INO_NODE, + UBIFS_DATA_NODE, + UBIFS_DENT_NODE, + UBIFS_XENT_NODE, + UBIFS_TRUN_NODE, + UBIFS_PAD_NODE, + UBIFS_SB_NODE, + UBIFS_MST_NODE, + UBIFS_REF_NODE, + UBIFS_IDX_NODE, + UBIFS_CS_NODE, + UBIFS_ORPH_NODE, + UBIFS_NODE_TYPES_CNT, +}; + +/* + * Master node flags. + * + * UBIFS_MST_DIRTY: rebooted uncleanly - master node is dirty + * UBIFS_MST_NO_ORPHS: no orphan inodes present + * UBIFS_MST_RCVRY: written by recovery + */ +enum { + UBIFS_MST_DIRTY = 1, + UBIFS_MST_NO_ORPHS = 2, + UBIFS_MST_RCVRY = 4, +}; + +/* + * Node group type (used by recovery to recover whole group or none). + * + * UBIFS_NO_NODE_GROUP: this node is not part of a group + * UBIFS_IN_NODE_GROUP: this node is a part of a group + * UBIFS_LAST_OF_NODE_GROUP: this node is the last in a group + */ +enum { + UBIFS_NO_NODE_GROUP = 0, + UBIFS_IN_NODE_GROUP, + UBIFS_LAST_OF_NODE_GROUP, +}; + +/* + * Superblock flags. + * + * UBIFS_FLG_BIGLPT: if "big" LPT model is used if set + */ +enum { + UBIFS_FLG_BIGLPT = 0x02, +}; + +/** + * struct ubifs_ch - common header node. + * @magic: UBIFS node magic number (%UBIFS_NODE_MAGIC) + * @crc: CRC-32 checksum of the node header + * @sqnum: sequence number + * @len: full node length + * @node_type: node type + * @group_type: node group type + * @padding: reserved for future, zeroes + * + * Every UBIFS node starts with this common part. If the node has a key, the + * key always goes next. + */ +struct ubifs_ch { + __le32 magic; + __le32 crc; + __le64 sqnum; + __le32 len; + __u8 node_type; + __u8 group_type; + __u8 padding[2]; +} __attribute__ ((packed)); + +/** + * union ubifs_dev_desc - device node descriptor. + * @new: new type device descriptor + * @huge: huge type device descriptor + * + * This data structure describes major/minor numbers of a device node. In an + * inode is a device node then its data contains an object of this type. UBIFS + * uses standard Linux "new" and "huge" device node encodings. + */ +union ubifs_dev_desc { + __le32 new; + __le64 huge; +} __attribute__ ((packed)); + +/** + * struct ubifs_ino_node - inode node. + * @ch: common header + * @key: node key + * @creat_sqnum: sequence number at time of creation + * @size: inode size in bytes (amount of uncompressed data) + * @atime_sec: access time seconds + * @ctime_sec: creation time seconds + * @mtime_sec: modification time seconds + * @atime_nsec: access time nanoseconds + * @ctime_nsec: creation time nanoseconds + * @mtime_nsec: modification time nanoseconds + * @nlink: number of hard links + * @uid: owner ID + * @gid: group ID + * @mode: access flags + * @flags: per-inode flags (%UBIFS_COMPR_FL, %UBIFS_SYNC_FL, etc) + * @data_len: inode data length + * @xattr_cnt: count of extended attributes this inode has + * @xattr_size: summarized size of all extended attributes in bytes + * @padding1: reserved for future, zeroes + * @xattr_names: sum of lengths of all extended attribute names belonging to + * this inode + * @compr_type: compression type used for this inode + * @padding2: reserved for future, zeroes + * @data: data attached to the inode + * + * Note, even though inode compression type is defined by @compr_type, some + * nodes of this inode may be compressed with different compressor - this + * happens if compression type is changed while the inode already has data + * nodes. But @compr_type will be use for further writes to the inode. + * + * Note, do not forget to amend 'zero_ino_node_unused()' function when changing + * the padding fields. + */ +struct ubifs_ino_node { + struct ubifs_ch ch; + __u8 key[UBIFS_MAX_KEY_LEN]; + __le64 creat_sqnum; + __le64 size; + __le64 atime_sec; + __le64 ctime_sec; + __le64 mtime_sec; + __le32 atime_nsec; + __le32 ctime_nsec; + __le32 mtime_nsec; + __le32 nlink; + __le32 uid; + __le32 gid; + __le32 mode; + __le32 flags; + __le32 data_len; + __le32 xattr_cnt; + __le32 xattr_size; + __u8 padding1[4]; /* Watch 'zero_ino_node_unused()' if changing! */ + __le32 xattr_names; + __le16 compr_type; + __u8 padding2[26]; /* Watch 'zero_ino_node_unused()' if changing! */ + __u8 data[]; +} __attribute__ ((packed)); + +/** + * struct ubifs_dent_node - directory entry node. + * @ch: common header + * @key: node key + * @inum: target inode number + * @padding1: reserved for future, zeroes + * @type: type of the target inode (%UBIFS_ITYPE_REG, %UBIFS_ITYPE_DIR, etc) + * @nlen: name length + * @padding2: reserved for future, zeroes + * @name: zero-terminated name + * + * Note, do not forget to amend 'zero_dent_node_unused()' function when + * changing the padding fields. + */ +struct ubifs_dent_node { + struct ubifs_ch ch; + __u8 key[UBIFS_MAX_KEY_LEN]; + __le64 inum; + __u8 padding1; + __u8 type; + __le16 nlen; + __u8 padding2[4]; /* Watch 'zero_dent_node_unused()' if changing! */ + __u8 name[]; +} __attribute__ ((packed)); + +/** + * struct ubifs_data_node - data node. + * @ch: common header + * @key: node key + * @size: uncompressed data size in bytes + * @compr_type: compression type (%UBIFS_COMPR_NONE, %UBIFS_COMPR_LZO, etc) + * @padding: reserved for future, zeroes + * @data: data + * + * Note, do not forget to amend 'zero_data_node_unused()' function when + * changing the padding fields. + */ +struct ubifs_data_node { + struct ubifs_ch ch; + __u8 key[UBIFS_MAX_KEY_LEN]; + __le32 size; + __le16 compr_type; + __u8 padding[2]; /* Watch 'zero_data_node_unused()' if changing! */ + __u8 data[]; +} __attribute__ ((packed)); + +/** + * struct ubifs_trun_node - truncation node. + * @ch: common header + * @inum: truncated inode number + * @padding: reserved for future, zeroes + * @old_size: size before truncation + * @new_size: size after truncation + * + * This node exists only in the journal and never goes to the main area. Note, + * do not forget to amend 'zero_trun_node_unused()' function when changing the + * padding fields. + */ +struct ubifs_trun_node { + struct ubifs_ch ch; + __le32 inum; + __u8 padding[12]; /* Watch 'zero_trun_node_unused()' if changing! */ + __le64 old_size; + __le64 new_size; +} __attribute__ ((packed)); + +/** + * struct ubifs_pad_node - padding node. + * @ch: common header + * @pad_len: how many bytes after this node are unused (because padded) + * @padding: reserved for future, zeroes + */ +struct ubifs_pad_node { + struct ubifs_ch ch; + __le32 pad_len; +} __attribute__ ((packed)); + +/** + * struct ubifs_sb_node - superblock node. + * @ch: common header + * @padding: reserved for future, zeroes + * @key_hash: type of hash function used in keys + * @key_fmt: format of the key + * @flags: file-system flags (%UBIFS_FLG_BIGLPT, etc) + * @min_io_size: minimal input/output unit size + * @leb_size: logical eraseblock size in bytes + * @leb_cnt: count of LEBs used by file-system + * @max_leb_cnt: maximum count of LEBs used by file-system + * @max_bud_bytes: maximum amount of data stored in buds + * @log_lebs: log size in logical eraseblocks + * @lpt_lebs: number of LEBs used for lprops table + * @orph_lebs: number of LEBs used for recording orphans + * @jhead_cnt: count of journal heads + * @fanout: tree fanout (max. number of links per indexing node) + * @lsave_cnt: number of LEB numbers in LPT's save table + * @fmt_version: UBIFS on-flash format version + * @default_compr: default compression algorithm (%UBIFS_COMPR_LZO, etc) + * @padding1: reserved for future, zeroes + * @rp_uid: reserve pool UID + * @rp_gid: reserve pool GID + * @rp_size: size of the reserved pool in bytes + * @padding2: reserved for future, zeroes + * @time_gran: time granularity in nanoseconds + * @uuid: UUID generated when the file system image was created + */ +struct ubifs_sb_node { + struct ubifs_ch ch; + __u8 padding[2]; + __u8 key_hash; + __u8 key_fmt; + __le32 flags; + __le32 min_io_size; + __le32 leb_size; + __le32 leb_cnt; + __le32 max_leb_cnt; + __le64 max_bud_bytes; + __le32 log_lebs; + __le32 lpt_lebs; + __le32 orph_lebs; + __le32 jhead_cnt; + __le32 fanout; + __le32 lsave_cnt; + __le32 fmt_version; + __le16 default_compr; + __u8 padding1[2]; + __le32 rp_uid; + __le32 rp_gid; + __le64 rp_size; + __le32 time_gran; + __u8 uuid[16]; + __u8 padding2[3972]; +} __attribute__ ((packed)); + +/** + * struct ubifs_mst_node - master node. + * @ch: common header + * @highest_inum: highest inode number in the committed index + * @cmt_no: commit number + * @flags: various flags (%UBIFS_MST_DIRTY, etc) + * @log_lnum: start of the log + * @root_lnum: LEB number of the root indexing node + * @root_offs: offset within @root_lnum + * @root_len: root indexing node length + * @gc_lnum: LEB reserved for garbage collection (%-1 value means the LEB was + * not reserved and should be reserved on mount) + * @ihead_lnum: LEB number of index head + * @ihead_offs: offset of index head + * @index_size: size of index on flash + * @total_free: total free space in bytes + * @total_dirty: total dirty space in bytes + * @total_used: total used space in bytes (includes only data LEBs) + * @total_dead: total dead space in bytes (includes only data LEBs) + * @total_dark: total dark space in bytes (includes only data LEBs) + * @lpt_lnum: LEB number of LPT root nnode + * @lpt_offs: offset of LPT root nnode + * @nhead_lnum: LEB number of LPT head + * @nhead_offs: offset of LPT head + * @ltab_lnum: LEB number of LPT's own lprops table + * @ltab_offs: offset of LPT's own lprops table + * @lsave_lnum: LEB number of LPT's save table (big model only) + * @lsave_offs: offset of LPT's save table (big model only) + * @lscan_lnum: LEB number of last LPT scan + * @empty_lebs: number of empty logical eraseblocks + * @idx_lebs: number of indexing logical eraseblocks + * @leb_cnt: count of LEBs used by file-system + * @padding: reserved for future, zeroes + */ +struct ubifs_mst_node { + struct ubifs_ch ch; + __le64 highest_inum; + __le64 cmt_no; + __le32 flags; + __le32 log_lnum; + __le32 root_lnum; + __le32 root_offs; + __le32 root_len; + __le32 gc_lnum; + __le32 ihead_lnum; + __le32 ihead_offs; + __le64 index_size; + __le64 total_free; + __le64 total_dirty; + __le64 total_used; + __le64 total_dead; + __le64 total_dark; + __le32 lpt_lnum; + __le32 lpt_offs; + __le32 nhead_lnum; + __le32 nhead_offs; + __le32 ltab_lnum; + __le32 ltab_offs; + __le32 lsave_lnum; + __le32 lsave_offs; + __le32 lscan_lnum; + __le32 empty_lebs; + __le32 idx_lebs; + __le32 leb_cnt; + __u8 padding[344]; +} __attribute__ ((packed)); + +/** + * struct ubifs_ref_node - logical eraseblock reference node. + * @ch: common header + * @lnum: the referred logical eraseblock number + * @offs: start offset in the referred LEB + * @jhead: journal head number + * @padding: reserved for future, zeroes + */ +struct ubifs_ref_node { + struct ubifs_ch ch; + __le32 lnum; + __le32 offs; + __le32 jhead; + __u8 padding[28]; +} __attribute__ ((packed)); + +/** + * struct ubifs_branch - key/reference/length branch + * @lnum: LEB number of the target node + * @offs: offset within @lnum + * @len: target node length + * @key: key + */ +struct ubifs_branch { + __le32 lnum; + __le32 offs; + __le32 len; + __u8 key[]; +} __attribute__ ((packed)); + +/** + * struct ubifs_idx_node - indexing node. + * @ch: common header + * @child_cnt: number of child index nodes + * @level: tree level + * @branches: LEB number / offset / length / key branches + */ +struct ubifs_idx_node { + struct ubifs_ch ch; + __le16 child_cnt; + __le16 level; + __u8 branches[]; +} __attribute__ ((packed)); + +/** + * struct ubifs_cs_node - commit start node. + * @ch: common header + * @cmt_no: commit number + */ +struct ubifs_cs_node { + struct ubifs_ch ch; + __le64 cmt_no; +} __attribute__ ((packed)); + +/** + * struct ubifs_orph_node - orphan node. + * @ch: common header + * @cmt_no: commit number (also top bit is set on the last node of the commit) + * @inos: inode numbers of orphans + */ +struct ubifs_orph_node { + struct ubifs_ch ch; + __le64 cmt_no; + __le64 inos[]; +} __attribute__ ((packed)); + +#endif /* __UBIFS_MEDIA_H__ */ diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h new file mode 100644 index 0000000..e4f89f2 --- /dev/null +++ b/fs/ubifs/ubifs.h @@ -0,0 +1,1649 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Artem Bityutskiy (Битюцкий Артём) + * Adrian Hunter + */ + +/* Implementation version 0.7 */ + +#ifndef __UBIFS_H__ +#define __UBIFS_H__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "ubifs-media.h" + +/* Version of this UBIFS implementation */ +#define UBIFS_VERSION 1 + +/* Normal UBIFS messages */ +#define ubifs_msg(fmt, ...) \ + printk(KERN_NOTICE "UBIFS: " fmt "\n", ##__VA_ARGS__) +/* UBIFS error messages */ +#define ubifs_err(fmt, ...) \ + printk(KERN_ERR "UBIFS error (pid %d): %s: " fmt "\n", current->pid, \ + __func__, ##__VA_ARGS__) +/* UBIFS warning messages */ +#define ubifs_warn(fmt, ...) \ + printk(KERN_WARNING "UBIFS warning (pid %d): %s: " fmt "\n", \ + current->pid, __func__, ##__VA_ARGS__) + +/* UBIFS file system VFS magic number */ +#define UBIFS_SUPER_MAGIC 0x24051905 + +/* Number of UBIFS blocks per VFS page */ +#define UBIFS_BLOCKS_PER_PAGE (PAGE_CACHE_SIZE / UBIFS_BLOCK_SIZE) +#define UBIFS_BLOCKS_PER_PAGE_SHIFT (PAGE_CACHE_SHIFT - UBIFS_BLOCK_SHIFT) + +/* "File system end of life" sequence number watermark */ +#define SQNUM_WARN_WATERMARK 0xFFFFFFFF00000000ULL +#define SQNUM_WATERMARK 0xFFFFFFFFFF000000ULL + +/* Minimum amount of data UBIFS writes to the flash */ +#define MIN_WRITE_SZ (UBIFS_DATA_NODE_SZ + 8) + +/* + * Currently we do not support inode number overlapping and re-using, so this + * watermark defines dangerous inode number level. This should be fixed later, + * although it is difficult to exceed current limit. Another option is to use + * 64-bit inode numbers, but this means more overhead. + */ +#define INUM_WARN_WATERMARK 0xFFF00000 +#define INUM_WATERMARK 0xFFFFFF00 + +/* Largest key size supported in this implementation */ +#define CUR_MAX_KEY_LEN UBIFS_SK_LEN + +/* Maximum number of entries in each LPT (LEB category) heap */ +#define LPT_HEAP_SZ 256 + +/* + * Background thread name pattern. The numbers are UBI device and volume + * numbers. + */ +#define BGT_NAME_PATTERN "ubifs_bgt%d_%d" + +/* Default write-buffer synchronization timeout (5 secs) */ +#define DEFAULT_WBUF_TIMEOUT (5 * HZ) + +/* Maximum possible inode number (only 32-bit inodes are supported now) */ +#define MAX_INUM 0xFFFFFFFF + +/* Number of non-data journal heads */ +#define NONDATA_JHEADS_CNT 2 + +/* Garbage collector head */ +#define GCHD 0 +/* Base journal head number */ +#define BASEHD 1 +/* First "general purpose" journal head */ +#define DATAHD 2 + +/* 'No change' value for 'ubifs_change_lp()' */ +#define LPROPS_NC 0x80000001 + +/* + * There is no notion of truncation key because truncation nodes do not exist + * in TNC. However, when replaying, it is handy to introduce fake "truncation" + * keys for truncation nodes because the code becomes simpler. So we define + * %UBIFS_TRUN_KEY type. + */ +#define UBIFS_TRUN_KEY UBIFS_KEY_TYPES_CNT + +/* + * How much a directory entry/extended attribute entry adds to the parent/host + * inode. + */ +#define CALC_DENT_SIZE(name_len) ALIGN(UBIFS_DENT_NODE_SZ + (name_len) + 1, 8) + +/* How much an extended attribute adds to the host inode */ +#define CALC_XATTR_BYTES(data_len) ALIGN(UBIFS_INO_NODE_SZ + (data_len) + 1, 8) + +/* + * Znodes which were not touched for 'OLD_ZNODE_AGE' seconds are considered + * "old", and znode which were touched last 'YOUNG_ZNODE_AGE' seconds ago are + * considered "young". This is used by shrinker when selecting znode to trim + * off. + */ +#define OLD_ZNODE_AGE 20 +#define YOUNG_ZNODE_AGE 5 + +/* + * Some compressors, like LZO, may end up with more data then the input buffer. + * So UBIFS always allocates larger output buffer, to be sure the compressor + * will not corrupt memory in case of worst case compression. + */ +#define WORST_COMPR_FACTOR 2 + +/* Maximum expected tree height for use by bottom_up_buf */ +#define BOTTOM_UP_HEIGHT 64 + +/* + * Lockdep classes for UBIFS inode @ui_mutex. + */ +enum { + WB_MUTEX_1 = 0, + WB_MUTEX_2 = 1, + WB_MUTEX_3 = 2, +}; + +/* + * Znode flags (actually, bit numbers which store the flags). + * + * DIRTY_ZNODE: znode is dirty + * COW_ZNODE: znode is being committed and a new instance of this znode has to + * be created before changing this znode + * OBSOLETE_ZNODE: znode is obsolete, which means it was deleted, but it is + * still in the commit list and the ongoing commit operation + * will commit it, and delete this znode after it is done + */ +enum { + DIRTY_ZNODE = 0, + COW_ZNODE = 1, + OBSOLETE_ZNODE = 2, +}; + +/* + * Commit states. + * + * COMMIT_RESTING: commit is not wanted + * COMMIT_BACKGROUND: background commit has been requested + * COMMIT_REQUIRED: commit is required + * COMMIT_RUNNING_BACKGROUND: background commit is running + * COMMIT_RUNNING_REQUIRED: commit is running and it is required + * COMMIT_BROKEN: commit failed + */ +enum { + COMMIT_RESTING = 0, + COMMIT_BACKGROUND, + COMMIT_REQUIRED, + COMMIT_RUNNING_BACKGROUND, + COMMIT_RUNNING_REQUIRED, + COMMIT_BROKEN, +}; + +/* + * 'ubifs_scan_a_node()' return values. + * + * SCANNED_GARBAGE: scanned garbage + * SCANNED_EMPTY_SPACE: scanned empty space + * SCANNED_A_NODE: scanned a valid node + * SCANNED_A_CORRUPT_NODE: scanned a corrupted node + * SCANNED_A_BAD_PAD_NODE: scanned a padding node with invalid pad length + * + * Greater than zero means: 'scanned that number of padding bytes' + */ +enum { + SCANNED_GARBAGE = 0, + SCANNED_EMPTY_SPACE = -1, + SCANNED_A_NODE = -2, + SCANNED_A_CORRUPT_NODE = -3, + SCANNED_A_BAD_PAD_NODE = -4, +}; + +/* + * LPT cnode flag bits. + * + * DIRTY_CNODE: cnode is dirty + * COW_CNODE: cnode is being committed and must be copied before writing + * OBSOLETE_CNODE: cnode is being committed and has been copied (or deleted), + * so it can (and must) be freed when the commit is finished + */ +enum { + DIRTY_CNODE = 0, + COW_CNODE = 1, + OBSOLETE_CNODE = 2, +}; + +/* + * Dirty flag bits (lpt_drty_flgs) for LPT special nodes. + * + * LTAB_DIRTY: ltab node is dirty + * LSAVE_DIRTY: lsave node is dirty + */ +enum { + LTAB_DIRTY = 1, + LSAVE_DIRTY = 2, +}; + +/* + * Return codes used by the garbage collector. + * @LEB_FREED: the logical eraseblock was freed and is ready to use + * @LEB_FREED_IDX: indexing LEB was freed and can be used only after the commit + * @LEB_RETAINED: the logical eraseblock was freed and retained for GC purposes + */ +enum { + LEB_FREED, + LEB_FREED_IDX, + LEB_RETAINED, +}; + +/** + * struct ubifs_old_idx - index node obsoleted since last commit start. + * @rb: rb-tree node + * @lnum: LEB number of obsoleted index node + * @offs: offset of obsoleted index node + */ +struct ubifs_old_idx { + struct rb_node rb; + int lnum; + int offs; +}; + +/* The below union makes it easier to deal with keys */ +union ubifs_key { + uint8_t u8[CUR_MAX_KEY_LEN]; + uint32_t u32[CUR_MAX_KEY_LEN/4]; + uint64_t u64[CUR_MAX_KEY_LEN/8]; + __le32 j32[CUR_MAX_KEY_LEN/4]; +}; + +/** + * struct ubifs_scan_node - UBIFS scanned node information. + * @list: list of scanned nodes + * @key: key of node scanned (if it has one) + * @sqnum: sequence number + * @type: type of node scanned + * @offs: offset with LEB of node scanned + * @len: length of node scanned + * @node: raw node + */ +struct ubifs_scan_node { + struct list_head list; + union ubifs_key key; + unsigned long long sqnum; + int type; + int offs; + int len; + void *node; +}; + +/** + * struct ubifs_scan_leb - UBIFS scanned LEB information. + * @lnum: logical eraseblock number + * @nodes_cnt: number of nodes scanned + * @nodes: list of struct ubifs_scan_node + * @endpt: end point (and therefore the start of empty space) + * @ecc: read returned -EBADMSG + * @buf: buffer containing entire LEB scanned + */ +struct ubifs_scan_leb { + int lnum; + int nodes_cnt; + struct list_head nodes; + int endpt; + int ecc; + void *buf; +}; + +/** + * struct ubifs_gced_idx_leb - garbage-collected indexing LEB. + * @list: list + * @lnum: LEB number + * @unmap: OK to unmap this LEB + * + * This data structure is used to temporary store garbage-collected indexing + * LEBs - they are not released immediately, but only after the next commit. + * This is needed to guarantee recoverability. + */ +struct ubifs_gced_idx_leb { + struct list_head list; + int lnum; + int unmap; +}; + +/** + * struct ubifs_inode - UBIFS in-memory inode description. + * @vfs_inode: VFS inode description object + * @creat_sqnum: sequence number at time of creation + * @xattr_size: summarized size of all extended attributes in bytes + * @xattr_cnt: count of extended attributes this inode has + * @xattr_names: sum of lengths of all extended attribute names belonging to + * this inode + * @dirty: non-zero if the inode is dirty + * @xattr: non-zero if this is an extended attribute inode + * @ui_mutex: serializes inode write-back with the rest of VFS operations, + * serializes "clean <-> dirty" state changes, protects @dirty, + * @ui_size, and @xattr_size + * @ui_lock: protects @synced_i_size + * @synced_i_size: synchronized size of inode, i.e. the value of inode size + * currently stored on the flash; used only for regular file + * inodes + * @ui_size: inode size used by UBIFS when writing to flash + * @flags: inode flags (@UBIFS_COMPR_FL, etc) + * @compr_type: default compression type used for this inode + * @data_len: length of the data attached to the inode + * @data: inode's data + * + * @ui_mutex exists for two main reasons. At first it prevents inodes from + * being written back while UBIFS changing them, being in the middle of an VFS + * operation. This way UBIFS makes sure the inode fields are consistent. For + * example, in 'ubifs_rename()' we change 3 inodes simultaneously, and + * write-back must not write any of them before we have finished. + * + * The second reason is budgeting - UBIFS has to budget all operations. If an + * operation is going to mark an inode dirty, it has to allocate budget for + * this. It cannot just mark it dirty because there is no guarantee there will + * be enough flash space to write the inode back later. This means UBIFS has + * to have full control over inode "clean <-> dirty" transitions (and pages + * actually). But unfortunately, VFS marks inodes dirty in many places, and it + * does not ask the file-system if it is allowed to do so (there is a notifier, + * but it is not enough), i.e., there is no mechanism to synchronize with this. + * So UBIFS has its own inode dirty flag and its own mutex to serialize + * "clean <-> dirty" transitions. + * + * The @synced_i_size field is used to make sure we never write pages which are + * beyond last synchronized inode size. See 'ubifs_writepage()' for more + * information. + * + * The @ui_size is a "shadow" variable for @inode->i_size and UBIFS uses + * @ui_size instead of @inode->i_size. The reason for this is that UBIFS cannot + * make sure @inode->i_size is always changed under @ui_mutex, because it + * cannot call 'vmtruncate()' with @ui_mutex locked, because it would deadlock + * with 'ubifs_writepage()' (see file.c). All the other inode fields are + * changed under @ui_mutex, so they do not need "shadow" fields. Note, one + * could consider to rework locking and base it on "shadow" fields. + */ +struct ubifs_inode { + struct inode vfs_inode; + unsigned long long creat_sqnum; + unsigned int xattr_size; + unsigned int xattr_cnt; + unsigned int xattr_names; + unsigned int dirty:1; + unsigned int xattr:1; + struct mutex ui_mutex; + spinlock_t ui_lock; + loff_t synced_i_size; + loff_t ui_size; + int flags; + int compr_type; + int data_len; + void *data; +}; + +/** + * struct ubifs_unclean_leb - records a LEB recovered under read-only mode. + * @list: list + * @lnum: LEB number of recovered LEB + * @endpt: offset where recovery ended + * + * This structure records a LEB identified during recovery that needs to be + * cleaned but was not because UBIFS was mounted read-only. The information + * is used to clean the LEB when remounting to read-write mode. + */ +struct ubifs_unclean_leb { + struct list_head list; + int lnum; + int endpt; +}; + +/* + * LEB properties flags. + * + * LPROPS_UNCAT: not categorized + * LPROPS_DIRTY: dirty > 0, not index + * LPROPS_DIRTY_IDX: dirty + free > UBIFS_CH_SZ and index + * LPROPS_FREE: free > 0, not empty, not index + * LPROPS_HEAP_CNT: number of heaps used for storing categorized LEBs + * LPROPS_EMPTY: LEB is empty, not taken + * LPROPS_FREEABLE: free + dirty == leb_size, not index, not taken + * LPROPS_FRDI_IDX: free + dirty == leb_size and index, may be taken + * LPROPS_CAT_MASK: mask for the LEB categories above + * LPROPS_TAKEN: LEB was taken (this flag is not saved on the media) + * LPROPS_INDEX: LEB contains indexing nodes (this flag also exists on flash) + */ +enum { + LPROPS_UNCAT = 0, + LPROPS_DIRTY = 1, + LPROPS_DIRTY_IDX = 2, + LPROPS_FREE = 3, + LPROPS_HEAP_CNT = 3, + LPROPS_EMPTY = 4, + LPROPS_FREEABLE = 5, + LPROPS_FRDI_IDX = 6, + LPROPS_CAT_MASK = 15, + LPROPS_TAKEN = 16, + LPROPS_INDEX = 32, +}; + +/** + * struct ubifs_lprops - logical eraseblock properties. + * @free: amount of free space in bytes + * @dirty: amount of dirty space in bytes + * @flags: LEB properties flags (see above) + * @lnum: LEB number + * @list: list of same-category lprops (for LPROPS_EMPTY and LPROPS_FREEABLE) + * @hpos: heap position in heap of same-category lprops (other categories) + */ +struct ubifs_lprops { + int free; + int dirty; + int flags; + int lnum; + union { + struct list_head list; + int hpos; + }; +}; + +/** + * struct ubifs_lpt_lprops - LPT logical eraseblock properties. + * @free: amount of free space in bytes + * @dirty: amount of dirty space in bytes + * @tgc: trivial GC flag (1 => unmap after commit end) + * @cmt: commit flag (1 => reserved for commit) + */ +struct ubifs_lpt_lprops { + int free; + int dirty; + unsigned tgc : 1; + unsigned cmt : 1; +}; + +/** + * struct ubifs_lp_stats - statistics of eraseblocks in the main area. + * @empty_lebs: number of empty LEBs + * @taken_empty_lebs: number of taken LEBs + * @idx_lebs: number of indexing LEBs + * @total_free: total free space in bytes + * @total_dirty: total dirty space in bytes + * @total_used: total used space in bytes (includes only data LEBs) + * @total_dead: total dead space in bytes (includes only data LEBs) + * @total_dark: total dark space in bytes (includes only data LEBs) + * + * N.B. total_dirty and total_used are different to other total_* fields, + * because they account _all_ LEBs, not just data LEBs. + * + * 'taken_empty_lebs' counts the LEBs that are in the transient state of having + * been 'taken' for use but not yet written to. 'taken_empty_lebs' is needed + * to account correctly for gc_lnum, otherwise 'empty_lebs' could be used + * by itself (in which case 'unused_lebs' would be a better name). In the case + * of gc_lnum, it is 'taken' at mount time or whenever a LEB is retained by GC, + * but unlike other empty LEBs that are 'taken', it may not be written straight + * away (i.e. before the next commit start or unmount), so either gc_lnum must + * be specially accounted for, or the current approach followed i.e. count it + * under 'taken_empty_lebs'. + */ +struct ubifs_lp_stats { + int empty_lebs; + int taken_empty_lebs; + int idx_lebs; + long long total_free; + long long total_dirty; + long long total_used; + long long total_dead; + long long total_dark; +}; + +struct ubifs_nnode; + +/** + * struct ubifs_cnode - LEB Properties Tree common node. + * @parent: parent nnode + * @cnext: next cnode to commit + * @flags: flags (%DIRTY_LPT_NODE or %OBSOLETE_LPT_NODE) + * @iip: index in parent + * @level: level in the tree (zero for pnodes, greater than zero for nnodes) + * @num: node number + */ +struct ubifs_cnode { + struct ubifs_nnode *parent; + struct ubifs_cnode *cnext; + unsigned long flags; + int iip; + int level; + int num; +}; + +/** + * struct ubifs_pnode - LEB Properties Tree leaf node. + * @parent: parent nnode + * @cnext: next cnode to commit + * @flags: flags (%DIRTY_LPT_NODE or %OBSOLETE_LPT_NODE) + * @iip: index in parent + * @level: level in the tree (always zero for pnodes) + * @num: node number + * @lprops: LEB properties array + */ +struct ubifs_pnode { + struct ubifs_nnode *parent; + struct ubifs_cnode *cnext; + unsigned long flags; + int iip; + int level; + int num; + struct ubifs_lprops lprops[UBIFS_LPT_FANOUT]; +}; + +/** + * struct ubifs_nbranch - LEB Properties Tree internal node branch. + * @lnum: LEB number of child + * @offs: offset of child + * @nnode: nnode child + * @pnode: pnode child + * @cnode: cnode child + */ +struct ubifs_nbranch { + int lnum; + int offs; + union { + struct ubifs_nnode *nnode; + struct ubifs_pnode *pnode; + struct ubifs_cnode *cnode; + }; +}; + +/** + * struct ubifs_nnode - LEB Properties Tree internal node. + * @parent: parent nnode + * @cnext: next cnode to commit + * @flags: flags (%DIRTY_LPT_NODE or %OBSOLETE_LPT_NODE) + * @iip: index in parent + * @level: level in the tree (always greater than zero for nnodes) + * @num: node number + * @nbranch: branches to child nodes + */ +struct ubifs_nnode { + struct ubifs_nnode *parent; + struct ubifs_cnode *cnext; + unsigned long flags; + int iip; + int level; + int num; + struct ubifs_nbranch nbranch[UBIFS_LPT_FANOUT]; +}; + +/** + * struct ubifs_lpt_heap - heap of categorized lprops. + * @arr: heap array + * @cnt: number in heap + * @max_cnt: maximum number allowed in heap + * + * There are %LPROPS_HEAP_CNT heaps. + */ +struct ubifs_lpt_heap { + struct ubifs_lprops **arr; + int cnt; + int max_cnt; +}; + +/* + * Return codes for LPT scan callback function. + * + * LPT_SCAN_CONTINUE: continue scanning + * LPT_SCAN_ADD: add the LEB properties scanned to the tree in memory + * LPT_SCAN_STOP: stop scanning + */ +enum { + LPT_SCAN_CONTINUE = 0, + LPT_SCAN_ADD = 1, + LPT_SCAN_STOP = 2, +}; + +struct ubifs_info; + +/* Callback used by the 'ubifs_lpt_scan_nolock()' function */ +typedef int (*ubifs_lpt_scan_callback)(struct ubifs_info *c, + const struct ubifs_lprops *lprops, + int in_tree, void *data); + +/** + * struct ubifs_wbuf - UBIFS write-buffer. + * @c: UBIFS file-system description object + * @buf: write-buffer (of min. flash I/O unit size) + * @lnum: logical eraseblock number the write-buffer points to + * @offs: write-buffer offset in this logical eraseblock + * @avail: number of bytes available in the write-buffer + * @used: number of used bytes in the write-buffer + * @dtype: type of data stored in this LEB (%UBI_LONGTERM, %UBI_SHORTTERM, + * %UBI_UNKNOWN) + * @jhead: journal head the mutex belongs to (note, needed only to shut lockdep + * up by 'mutex_lock_nested()). + * @sync_callback: write-buffer synchronization callback + * @io_mutex: serializes write-buffer I/O + * @lock: serializes @buf, @lnum, @offs, @avail, @used, @next_ino and @inodes + * fields + * @timer: write-buffer timer + * @timeout: timer expire interval in jiffies + * @need_sync: it is set if its timer expired and needs sync + * @next_ino: points to the next position of the following inode number + * @inodes: stores the inode numbers of the nodes which are in wbuf + * + * The write-buffer synchronization callback is called when the write-buffer is + * synchronized in order to notify how much space was wasted due to + * write-buffer padding and how much free space is left in the LEB. + * + * Note: the fields @buf, @lnum, @offs, @avail and @used can be read under + * spin-lock or mutex because they are written under both mutex and spin-lock. + * @buf is appended to under mutex but overwritten under both mutex and + * spin-lock. Thus the data between @buf and @buf + @used can be read under + * spinlock. + */ +struct ubifs_wbuf { + struct ubifs_info *c; + void *buf; + int lnum; + int offs; + int avail; + int used; + int dtype; + int jhead; + int (*sync_callback)(struct ubifs_info *c, int lnum, int free, int pad); + struct mutex io_mutex; + spinlock_t lock; + struct timer_list timer; + int timeout; + int need_sync; + int next_ino; + ino_t *inodes; +}; + +/** + * struct ubifs_bud - bud logical eraseblock. + * @lnum: logical eraseblock number + * @start: where the (uncommitted) bud data starts + * @jhead: journal head number this bud belongs to + * @list: link in the list buds belonging to the same journal head + * @rb: link in the tree of all buds + */ +struct ubifs_bud { + int lnum; + int start; + int jhead; + struct list_head list; + struct rb_node rb; +}; + +/** + * struct ubifs_jhead - journal head. + * @wbuf: head's write-buffer + * @buds_list: list of bud LEBs belonging to this journal head + * + * Note, the @buds list is protected by the @c->buds_lock. + */ +struct ubifs_jhead { + struct ubifs_wbuf wbuf; + struct list_head buds_list; +}; + +/** + * struct ubifs_zbranch - key/coordinate/length branch stored in znodes. + * @key: key + * @znode: znode address in memory + * @lnum: LEB number of the indexing node + * @offs: offset of the indexing node within @lnum + * @len: target node length + */ +struct ubifs_zbranch { + union ubifs_key key; + union { + struct ubifs_znode *znode; + void *leaf; + }; + int lnum; + int offs; + int len; +}; + +/** + * struct ubifs_znode - in-memory representation of an indexing node. + * @parent: parent znode or NULL if it is the root + * @cnext: next znode to commit + * @flags: znode flags (%DIRTY_ZNODE, %COW_ZNODE or %OBSOLETE_ZNODE) + * @time: last access time (seconds) + * @level: level of the entry in the TNC tree + * @child_cnt: count of child znodes + * @iip: index in parent's zbranch array + * @alt: lower bound of key range has altered i.e. child inserted at slot 0 + * @lnum: LEB number of the corresponding indexing node + * @offs: offset of the corresponding indexing node + * @len: length of the corresponding indexing node + * @zbranch: array of znode branches (@c->fanout elements) + */ +struct ubifs_znode { + struct ubifs_znode *parent; + struct ubifs_znode *cnext; + unsigned long flags; + unsigned long time; + int level; + int child_cnt; + int iip; + int alt; +#ifdef CONFIG_UBIFS_FS_DEBUG + int lnum, offs, len; +#endif + struct ubifs_zbranch zbranch[]; +}; + +/** + * struct ubifs_node_range - node length range description data structure. + * @len: fixed node length + * @min_len: minimum possible node length + * @max_len: maximum possible node length + * + * If @max_len is %0, the node has fixed length @len. + */ +struct ubifs_node_range { + union { + int len; + int min_len; + }; + int max_len; +}; + +/** + * struct ubifs_compressor - UBIFS compressor description structure. + * @compr_type: compressor type (%UBIFS_COMPR_LZO, etc) + * @cc: cryptoapi compressor handle + * @comp_mutex: mutex used during compression + * @decomp_mutex: mutex used during decompression + * @name: compressor name + * @capi_name: cryptoapi compressor name + */ +struct ubifs_compressor { + int compr_type; + struct crypto_comp *cc; + struct mutex *comp_mutex; + struct mutex *decomp_mutex; + const char *name; + const char *capi_name; +}; + +/** + * struct ubifs_budget_req - budget requirements of an operation. + * + * @fast: non-zero if the budgeting should try to aquire budget quickly and + * should not try to call write-back + * @recalculate: non-zero if @idx_growth, @data_growth, and @dd_growth fields + * have to be re-calculated + * @new_page: non-zero if the operation adds a new page + * @dirtied_page: non-zero if the operation makes a page dirty + * @new_dent: non-zero if the operation adds a new directory entry + * @mod_dent: non-zero if the operation removes or modifies an existing + * directory entry + * @new_ino: non-zero if the operation adds a new inode + * @new_ino_d: now much data newly created inode contains + * @dirtied_ino: how many inodes the operation makes dirty + * @dirtied_ino_d: now much data dirtied inode contains + * @idx_growth: how much the index will supposedly grow + * @data_growth: how much new data the operation will supposedly add + * @dd_growth: how much data that makes other data dirty the operation will + * supposedly add + * + * @idx_growth, @data_growth and @dd_growth are not used in budget request. The + * budgeting subsystem caches index and data growth values there to avoid + * re-calculating them when the budget is released. However, if @idx_growth is + * %-1, it is calculated by the release function using other fields. + * + * An inode may contain 4KiB of data at max., thus the widths of @new_ino_d + * is 13 bits, and @dirtied_ino_d - 15, because up to 4 inodes may be made + * dirty by the re-name operation. + */ +struct ubifs_budget_req { + unsigned int fast:1; + unsigned int recalculate:1; + unsigned int new_page:1; + unsigned int dirtied_page:1; + unsigned int new_dent:1; + unsigned int mod_dent:1; + unsigned int new_ino:1; + unsigned int new_ino_d:13; +#ifndef UBIFS_DEBUG + unsigned int dirtied_ino:4; + unsigned int dirtied_ino_d:15; +#else + /* Not bit-fields to check for overflows */ + unsigned int dirtied_ino; + unsigned int dirtied_ino_d; +#endif + int idx_growth; + int data_growth; + int dd_growth; +}; + +/** + * struct ubifs_orphan - stores the inode number of an orphan. + * @rb: rb-tree node of rb-tree of orphans sorted by inode number + * @list: list head of list of orphans in order added + * @new_list: list head of list of orphans added since the last commit + * @cnext: next orphan to commit + * @dnext: next orphan to delete + * @inum: inode number + * @new: %1 => added since the last commit, otherwise %0 + */ +struct ubifs_orphan { + struct rb_node rb; + struct list_head list; + struct list_head new_list; + struct ubifs_orphan *cnext; + struct ubifs_orphan *dnext; + ino_t inum; + int new; +}; + +/** + * struct ubifs_mount_opts - UBIFS-specific mount options information. + * @unmount_mode: selected unmount mode (%0 default, %1 normal, %2 fast) + */ +struct ubifs_mount_opts { + unsigned int unmount_mode:2; +}; + +/** + * struct ubifs_info - UBIFS file-system description data structure + * (per-superblock). + * @vfs_sb: VFS @struct super_block object + * @bdi: backing device info object to make VFS happy and disable readahead + * + * @highest_inum: highest used inode number + * @vfs_gen: VFS inode generation counter + * @max_sqnum: current global sequence number + * @cmt_no: commit number (last successfully completed commit) + * @cnt_lock: protects @highest_inum, @vfs_gen, and @max_sqnum counters + * @fmt_version: UBIFS on-flash format version + * @uuid: UUID from super block + * + * @lhead_lnum: log head logical eraseblock number + * @lhead_offs: log head offset + * @ltail_lnum: log tail logical eraseblock number (offset is always 0) + * @log_mutex: protects the log, @lhead_lnum, @lhead_offs, @ltail_lnum, and + * @bud_bytes + * @min_log_bytes: minimum required number of bytes in the log + * @cmt_bud_bytes: used during commit to temporarily amount of bytes in + * committed buds + * + * @buds: tree of all buds indexed by bud LEB number + * @bud_bytes: how many bytes of flash is used by buds + * @buds_lock: protects the @buds tree, @bud_bytes, and per-journal head bud + * lists + * @jhead_cnt: count of journal heads + * @jheads: journal heads (head zero is base head) + * @max_bud_bytes: maximum number of bytes allowed in buds + * @bg_bud_bytes: number of bud bytes when background commit is initiated + * @old_buds: buds to be released after commit ends + * @max_bud_cnt: maximum number of buds + * + * @commit_sem: synchronizes committer with other processes + * @cmt_state: commit state + * @cs_lock: commit state lock + * @cmt_wq: wait queue to sleep on if the log is full and a commit is running + * @fast_unmount: do not run journal commit before un-mounting + * @big_lpt: flag that LPT is too big to write whole during commit + * @check_lpt_free: flag that indicates LPT GC may be needed + * @nospace: non-zero if the file-system does not have flash space (used as + * optimization) + * @nospace_rp: the same as @nospace, but additionally means that even reserved + * pool is full + * + * @tnc_mutex: protects the Tree Node Cache (TNC), @zroot, @cnext, @enext, and + * @calc_idx_sz + * @zroot: zbranch which points to the root index node and znode + * @cnext: next znode to commit + * @enext: next znode to commit to empty space + * @gap_lebs: array of LEBs used by the in-gaps commit method + * @cbuf: commit buffer + * @ileb_buf: buffer for commit in-the-gaps method + * @ileb_len: length of data in ileb_buf + * @ihead_lnum: LEB number of index head + * @ihead_offs: offset of index head + * @ilebs: pre-allocated index LEBs + * @ileb_cnt: number of pre-allocated index LEBs + * @ileb_nxt: next pre-allocated index LEBs + * @old_idx: tree of index nodes obsoleted since the last commit start + * @bottom_up_buf: a buffer which is used by 'dirty_cow_bottom_up()' in tnc.c + * @new_ihead_lnum: used by debugging to check ihead_lnum + * @new_ihead_offs: used by debugging to check ihead_offs + * + * @mst_node: master node + * @mst_offs: offset of valid master node + * @mst_mutex: protects the master node area, @mst_node, and @mst_offs + * + * @log_lebs: number of logical eraseblocks in the log + * @log_bytes: log size in bytes + * @log_last: last LEB of the log + * @lpt_lebs: number of LEBs used for lprops table + * @lpt_first: first LEB of the lprops table area + * @lpt_last: last LEB of the lprops table area + * @orph_lebs: number of LEBs used for the orphan area + * @orph_first: first LEB of the orphan area + * @orph_last: last LEB of the orphan area + * @main_lebs: count of LEBs in the main area + * @main_first: first LEB of the main area + * @main_bytes: main area size in bytes + * @default_compr: default compression algorithm (%UBIFS_COMPR_LZO, etc) + * + * @key_hash_type: type of the key hash + * @key_hash: direntry key hash function + * @key_fmt: key format + * @key_len: key length + * @fanout: fanout of the index tree (number of links per indexing node) + * + * @min_io_size: minimal input/output unit size + * @min_io_shift: number of bits in @min_io_size minus one + * @leb_size: logical eraseblock size in bytes + * @half_leb_size: half LEB size + * @leb_cnt: count of logical eraseblocks + * @max_leb_cnt: maximum count of logical eraseblocks + * @old_leb_cnt: count of logical eraseblocks before re-size + * @ro_media: the underlying UBI volume is read-only + * + * @dirty_pg_cnt: number of dirty pages (not used) + * @dirty_zn_cnt: number of dirty znodes + * @clean_zn_cnt: number of clean znodes + * + * @budg_idx_growth: amount of bytes budgeted for index growth + * @budg_data_growth: amount of bytes budgeted for cached data + * @budg_dd_growth: amount of bytes budgeted for cached data that will make + * other data dirty + * @budg_uncommitted_idx: amount of bytes were budgeted for growth of the index, + * but which still have to be taken into account because + * the index has not been committed so far + * @space_lock: protects @budg_idx_growth, @budg_data_growth, @budg_dd_growth, + * @budg_uncommited_idx, @min_idx_lebs, @old_idx_sz, and @lst; + * @min_idx_lebs: minimum number of LEBs required for the index + * @old_idx_sz: size of index on flash + * @calc_idx_sz: temporary variable which is used to calculate new index size + * (contains accurate new index size at end of TNC commit start) + * @lst: lprops statistics + * + * @page_budget: budget for a page + * @inode_budget: budget for an inode + * @dent_budget: budget for a directory entry + * + * @ref_node_alsz: size of the LEB reference node aligned to the min. flash + * I/O unit + * @mst_node_alsz: master node aligned size + * @min_idx_node_sz: minimum indexing node aligned on 8-bytes boundary + * @max_idx_node_sz: maximum indexing node aligned on 8-bytes boundary + * @max_inode_sz: maximum possible inode size in bytes + * @max_znode_sz: size of znode in bytes + * @dead_wm: LEB dead space watermark + * @dark_wm: LEB dark space watermark + * @block_cnt: count of 4KiB blocks on the FS + * + * @ranges: UBIFS node length ranges + * @ubi: UBI volume descriptor + * @di: UBI device information + * @vi: UBI volume information + * + * @orph_tree: rb-tree of orphan inode numbers + * @orph_list: list of orphan inode numbers in order added + * @orph_new: list of orphan inode numbers added since last commit + * @orph_cnext: next orphan to commit + * @orph_dnext: next orphan to delete + * @orphan_lock: lock for orph_tree and orph_new + * @orph_buf: buffer for orphan nodes + * @new_orphans: number of orphans since last commit + * @cmt_orphans: number of orphans being committed + * @tot_orphans: number of orphans in the rb_tree + * @max_orphans: maximum number of orphans allowed + * @ohead_lnum: orphan head LEB number + * @ohead_offs: orphan head offset + * @no_orphs: non-zero if there are no orphans + * + * @bgt: UBIFS background thread + * @bgt_name: background thread name + * @need_bgt: if background thread should run + * @need_wbuf_sync: if write-buffers have to be synchronized + * + * @gc_lnum: LEB number used for garbage collection + * @sbuf: a buffer of LEB size used by GC and replay for scanning + * @idx_gc: list of index LEBs that have been garbage collected + * @idx_gc_cnt: number of elements on the idx_gc list + * + * @infos_list: links all 'ubifs_info' objects + * @umount_mutex: serializes shrinker and un-mount + * @shrinker_run_no: shrinker run number + * + * @space_bits: number of bits needed to record free or dirty space + * @lpt_lnum_bits: number of bits needed to record a LEB number in the LPT + * @lpt_offs_bits: number of bits needed to record an offset in the LPT + * @lpt_spc_bits: number of bits needed to space in the LPT + * @pcnt_bits: number of bits needed to record pnode or nnode number + * @lnum_bits: number of bits needed to record LEB number + * @nnode_sz: size of on-flash nnode + * @pnode_sz: size of on-flash pnode + * @ltab_sz: size of on-flash LPT lprops table + * @lsave_sz: size of on-flash LPT save table + * @pnode_cnt: number of pnodes + * @nnode_cnt: number of nnodes + * @lpt_hght: height of the LPT + * @pnodes_have: number of pnodes in memory + * + * @lp_mutex: protects lprops table and all the other lprops-related fields + * @lpt_lnum: LEB number of the root nnode of the LPT + * @lpt_offs: offset of the root nnode of the LPT + * @nhead_lnum: LEB number of LPT head + * @nhead_offs: offset of LPT head + * @lpt_drty_flgs: dirty flags for LPT special nodes e.g. ltab + * @dirty_nn_cnt: number of dirty nnodes + * @dirty_pn_cnt: number of dirty pnodes + * @lpt_sz: LPT size + * @lpt_nod_buf: buffer for an on-flash nnode or pnode + * @lpt_buf: buffer of LEB size used by LPT + * @nroot: address in memory of the root nnode of the LPT + * @lpt_cnext: next LPT node to commit + * @lpt_heap: array of heaps of categorized lprops + * @dirty_idx: a (reverse sorted) copy of the LPROPS_DIRTY_IDX heap as at + * previous commit start + * @uncat_list: list of un-categorized LEBs + * @empty_list: list of empty LEBs + * @freeable_list: list of freeable non-index LEBs (free + dirty == leb_size) + * @frdi_idx_list: list of freeable index LEBs (free + dirty == leb_size) + * @freeable_cnt: number of freeable LEBs in @freeable_list + * + * @ltab_lnum: LEB number of LPT's own lprops table + * @ltab_offs: offset of LPT's own lprops table + * @ltab: LPT's own lprops table + * @ltab_cmt: LPT's own lprops table (commit copy) + * @lsave_cnt: number of LEB numbers in LPT's save table + * @lsave_lnum: LEB number of LPT's save table + * @lsave_offs: offset of LPT's save table + * @lsave: LPT's save table + * @lscan_lnum: LEB number of last LPT scan + * + * @rp_size: size of the reserved pool in bytes + * @report_rp_size: size of the reserved pool reported to user-space + * @rp_uid: reserved pool user ID + * @rp_gid: reserved pool group ID + * + * @empty: if the UBI device is empty + * @replay_tree: temporary tree used during journal replay + * @replay_list: temporary list used during journal replay + * @replay_buds: list of buds to replay + * @cs_sqnum: sequence number of first node in the log (commit start node) + * @replay_sqnum: sequence number of node currently being replayed + * @need_recovery: file-system needs recovery + * @replaying: set to %1 during journal replay + * @unclean_leb_list: LEBs to recover when mounting ro to rw + * @rcvrd_mst_node: recovered master node to write when mounting ro to rw + * @size_tree: inode size information for recovery + * @remounting_rw: set while remounting from ro to rw (sb flags have MS_RDONLY) + * @mount_opts: UBIFS-specific mount options + * + * @dbg_buf: a buffer of LEB size used for debugging purposes + * @old_zroot: old index root - used by 'dbg_check_old_index()' + * @old_zroot_level: old index root level - used by 'dbg_check_old_index()' + * @old_zroot_sqnum: old index root sqnum - used by 'dbg_check_old_index()' + * @failure_mode: failure mode for recovery testing + * @fail_delay: 0=>don't delay, 1=>delay a time, 2=>delay a number of calls + * @fail_timeout: time in jiffies when delay of failure mode expires + * @fail_cnt: current number of calls to failure mode I/O functions + * @fail_cnt_max: number of calls by which to delay failure mode + */ +struct ubifs_info { + struct super_block *vfs_sb; + struct backing_dev_info bdi; + + ino_t highest_inum; + unsigned int vfs_gen; + unsigned long long max_sqnum; + unsigned long long cmt_no; + spinlock_t cnt_lock; + int fmt_version; + unsigned char uuid[16]; + + int lhead_lnum; + int lhead_offs; + int ltail_lnum; + struct mutex log_mutex; + int min_log_bytes; + long long cmt_bud_bytes; + + struct rb_root buds; + long long bud_bytes; + spinlock_t buds_lock; + int jhead_cnt; + struct ubifs_jhead *jheads; + long long max_bud_bytes; + long long bg_bud_bytes; + struct list_head old_buds; + int max_bud_cnt; + + struct rw_semaphore commit_sem; + int cmt_state; + spinlock_t cs_lock; + wait_queue_head_t cmt_wq; + unsigned int fast_unmount:1; + unsigned int big_lpt:1; + unsigned int check_lpt_free:1; + unsigned int nospace:1; + unsigned int nospace_rp:1; + + struct mutex tnc_mutex; + struct ubifs_zbranch zroot; + struct ubifs_znode *cnext; + struct ubifs_znode *enext; + int *gap_lebs; + void *cbuf; + void *ileb_buf; + int ileb_len; + int ihead_lnum; + int ihead_offs; + int *ilebs; + int ileb_cnt; + int ileb_nxt; + struct rb_root old_idx; + int *bottom_up_buf; +#ifdef CONFIG_UBIFS_FS_DEBUG + int new_ihead_lnum; + int new_ihead_offs; +#endif + + struct ubifs_mst_node *mst_node; + int mst_offs; + struct mutex mst_mutex; + + int log_lebs; + long long log_bytes; + int log_last; + int lpt_lebs; + int lpt_first; + int lpt_last; + int orph_lebs; + int orph_first; + int orph_last; + int main_lebs; + int main_first; + long long main_bytes; + int default_compr; + + uint8_t key_hash_type; + uint32_t (*key_hash)(const char *str, int len); + int key_fmt; + int key_len; + int fanout; + + int min_io_size; + int min_io_shift; + int leb_size; + int half_leb_size; + int leb_cnt; + int max_leb_cnt; + int old_leb_cnt; + int ro_media; + + atomic_long_t dirty_pg_cnt; + atomic_long_t dirty_zn_cnt; + atomic_long_t clean_zn_cnt; + + long long budg_idx_growth; + long long budg_data_growth; + long long budg_dd_growth; + long long budg_uncommitted_idx; + spinlock_t space_lock; + int min_idx_lebs; + unsigned long long old_idx_sz; + unsigned long long calc_idx_sz; + struct ubifs_lp_stats lst; + + int page_budget; + int inode_budget; + int dent_budget; + + int ref_node_alsz; + int mst_node_alsz; + int min_idx_node_sz; + int max_idx_node_sz; + long long max_inode_sz; + int max_znode_sz; + int dead_wm; + int dark_wm; + int block_cnt; + + struct ubifs_node_range ranges[UBIFS_NODE_TYPES_CNT]; + struct ubi_volume_desc *ubi; + struct ubi_device_info di; + struct ubi_volume_info vi; + + struct rb_root orph_tree; + struct list_head orph_list; + struct list_head orph_new; + struct ubifs_orphan *orph_cnext; + struct ubifs_orphan *orph_dnext; + spinlock_t orphan_lock; + void *orph_buf; + int new_orphans; + int cmt_orphans; + int tot_orphans; + int max_orphans; + int ohead_lnum; + int ohead_offs; + int no_orphs; + + struct task_struct *bgt; + char bgt_name[sizeof(BGT_NAME_PATTERN) + 9]; + int need_bgt; + int need_wbuf_sync; + + int gc_lnum; + void *sbuf; + struct list_head idx_gc; + int idx_gc_cnt; + + struct list_head infos_list; + struct mutex umount_mutex; + unsigned int shrinker_run_no; + + int space_bits; + int lpt_lnum_bits; + int lpt_offs_bits; + int lpt_spc_bits; + int pcnt_bits; + int lnum_bits; + int nnode_sz; + int pnode_sz; + int ltab_sz; + int lsave_sz; + int pnode_cnt; + int nnode_cnt; + int lpt_hght; + int pnodes_have; + + struct mutex lp_mutex; + int lpt_lnum; + int lpt_offs; + int nhead_lnum; + int nhead_offs; + int lpt_drty_flgs; + int dirty_nn_cnt; + int dirty_pn_cnt; + long long lpt_sz; + void *lpt_nod_buf; + void *lpt_buf; + struct ubifs_nnode *nroot; + struct ubifs_cnode *lpt_cnext; + struct ubifs_lpt_heap lpt_heap[LPROPS_HEAP_CNT]; + struct ubifs_lpt_heap dirty_idx; + struct list_head uncat_list; + struct list_head empty_list; + struct list_head freeable_list; + struct list_head frdi_idx_list; + int freeable_cnt; + + int ltab_lnum; + int ltab_offs; + struct ubifs_lpt_lprops *ltab; + struct ubifs_lpt_lprops *ltab_cmt; + int lsave_cnt; + int lsave_lnum; + int lsave_offs; + int *lsave; + int lscan_lnum; + + long long rp_size; + long long report_rp_size; + uid_t rp_uid; + gid_t rp_gid; + + /* The below fields are used only during mounting and re-mounting */ + int empty; + struct rb_root replay_tree; + struct list_head replay_list; + struct list_head replay_buds; + unsigned long long cs_sqnum; + unsigned long long replay_sqnum; + int need_recovery; + int replaying; + struct list_head unclean_leb_list; + struct ubifs_mst_node *rcvrd_mst_node; + struct rb_root size_tree; + int remounting_rw; + struct ubifs_mount_opts mount_opts; + +#ifdef CONFIG_UBIFS_FS_DEBUG + void *dbg_buf; + struct ubifs_zbranch old_zroot; + int old_zroot_level; + unsigned long long old_zroot_sqnum; + int failure_mode; + int fail_delay; + unsigned long fail_timeout; + unsigned int fail_cnt; + unsigned int fail_cnt_max; +#endif +}; + +extern struct list_head ubifs_infos; +extern spinlock_t ubifs_infos_lock; +extern atomic_long_t ubifs_clean_zn_cnt; +extern struct kmem_cache *ubifs_inode_slab; +extern struct super_operations ubifs_super_operations; +extern struct address_space_operations ubifs_file_address_operations; +extern struct file_operations ubifs_file_operations; +extern struct inode_operations ubifs_file_inode_operations; +extern struct file_operations ubifs_dir_operations; +extern struct inode_operations ubifs_dir_inode_operations; +extern struct inode_operations ubifs_symlink_inode_operations; +extern struct backing_dev_info ubifs_backing_dev_info; +extern struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT]; + +/* io.c */ +int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len); +int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs, + int dtype); +int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf); +int ubifs_read_node(const struct ubifs_info *c, void *buf, int type, int len, + int lnum, int offs); +int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len, + int lnum, int offs); +int ubifs_write_node(struct ubifs_info *c, void *node, int len, int lnum, + int offs, int dtype); +int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum, + int offs, int quiet); +void ubifs_prepare_node(struct ubifs_info *c, void *buf, int len, int pad); +void ubifs_prep_grp_node(struct ubifs_info *c, void *node, int len, int last); +int ubifs_io_init(struct ubifs_info *c); +void ubifs_pad(const struct ubifs_info *c, void *buf, int pad); +int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf); +int ubifs_bg_wbufs_sync(struct ubifs_info *c); +void ubifs_wbuf_add_ino_nolock(struct ubifs_wbuf *wbuf, ino_t inum); +int ubifs_sync_wbufs_by_inode(struct ubifs_info *c, struct inode *inode); + +/* scan.c */ +struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum, + int offs, void *sbuf); +void ubifs_scan_destroy(struct ubifs_scan_leb *sleb); +int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum, + int offs, int quiet); +struct ubifs_scan_leb *ubifs_start_scan(const struct ubifs_info *c, int lnum, + int offs, void *sbuf); +void ubifs_end_scan(const struct ubifs_info *c, struct ubifs_scan_leb *sleb, + int lnum, int offs); +int ubifs_add_snod(const struct ubifs_info *c, struct ubifs_scan_leb *sleb, + void *buf, int offs); +void ubifs_scanned_corruption(const struct ubifs_info *c, int lnum, int offs, + void *buf); + +/* log.c */ +void ubifs_add_bud(struct ubifs_info *c, struct ubifs_bud *bud); +void ubifs_create_buds_lists(struct ubifs_info *c); +int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs); +struct ubifs_bud *ubifs_search_bud(struct ubifs_info *c, int lnum); +struct ubifs_wbuf *ubifs_get_wbuf(struct ubifs_info *c, int lnum); +int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum); +int ubifs_log_end_commit(struct ubifs_info *c, int new_ltail_lnum); +int ubifs_log_post_commit(struct ubifs_info *c, int old_ltail_lnum); +int ubifs_consolidate_log(struct ubifs_info *c); + +/* journal.c */ +int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir, + const struct qstr *nm, const struct inode *inode, + int deletion, int xent); +int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode, + const union ubifs_key *key, const void *buf, int len); +int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode, + int last_reference); +int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, + const struct dentry *old_dentry, + const struct inode *new_dir, + const struct dentry *new_dentry, int sync); +int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode, + loff_t old_size, loff_t new_size); +int ubifs_jnl_delete_xattr(struct ubifs_info *c, const struct inode *host, + const struct inode *inode, const struct qstr *nm); +int ubifs_jnl_change_xattr(struct ubifs_info *c, const struct inode *inode1, + const struct inode *inode2); + +/* budget.c */ +int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req); +void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req); +void ubifs_release_dirty_inode_budget(struct ubifs_info *c, + struct ubifs_inode *ui); +int ubifs_budget_inode_op(struct ubifs_info *c, struct inode *inode, + struct ubifs_budget_req *req); +void ubifs_release_ino_dirty(struct ubifs_info *c, struct inode *inode, + struct ubifs_budget_req *req); +void ubifs_cancel_ino_op(struct ubifs_info *c, struct inode *inode, + struct ubifs_budget_req *req); +long long ubifs_budg_get_free_space(struct ubifs_info *c); +int ubifs_calc_min_idx_lebs(struct ubifs_info *c); +void ubifs_convert_page_budget(struct ubifs_info *c); +long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs); + +/* find.c */ +int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free, + int squeeze); +int ubifs_find_free_leb_for_idx(struct ubifs_info *c); +int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, + int min_space, int pick_free); +int ubifs_find_dirty_idx_leb(struct ubifs_info *c); +int ubifs_save_dirty_idx_lnums(struct ubifs_info *c); + +/* tnc.c */ +int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key, + struct ubifs_znode **zn, int *n); +int ubifs_tnc_lookup(struct ubifs_info *c, const union ubifs_key *key, + void *node); +int ubifs_tnc_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, + void *node, const struct qstr *nm); +int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, + void *node, int *lnum, int *offs); +int ubifs_tnc_add(struct ubifs_info *c, const union ubifs_key *key, int lnum, + int offs, int len); +int ubifs_tnc_replace(struct ubifs_info *c, const union ubifs_key *key, + int old_lnum, int old_offs, int lnum, int offs, int len); +int ubifs_tnc_add_nm(struct ubifs_info *c, const union ubifs_key *key, + int lnum, int offs, int len, const struct qstr *nm); +int ubifs_tnc_remove(struct ubifs_info *c, const union ubifs_key *key); +int ubifs_tnc_remove_nm(struct ubifs_info *c, const union ubifs_key *key, + const struct qstr *nm); +int ubifs_tnc_remove_range(struct ubifs_info *c, union ubifs_key *from_key, + union ubifs_key *to_key); +int ubifs_tnc_remove_ino(struct ubifs_info *c, ino_t inum); +struct ubifs_dent_node *ubifs_tnc_next_ent(struct ubifs_info *c, + union ubifs_key *key, + const struct qstr *nm); +void ubifs_tnc_close(struct ubifs_info *c); +int ubifs_tnc_has_node(struct ubifs_info *c, union ubifs_key *key, int level, + int lnum, int offs, int is_idx); +int ubifs_dirty_idx_node(struct ubifs_info *c, union ubifs_key *key, int level, + int lnum, int offs); +/* Shared by tnc.c for tnc_commit.c */ +void destroy_old_idx(struct ubifs_info *c); +int is_idx_node_in_tnc(struct ubifs_info *c, union ubifs_key *key, int level, + int lnum, int offs); +int insert_old_idx_znode(struct ubifs_info *c, struct ubifs_znode *znode); + +/* tnc_misc.c */ +struct ubifs_znode *ubifs_tnc_levelorder_next(struct ubifs_znode *zr, + struct ubifs_znode *znode); +int ubifs_search_zbranch(const struct ubifs_info *c, + const struct ubifs_znode *znode, + const union ubifs_key *key, int *n); +struct ubifs_znode *ubifs_tnc_postorder_first(struct ubifs_znode *znode); +struct ubifs_znode *ubifs_tnc_postorder_next(struct ubifs_znode *znode); +long ubifs_destroy_tnc_subtree(struct ubifs_znode *zr); +struct ubifs_znode *ubifs_load_znode(struct ubifs_info *c, + struct ubifs_zbranch *zbr, + struct ubifs_znode *parent, int iip); +int ubifs_tnc_read_node(struct ubifs_info *c, struct ubifs_zbranch *zbr, + void *node); + +/* tnc_commit.c */ +int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot); +int ubifs_tnc_end_commit(struct ubifs_info *c); + +/* shrinker.c */ +int ubifs_shrinker(int nr_to_scan, gfp_t gfp_mask); + +/* commit.c */ +int ubifs_bg_thread(void *info); +void ubifs_commit_required(struct ubifs_info *c); +void ubifs_request_bg_commit(struct ubifs_info *c); +int ubifs_run_commit(struct ubifs_info *c); +void ubifs_recovery_commit(struct ubifs_info *c); +int ubifs_gc_should_commit(struct ubifs_info *c); +void ubifs_wait_for_commit(struct ubifs_info *c); + +/* master.c */ +int ubifs_read_master(struct ubifs_info *c); +int ubifs_write_master(struct ubifs_info *c); + +/* sb.c */ +int ubifs_read_superblock(struct ubifs_info *c); +struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c); +int ubifs_write_sb_node(struct ubifs_info *c, struct ubifs_sb_node *sup); + +/* replay.c */ +int ubifs_validate_entry(struct ubifs_info *c, + const struct ubifs_dent_node *dent); +int ubifs_replay_journal(struct ubifs_info *c); + +/* gc.c */ +int ubifs_garbage_collect(struct ubifs_info *c, int anyway); +int ubifs_gc_start_commit(struct ubifs_info *c); +int ubifs_gc_end_commit(struct ubifs_info *c); +void ubifs_destroy_idx_gc(struct ubifs_info *c); +int ubifs_get_idx_gc_leb(struct ubifs_info *c); +int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp); + +/* orphan.c */ +int ubifs_add_orphan(struct ubifs_info *c, ino_t inum); +void ubifs_delete_orphan(struct ubifs_info *c, ino_t inum); +int ubifs_orphan_start_commit(struct ubifs_info *c); +int ubifs_orphan_end_commit(struct ubifs_info *c); +int ubifs_mount_orphans(struct ubifs_info *c, int unclean, int read_only); + +/* lpt.c */ +int ubifs_calc_lpt_geom(struct ubifs_info *c); +int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first, + int *lpt_lebs, int *big_lpt); +int ubifs_lpt_init(struct ubifs_info *c, int rd, int wr); +struct ubifs_lprops *ubifs_lpt_lookup(struct ubifs_info *c, int lnum); +struct ubifs_lprops *ubifs_lpt_lookup_dirty(struct ubifs_info *c, int lnum); +int ubifs_lpt_scan_nolock(struct ubifs_info *c, int start_lnum, int end_lnum, + ubifs_lpt_scan_callback scan_cb, void *data); + +/* Shared by lpt.c for lpt_commit.c */ +void ubifs_pack_lsave(struct ubifs_info *c, void *buf, int *lsave); +void ubifs_pack_ltab(struct ubifs_info *c, void *buf, + struct ubifs_lpt_lprops *ltab); +void ubifs_pack_pnode(struct ubifs_info *c, void *buf, + struct ubifs_pnode *pnode); +void ubifs_pack_nnode(struct ubifs_info *c, void *buf, + struct ubifs_nnode *nnode); +struct ubifs_pnode *ubifs_get_pnode(struct ubifs_info *c, + struct ubifs_nnode *parent, int iip); +struct ubifs_nnode *ubifs_get_nnode(struct ubifs_info *c, + struct ubifs_nnode *parent, int iip); +int ubifs_read_nnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip); +void ubifs_add_lpt_dirt(struct ubifs_info *c, int lnum, int dirty); +void ubifs_add_nnode_dirt(struct ubifs_info *c, struct ubifs_nnode *nnode); +uint32_t ubifs_unpack_bits(uint8_t **addr, int *pos, int nrbits); +struct ubifs_nnode *ubifs_first_nnode(struct ubifs_info *c, int *hght); + +/* lpt_commit.c */ +int ubifs_lpt_start_commit(struct ubifs_info *c); +int ubifs_lpt_end_commit(struct ubifs_info *c); +int ubifs_lpt_post_commit(struct ubifs_info *c); +void ubifs_lpt_free(struct ubifs_info *c, int wr_only); + +/* lprops.c */ +void ubifs_get_lprops(struct ubifs_info *c); +const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c, + const struct ubifs_lprops *lp, + int free, int dirty, int flags, + int idx_gc_cnt); +void ubifs_release_lprops(struct ubifs_info *c); +void ubifs_get_lp_stats(struct ubifs_info *c, struct ubifs_lp_stats *stats); +void ubifs_add_to_cat(struct ubifs_info *c, struct ubifs_lprops *lprops, + int cat); +void ubifs_replace_cat(struct ubifs_info *c, struct ubifs_lprops *old_lprops, + struct ubifs_lprops *new_lprops); +void ubifs_ensure_cat(struct ubifs_info *c, struct ubifs_lprops *lprops); +int ubifs_categorize_lprops(const struct ubifs_info *c, + const struct ubifs_lprops *lprops); +int ubifs_change_one_lp(struct ubifs_info *c, int lnum, int free, int dirty, + int flags_set, int flags_clean, int idx_gc_cnt); +int ubifs_update_one_lp(struct ubifs_info *c, int lnum, int free, int dirty, + int flags_set, int flags_clean); +int ubifs_read_one_lp(struct ubifs_info *c, int lnum, struct ubifs_lprops *lp); +const struct ubifs_lprops *ubifs_fast_find_free(struct ubifs_info *c); +const struct ubifs_lprops *ubifs_fast_find_empty(struct ubifs_info *c); +const struct ubifs_lprops *ubifs_fast_find_freeable(struct ubifs_info *c); +const struct ubifs_lprops *ubifs_fast_find_frdi_idx(struct ubifs_info *c); + +/* file.c */ +int ubifs_fsync(struct file *file, struct dentry *dentry, int datasync); +int ubifs_setattr(struct dentry *dentry, struct iattr *attr); + +/* dir.c */ +struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir, + int mode); +int ubifs_getattr(struct vfsmount *mnt, struct dentry *dentry, + struct kstat *stat); + +/* xattr.c */ +int ubifs_setxattr(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags); +ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf, + size_t size); +ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size); +int ubifs_removexattr(struct dentry *dentry, const char *name); + +/* super.c */ +struct inode *ubifs_iget(struct super_block *sb, unsigned long inum); + +/* recovery.c */ +int ubifs_recover_master_node(struct ubifs_info *c); +int ubifs_write_rcvrd_mst_node(struct ubifs_info *c); +struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, + int offs, void *sbuf, int grouped); +struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, + int offs, void *sbuf); +int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf); +int ubifs_clean_lebs(const struct ubifs_info *c, void *sbuf); +int ubifs_rcvry_gc_commit(struct ubifs_info *c); +int ubifs_recover_size_accum(struct ubifs_info *c, union ubifs_key *key, + int deletion, loff_t new_size); +int ubifs_recover_size(struct ubifs_info *c); +void ubifs_destroy_size_tree(struct ubifs_info *c); + +/* ioctl.c */ +long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); +void ubifs_set_inode_flags(struct inode *inode); +#ifdef CONFIG_COMPAT +long ubifs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg); +#endif + +/* compressor.c */ +int __init ubifs_compressors_init(void); +void __exit ubifs_compressors_exit(void); +void ubifs_compress(const void *in_buf, int in_len, void *out_buf, int *out_len, + int *compr_type); +int ubifs_decompress(const void *buf, int len, void *out, int *out_len, + int compr_type); + +#include "debug.h" +#include "misc.h" +#include "key.h" + +#endif /* !__UBIFS_H__ */ diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c new file mode 100644 index 0000000..1388a07 --- /dev/null +++ b/fs/ubifs/xattr.c @@ -0,0 +1,581 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Artem Bityutskiy (Битюцкий Артём) + * Adrian Hunter + */ + +/* + * This file implements UBIFS extended attributes support. + * + * Extended attributes are implemented as regular inodes with attached data, + * which limits extended attribute size to UBIFS block size (4KiB). Names of + * extended attributes are described by extended attribute entries (xentries), + * which are almost identical to directory entries, but have different key type. + * + * In other words, the situation with extended attributes is very similar to + * directories. Indeed, any inode (but of course not xattr inodes) may have a + * number of associated xentries, just like directory inodes have associated + * directory entries. Extended attribute entries store the name of the extended + * attribute, the host inode number, and the extended attribute inode number. + * Similarly, direntries store the name, the parent and the target inode + * numbers. Thus, most of the common UBIFS mechanisms may be re-used for + * extended attributes. + * + * The number of extended attributes is not limited, but there is Linux + * limitation on the maximum possible size of the list of all extended + * attributes associated with an inode (%XATTR_LIST_MAX), so UBIFS makes sure + * the sum of all extended attribute names of the inode does not exceed that + * limit. + * + * Extended attributes are synchronous, which means they are written to the + * flash media synchronously and there is no write-back for extended attribute + * inodes. The extended attribute values are not stored in compressed form on + * the media. + * + * Since extended attributes are represented by regular inodes, they are cached + * in the VFS inode cache. The xentries are cached in the LNC cache (see + * tnc.c). + * + * ACL support is not implemented. + */ + +#include +#include +#include "ubifs.h" + +/* + * Limit the number of extended attributes per inode so that the total size + * (xattr_size) is guaranteeded to fit in an 'unsigned int'. + */ +#define MAX_XATTRS_PER_INODE 65535 + +/* + * Extended attribute type constants. + * + * USER_XATTR: user extended attribute ("user.*") + * TRUSTED_XATTR: trusted extended attribute ("trusted.*) + * SECURITY_XATTR: security extended attribute ("security.*") + */ +enum { + USER_XATTR, + TRUSTED_XATTR, + SECURITY_XATTR, +}; + +static struct inode_operations none_inode_operations; +static struct address_space_operations none_address_operations; +static struct file_operations none_file_operations; + +/** + * create_xattr - create an extended attribute. + * @c: UBIFS file-system description object + * @host: host inode + * @nm: extended attribute name + * @value: extended attribute value + * @size: size of extended attribute value + * + * This is a helper function which creates an extended attribute of name @nm + * and value @value for inode @host. The host inode is also updated on flash + * because the ctime and extended attribute accounting data changes. This + * function returns zero in case of success and a negative error code in case + * of failure. + */ +static int create_xattr(struct ubifs_info *c, struct inode *host, + const struct qstr *nm, const void *value, int size) +{ + int err; + struct inode *inode; + struct ubifs_inode *ui, *host_ui = ubifs_inode(host); + struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, + .new_ino_d = size, .dirtied_ino = 1, + .dirtied_ino_d = host_ui->data_len}; + + if (host_ui->xattr_cnt >= MAX_XATTRS_PER_INODE) + return -ENOSPC; + /* + * Linux limits the maximum size of the extended attribute names list + * to %XATTR_LIST_MAX. This means we should not allow creating more* + * extended attributes if the name list becomes larger. This limitation + * is artificial for UBIFS, though. + */ + if (host_ui->xattr_names + host_ui->xattr_cnt + + nm->len + 1 > XATTR_LIST_MAX) + return -ENOSPC; + + err = ubifs_budget_space(c, &req); + if (err) + return err; + + inode = ubifs_new_inode(c, host, S_IFREG | S_IRWXUGO); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + goto out_budg; + } + + mutex_lock(&host_ui->ui_mutex); + /* Re-define all operations to be "nothing" */ + inode->i_mapping->a_ops = &none_address_operations; + inode->i_op = &none_inode_operations; + inode->i_fop = &none_file_operations; + + inode->i_flags |= S_SYNC | S_NOATIME | S_NOCMTIME | S_NOQUOTA; + ui = ubifs_inode(inode); + ui->xattr = 1; + ui->flags |= UBIFS_XATTR_FL; + ui->data = kmalloc(size, GFP_NOFS); + if (!ui->data) { + err = -ENOMEM; + goto out_unlock; + } + + memcpy(ui->data, value, size); + host->i_ctime = ubifs_current_time(host); + host_ui->xattr_cnt += 1; + host_ui->xattr_size += CALC_DENT_SIZE(nm->len); + host_ui->xattr_size += CALC_XATTR_BYTES(size); + host_ui->xattr_names += nm->len; + + /* + * We do not use i_size_write() because nobody can race with us as we + * are holding host @host->i_mutex - every xattr operation for this + * inode is serialized by it. + */ + inode->i_size = ui->ui_size = size; + ui->data_len = size; + err = ubifs_jnl_update(c, host, nm, inode, 0, 1); + if (err) + goto out_cancel; + mutex_unlock(&host_ui->ui_mutex); + + ubifs_release_budget(c, &req); + insert_inode_hash(inode); + iput(inode); + return 0; + +out_cancel: + host_ui->xattr_cnt -= 1; + host_ui->xattr_size -= CALC_DENT_SIZE(nm->len); + host_ui->xattr_size -= CALC_XATTR_BYTES(size); +out_unlock: + mutex_unlock(&host_ui->ui_mutex); + make_bad_inode(inode); + iput(inode); +out_budg: + ubifs_release_budget(c, &req); + return err; +} + +/** + * change_xattr - change an extended attribute. + * @c: UBIFS file-system description object + * @host: host inode + * @inode: extended attribute inode + * @value: extended attribute value + * @size: size of extended attribute value + * + * This helper function changes the value of extended attribute @inode with new + * data from @value. Returns zero in case of success and a negative error code + * in case of failure. + */ +static int change_xattr(struct ubifs_info *c, struct inode *host, + struct inode *inode, const void *value, int size) +{ + int err; + struct ubifs_inode *host_ui = ubifs_inode(host); + struct ubifs_inode *ui = ubifs_inode(inode); + struct ubifs_budget_req req = { .dirtied_ino = 2, + .dirtied_ino_d = size + host_ui->data_len }; + + ubifs_assert(ui->data_len == inode->i_size); + err = ubifs_budget_space(c, &req); + if (err) + return err; + + mutex_lock(&host_ui->ui_mutex); + host->i_ctime = ubifs_current_time(host); + host_ui->xattr_size -= CALC_XATTR_BYTES(ui->data_len); + host_ui->xattr_size += CALC_XATTR_BYTES(size); + + kfree(ui->data); + ui->data = kmalloc(size, GFP_NOFS); + if (!ui->data) { + err = -ENOMEM; + goto out_unlock; + } + + memcpy(ui->data, value, size); + inode->i_size = ui->ui_size = size; + ui->data_len = size; + + /* + * It is important to write the host inode after the xattr inode + * because if the host inode gets synchronized (via 'fsync()'), then + * the extended attribute inode gets synchronized, because it goes + * before the host inode in the write-buffer. + */ + err = ubifs_jnl_change_xattr(c, inode, host); + if (err) + goto out_cancel; + mutex_unlock(&host_ui->ui_mutex); + + ubifs_release_budget(c, &req); + return 0; + +out_cancel: + host_ui->xattr_size -= CALC_XATTR_BYTES(size); + host_ui->xattr_size += CALC_XATTR_BYTES(ui->data_len); + make_bad_inode(inode); +out_unlock: + mutex_unlock(&host_ui->ui_mutex); + ubifs_release_budget(c, &req); + return err; +} + +/** + * check_namespace - check extended attribute name-space. + * @nm: extended attribute name + * + * This function makes sure the extended attribute name belongs to one of the + * supported extended attribute name-spaces. Returns name-space index in case + * of success and a negative error code in case of failure. + */ +static int check_namespace(const struct qstr *nm) +{ + int type; + + if (nm->len > UBIFS_MAX_NLEN) + return -ENAMETOOLONG; + + if (!strncmp(nm->name, XATTR_TRUSTED_PREFIX, + XATTR_TRUSTED_PREFIX_LEN)) { + if (nm->name[sizeof(XATTR_TRUSTED_PREFIX) - 1] == '\0') + return -EINVAL; + type = TRUSTED_XATTR; + } else if (!strncmp(nm->name, XATTR_USER_PREFIX, + XATTR_USER_PREFIX_LEN)) { + if (nm->name[XATTR_USER_PREFIX_LEN] == '\0') + return -EINVAL; + type = USER_XATTR; + } else if (!strncmp(nm->name, XATTR_SECURITY_PREFIX, + XATTR_SECURITY_PREFIX_LEN)) { + if (nm->name[sizeof(XATTR_SECURITY_PREFIX) - 1] == '\0') + return -EINVAL; + type = SECURITY_XATTR; + } else + return -EOPNOTSUPP; + + return type; +} + +static struct inode *iget_xattr(struct ubifs_info *c, ino_t inum) +{ + struct inode *inode; + + inode = ubifs_iget(c->vfs_sb, inum); + if (IS_ERR(inode)) { + ubifs_err("dead extended attribute entry, error %d", + (int)PTR_ERR(inode)); + return inode; + } + if (ubifs_inode(inode)->xattr) + return inode; + ubifs_err("corrupt extended attribute entry"); + iput(inode); + return ERR_PTR(-EINVAL); +} + +int ubifs_setxattr(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) +{ + struct inode *inode, *host = dentry->d_inode; + struct ubifs_info *c = host->i_sb->s_fs_info; + struct qstr nm = { .name = name, .len = strlen(name) }; + struct ubifs_dent_node *xent; + union ubifs_key key; + int err, type; + + dbg_gen("xattr '%s', host ino %lu ('%.*s'), size %zd", name, + host->i_ino, dentry->d_name.len, dentry->d_name.name, size); + + if (size > UBIFS_MAX_INO_DATA) + return -ERANGE; + + type = check_namespace(&nm); + if (type < 0) + return type; + + xent = kmalloc(UBIFS_MAX_XENT_NODE_SZ, GFP_NOFS); + if (!xent) + return -ENOMEM; + + /* + * The extended attribute entries are stored in LNC, so multiple + * look-ups do not involve reading the flash. + */ + xent_key_init(c, &key, host->i_ino, &nm); + err = ubifs_tnc_lookup_nm(c, &key, xent, &nm); + if (err) { + if (err != -ENOENT) + goto out_free; + + if (flags & XATTR_REPLACE) + /* We are asked not to create the xattr */ + err = -ENODATA; + else + err = create_xattr(c, host, &nm, value, size); + goto out_free; + } + + if (flags & XATTR_CREATE) { + /* We are asked not to replace the xattr */ + err = -EEXIST; + goto out_free; + } + + inode = iget_xattr(c, le64_to_cpu(xent->inum)); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + goto out_free; + } + + err = change_xattr(c, host, inode, value, size); + iput(inode); + +out_free: + kfree(xent); + return err; +} + +ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf, + size_t size) +{ + struct inode *inode, *host = dentry->d_inode; + struct ubifs_info *c = host->i_sb->s_fs_info; + struct qstr nm = { .name = name, .len = strlen(name) }; + struct ubifs_inode *ui; + struct ubifs_dent_node *xent; + union ubifs_key key; + int err; + + dbg_gen("xattr '%s', ino %lu ('%.*s'), buf size %zd", name, + host->i_ino, dentry->d_name.len, dentry->d_name.name, size); + + err = check_namespace(&nm); + if (err < 0) + return err; + + xent = kmalloc(UBIFS_MAX_XENT_NODE_SZ, GFP_NOFS); + if (!xent) + return -ENOMEM; + + mutex_lock(&host->i_mutex); + xent_key_init(c, &key, host->i_ino, &nm); + err = ubifs_tnc_lookup_nm(c, &key, xent, &nm); + if (err) { + if (err == -ENOENT) + err = -ENODATA; + goto out_unlock; + } + + inode = iget_xattr(c, le64_to_cpu(xent->inum)); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + goto out_unlock; + } + + ui = ubifs_inode(inode); + ubifs_assert(inode->i_size == ui->data_len); + ubifs_assert(ubifs_inode(host)->xattr_size > ui->data_len); + + if (buf) { + /* If @buf is %NULL we are supposed to return the length */ + if (ui->data_len > size) { + dbg_err("buffer size %zd, xattr len %d", + size, ui->data_len); + err = -ERANGE; + goto out_iput; + } + + memcpy(buf, ui->data, ui->data_len); + } + err = ui->data_len; + +out_iput: + iput(inode); +out_unlock: + mutex_unlock(&host->i_mutex); + kfree(xent); + return err; +} + +ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size) +{ + union ubifs_key key; + struct inode *host = dentry->d_inode; + struct ubifs_info *c = host->i_sb->s_fs_info; + struct ubifs_inode *host_ui = ubifs_inode(host); + struct ubifs_dent_node *xent, *pxent = NULL; + int err, len, written = 0; + struct qstr nm = { .name = NULL }; + + dbg_gen("ino %lu ('%.*s'), buffer size %zd", host->i_ino, + dentry->d_name.len, dentry->d_name.name, size); + + len = host_ui->xattr_names + host_ui->xattr_cnt; + if (!buffer) + /* + * We should return the minimum buffer size which will fit a + * null-terminated list of all the extended attribute names. + */ + return len; + + if (len > size) + return -ERANGE; + + lowest_xent_key(c, &key, host->i_ino); + + mutex_lock(&host->i_mutex); + while (1) { + int type; + + xent = ubifs_tnc_next_ent(c, &key, &nm); + if (unlikely(IS_ERR(xent))) { + err = PTR_ERR(xent); + break; + } + + nm.name = xent->name; + nm.len = le16_to_cpu(xent->nlen); + + type = check_namespace(&nm); + if (unlikely(type < 0)) { + err = type; + break; + } + + /* Show trusted namespace only for "power" users */ + if (type != TRUSTED_XATTR || capable(CAP_SYS_ADMIN)) { + memcpy(buffer + written, nm.name, nm.len + 1); + written += nm.len + 1; + } + + kfree(pxent); + pxent = xent; + key_read(c, &xent->key, &key); + } + mutex_unlock(&host->i_mutex); + + kfree(pxent); + if (err != -ENOENT) { + ubifs_err("cannot find next direntry, error %d", err); + return err; + } + + ubifs_assert(written <= size); + return written; +} + +static int remove_xattr(struct ubifs_info *c, struct inode *host, + struct inode *inode, const struct qstr *nm) +{ + int err; + struct ubifs_inode *host_ui = ubifs_inode(host); + struct ubifs_inode *ui = ubifs_inode(inode); + struct ubifs_budget_req req = { .dirtied_ino = 1, .mod_dent = 1, + .dirtied_ino_d = host_ui->data_len }; + + ubifs_assert(ui->data_len == inode->i_size); + + err = ubifs_budget_space(c, &req); + if (err) + return err; + + mutex_lock(&host_ui->ui_mutex); + host->i_ctime = ubifs_current_time(host); + host_ui->xattr_cnt -= 1; + host_ui->xattr_size -= CALC_DENT_SIZE(nm->len); + host_ui->xattr_size -= CALC_XATTR_BYTES(ui->data_len); + host_ui->xattr_names -= nm->len; + + err = ubifs_jnl_delete_xattr(c, host, inode, nm); + if (err) + goto out_cancel; + mutex_unlock(&host_ui->ui_mutex); + + ubifs_release_budget(c, &req); + return 0; + +out_cancel: + host_ui->xattr_cnt += 1; + host_ui->xattr_size += CALC_DENT_SIZE(nm->len); + host_ui->xattr_size += CALC_XATTR_BYTES(ui->data_len); + mutex_unlock(&host_ui->ui_mutex); + ubifs_release_budget(c, &req); + make_bad_inode(inode); + return err; +} + +int ubifs_removexattr(struct dentry *dentry, const char *name) +{ + struct inode *inode, *host = dentry->d_inode; + struct ubifs_info *c = host->i_sb->s_fs_info; + struct qstr nm = { .name = name, .len = strlen(name) }; + struct ubifs_dent_node *xent; + union ubifs_key key; + int err; + + dbg_gen("xattr '%s', ino %lu ('%.*s')", name, + host->i_ino, dentry->d_name.len, dentry->d_name.name); + ubifs_assert(mutex_is_locked(&host->i_mutex)); + + err = check_namespace(&nm); + if (err < 0) + return err; + + xent = kmalloc(UBIFS_MAX_XENT_NODE_SZ, GFP_NOFS); + if (!xent) + return -ENOMEM; + + xent_key_init(c, &key, host->i_ino, &nm); + err = ubifs_tnc_lookup_nm(c, &key, xent, &nm); + if (err) { + if (err == -ENOENT) + err = -ENODATA; + goto out_free; + } + + inode = iget_xattr(c, le64_to_cpu(xent->inum)); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + goto out_free; + } + + ubifs_assert(inode->i_nlink == 1); + inode->i_nlink = 0; + err = remove_xattr(c, host, inode, &nm); + if (err) + inode->i_nlink = 1; + + /* If @i_nlink is 0, 'iput()' will delete the inode */ + iput(inode); + +out_free: + kfree(xent); + return err; +} -- cgit v0.10.2 From 0d7eff873caaeac84de01a1acdca983d2c7ba3fe Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 14 Jul 2008 19:08:38 +0300 Subject: UBIFS: include to compilation Add UBIFS to Makefile and Kbuild. Signed-off-by: Artem Bityutskiy Signed-off-by: Adrian Hunter diff --git a/fs/Kconfig b/fs/Kconfig index 2694648..a52cf62 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -1375,6 +1375,9 @@ config JFFS2_CMODE_FAVOURLZO endchoice +# UBIFS File system configuration +source "fs/ubifs/Kconfig" + config CRAMFS tristate "Compressed ROM file system support (cramfs)" depends on BLOCK diff --git a/fs/Makefile b/fs/Makefile index 1e7a11b..fcae06a 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -100,6 +100,7 @@ obj-$(CONFIG_NTFS_FS) += ntfs/ obj-$(CONFIG_UFS_FS) += ufs/ obj-$(CONFIG_EFS_FS) += efs/ obj-$(CONFIG_JFFS2_FS) += jffs2/ +obj-$(CONFIG_UBIFS_FS) += ubifs/ obj-$(CONFIG_AFFS_FS) += affs/ obj-$(CONFIG_ROMFS_FS) += romfs/ obj-$(CONFIG_QNX4FS_FS) += qnx4/ diff --git a/fs/ubifs/Kconfig b/fs/ubifs/Kconfig new file mode 100644 index 0000000..91ceeda --- /dev/null +++ b/fs/ubifs/Kconfig @@ -0,0 +1,72 @@ +config UBIFS_FS + tristate "UBIFS file system support" + select CRC16 + select CRC32 + select CRYPTO if UBIFS_FS_ADVANCED_COMPR + select CRYPTO if UBIFS_FS_LZO + select CRYPTO if UBIFS_FS_ZLIB + select CRYPTO_LZO if UBIFS_FS_LZO + select CRYPTO_DEFLATE if UBIFS_FS_ZLIB + depends on MTD_UBI + help + UBIFS is a file system for flash devices which works on top of UBI. + +config UBIFS_FS_XATTR + bool "Extended attributes support" + depends on UBIFS_FS + help + This option enables support of extended attributes. + +config UBIFS_FS_ADVANCED_COMPR + bool "Advanced compression options" + depends on UBIFS_FS + help + This option allows to explicitly choose which compressions, if any, + are enabled in UBIFS. Removing compressors means inbility to read + existing file systems. + + If unsure, say 'N'. + +config UBIFS_FS_LZO + bool "LZO compression support" if UBIFS_FS_ADVANCED_COMPR + depends on UBIFS_FS + default y + help + LZO compressor is generally faster then zlib but compresses worse. + Say 'Y' if unsure. + +config UBIFS_FS_ZLIB + bool "ZLIB compression support" if UBIFS_FS_ADVANCED_COMPR + depends on UBIFS_FS + default y + help + Zlib copresses better then LZO but it is slower. Say 'Y' if unsure. + +# Debugging-related stuff +config UBIFS_FS_DEBUG + bool "Enable debugging" + depends on UBIFS_FS + select DEBUG_FS + select KALLSYMS_ALL + help + This option enables UBIFS debugging. + +config UBIFS_FS_DEBUG_MSG_LVL + int "Default message level (0 = no extra messages, 3 = lots)" + depends on UBIFS_FS_DEBUG + default "0" + help + This controls the amount of debugging messages produced by UBIFS. + If reporting bugs, please try to have available a full dump of the + messages at level 1 while the misbehaviour was occurring. Level 2 + may become necessary if level 1 messages were not enough to find the + bug. Generally Level 3 should be avoided. + +config UBIFS_FS_DEBUG_CHKS + bool "Enable extra checks" + depends on UBIFS_FS_DEBUG + help + If extra checks are enabled UBIFS will check the consistency of its + internal data structures during operation. However, UBIFS performance + is dramatically slower when this option is selected especially if the + file system is large. diff --git a/fs/ubifs/Makefile b/fs/ubifs/Makefile new file mode 100644 index 0000000..80e93c3 --- /dev/null +++ b/fs/ubifs/Makefile @@ -0,0 +1,9 @@ +obj-$(CONFIG_UBIFS_FS) += ubifs.o + +ubifs-y += shrinker.o journal.o file.o dir.o super.o sb.o io.o +ubifs-y += tnc.o master.o scan.o replay.o log.o commit.o gc.o orphan.o +ubifs-y += budget.o find.o tnc_commit.o compress.o lpt.o lprops.o +ubifs-y += recovery.o ioctl.o lpt_commit.o tnc_misc.o + +ubifs-$(CONFIG_UBIFS_FS_DEBUG) += debug.o +ubifs-$(CONFIG_UBIFS_FS_XATTR) += xattr.o -- cgit v0.10.2 From d93c768e66d8c3970187c179a91a2553b077d9e8 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 15 Jul 2008 20:11:21 +0200 Subject: pcmcia: fix return value in cm4000_cs.c should be -EINVAL, not EINVAL. Found by Peter Stuge. Signed-off-by: Dominik Brodowski diff --git a/drivers/char/pcmcia/cm4000_cs.c b/drivers/char/pcmcia/cm4000_cs.c index 59ca351..e4a4fbd 100644 --- a/drivers/char/pcmcia/cm4000_cs.c +++ b/drivers/char/pcmcia/cm4000_cs.c @@ -1439,7 +1439,7 @@ static long cmm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) DEBUGP(4, dev, "CMM_ABSENT flag set\n"); goto out; } - rc = EINVAL; + rc = -EINVAL; if (_IOC_TYPE(cmd) != CM_IOC_MAGIC) { DEBUGP(4, dev, "ioctype mismatch\n"); -- cgit v0.10.2 From 002b90a1bf5fe9c8de7a8634403a685621841ff3 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 15 Jul 2008 15:26:15 +0200 Subject: pcmcia: fix cisinfo_t removal The cisinfo_t removal patch (c5081d5f4775b2a3f858f91151bbf9163e473075 pcmcia: simplify pccard_validate_cis ) introduced a bug that prevented card detection, for the (info->Chains == MAX_TUPLES) check was replaced by (count), which is always true. Restoring the comparison to MAX_TUPLES makes everybody happy... [linux@dominikbrodowski.net: update changelog comment] Signed-off-by: Marc Zyngier Signed-off-by: Dominik Brodowski diff --git a/drivers/pcmcia/cistpl.c b/drivers/pcmcia/cistpl.c index 9fcff0c..65129b5 100644 --- a/drivers/pcmcia/cistpl.c +++ b/drivers/pcmcia/cistpl.c @@ -1490,7 +1490,7 @@ int pccard_validate_cis(struct pcmcia_socket *s, unsigned int function, unsigned ((tuple->TupleCode > 0x90) && (tuple->TupleCode < 0xff))) reserved++; } - if ((count) || (reserved > 5) || + if ((count == MAX_TUPLES) || (reserved > 5) || ((!dev_ok || !ident_ok) && (count > 10))) count = 0; -- cgit v0.10.2 From 417e1494fd70715b737428cc3c3d924255f22ba1 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Tue, 15 Jul 2008 11:30:00 -0500 Subject: pcmcia: ide-cs: Remove outdated comment There is an outdated version information comment in ide-cs. Signed-off-by: Larry Finger Signed-off-by: Dominik Brodowski diff --git a/drivers/ide/legacy/ide-cs.c b/drivers/ide/legacy/ide-cs.c index 8dbf4d9..41a2e25 100644 --- a/drivers/ide/legacy/ide-cs.c +++ b/drivers/ide/legacy/ide-cs.c @@ -66,8 +66,6 @@ MODULE_LICENSE("Dual MPL/GPL"); #ifdef CONFIG_PCMCIA_DEBUG INT_MODULE_PARM(pc_debug, 0); #define DEBUG(n, args...) if (pc_debug>(n)) printk(KERN_DEBUG args) -/*static char *version = -"ide-cs.c 1.3 2002/10/26 05:45:31 (David Hinds)";*/ #else #define DEBUG(n, args...) #endif -- cgit v0.10.2 From 166b88d755f925139af7f7b75aa0a1b896ca0670 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 14 Jul 2008 16:03:26 -0400 Subject: SUNRPC: Use correct XDR encoding procedure for rpcbind SET/UNSET The rpcbind versions 3 and 4 SET and UNSET procedures use the same arguments as the GETADDR procedure. While definitely a bug, this hasn't been a problem so far since the kernel hasn't used version 3 or 4 SET and UNSET. But this will change in just a moment. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 24e93e0..0021fad 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -426,6 +426,10 @@ static void rpcb_getport_done(struct rpc_task *child, void *data) map->r_status = status; } +/* + * XDR functions for rpcbind + */ + static int rpcb_encode_mapping(struct rpc_rqst *req, __be32 *p, struct rpcbind_args *rpcb) { @@ -581,14 +585,14 @@ static struct rpc_procinfo rpcb_procedures2[] = { }; static struct rpc_procinfo rpcb_procedures3[] = { - PROC(SET, mapping, set), - PROC(UNSET, mapping, set), + PROC(SET, getaddr, set), + PROC(UNSET, getaddr, set), PROC(GETADDR, getaddr, getaddr), }; static struct rpc_procinfo rpcb_procedures4[] = { - PROC(SET, mapping, set), - PROC(UNSET, mapping, set), + PROC(SET, getaddr, set), + PROC(UNSET, getaddr, set), PROC(GETADDR, getaddr, getaddr), PROC(GETVERSADDR, getaddr, getaddr), }; -- cgit v0.10.2 From cc5598b78fd320dd6d1f90c14491e08029f3c4f6 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 14 Jul 2008 16:03:27 -0400 Subject: SUNRPC: Introduce a specific rpcb_create for contacting localhost Add rpcb_create_local() for use by rpcb_register() and upcoming IPv6 registration functions. Ensure any errors encountered by rpcb_create_local() are properly reported. We can also use a statically allocated constant loopback socket address instead of one allocated on the stack and initialized every time the function is called. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 0021fad..35c1ded 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -116,6 +116,29 @@ static void rpcb_map_release(void *data) kfree(map); } +static const struct sockaddr_in rpcb_inaddr_loopback = { + .sin_family = AF_INET, + .sin_addr.s_addr = htonl(INADDR_LOOPBACK), + .sin_port = htons(RPCBIND_PORT), +}; + +static struct rpc_clnt *rpcb_create_local(struct sockaddr *addr, + size_t addrlen, u32 version) +{ + struct rpc_create_args args = { + .protocol = XPRT_TRANSPORT_UDP, + .address = addr, + .addrsize = addrlen, + .servername = "localhost", + .program = &rpcb_program, + .version = version, + .authflavor = RPC_AUTH_UNIX, + .flags = RPC_CLNT_CREATE_NOPING, + }; + + return rpc_create(&args); +} + static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, size_t salen, int proto, u32 version, int privileged) @@ -161,10 +184,6 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, */ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) { - struct sockaddr_in sin = { - .sin_family = AF_INET, - .sin_addr.s_addr = htonl(INADDR_LOOPBACK), - }; struct rpcbind_args map = { .r_prog = prog, .r_vers = vers, @@ -184,14 +203,15 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) "rpcbind\n", (port ? "" : "un"), prog, vers, prot, port); - rpcb_clnt = rpcb_create("localhost", (struct sockaddr *) &sin, - sizeof(sin), XPRT_TRANSPORT_UDP, RPCBVERS_2, 1); - if (IS_ERR(rpcb_clnt)) - return PTR_ERR(rpcb_clnt); + rpcb_clnt = rpcb_create_local((struct sockaddr *)&rpcb_inaddr_loopback, + sizeof(rpcb_inaddr_loopback), + RPCBVERS_2); + if (!IS_ERR(rpcb_clnt)) { + error = rpc_call_sync(rpcb_clnt, &msg, 0); + rpc_shutdown_client(rpcb_clnt); + } else + error = PTR_ERR(rpcb_clnt); - error = rpc_call_sync(rpcb_clnt, &msg, 0); - - rpc_shutdown_client(rpcb_clnt); if (error < 0) printk(KERN_WARNING "RPC: failed to contact local rpcbind " "server (errno %d).\n", -error); -- cgit v0.10.2 From 423d8b064771f5cd8b706a4839b18db9bb6c3c59 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 14 Jul 2008 16:03:28 -0400 Subject: SUNRPC: None of rpcb_create's callers wants a privileged source port Clean up: Callers that required a privileged source port now use rpcb_create_local(), so we can remove the @privileged argument from rpcb_create(). Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 35c1ded..691bd21 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -140,8 +140,7 @@ static struct rpc_clnt *rpcb_create_local(struct sockaddr *addr, } static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, - size_t salen, int proto, u32 version, - int privileged) + size_t salen, int proto, u32 version) { struct rpc_create_args args = { .protocol = proto, @@ -151,7 +150,8 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, .program = &rpcb_program, .version = version, .authflavor = RPC_AUTH_UNIX, - .flags = RPC_CLNT_CREATE_NOPING, + .flags = (RPC_CLNT_CREATE_NOPING | + RPC_CLNT_CREATE_NONPRIVPORT), }; switch (srvaddr->sa_family) { @@ -165,8 +165,6 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, return NULL; } - if (!privileged) - args.flags |= RPC_CLNT_CREATE_NONPRIVPORT; return rpc_create(&args); } @@ -255,7 +253,7 @@ int rpcb_getport_sync(struct sockaddr_in *sin, u32 prog, u32 vers, int prot) __func__, NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot); rpcb_clnt = rpcb_create(NULL, (struct sockaddr *)sin, - sizeof(*sin), prot, RPCBVERS_2, 0); + sizeof(*sin), prot, RPCBVERS_2); if (IS_ERR(rpcb_clnt)) return PTR_ERR(rpcb_clnt); @@ -365,7 +363,7 @@ void rpcb_getport_async(struct rpc_task *task) task->tk_pid, __func__, bind_version); rpcb_clnt = rpcb_create(clnt->cl_server, sap, salen, xprt->prot, - bind_version, 0); + bind_version); if (IS_ERR(rpcb_clnt)) { status = PTR_ERR(rpcb_clnt); dprintk("RPC: %5u %s: rpcb_create failed, error %ld\n", -- cgit v0.10.2 From babe80eb4994dfdc97d5be19a68b5af66d667585 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 14 Jul 2008 16:03:29 -0400 Subject: SUNRPC: Refactor rpcb_register to make rpcbindv4 support easier rpcbind version 4 registration will reuse part of rpcb_register, so just split it out into a separate function now. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 691bd21..8b75c30 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -168,6 +168,30 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, return rpc_create(&args); } +static int rpcb_register_call(struct sockaddr *addr, size_t addrlen, + u32 version, struct rpc_message *msg, + int *result) +{ + struct rpc_clnt *rpcb_clnt; + int error = 0; + + *result = 0; + + rpcb_clnt = rpcb_create_local(addr, addrlen, version); + if (!IS_ERR(rpcb_clnt)) { + error = rpc_call_sync(rpcb_clnt, msg, 0); + rpc_shutdown_client(rpcb_clnt); + } else + error = PTR_ERR(rpcb_clnt); + + if (error < 0) + printk(KERN_WARNING "RPC: failed to contact local rpcbind " + "server (errno %d).\n", -error); + dprintk("RPC: registration status %d/%d\n", error, *result); + + return error; +} + /** * rpcb_register - set or unset a port registration with the local rpcbind svc * @prog: RPC program number to bind @@ -189,33 +213,21 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) .r_port = port, }; struct rpc_message msg = { - .rpc_proc = &rpcb_procedures2[port ? - RPCBPROC_SET : RPCBPROC_UNSET], .rpc_argp = &map, .rpc_resp = okay, }; - struct rpc_clnt *rpcb_clnt; - int error = 0; dprintk("RPC: %sregistering (%u, %u, %d, %u) with local " "rpcbind\n", (port ? "" : "un"), prog, vers, prot, port); - rpcb_clnt = rpcb_create_local((struct sockaddr *)&rpcb_inaddr_loopback, - sizeof(rpcb_inaddr_loopback), - RPCBVERS_2); - if (!IS_ERR(rpcb_clnt)) { - error = rpc_call_sync(rpcb_clnt, &msg, 0); - rpc_shutdown_client(rpcb_clnt); - } else - error = PTR_ERR(rpcb_clnt); + msg.rpc_proc = &rpcb_procedures2[RPCBPROC_UNSET]; + if (port) + msg.rpc_proc = &rpcb_procedures2[RPCBPROC_SET]; - if (error < 0) - printk(KERN_WARNING "RPC: failed to contact local rpcbind " - "server (errno %d).\n", -error); - dprintk("RPC: registration status %d/%d\n", error, *okay); - - return error; + return rpcb_register_call((struct sockaddr *)&rpcb_inaddr_loopback, + sizeof(rpcb_inaddr_loopback), + RPCBVERS_2, &msg, okay); } /** -- cgit v0.10.2 From c2e1b09ff237c0a3687b9a804cc8bf489743cffc Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 14 Jul 2008 16:03:30 -0400 Subject: SUNRPC: Support registering IPv6 interfaces with local rpcbind daemon Introduce a new API to register RPC services on IPv6 interfaces to allow the NFS server and lockd to advertise on IPv6 networks. Unlike rpcb_register(), the new rpcb_v4_register() function uses rpcbind protocol version 4 to contact the local rpcbind daemon. The version 4 SET/UNSET procedures allow services to register address families besides AF_INET, register at specific network interfaces, and register transport protocols besides UDP and TCP. All of this functionality is exposed via the new rpcb_v4_register() kernel API. A user-space rpcbind daemon implementation that supports version 4 of the rpcbind protocol is required in order to make use of this new API. Note that rpcbind version 3 is sufficient to support the new rpcbind facilities listed above, but most extant implementations use version 4. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 764fd4c..e5bfe01 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -125,6 +125,9 @@ void rpc_shutdown_client(struct rpc_clnt *); void rpc_release_client(struct rpc_clnt *); int rpcb_register(u32, u32, int, unsigned short, int *); +int rpcb_v4_register(const u32 program, const u32 version, + const struct sockaddr *address, + const char *netid, int *result); int rpcb_getport_sync(struct sockaddr_in *, u32, u32, int); void rpcb_getport_async(struct rpc_task *); diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 8b75c30..24db2b4 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -87,6 +87,7 @@ struct rpcbind_args { static struct rpc_procinfo rpcb_procedures2[]; static struct rpc_procinfo rpcb_procedures3[]; +static struct rpc_procinfo rpcb_procedures4[]; struct rpcb_info { u32 rpc_vers; @@ -122,6 +123,12 @@ static const struct sockaddr_in rpcb_inaddr_loopback = { .sin_port = htons(RPCBIND_PORT), }; +static const struct sockaddr_in6 rpcb_in6addr_loopback = { + .sin6_family = AF_INET6, + .sin6_addr = IN6ADDR_LOOPBACK_INIT, + .sin6_port = htons(RPCBIND_PORT), +}; + static struct rpc_clnt *rpcb_create_local(struct sockaddr *addr, size_t addrlen, u32 version) { @@ -196,13 +203,38 @@ static int rpcb_register_call(struct sockaddr *addr, size_t addrlen, * rpcb_register - set or unset a port registration with the local rpcbind svc * @prog: RPC program number to bind * @vers: RPC version number to bind - * @prot: transport protocol to use to make this request + * @prot: transport protocol to register * @port: port value to register - * @okay: result code + * @okay: OUT: result code + * + * RPC services invoke this function to advertise their contact + * information via the system's rpcbind daemon. RPC services + * invoke this function once for each [program, version, transport] + * tuple they wish to advertise. + * + * Callers may also unregister RPC services that are no longer + * available by setting the passed-in port to zero. This removes + * all registered transports for [program, version] from the local + * rpcbind database. + * + * Returns zero if the registration request was dispatched + * successfully and a reply was received. The rpcbind daemon's + * boolean result code is stored in *okay. * - * port == 0 means unregister, port != 0 means register. + * Returns an errno value and sets *result to zero if there was + * some problem that prevented the rpcbind request from being + * dispatched, or if the rpcbind daemon did not respond within + * the timeout. * - * This routine supports only rpcbind version 2. + * This function uses rpcbind protocol version 2 to contact the + * local rpcbind daemon. + * + * Registration works over both AF_INET and AF_INET6, and services + * registered via this function are advertised as available for any + * address. If the local rpcbind daemon is listening on AF_INET6, + * services registered via this function will be advertised on + * IN6ADDR_ANY (ie available for all AF_INET and AF_INET6 + * addresses). */ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) { @@ -230,6 +262,144 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) RPCBVERS_2, &msg, okay); } +/* + * Fill in AF_INET family-specific arguments to register + */ +static int rpcb_register_netid4(struct sockaddr_in *address_to_register, + struct rpc_message *msg) +{ + struct rpcbind_args *map = msg->rpc_argp; + unsigned short port = ntohs(address_to_register->sin_port); + char buf[32]; + + /* Construct AF_INET universal address */ + snprintf(buf, sizeof(buf), + NIPQUAD_FMT".%u.%u", + NIPQUAD(address_to_register->sin_addr.s_addr), + port >> 8, port & 0xff); + map->r_addr = buf; + + dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with " + "local rpcbind\n", (port ? "" : "un"), + map->r_prog, map->r_vers, + map->r_addr, map->r_netid); + + msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET]; + if (port) + msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; + + return rpcb_register_call((struct sockaddr *)&rpcb_inaddr_loopback, + sizeof(rpcb_inaddr_loopback), + RPCBVERS_4, msg, msg->rpc_resp); +} + +/* + * Fill in AF_INET6 family-specific arguments to register + */ +static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register, + struct rpc_message *msg) +{ + struct rpcbind_args *map = msg->rpc_argp; + unsigned short port = ntohs(address_to_register->sin6_port); + char buf[64]; + + /* Construct AF_INET6 universal address */ + snprintf(buf, sizeof(buf), + NIP6_FMT".%u.%u", + NIP6(address_to_register->sin6_addr), + port >> 8, port & 0xff); + map->r_addr = buf; + + dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with " + "local rpcbind\n", (port ? "" : "un"), + map->r_prog, map->r_vers, + map->r_addr, map->r_netid); + + msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET]; + if (port) + msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; + + return rpcb_register_call((struct sockaddr *)&rpcb_in6addr_loopback, + sizeof(rpcb_in6addr_loopback), + RPCBVERS_4, msg, msg->rpc_resp); +} + +/** + * rpcb_v4_register - set or unset a port registration with the local rpcbind + * @program: RPC program number of service to (un)register + * @version: RPC version number of service to (un)register + * @address: address family, IP address, and port to (un)register + * @netid: netid of transport protocol to (un)register + * @result: result code from rpcbind RPC call + * + * RPC services invoke this function to advertise their contact + * information via the system's rpcbind daemon. RPC services + * invoke this function once for each [program, version, address, + * netid] tuple they wish to advertise. + * + * Callers may also unregister RPC services that are no longer + * available by setting the port number in the passed-in address + * to zero. Callers pass a netid of "" to unregister all + * transport netids associated with [program, version, address]. + * + * Returns zero if the registration request was dispatched + * successfully and a reply was received. The rpcbind daemon's + * result code is stored in *result. + * + * Returns an errno value and sets *result to zero if there was + * some problem that prevented the rpcbind request from being + * dispatched, or if the rpcbind daemon did not respond within + * the timeout. + * + * This function uses rpcbind protocol version 4 to contact the + * local rpcbind daemon. The local rpcbind daemon must support + * version 4 of the rpcbind protocol in order for these functions + * to register a service successfully. + * + * Supported netids include "udp" and "tcp" for UDP and TCP over + * IPv4, and "udp6" and "tcp6" for UDP and TCP over IPv6, + * respectively. + * + * The contents of @address determine the address family and the + * port to be registered. The usual practice is to pass INADDR_ANY + * as the raw address, but specifying a non-zero address is also + * supported by this API if the caller wishes to advertise an RPC + * service on a specific network interface. + * + * Note that passing in INADDR_ANY does not create the same service + * registration as IN6ADDR_ANY. The former advertises an RPC + * service on any IPv4 address, but not on IPv6. The latter + * advertises the service on all IPv4 and IPv6 addresses. + */ +int rpcb_v4_register(const u32 program, const u32 version, + const struct sockaddr *address, const char *netid, + int *result) +{ + struct rpcbind_args map = { + .r_prog = program, + .r_vers = version, + .r_netid = netid, + .r_owner = RPCB_OWNER_STRING, + }; + struct rpc_message msg = { + .rpc_argp = &map, + .rpc_resp = result, + }; + + *result = 0; + + switch (address->sa_family) { + case AF_INET: + return rpcb_register_netid4((struct sockaddr_in *)address, + &msg); + case AF_INET6: + return rpcb_register_netid6((struct sockaddr_in6 *)address, + &msg); + } + + return -EAFNOSUPPORT; +} + /** * rpcb_getport_sync - obtain the port for an RPC service on a given host * @sin: address of remote peer -- cgit v0.10.2 From d67d1c7bf948341fd8678c8e337ec27f4b46b206 Mon Sep 17 00:00:00 2001 From: Felix Blyakher Date: Tue, 15 Jul 2008 12:40:22 -0500 Subject: nfs: set correct fl_len in nlmclnt_test() fcntl(F_GETLK) on an nfs client incorrectly returns the values for the conflicting lock. fl_len value is always 1. If the conflicting lock is (0, 4095) the F_GETLK request for (1024, 10) returns (0, 1), which doesn't even cover the requested range, and is quite confusing. The fix is trivial, set fl_end from the fl_end value recieved from the nfs server. Signed-off-by: Felix Blyakher Signed-off-by: "J. Bruce Fields" Signed-off-by: Trond Myklebust diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index 5df517b..fd7d466 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -430,7 +430,7 @@ nlmclnt_test(struct nlm_rqst *req, struct file_lock *fl) * Report the conflicting lock back to the application. */ fl->fl_start = req->a_res.lock.fl.fl_start; - fl->fl_end = req->a_res.lock.fl.fl_start; + fl->fl_end = req->a_res.lock.fl.fl_end; fl->fl_type = req->a_res.lock.fl.fl_type; fl->fl_pid = 0; break; -- cgit v0.10.2 From 1b83d707032a1be40a60ed0a9bd841662cc04a5d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 11 Jun 2008 15:44:04 -0400 Subject: NFS: Protect inode->i_nlink updates using inode->i_lock Signed-off-by: Trond Myklebust diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index b194066..d6ec1c8 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -870,6 +870,14 @@ static int nfs_dentry_delete(struct dentry *dentry) } +static void nfs_drop_nlink(struct inode *inode) +{ + spin_lock(&inode->i_lock); + if (inode->i_nlink > 0) + drop_nlink(inode); + spin_unlock(&inode->i_lock); +} + /* * Called when the dentry loses inode. * We use it to clean up silly-renamed files. @@ -1420,7 +1428,7 @@ static int nfs_safe_remove(struct dentry *dentry) error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); /* The VFS may want to delete this inode */ if (error == 0) - drop_nlink(inode); + nfs_drop_nlink(inode); nfs_mark_for_revalidate(inode); } else error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); @@ -1647,7 +1655,7 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, /* dentry still busy? */ goto out; } else - drop_nlink(new_inode); + nfs_drop_nlink(new_inode); go_ahead: /* -- cgit v0.10.2 From a3d01454bc58b5a211ef64a7670572a40b71e682 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 11 Jun 2008 12:21:19 -0400 Subject: NFS: Remove BKL requirement from attribute updates The main problem is dealing with inode->i_size: we need to set the inode->i_lock on all attribute updates, and so vmtruncate won't cut it. Make an NFS-private version of vmtruncate that has the necessary locking semantics. The result should be that the following inode attribute updates are protected by inode->i_lock nfsi->cache_validity nfsi->read_cache_jiffies nfsi->attrtimeo nfsi->attrtimeo_timestamp nfsi->change_attr nfsi->last_updated nfsi->cache_change_attribute nfsi->access_cache nfsi->access_cache_entry_lru nfsi->access_cache_inode_lru nfsi->acl_access nfsi->acl_default nfsi->nfs_page_tree nfsi->ncommit nfsi->npages nfsi->open_files nfsi->silly_list nfsi->acl nfsi->open_states inode->i_size inode->i_atime inode->i_mtime inode->i_ctime inode->i_nlink inode->i_uid inode->i_gid The following is protected by dir->i_mutex nfsi->cookieverf Signed-off-by: Trond Myklebust diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 2c23d06..3adabd1 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -389,6 +389,62 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) } /** + * nfs_vmtruncate - unmap mappings "freed" by truncate() syscall + * @inode: inode of the file used + * @offset: file offset to start truncating + * + * This is a copy of the common vmtruncate, but with the locking + * corrected to take into account the fact that NFS requires + * inode->i_size to be updated under the inode->i_lock. + */ +static int nfs_vmtruncate(struct inode * inode, loff_t offset) +{ + if (i_size_read(inode) < offset) { + unsigned long limit; + + limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; + if (limit != RLIM_INFINITY && offset > limit) + goto out_sig; + if (offset > inode->i_sb->s_maxbytes) + goto out_big; + spin_lock(&inode->i_lock); + i_size_write(inode, offset); + spin_unlock(&inode->i_lock); + } else { + struct address_space *mapping = inode->i_mapping; + + /* + * truncation of in-use swapfiles is disallowed - it would + * cause subsequent swapout to scribble on the now-freed + * blocks. + */ + if (IS_SWAPFILE(inode)) + return -ETXTBSY; + spin_lock(&inode->i_lock); + i_size_write(inode, offset); + spin_unlock(&inode->i_lock); + + /* + * unmap_mapping_range is called twice, first simply for + * efficiency so that truncate_inode_pages does fewer + * single-page unmaps. However after this first call, and + * before truncate_inode_pages finishes, it is possible for + * private pages to be COWed, which remain after + * truncate_inode_pages finishes, hence the second + * unmap_mapping_range call must be made for correctness. + */ + unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); + truncate_inode_pages(mapping, offset); + unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); + } + return 0; +out_sig: + send_sig(SIGXFSZ, current, 0); +out_big: + return -EFBIG; +} + +/** * nfs_setattr_update_inode - Update inode metadata after a setattr call. * @inode: pointer to struct inode * @attr: pointer to struct iattr @@ -414,8 +470,7 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr) } if ((attr->ia_valid & ATTR_SIZE) != 0) { nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC); - inode->i_size = attr->ia_size; - vmtruncate(inode, attr->ia_size); + nfs_vmtruncate(inode, attr->ia_size); } } @@ -829,9 +884,9 @@ static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) if (S_ISDIR(inode->i_mode)) nfsi->cache_validity |= NFS_INO_INVALID_DATA; } - if (inode->i_size == nfs_size_to_loff_t(fattr->pre_size) && + if (i_size_read(inode) == nfs_size_to_loff_t(fattr->pre_size) && nfsi->npages == 0) - inode->i_size = nfs_size_to_loff_t(fattr->size); + i_size_write(inode, nfs_size_to_loff_t(fattr->size)); } } @@ -972,7 +1027,7 @@ int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fa (fattr->valid & NFS_ATTR_WCC) == 0) { memcpy(&fattr->pre_ctime, &inode->i_ctime, sizeof(fattr->pre_ctime)); memcpy(&fattr->pre_mtime, &inode->i_mtime, sizeof(fattr->pre_mtime)); - fattr->pre_size = inode->i_size; + fattr->pre_size = i_size_read(inode); fattr->valid |= NFS_ATTR_WCC; } return nfs_post_op_update_inode(inode, fattr); @@ -1057,7 +1112,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) /* Do we perhaps have any outstanding writes, or has * the file grown beyond our last write? */ if (nfsi->npages == 0 || new_isize > cur_isize) { - inode->i_size = new_isize; + i_size_write(inode, new_isize); invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; } dprintk("NFS: isize change on server for file %s/%ld\n", diff --git a/fs/nfs/write.c b/fs/nfs/write.c index feca8c6..3229e21 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -133,16 +133,21 @@ static struct nfs_page *nfs_page_find_request(struct page *page) static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count) { struct inode *inode = page->mapping->host; - loff_t end, i_size = i_size_read(inode); - pgoff_t end_index = (i_size - 1) >> PAGE_CACHE_SHIFT; + loff_t end, i_size; + pgoff_t end_index; + spin_lock(&inode->i_lock); + i_size = i_size_read(inode); + end_index = (i_size - 1) >> PAGE_CACHE_SHIFT; if (i_size > 0 && page->index < end_index) - return; + goto out; end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + ((loff_t)offset+count); if (i_size >= end) - return; - nfs_inc_stats(inode, NFSIOS_EXTENDWRITE); + goto out; i_size_write(inode, end); + nfs_inc_stats(inode, NFSIOS_EXTENDWRITE); +out: + spin_unlock(&inode->i_lock); } /* A writeback failed: mark the page as bad, and invalidate the page cache */ -- cgit v0.10.2 From fa6dc9dc59c3a76fd209a97c8cf37395980fb903 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 11 Jun 2008 13:26:14 -0400 Subject: NFS: Remove attribute update related BKL references Signed-off-by: Trond Myklebust diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 3adabd1..df23f98 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -370,7 +370,6 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) if ((attr->ia_valid & ~ATTR_FILE) == 0) return 0; - lock_kernel(); /* Write all dirty data */ if (S_ISREG(inode->i_mode)) { filemap_write_and_wait(inode->i_mapping); @@ -384,7 +383,6 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) error = NFS_PROTO(inode)->setattr(dentry, &fattr, attr); if (error == 0) nfs_refresh_inode(inode, &fattr); - unlock_kernel(); return error; } @@ -700,7 +698,6 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) inode->i_sb->s_id, (long long)NFS_FILEID(inode)); nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE); - lock_kernel(); if (is_bad_inode(inode)) goto out_nowait; if (NFS_STALE(inode)) @@ -749,7 +746,6 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) nfs_wake_up_inode(inode); out_nowait: - unlock_kernel(); return status; } diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 47cf83e..1b94e36 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -374,8 +374,6 @@ static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf) }; int error; - lock_kernel(); - error = server->nfs_client->rpc_ops->statfs(server, fh, &res); if (error < 0) goto out_err; @@ -407,12 +405,10 @@ static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_namelen = server->namelen; - unlock_kernel(); return 0; out_err: dprintk("%s: statfs error = %d\n", __func__, -error); - unlock_kernel(); return error; } -- cgit v0.10.2 From 4d80f2ecd506d9732ad94a6da104580bb47680d6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 11 Jun 2008 15:44:18 -0400 Subject: NFS: Remove the BKL from the permission checking code Signed-off-by: Trond Myklebust diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index d6ec1c8..73e0f97 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1982,8 +1982,6 @@ int nfs_permission(struct inode *inode, int mask, struct nameidata *nd) } force_lookup: - lock_kernel(); - if (!NFS_PROTO(inode)->access) goto out_notsup; @@ -1993,7 +1991,6 @@ force_lookup: put_rpccred(cred); } else res = PTR_ERR(cred); - unlock_kernel(); out: dfprintk(VFS, "NFS: permission(%s/%ld), mask=0x%x, res=%d\n", inode->i_sb->s_id, inode->i_ino, mask, res); @@ -2002,7 +1999,6 @@ out_notsup: res = nfs_revalidate_inode(NFS_SERVER(inode), inode); if (res == 0) res = generic_permission(inode, mask, NULL); - unlock_kernel(); goto out; } -- cgit v0.10.2 From b6a2e569e2157509951c9f3f58dfa18b44ce91b3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 11 Jun 2008 13:26:16 -0400 Subject: NFS: Remove BKL usage from the write path Signed-off-by: Trond Myklebust diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 509dcb5..f919d9a 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -393,9 +393,7 @@ static int nfs_write_end(struct file *file, struct address_space *mapping, zero_user_segment(page, pglen, PAGE_CACHE_SIZE); } - lock_kernel(); status = nfs_updatepage(file, page, offset, copied); - unlock_kernel(); unlock_page(page); page_cache_release(page); -- cgit v0.10.2 From bba67e0e3f4caba2b2b90b48ed798fb0461bcb86 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 11 Jun 2008 13:26:23 -0400 Subject: NFS: Remove BKL usage from open() All the NFSv4 stateful operations are already protected by other locks (in particular by the rpc_sequence locks. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 73e0f97..c68ec44 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -139,10 +139,8 @@ nfs_opendir(struct inode *inode, struct file *filp) nfs_inc_stats(inode, NFSIOS_VFSOPEN); - lock_kernel(); /* Call generic open code in order to cache credentials */ res = nfs_open(inode, filp); - unlock_kernel(); return res; } @@ -1019,9 +1017,7 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry } /* Open the file on the server */ - lock_kernel(); res = nfs4_atomic_open(dir, dentry, nd); - unlock_kernel(); if (IS_ERR(res)) { error = PTR_ERR(res); switch (error) { @@ -1083,9 +1079,7 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) * operations that change the directory. We therefore save the * change attribute *before* we do the RPC call. */ - lock_kernel(); ret = nfs4_open_revalidate(dir, dentry, openflags, nd); - unlock_kernel(); out: dput(parent); if (!ret) diff --git a/fs/nfs/file.c b/fs/nfs/file.c index f919d9a..9f1bed9 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -128,9 +128,7 @@ nfs_file_open(struct inode *inode, struct file *filp) return res; nfs_inc_stats(inode, NFSIOS_VFSOPEN); - lock_kernel(); res = nfs_open(inode, filp); - unlock_kernel(); return res; } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 4451287..c910413 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -451,9 +451,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata) /* Save the delegation */ memcpy(stateid.data, delegation->stateid.data, sizeof(stateid.data)); rcu_read_unlock(); - lock_kernel(); ret = nfs_may_open(state->inode, state->owner->so_cred, open_mode); - unlock_kernel(); if (ret != 0) goto out; ret = -EAGAIN; -- cgit v0.10.2 From f1e2eda23513b68003202bddf1f84158baad8844 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 11 Jun 2008 15:50:50 -0400 Subject: NFS: Remove the BKL from the inode creation operations nfs_instantiate() does not require the BKL, neither do the attribute updates or the RPC code. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index c68ec44..d40e91e 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1232,14 +1232,11 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode, if ((nd->flags & LOOKUP_CREATE) != 0) open_flags = nd->intent.open.flags; - lock_kernel(); error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, nd); if (error != 0) goto out_err; - unlock_kernel(); return 0; out_err: - unlock_kernel(); d_drop(dentry); return error; } @@ -1262,14 +1259,11 @@ nfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev) attr.ia_mode = mode; attr.ia_valid = ATTR_MODE; - lock_kernel(); status = NFS_PROTO(dir)->mknod(dir, dentry, &attr, rdev); if (status != 0) goto out_err; - unlock_kernel(); return 0; out_err: - unlock_kernel(); d_drop(dentry); return status; } @@ -1288,15 +1282,12 @@ static int nfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) attr.ia_valid = ATTR_MODE; attr.ia_mode = mode | S_IFDIR; - lock_kernel(); error = NFS_PROTO(dir)->mkdir(dir, dentry, &attr); if (error != 0) goto out_err; - unlock_kernel(); return 0; out_err: d_drop(dentry); - unlock_kernel(); return error; } -- cgit v0.10.2 From fc81af535e462764e17f638d542973fbef13b026 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 11 Jun 2008 15:52:40 -0400 Subject: NFS: Remove the BKL from nfs_link() Signed-off-by: Trond Myklebust diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index d40e91e..5ae8ee6 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1549,14 +1549,12 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) old_dentry->d_parent->d_name.name, old_dentry->d_name.name, dentry->d_parent->d_name.name, dentry->d_name.name); - lock_kernel(); d_drop(dentry); error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name); if (error == 0) { atomic_inc(&inode->i_count); d_add(dentry, inode); } - unlock_kernel(); return error; } -- cgit v0.10.2 From fc0f684c21b5d4b41dc2ec76f7c0897ac98f5b6e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 11 Jun 2008 15:44:20 -0400 Subject: NFS: Remove BKL from NFS lookup code All dentry-related operations are already BKL-safe, since they are protected by the VFS locking. No extra locks should be needed in the NFS code. In the case of nfs_revalidate_inode(), we're only doing an attribute update (protected by the inode->i_lock). In the case of nfs_lookup(), we're instantiating a new dentry, so there should be no contention possible until after we call d_materialise_unique. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 5ae8ee6..60da755 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -777,7 +777,6 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd) struct nfs_fattr fattr; parent = dget_parent(dentry); - lock_kernel(); dir = parent->d_inode; nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE); inode = dentry->d_inode; @@ -815,7 +814,6 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd) nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); out_valid: - unlock_kernel(); dput(parent); dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is valid\n", __func__, dentry->d_parent->d_name.name, @@ -834,7 +832,6 @@ out_zap_parent: shrink_dcache_parent(dentry); } d_drop(dentry); - unlock_kernel(); dput(parent); dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is invalid\n", __func__, dentry->d_parent->d_name.name, @@ -921,8 +918,6 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru res = ERR_PTR(-ENOMEM); dentry->d_op = NFS_PROTO(dir)->dentry_ops; - lock_kernel(); - /* * If we're doing an exclusive create, optimize away the lookup * but don't hash the dentry. @@ -930,7 +925,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru if (nfs_is_exclusive_create(dir, nd)) { d_instantiate(dentry, NULL); res = NULL; - goto out_unlock; + goto out; } parent = dentry->d_parent; @@ -958,8 +953,6 @@ no_entry: nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); out_unblock_sillyrename: nfs_unblock_sillyrename(parent); -out_unlock: - unlock_kernel(); out: return res; } -- cgit v0.10.2 From bd9bb454b76fb6ca2d00f17313f9f9df5f5c404a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 11 Jun 2008 16:09:59 -0400 Subject: NFS: Remove the BKL from the rename, rmdir and unlink operations Attribute updates are safe, and dentry operations are protected using VFS level locks. Defer removing the BKL from sillyrename until a separate patch. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 60da755..68e0688 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1297,14 +1297,12 @@ static int nfs_rmdir(struct inode *dir, struct dentry *dentry) dfprintk(VFS, "NFS: rmdir(%s/%ld), %s\n", dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); - lock_kernel(); error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name); /* Ensure the VFS deletes this inode */ if (error == 0 && dentry->d_inode != NULL) clear_nlink(dentry->d_inode); else if (error == -ENOENT) nfs_dentry_handle_enoent(dentry); - unlock_kernel(); return error; } @@ -1429,7 +1427,6 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry) dfprintk(VFS, "NFS: unlink(%s/%ld, %s)\n", dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); - lock_kernel(); spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); if (atomic_read(&dentry->d_count) > 1) { @@ -1437,6 +1434,7 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry) spin_unlock(&dcache_lock); /* Start asynchronous writeout of the inode */ write_inode_now(dentry->d_inode, 0); + lock_kernel(); error = nfs_sillyrename(dir, dentry); unlock_kernel(); return error; @@ -1452,7 +1450,6 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry) nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); } else if (need_rehash) d_rehash(dentry); - unlock_kernel(); return error; } @@ -1587,7 +1584,6 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, * To prevent any new references to the target during the rename, * we unhash the dentry and free the inode in advance. */ - lock_kernel(); if (!d_unhashed(new_dentry)) { d_drop(new_dentry); rehash = new_dentry; @@ -1621,7 +1617,9 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, goto out; /* silly-rename the existing target ... */ + lock_kernel(); err = nfs_sillyrename(new_dir, new_dentry); + unlock_kernel(); if (!err) { new_dentry = rehash = dentry; new_inode = NULL; @@ -1665,7 +1663,6 @@ out: /* new dentry created? */ if (dentry) dput(dentry); - unlock_kernel(); return error; } -- cgit v0.10.2 From 52e2e8d37e01edf38ccdccc983fb13ec1456d63d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 11 Jun 2008 15:44:21 -0400 Subject: NFS: Remove BKL from the sillydelete operations Signed-off-by: Trond Myklebust diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 68e0688..1bdc36b 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -884,10 +884,8 @@ static void nfs_dentry_iput(struct dentry *dentry, struct inode *inode) NFS_I(inode)->cache_validity |= NFS_INO_INVALID_DATA; if (dentry->d_flags & DCACHE_NFSFS_RENAMED) { - lock_kernel(); drop_nlink(inode); nfs_complete_unlink(dentry, inode); - unlock_kernel(); } iput(inode); } @@ -1434,9 +1432,7 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry) spin_unlock(&dcache_lock); /* Start asynchronous writeout of the inode */ write_inode_now(dentry->d_inode, 0); - lock_kernel(); error = nfs_sillyrename(dir, dentry); - unlock_kernel(); return error; } if (!d_unhashed(dentry)) { @@ -1617,9 +1613,7 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, goto out; /* silly-rename the existing target ... */ - lock_kernel(); err = nfs_sillyrename(new_dir, new_dentry); - unlock_kernel(); if (!err) { new_dentry = rehash = dentry; new_inode = NULL; -- cgit v0.10.2 From 76566991f94c206d9c5881edcaf99ba72c9e9d61 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 11 Jun 2008 15:44:22 -0400 Subject: NFS: Remove BKL from the symlink code Page cache accesses are serialised using page locks, whereas attribute updates are serialised using inode->i_lock. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 1bdc36b..e5f9502 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1482,13 +1482,9 @@ static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *sym attr.ia_mode = S_IFLNK | S_IRWXUGO; attr.ia_valid = ATTR_MODE; - lock_kernel(); - page = alloc_page(GFP_HIGHUSER); - if (!page) { - unlock_kernel(); + if (!page) return -ENOMEM; - } kaddr = kmap_atomic(page, KM_USER0); memcpy(kaddr, symname, pathlen); @@ -1503,7 +1499,6 @@ static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *sym dentry->d_name.name, symname, error); d_drop(dentry); __free_page(page); - unlock_kernel(); return error; } @@ -1521,7 +1516,6 @@ static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *sym } else __free_page(page); - unlock_kernel(); return 0; } -- cgit v0.10.2 From c3cc8c019ca09767d7c9b5457d5cf8ac65085f44 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 11 Jun 2008 15:44:23 -0400 Subject: NFS: Remove BKL from the readdir code Page accesses are serialised using the page locks, whereas all attribute updates are serialised using the inode->i_lock. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index e5f9502..28a238d 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -534,8 +534,6 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) (long long)filp->f_pos); nfs_inc_stats(inode, NFSIOS_VFSGETDENTS); - lock_kernel(); - /* * filp->f_pos points to the dirent entry number. * *desc->dir_cookie has the cookie for the next entry. We have @@ -593,7 +591,6 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) } out: nfs_unblock_sillyrename(dentry); - unlock_kernel(); if (res > 0) res = 0; dfprintk(FILE, "NFS: readdir(%s/%s) returns %ld\n", -- cgit v0.10.2 From a86dc496b764ebb1431677b38eab45310e5a2ad4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 11 Jun 2008 13:37:09 -0400 Subject: SUNRPC: Remove the BKL from the callback functions Push it into those callback functions that actually need it. Note that all the NFS operations use their own locking, so don't need the BKL. Ditto for the rpcbind client. Signed-off-by: Trond Myklebust diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index fd7d466..1f6dc51 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -224,7 +224,9 @@ void nlm_release_call(struct nlm_rqst *call) static void nlmclnt_rpc_release(void *data) { + lock_kernel(); nlm_release_call(data); + unlock_kernel(); } static int nlm_wait_on_grace(wait_queue_head_t *queue) @@ -710,7 +712,9 @@ static void nlmclnt_unlock_callback(struct rpc_task *task, void *data) die: return; retry_rebind: + lock_kernel(); nlm_rebind_host(req->a_host); + unlock_kernel(); retry_unlock: rpc_restart_call(task); } @@ -788,7 +792,9 @@ retry_cancel: /* Don't ever retry more than 3 times */ if (req->a_retries++ >= NLMCLNT_MAX_RETRIES) goto die; + lock_kernel(); nlm_rebind_host(req->a_host); + unlock_kernel(); rpc_restart_call(task); rpc_delay(task, 30 * HZ); } diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c index 385437e..2e27176 100644 --- a/fs/lockd/svc4proc.c +++ b/fs/lockd/svc4proc.c @@ -248,7 +248,9 @@ static void nlm4svc_callback_exit(struct rpc_task *task, void *data) static void nlm4svc_callback_release(void *data) { + lock_kernel(); nlm_release_call(data); + unlock_kernel(); } static const struct rpc_call_ops nlm4svc_callback_ops = { diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index 81aca85..56a08ab 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -795,6 +795,7 @@ static void nlmsvc_grant_callback(struct rpc_task *task, void *data) dprintk("lockd: GRANT_MSG RPC callback\n"); + lock_kernel(); /* if the block is not on a list at this point then it has * been invalidated. Don't try to requeue it. * @@ -804,7 +805,7 @@ static void nlmsvc_grant_callback(struct rpc_task *task, void *data) * for nlm_blocked? */ if (list_empty(&block->b_list)) - return; + goto out; /* Technically, we should down the file semaphore here. Since we * move the block towards the head of the queue only, no harm @@ -818,13 +819,17 @@ static void nlmsvc_grant_callback(struct rpc_task *task, void *data) } nlmsvc_insert_block(block, timeout); svc_wake_up(block->b_daemon); +out: + unlock_kernel(); } static void nlmsvc_grant_release(void *data) { struct nlm_rqst *call = data; + lock_kernel(); nlmsvc_release_block(call->a_block); + unlock_kernel(); } static const struct rpc_call_ops nlmsvc_grant_ops = { diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index 88379cc..ce6952b 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c @@ -278,7 +278,9 @@ static void nlmsvc_callback_exit(struct rpc_task *task, void *data) static void nlmsvc_callback_release(void *data) { + lock_kernel(); nlm_release_call(data); + unlock_kernel(); } static const struct rpc_call_ops nlmsvc_callback_ops = { diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 6288af0..385f427 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -576,9 +576,7 @@ EXPORT_SYMBOL_GPL(rpc_delay); */ static void rpc_prepare_task(struct rpc_task *task) { - lock_kernel(); task->tk_ops->rpc_call_prepare(task, task->tk_calldata); - unlock_kernel(); } /* @@ -588,9 +586,7 @@ void rpc_exit_task(struct rpc_task *task) { task->tk_action = NULL; if (task->tk_ops->rpc_call_done != NULL) { - lock_kernel(); task->tk_ops->rpc_call_done(task, task->tk_calldata); - unlock_kernel(); if (task->tk_action != NULL) { WARN_ON(RPC_ASSASSINATED(task)); /* Always release the RPC slot and buffer memory */ @@ -602,11 +598,8 @@ EXPORT_SYMBOL_GPL(rpc_exit_task); void rpc_release_calldata(const struct rpc_call_ops *ops, void *calldata) { - if (ops->rpc_release != NULL) { - lock_kernel(); + if (ops->rpc_release != NULL) ops->rpc_release(calldata); - unlock_kernel(); - } } /* -- cgit v0.10.2 From f839c4c1991cc9b580ae38f98f54554938a7f49c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 11 Jun 2008 15:44:26 -0400 Subject: NFSv4: Remove BKL from the nfsv4 state recovery Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 856a893..401ef8b 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -940,7 +940,6 @@ static int reclaimer(void *ptr) allow_signal(SIGKILL); /* Ensure exclusive access to NFSv4 state */ - lock_kernel(); down_write(&clp->cl_sem); /* Are there any NFS mounts out there? */ if (list_empty(&clp->cl_superblocks)) @@ -1000,7 +999,6 @@ restart_loop: nfs_delegation_reap_unclaimed(clp); out: up_write(&clp->cl_sem); - unlock_kernel(); if (status == -NFS4ERR_CB_PATH_DOWN) nfs_handle_cb_pathdown(clp); nfs4_clear_recover_bit(clp); -- cgit v0.10.2 From 809d9a8f93bd8504dcc34b16bbfdfd1a8c9bb1ed Mon Sep 17 00:00:00 2001 From: Alok Kataria Date: Tue, 15 Jul 2008 11:59:42 -0700 Subject: x86/PCI: ACPI based PCI gap calculation Using ACPI to find free address space allows us to find a gap for the unallocated PCI resources or MMIO resources for hotplug devices within the BIOS allowed PCI regions. It works by evaluating the _CRS object under PCI0 looking for producer resources. Then searches the e820 memory space for a gap within these producer resources. Signed-off-by: Alok N Kataria Cc: Andi Kleen Cc: Len Brown Cc: Ingo Molnar Signed-off-by: Jesse Barnes diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index d95de2f..d1ffb57 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -4,6 +4,7 @@ #include #include #include +#include #include "pci.h" struct pci_root_info { @@ -14,6 +15,11 @@ struct pci_root_info { int busnum; }; +struct gap_info { + unsigned long gapstart; + unsigned long gapsize; +}; + static acpi_status resource_to_addr(struct acpi_resource *resource, struct acpi_resource_address64 *addr) @@ -110,6 +116,78 @@ adjust_transparent_bridge_resources(struct pci_bus *bus) } } +static acpi_status search_gap(struct acpi_resource *resource, void *data) +{ + struct acpi_resource_address64 addr; + acpi_status status; + struct gap_info *gap = data; + unsigned long long start_addr, end_addr; + + status = resource_to_addr(resource, &addr); + if (ACPI_SUCCESS(status) && + addr.resource_type == ACPI_MEMORY_RANGE && + addr.address_length > gap->gapsize) { + start_addr = addr.minimum + addr.translation_offset; + /* + * We want space only in the 32bit address range + */ + if (start_addr < UINT_MAX) { + end_addr = start_addr + addr.address_length; + e820_search_gap(&gap->gapstart, &gap->gapsize, + start_addr, end_addr); + } + } + + return AE_OK; +} + +/* + * Search for a hole in the 32 bit address space for PCI to assign MMIO + * resources, for hotplug or unconfigured resources. + * We query the CRS object of the PCI root device to look for possible producer + * resources in the tree and consider these while calulating the start address + * for this hole. + */ +static void pci_setup_gap(acpi_handle *handle) +{ + struct gap_info gap; + acpi_status status; + + gap.gapstart = 0; + gap.gapsize = 0x400000; + + status = acpi_walk_resources(handle, METHOD_NAME__CRS, + search_gap, &gap); + + if (ACPI_SUCCESS(status)) { + unsigned long round; + + if (!gap.gapstart) { + printk(KERN_ERR "ACPI: Warning: Cannot find a gap " + "in the 32bit address range for PCI\n" + "ACPI: PCI devices may collide with " + "hotpluggable memory address range\n"); + } + /* + * Round the gapstart, uses the same logic as in + * e820_gap_setup + */ + round = 0x100000; + while ((gap.gapsize >> 4) > round) + round += round; + /* Fun with two's complement */ + pci_mem_start = (gap.gapstart + round) & -round; + + printk(KERN_INFO "ACPI: PCI resources should " + "start at %lx (gap: %lx:%lx)\n", + pci_mem_start, gap.gapstart, gap.gapsize); + } else { + printk(KERN_ERR "ACPI: Error while searching for gap in " + "the 32bit address range for PCI\n"); + } +} + + static void get_current_resources(struct acpi_device *device, int busnum, int domain, struct pci_bus *bus) @@ -215,6 +293,8 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do if (bus && (pci_probe & PCI_USE__CRS)) get_current_resources(device, busnum, domain, bus); + + pci_setup_gap(device->handle); return bus; } -- cgit v0.10.2 From d25dc7fd4740decf4c66e2f17dbaa288448fabd5 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 15 Jul 2008 22:54:04 -0700 Subject: Fix 'make clean' and .gitignore for firmware/ directory. Provide a .gitignore file, and fix a typo which prevented some of the generated binary files from being removed on 'make clean'. Signed-off-by: David Woodhouse Reported-and-tested-by: Alexey Dobriyan Signed-off-by: Linus Torvalds diff --git a/firmware/.gitignore b/firmware/.gitignore new file mode 100644 index 0000000..d9c6901 --- /dev/null +++ b/firmware/.gitignore @@ -0,0 +1,6 @@ +*.gen.S +*.fw +*.bin +*.csp +*.dsp +ihex2fw diff --git a/firmware/Makefile b/firmware/Makefile index e4f2fb3..9fe8604 100644 --- a/firmware/Makefile +++ b/firmware/Makefile @@ -56,7 +56,7 @@ fw-shipped-$(CONFIG_USB_SERIAL_KEYSPAN_USA28X) += keyspan/usa28x.fw fw-shipped-$(CONFIG_USB_SERIAL_KEYSPAN_USA49W) += keyspan/usa49w.fw fw-shipped-$(CONFIG_USB_SERIAL_KEYSPAN_USA49WLC) += keyspan/usa49wlc.fw else -fw-shipped- := keyspan/mpr.fw keyspan/usa18x.fw keyspan/usa19.fw \ +fw-shipped- += keyspan/mpr.fw keyspan/usa18x.fw keyspan/usa19.fw \ keyspan/usa19qi.fw keyspan/usa19qw.fw keyspan/usa19w.fw \ keyspan/usa28.fw keyspan/usa28xa.fw keyspan/usa28xb.fw \ keyspan/usa28x.fw keyspan/usa49w.fw keyspan/usa49wlc.fw -- cgit v0.10.2 From 9fce1bc956c21dfe0f46be028f18c4d5057f2bd7 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 16 Jul 2008 22:54:30 +0800 Subject: PCI: remove unnecessary volatile in PCIe hotplug struct controller Proper memory barriers have been added to order accesses to ->cmd_busy, so volatile declaration for cmd_busy can be removed. Signed-off-by: Ming Lei Signed-off-by: Jesse Barnes diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h index d17233a..e3a1e7e 100644 --- a/drivers/pci/hotplug/pciehp.h +++ b/drivers/pci/hotplug/pciehp.h @@ -97,7 +97,7 @@ struct controller { u32 slot_cap; u8 cap_base; struct timer_list poll_timer; - volatile int cmd_busy; + int cmd_busy; unsigned int no_cmd_complete:1; }; -- cgit v0.10.2 From e22146e610bb7aed63282148740ab1d1b91e1d90 Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Wed, 16 Jul 2008 11:11:59 -0500 Subject: x86: fix kernel_physical_mapping_init() for large x86 systems Fix bug in kernel_physical_mapping_init() that causes kernel page table to be built incorrectly for systems with greater than 512GB of memory. Signed-off-by: Jack Steiner Cc: linux-mm@kvack.org Signed-off-by: Ingo Molnar diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 27de243..306049e 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -644,7 +644,7 @@ static unsigned long __init kernel_physical_mapping_init(unsigned long start, unsigned long pud_phys; pud_t *pud; - next = start + PGDIR_SIZE; + next = (start + PGDIR_SIZE) & PGDIR_MASK; if (next > end) next = end; -- cgit v0.10.2 From bd4bc3dbded9cd7b2bdca6bba1aecb4251a8039d Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 16 Jul 2008 19:30:05 +0200 Subject: i2c: Clear i2c_adapter.dev on adapter removal Clear i2c_adapter.dev on adapter removal. This makes it possible to re-add the adapter at a later point, which some drivers (i2c-amd756-s4882, i2c-nforce2-s4985) actually do. This fixes a bug reported by John Stultz here: http://lkml.org/lkml/2008/7/15/720 and by Ingo Molar there: http://lkml.org/lkml/2008/7/16/78 Signed-off-by: Jean Delvare Cc: John Stultz Cc: Ingo Molnar diff --git a/drivers/i2c/busses/i2c-amd756-s4882.c b/drivers/i2c/busses/i2c-amd756-s4882.c index 2f150e3..72872d1 100644 --- a/drivers/i2c/busses/i2c-amd756-s4882.c +++ b/drivers/i2c/busses/i2c-amd756-s4882.c @@ -155,6 +155,16 @@ static int __init amd756_s4882_init(void) int i, error; union i2c_smbus_data ioconfig; + /* Configure the PCA9556 multiplexer */ + ioconfig.byte = 0x00; /* All I/O to output mode */ + error = i2c_smbus_xfer(&amd756_smbus, 0x18, 0, I2C_SMBUS_WRITE, 0x03, + I2C_SMBUS_BYTE_DATA, &ioconfig); + if (error) { + dev_err(&amd756_smbus.dev, "PCA9556 configuration failed\n"); + error = -EIO; + goto ERROR0; + } + /* Unregister physical bus */ error = i2c_del_adapter(&amd756_smbus); if (error) { @@ -198,22 +208,11 @@ static int __init amd756_s4882_init(void) s4882_algo[3].smbus_xfer = amd756_access_virt3; s4882_algo[4].smbus_xfer = amd756_access_virt4; - /* Configure the PCA9556 multiplexer */ - ioconfig.byte = 0x00; /* All I/O to output mode */ - error = amd756_smbus.algo->smbus_xfer(&amd756_smbus, 0x18, 0, - I2C_SMBUS_WRITE, 0x03, - I2C_SMBUS_BYTE_DATA, &ioconfig); - if (error) { - dev_err(&amd756_smbus.dev, "PCA9556 configuration failed\n"); - error = -EIO; - goto ERROR3; - } - /* Register virtual adapters */ for (i = 0; i < 5; i++) { error = i2c_add_adapter(s4882_adapter+i); if (error) { - dev_err(&amd756_smbus.dev, + printk(KERN_ERR "i2c-amd756-s4882: " "Virtual adapter %d registration " "failed, module not inserted\n", i); for (i--; i >= 0; i--) @@ -252,8 +251,8 @@ static void __exit amd756_s4882_exit(void) /* Restore physical bus */ if (i2c_add_adapter(&amd756_smbus)) - dev_err(&amd756_smbus.dev, "Physical bus restoration " - "failed\n"); + printk(KERN_ERR "i2c-amd756-s4882: " + "Physical bus restoration failed\n"); } MODULE_AUTHOR("Jean Delvare "); diff --git a/drivers/i2c/busses/i2c-nforce2-s4985.c b/drivers/i2c/busses/i2c-nforce2-s4985.c index 6a8995d..d1a4cbc 100644 --- a/drivers/i2c/busses/i2c-nforce2-s4985.c +++ b/drivers/i2c/busses/i2c-nforce2-s4985.c @@ -150,6 +150,16 @@ static int __init nforce2_s4985_init(void) int i, error; union i2c_smbus_data ioconfig; + /* Configure the PCA9556 multiplexer */ + ioconfig.byte = 0x00; /* All I/O to output mode */ + error = i2c_smbus_xfer(nforce2_smbus, 0x18, 0, I2C_SMBUS_WRITE, 0x03, + I2C_SMBUS_BYTE_DATA, &ioconfig); + if (error) { + dev_err(&nforce2_smbus->dev, "PCA9556 configuration failed\n"); + error = -EIO; + goto ERROR0; + } + /* Unregister physical bus */ if (!nforce2_smbus) return -ENODEV; @@ -191,24 +201,13 @@ static int __init nforce2_s4985_init(void) s4985_algo[3].smbus_xfer = nforce2_access_virt3; s4985_algo[4].smbus_xfer = nforce2_access_virt4; - /* Configure the PCA9556 multiplexer */ - ioconfig.byte = 0x00; /* All I/O to output mode */ - error = nforce2_smbus->algo->smbus_xfer(nforce2_smbus, 0x18, 0, - I2C_SMBUS_WRITE, 0x03, - I2C_SMBUS_BYTE_DATA, &ioconfig); - if (error) { - dev_err(&nforce2_smbus->dev, "PCA9556 configuration failed\n"); - error = -EIO; - goto ERROR3; - } - /* Register virtual adapters */ for (i = 0; i < 5; i++) { error = i2c_add_adapter(s4985_adapter + i); if (error) { - dev_err(&nforce2_smbus->dev, - "Virtual adapter %d registration " - "failed, module not inserted\n", i); + printk(KERN_ERR "i2c-nforce2-s4985: " + "Virtual adapter %d registration " + "failed, module not inserted\n", i); for (i--; i >= 0; i--) i2c_del_adapter(s4985_adapter + i); goto ERROR3; @@ -245,8 +244,8 @@ static void __exit nforce2_s4985_exit(void) /* Restore physical bus */ if (i2c_add_adapter(nforce2_smbus)) - dev_err(&nforce2_smbus->dev, "Physical bus restoration " - "failed\n"); + printk(KERN_ERR "i2c-nforce2-s4985: " + "Physical bus restoration failed\n"); } MODULE_AUTHOR("Jean Delvare "); diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c index 0a79f76..7608df8 100644 --- a/drivers/i2c/i2c-core.c +++ b/drivers/i2c/i2c-core.c @@ -654,6 +654,10 @@ int i2c_del_adapter(struct i2c_adapter *adap) dev_dbg(&adap->dev, "adapter [%s] unregistered\n", adap->name); + /* Clear the device structure in case this adapter is ever going to be + added again */ + memset(&adap->dev, 0, sizeof(adap->dev)); + out_unlock: mutex_unlock(&core_lock); return res; -- cgit v0.10.2 From 68be3363740e4ac3b4309faebcdf8fe5bf62ed2f Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 16 Jul 2008 19:30:05 +0200 Subject: i2c: Convert the eeprom driver to a new-style i2c driver The new-style eeprom driver implements the optional detect() callback to cover the use cases of the legacy driver. Signed-off-by: Jean Delvare diff --git a/drivers/i2c/chips/eeprom.c b/drivers/i2c/chips/eeprom.c index 373ea8d..2c27193 100644 --- a/drivers/i2c/chips/eeprom.c +++ b/drivers/i2c/chips/eeprom.c @@ -47,7 +47,6 @@ enum eeprom_nature { /* Each client has this additional data */ struct eeprom_data { - struct i2c_client client; struct mutex update_lock; u8 valid; /* bitfield, bit!=0 if slice is valid */ unsigned long last_updated[8]; /* In jiffies, 8 slices */ @@ -56,19 +55,6 @@ struct eeprom_data { }; -static int eeprom_attach_adapter(struct i2c_adapter *adapter); -static int eeprom_detect(struct i2c_adapter *adapter, int address, int kind); -static int eeprom_detach_client(struct i2c_client *client); - -/* This is the driver that will be inserted */ -static struct i2c_driver eeprom_driver = { - .driver = { - .name = "eeprom", - }, - .attach_adapter = eeprom_attach_adapter, - .detach_client = eeprom_detach_client, -}; - static void eeprom_update_client(struct i2c_client *client, u8 slice) { struct eeprom_data *data = i2c_get_clientdata(client); @@ -148,25 +134,17 @@ static struct bin_attribute eeprom_attr = { .read = eeprom_read, }; -static int eeprom_attach_adapter(struct i2c_adapter *adapter) -{ - if (!(adapter->class & (I2C_CLASS_DDC | I2C_CLASS_SPD))) - return 0; - return i2c_probe(adapter, &addr_data, eeprom_detect); -} - -/* This function is called by i2c_probe */ -static int eeprom_detect(struct i2c_adapter *adapter, int address, int kind) +/* Return 0 if detection is successful, -ENODEV otherwise */ +static int eeprom_detect(struct i2c_client *client, int kind, + struct i2c_board_info *info) { - struct i2c_client *client; - struct eeprom_data *data; - int err = 0; + struct i2c_adapter *adapter = client->adapter; /* EDID EEPROMs are often 24C00 EEPROMs, which answer to all addresses 0x50-0x57, but we only care about 0x50. So decline attaching to addresses >= 0x51 on DDC buses */ - if (!(adapter->class & I2C_CLASS_SPD) && address >= 0x51) - goto exit; + if (!(adapter->class & I2C_CLASS_SPD) && client->addr >= 0x51) + return -ENODEV; /* There are four ways we can read the EEPROM data: (1) I2C block reads (faster, but unsupported by most adapters) @@ -177,32 +155,33 @@ static int eeprom_detect(struct i2c_adapter *adapter, int address, int kind) because all known adapters support one of the first two. */ if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_READ_WORD_DATA) && !i2c_check_functionality(adapter, I2C_FUNC_SMBUS_READ_I2C_BLOCK)) - goto exit; + return -ENODEV; + + strlcpy(info->type, "eeprom", I2C_NAME_SIZE); + + return 0; +} + +static int eeprom_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct i2c_adapter *adapter = client->adapter; + struct eeprom_data *data; + int err; if (!(data = kzalloc(sizeof(struct eeprom_data), GFP_KERNEL))) { err = -ENOMEM; goto exit; } - client = &data->client; memset(data->data, 0xff, EEPROM_SIZE); i2c_set_clientdata(client, data); - client->addr = address; - client->adapter = adapter; - client->driver = &eeprom_driver; - - /* Fill in the remaining client fields */ - strlcpy(client->name, "eeprom", I2C_NAME_SIZE); mutex_init(&data->update_lock); data->nature = UNKNOWN; - /* Tell the I2C layer a new client has arrived */ - if ((err = i2c_attach_client(client))) - goto exit_kfree; - /* Detect the Vaio nature of EEPROMs. We use the "PCG-" or "VGN-" prefix as the signature. */ - if (address == 0x57 + if (client->addr == 0x57 && i2c_check_functionality(adapter, I2C_FUNC_SMBUS_READ_BYTE_DATA)) { char name[4]; @@ -221,33 +200,42 @@ static int eeprom_detect(struct i2c_adapter *adapter, int address, int kind) /* create the sysfs eeprom file */ err = sysfs_create_bin_file(&client->dev.kobj, &eeprom_attr); if (err) - goto exit_detach; + goto exit_kfree; return 0; -exit_detach: - i2c_detach_client(client); exit_kfree: kfree(data); exit: return err; } -static int eeprom_detach_client(struct i2c_client *client) +static int eeprom_remove(struct i2c_client *client) { - int err; - sysfs_remove_bin_file(&client->dev.kobj, &eeprom_attr); - - err = i2c_detach_client(client); - if (err) - return err; - kfree(i2c_get_clientdata(client)); return 0; } +static const struct i2c_device_id eeprom_id[] = { + { "eeprom", 0 }, + { } +}; + +static struct i2c_driver eeprom_driver = { + .driver = { + .name = "eeprom", + }, + .probe = eeprom_probe, + .remove = eeprom_remove, + .id_table = eeprom_id, + + .class = I2C_CLASS_DDC | I2C_CLASS_SPD, + .detect = eeprom_detect, + .address_data = &addr_data, +}; + static int __init eeprom_init(void) { return i2c_add_driver(&eeprom_driver); -- cgit v0.10.2 From 8b77e6ac4911a79993e583ece719736a9e035b1d Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 16 Jul 2008 19:30:06 +0200 Subject: i2c: Convert the pcf8591 driver to a new-style i2c driver The new-style pcf8591 driver implements the optional detect() callback to cover the use cases of the legacy driver. Signed-off-by: Jean Delvare diff --git a/drivers/i2c/chips/pcf8591.c b/drivers/i2c/chips/pcf8591.c index db73537..16ce3e1 100644 --- a/drivers/i2c/chips/pcf8591.c +++ b/drivers/i2c/chips/pcf8591.c @@ -72,28 +72,15 @@ MODULE_PARM_DESC(input_mode, #define REG_TO_SIGNED(reg) (((reg) & 0x80)?((reg) - 256):(reg)) struct pcf8591_data { - struct i2c_client client; struct mutex update_lock; u8 control; u8 aout; }; -static int pcf8591_attach_adapter(struct i2c_adapter *adapter); -static int pcf8591_detect(struct i2c_adapter *adapter, int address, int kind); -static int pcf8591_detach_client(struct i2c_client *client); static void pcf8591_init_client(struct i2c_client *client); static int pcf8591_read_channel(struct device *dev, int channel); -/* This is the driver that will be inserted */ -static struct i2c_driver pcf8591_driver = { - .driver = { - .name = "pcf8591", - }, - .attach_adapter = pcf8591_attach_adapter, - .detach_client = pcf8591_detach_client, -}; - /* following are the sysfs callback functions */ #define show_in_channel(channel) \ static ssize_t show_in##channel##_input(struct device *dev, struct device_attribute *attr, char *buf) \ @@ -180,58 +167,46 @@ static const struct attribute_group pcf8591_attr_group_opt = { /* * Real code */ -static int pcf8591_attach_adapter(struct i2c_adapter *adapter) -{ - return i2c_probe(adapter, &addr_data, pcf8591_detect); -} -/* This function is called by i2c_probe */ -static int pcf8591_detect(struct i2c_adapter *adapter, int address, int kind) +/* Return 0 if detection is successful, -ENODEV otherwise */ +static int pcf8591_detect(struct i2c_client *client, int kind, + struct i2c_board_info *info) { - struct i2c_client *client; - struct pcf8591_data *data; - int err = 0; + struct i2c_adapter *adapter = client->adapter; if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE | I2C_FUNC_SMBUS_WRITE_BYTE_DATA)) - goto exit; + return -ENODEV; + + /* Now, we would do the remaining detection. But the PCF8591 is plainly + impossible to detect! Stupid chip. */ + + strlcpy(info->type, "pcf8591", I2C_NAME_SIZE); + + return 0; +} + +static int pcf8591_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct pcf8591_data *data; + int err; - /* OK. For now, we presume we have a valid client. We now create the - client structure, even though we cannot fill it completely yet. */ if (!(data = kzalloc(sizeof(struct pcf8591_data), GFP_KERNEL))) { err = -ENOMEM; goto exit; } - client = &data->client; i2c_set_clientdata(client, data); - client->addr = address; - client->adapter = adapter; - client->driver = &pcf8591_driver; - - /* Now, we would do the remaining detection. But the PCF8591 is plainly - impossible to detect! Stupid chip. */ - - /* Determine the chip type - only one kind supported! */ - if (kind <= 0) - kind = pcf8591; - - /* Fill in the remaining client fields and put it into the global - list */ - strlcpy(client->name, "pcf8591", I2C_NAME_SIZE); mutex_init(&data->update_lock); - /* Tell the I2C layer a new client has arrived */ - if ((err = i2c_attach_client(client))) - goto exit_kfree; - /* Initialize the PCF8591 chip */ pcf8591_init_client(client); /* Register sysfs hooks */ err = sysfs_create_group(&client->dev.kobj, &pcf8591_attr_group); if (err) - goto exit_detach; + goto exit_kfree; /* Register input2 if not in "two differential inputs" mode */ if (input_mode != 3) { @@ -252,24 +227,16 @@ static int pcf8591_detect(struct i2c_adapter *adapter, int address, int kind) exit_sysfs_remove: sysfs_remove_group(&client->dev.kobj, &pcf8591_attr_group_opt); sysfs_remove_group(&client->dev.kobj, &pcf8591_attr_group); -exit_detach: - i2c_detach_client(client); exit_kfree: kfree(data); exit: return err; } -static int pcf8591_detach_client(struct i2c_client *client) +static int pcf8591_remove(struct i2c_client *client) { - int err; - sysfs_remove_group(&client->dev.kobj, &pcf8591_attr_group_opt); sysfs_remove_group(&client->dev.kobj, &pcf8591_attr_group); - - if ((err = i2c_detach_client(client))) - return err; - kfree(i2c_get_clientdata(client)); return 0; } @@ -316,6 +283,25 @@ static int pcf8591_read_channel(struct device *dev, int channel) return (10 * value); } +static const struct i2c_device_id pcf8591_id[] = { + { "pcf8591", 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, pcf8591_id); + +static struct i2c_driver pcf8591_driver = { + .driver = { + .name = "pcf8591", + }, + .probe = pcf8591_probe, + .remove = pcf8591_remove, + .id_table = pcf8591_id, + + .class = I2C_CLASS_HWMON, /* Nearest choice */ + .detect = pcf8591_detect, + .address_data = &addr_data, +}; + static int __init pcf8591_init(void) { if (input_mode < 0 || input_mode > 3) { -- cgit v0.10.2 From 833bedb813689807385ae73175389c73a3f855c1 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 16 Jul 2008 19:30:06 +0200 Subject: i2c: Convert the pcf8574 driver to a new-style i2c driver The new-style pcf8574 driver implements the optional detect() callback to cover the use cases of the legacy driver. Warning: users will now have to use the force module parameter to get the driver to attach to their device. That's not a bad thing as these devices can't be detected anyway. Note that this doesn't change the fact that this driver is deprecated in favor of gpio/pcf857x. Signed-off-by: Jean Delvare diff --git a/Documentation/i2c/chips/pcf8574 b/Documentation/i2c/chips/pcf8574 index 5c1ad13..235815c 100644 --- a/Documentation/i2c/chips/pcf8574 +++ b/Documentation/i2c/chips/pcf8574 @@ -4,13 +4,13 @@ Kernel driver pcf8574 Supported chips: * Philips PCF8574 Prefix: 'pcf8574' - Addresses scanned: I2C 0x20 - 0x27 + Addresses scanned: none Datasheet: Publicly available at the Philips Semiconductors website http://www.semiconductors.philips.com/pip/PCF8574P.html * Philips PCF8574A Prefix: 'pcf8574a' - Addresses scanned: I2C 0x38 - 0x3f + Addresses scanned: none Datasheet: Publicly available at the Philips Semiconductors website http://www.semiconductors.philips.com/pip/PCF8574P.html @@ -38,12 +38,10 @@ For more informations see the datasheet. Accessing PCF8574(A) via /sys interface ------------------------------------- -! Be careful ! The PCF8574(A) is plainly impossible to detect ! Stupid chip. -So every chip with address in the interval [20..27] and [38..3f] are -detected as PCF8574(A). If you have other chips in this address -range, the workaround is to load this module after the one -for your others chips. +So, you have to pass the I2C bus and address of the installed PCF857A +and PCF8574A devices explicitly to the driver at load time via the +force=... parameter. On detection (i.e. insmod, modprobe et al.), directories are being created for each detected PCF8574(A): diff --git a/drivers/i2c/chips/pcf8574.c b/drivers/i2c/chips/pcf8574.c index 1b3db2b..6ec3098 100644 --- a/drivers/i2c/chips/pcf8574.c +++ b/drivers/i2c/chips/pcf8574.c @@ -38,37 +38,19 @@ #include #include -/* Addresses to scan */ -static const unsigned short normal_i2c[] = { - 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, - 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, - I2C_CLIENT_END -}; +/* Addresses to scan: none, device can't be detected */ +static const unsigned short normal_i2c[] = { I2C_CLIENT_END }; /* Insmod parameters */ I2C_CLIENT_INSMOD_2(pcf8574, pcf8574a); /* Each client has this additional data */ struct pcf8574_data { - struct i2c_client client; - int write; /* Remember last written value */ }; -static int pcf8574_attach_adapter(struct i2c_adapter *adapter); -static int pcf8574_detect(struct i2c_adapter *adapter, int address, int kind); -static int pcf8574_detach_client(struct i2c_client *client); static void pcf8574_init_client(struct i2c_client *client); -/* This is the driver that will be inserted */ -static struct i2c_driver pcf8574_driver = { - .driver = { - .name = "pcf8574", - }, - .attach_adapter = pcf8574_attach_adapter, - .detach_client = pcf8574_detach_client, -}; - /* following are the sysfs callback functions */ static ssize_t show_read(struct device *dev, struct device_attribute *attr, char *buf) { @@ -119,41 +101,22 @@ static const struct attribute_group pcf8574_attr_group = { * Real code */ -static int pcf8574_attach_adapter(struct i2c_adapter *adapter) -{ - return i2c_probe(adapter, &addr_data, pcf8574_detect); -} - -/* This function is called by i2c_probe */ -static int pcf8574_detect(struct i2c_adapter *adapter, int address, int kind) +/* Return 0 if detection is successful, -ENODEV otherwise */ +static int pcf8574_detect(struct i2c_client *client, int kind, + struct i2c_board_info *info) { - struct i2c_client *client; - struct pcf8574_data *data; - int err = 0; - const char *client_name = ""; + struct i2c_adapter *adapter = client->adapter; + const char *client_name; if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE)) - goto exit; - - /* OK. For now, we presume we have a valid client. We now create the - client structure, even though we cannot fill it completely yet. */ - if (!(data = kzalloc(sizeof(struct pcf8574_data), GFP_KERNEL))) { - err = -ENOMEM; - goto exit; - } - - client = &data->client; - i2c_set_clientdata(client, data); - client->addr = address; - client->adapter = adapter; - client->driver = &pcf8574_driver; + return -ENODEV; /* Now, we would do the remaining detection. But the PCF8574 is plainly impossible to detect! Stupid chip. */ /* Determine the chip type */ if (kind <= 0) { - if (address >= 0x38 && address <= 0x3f) + if (client->addr >= 0x38 && client->addr <= 0x3f) kind = pcf8574a; else kind = pcf8574; @@ -163,40 +126,43 @@ static int pcf8574_detect(struct i2c_adapter *adapter, int address, int kind) client_name = "pcf8574a"; else client_name = "pcf8574"; + strlcpy(info->type, client_name, I2C_NAME_SIZE); - /* Fill in the remaining client fields and put it into the global list */ - strlcpy(client->name, client_name, I2C_NAME_SIZE); + return 0; +} + +static int pcf8574_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct pcf8574_data *data; + int err; + + data = kzalloc(sizeof(struct pcf8574_data), GFP_KERNEL); + if (!data) { + err = -ENOMEM; + goto exit; + } + + i2c_set_clientdata(client, data); - /* Tell the I2C layer a new client has arrived */ - if ((err = i2c_attach_client(client))) - goto exit_free; - /* Initialize the PCF8574 chip */ pcf8574_init_client(client); /* Register sysfs hooks */ err = sysfs_create_group(&client->dev.kobj, &pcf8574_attr_group); if (err) - goto exit_detach; + goto exit_free; return 0; - exit_detach: - i2c_detach_client(client); exit_free: kfree(data); exit: return err; } -static int pcf8574_detach_client(struct i2c_client *client) +static int pcf8574_remove(struct i2c_client *client) { - int err; - sysfs_remove_group(&client->dev.kobj, &pcf8574_attr_group); - - if ((err = i2c_detach_client(client))) - return err; - kfree(i2c_get_clientdata(client)); return 0; } @@ -208,6 +174,24 @@ static void pcf8574_init_client(struct i2c_client *client) data->write = -EAGAIN; } +static const struct i2c_device_id pcf8574_id[] = { + { "pcf8574", 0 }, + { "pcf8574a", 0 }, + { } +}; + +static struct i2c_driver pcf8574_driver = { + .driver = { + .name = "pcf8574", + }, + .probe = pcf8574_probe, + .remove = pcf8574_remove, + .id_table = pcf8574_id, + + .detect = pcf8574_detect, + .address_data = &addr_data, +}; + static int __init pcf8574_init(void) { return i2c_add_driver(&pcf8574_driver); -- cgit v0.10.2 From 97addff6def3f8e228a634fa017589f45c69de5c Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 16 Jul 2008 19:30:06 +0200 Subject: i2c: Convert the pcf8575 driver to a new-style i2c driver The new-style pcf8575 driver implements the optional detect() callback to cover the use cases of the legacy driver. Warning: users will now have to use the force module parameter to get the driver to attach to their device. That's not a bad thing as these devices can't be detected anyway. Note that this doesn't change the fact that this driver is deprecated in favor of gpio/pcf857x. Signed-off-by: Jean Delvare diff --git a/Documentation/i2c/chips/pcf8575 b/Documentation/i2c/chips/pcf8575 index 25f5698..40b268e 100644 --- a/Documentation/i2c/chips/pcf8575 +++ b/Documentation/i2c/chips/pcf8575 @@ -40,12 +40,9 @@ Detection --------- There is no method known to detect whether a chip on a given I2C address is -a PCF8575 or whether it is any other I2C device. So there are two alternatives -to let the driver find the installed PCF8575 devices: -- Load this driver after any other I2C driver for I2C devices with addresses - in the range 0x20 .. 0x27. -- Pass the I2C bus and address of the installed PCF8575 devices explicitly to - the driver at load time via the probe=... or force=... parameters. +a PCF8575 or whether it is any other I2C device, so you have to pass the I2C +bus and address of the installed PCF8575 devices explicitly to the driver at +load time via the force=... parameter. /sys interface -------------- diff --git a/drivers/i2c/chips/pcf8575.c b/drivers/i2c/chips/pcf8575.c index 3ea08ac..07fd7cb 100644 --- a/drivers/i2c/chips/pcf8575.c +++ b/drivers/i2c/chips/pcf8575.c @@ -32,11 +32,8 @@ #include /* kzalloc() */ #include /* sysfs_create_group() */ -/* Addresses to scan */ -static const unsigned short normal_i2c[] = { - 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, - I2C_CLIENT_END -}; +/* Addresses to scan: none, device can't be detected */ +static const unsigned short normal_i2c[] = { I2C_CLIENT_END }; /* Insmod parameters */ I2C_CLIENT_INSMOD; @@ -44,24 +41,9 @@ I2C_CLIENT_INSMOD; /* Each client has this additional data */ struct pcf8575_data { - struct i2c_client client; int write; /* last written value, or error code */ }; -static int pcf8575_attach_adapter(struct i2c_adapter *adapter); -static int pcf8575_detect(struct i2c_adapter *adapter, int address, int kind); -static int pcf8575_detach_client(struct i2c_client *client); - -/* This is the driver that will be inserted */ -static struct i2c_driver pcf8575_driver = { - .driver = { - .owner = THIS_MODULE, - .name = "pcf8575", - }, - .attach_adapter = pcf8575_attach_adapter, - .detach_client = pcf8575_detach_client, -}; - /* following are the sysfs callback functions */ static ssize_t show_read(struct device *dev, struct device_attribute *attr, char *buf) @@ -126,75 +108,77 @@ static const struct attribute_group pcf8575_attr_group = { * Real code */ -static int pcf8575_attach_adapter(struct i2c_adapter *adapter) +/* Return 0 if detection is successful, -ENODEV otherwise */ +static int pcf8575_detect(struct i2c_client *client, int kind, + struct i2c_board_info *info) { - return i2c_probe(adapter, &addr_data, pcf8575_detect); + struct i2c_adapter *adapter = client->adapter; + + if (!i2c_check_functionality(adapter, I2C_FUNC_I2C)) + return -ENODEV; + + /* This is the place to detect whether the chip at the specified + address really is a PCF8575 chip. However, there is no method known + to detect whether an I2C chip is a PCF8575 or any other I2C chip. */ + + strlcpy(info->type, "pcf8575", I2C_NAME_SIZE); + + return 0; } -/* This function is called by i2c_probe */ -static int pcf8575_detect(struct i2c_adapter *adapter, int address, int kind) +static int pcf8575_probe(struct i2c_client *client, + const struct i2c_device_id *id) { - struct i2c_client *client; struct pcf8575_data *data; - int err = 0; - - if (!i2c_check_functionality(adapter, I2C_FUNC_I2C)) - goto exit; + int err; - /* OK. For now, we presume we have a valid client. We now create the - client structure, even though we cannot fill it completely yet. */ data = kzalloc(sizeof(struct pcf8575_data), GFP_KERNEL); if (!data) { err = -ENOMEM; goto exit; } - client = &data->client; i2c_set_clientdata(client, data); - client->addr = address; - client->adapter = adapter; - client->driver = &pcf8575_driver; - strlcpy(client->name, "pcf8575", I2C_NAME_SIZE); data->write = -EAGAIN; - /* This is the place to detect whether the chip at the specified - address really is a PCF8575 chip. However, there is no method known - to detect whether an I2C chip is a PCF8575 or any other I2C chip. */ - - /* Tell the I2C layer a new client has arrived */ - err = i2c_attach_client(client); - if (err) - goto exit_free; - /* Register sysfs hooks */ err = sysfs_create_group(&client->dev.kobj, &pcf8575_attr_group); if (err) - goto exit_detach; + goto exit_free; return 0; -exit_detach: - i2c_detach_client(client); exit_free: kfree(data); exit: return err; } -static int pcf8575_detach_client(struct i2c_client *client) +static int pcf8575_remove(struct i2c_client *client) { - int err; - sysfs_remove_group(&client->dev.kobj, &pcf8575_attr_group); - - err = i2c_detach_client(client); - if (err) - return err; - kfree(i2c_get_clientdata(client)); return 0; } +static const struct i2c_device_id pcf8575_id[] = { + { "pcf8575", 0 }, + { } +}; + +static struct i2c_driver pcf8575_driver = { + .driver = { + .owner = THIS_MODULE, + .name = "pcf8575", + }, + .probe = pcf8575_probe, + .remove = pcf8575_remove, + .id_table = pcf8575_id, + + .detect = pcf8575_detect, + .address_data = &addr_data, +}; + static int __init pcf8575_init(void) { return i2c_add_driver(&pcf8575_driver); -- cgit v0.10.2 From 3d63430a26b91fe3daee0dd933f899c225e66daa Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 16 Jul 2008 19:30:07 +0200 Subject: i2c: Convert the pca9539 driver to a new-style i2c driver The new-style pca9539 driver implements the optional detect() callback to cover the use cases of the legacy driver. Warning: users will now have to use the force module parameter to get the driver to attach to their device. That's not a bad thing as these devices can't be detected anyway. Note that this doesn't change the fact that this driver is deprecated in favor of gpio/pca953x. Signed-off-by: Jean Delvare diff --git a/Documentation/i2c/chips/pca9539 b/Documentation/i2c/chips/pca9539 index 1d81c53..6aff890 100644 --- a/Documentation/i2c/chips/pca9539 +++ b/Documentation/i2c/chips/pca9539 @@ -7,7 +7,7 @@ drivers/gpio/pca9539.c instead. Supported chips: * Philips PCA9539 Prefix: 'pca9539' - Addresses scanned: 0x74 - 0x77 + Addresses scanned: none Datasheet: http://www.semiconductors.philips.com/acrobat/datasheets/PCA9539_2.pdf @@ -23,6 +23,14 @@ The input sense can also be inverted. The 16 lines are split between two bytes. +Detection +--------- + +The PCA9539 is difficult to detect and not commonly found in PC machines, +so you have to pass the I2C bus and address of the installed PCA9539 +devices explicitly to the driver at load time via the force=... parameter. + + Sysfs entries ------------- diff --git a/drivers/i2c/chips/pca9539.c b/drivers/i2c/chips/pca9539.c index 58ab7f2..270de4e 100644 --- a/drivers/i2c/chips/pca9539.c +++ b/drivers/i2c/chips/pca9539.c @@ -14,8 +14,8 @@ #include #include -/* Addresses to scan */ -static unsigned short normal_i2c[] = {0x74, 0x75, 0x76, 0x77, I2C_CLIENT_END}; +/* Addresses to scan: none, device is not autodetected */ +static const unsigned short normal_i2c[] = { I2C_CLIENT_END }; /* Insmod parameters */ I2C_CLIENT_INSMOD_1(pca9539); @@ -32,23 +32,6 @@ enum pca9539_cmd PCA9539_DIRECTION_1 = 7, }; -static int pca9539_attach_adapter(struct i2c_adapter *adapter); -static int pca9539_detect(struct i2c_adapter *adapter, int address, int kind); -static int pca9539_detach_client(struct i2c_client *client); - -/* This is the driver that will be inserted */ -static struct i2c_driver pca9539_driver = { - .driver = { - .name = "pca9539", - }, - .attach_adapter = pca9539_attach_adapter, - .detach_client = pca9539_detach_client, -}; - -struct pca9539_data { - struct i2c_client client; -}; - /* following are the sysfs callback functions */ static ssize_t pca9539_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -105,77 +88,51 @@ static struct attribute_group pca9539_defattr_group = { .attrs = pca9539_attributes, }; -static int pca9539_attach_adapter(struct i2c_adapter *adapter) +/* Return 0 if detection is successful, -ENODEV otherwise */ +static int pca9539_detect(struct i2c_client *client, int kind, + struct i2c_board_info *info) { - return i2c_probe(adapter, &addr_data, pca9539_detect); -} - -/* This function is called by i2c_probe */ -static int pca9539_detect(struct i2c_adapter *adapter, int address, int kind) -{ - struct i2c_client *client; - struct pca9539_data *data; - int err = 0; + struct i2c_adapter *adapter = client->adapter; if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA)) - goto exit; - - /* OK. For now, we presume we have a valid client. We now create the - client structure, even though we cannot fill it completely yet. */ - if (!(data = kzalloc(sizeof(struct pca9539_data), GFP_KERNEL))) { - err = -ENOMEM; - goto exit; - } - - client = &data->client; - i2c_set_clientdata(client, data); - client->addr = address; - client->adapter = adapter; - client->driver = &pca9539_driver; - - if (kind < 0) { - /* Detection: the pca9539 only has 8 registers (0-7). - A read of 7 should succeed, but a read of 8 should fail. */ - if ((i2c_smbus_read_byte_data(client, 7) < 0) || - (i2c_smbus_read_byte_data(client, 8) >= 0)) - goto exit_kfree; - } - - strlcpy(client->name, "pca9539", I2C_NAME_SIZE); - - /* Tell the I2C layer a new client has arrived */ - if ((err = i2c_attach_client(client))) - goto exit_kfree; + return -ENODEV; - /* Register sysfs hooks */ - err = sysfs_create_group(&client->dev.kobj, - &pca9539_defattr_group); - if (err) - goto exit_detach; + strlcpy(info->type, "pca9539", I2C_NAME_SIZE); return 0; - -exit_detach: - i2c_detach_client(client); -exit_kfree: - kfree(data); -exit: - return err; } -static int pca9539_detach_client(struct i2c_client *client) +static int pca9539_probe(struct i2c_client *client, + const struct i2c_device_id *id) { - int err; + /* Register sysfs hooks */ + return sysfs_create_group(&client->dev.kobj, + &pca9539_defattr_group); +} +static int pca9539_remove(struct i2c_client *client) +{ sysfs_remove_group(&client->dev.kobj, &pca9539_defattr_group); - - if ((err = i2c_detach_client(client))) - return err; - - kfree(i2c_get_clientdata(client)); return 0; } +static const struct i2c_device_id pca9539_id[] = { + { "pca9539", 0 }, + { } +}; + +static struct i2c_driver pca9539_driver = { + .driver = { + .name = "pca9539", + }, + .probe = pca9539_probe, + .remove = pca9539_remove, + .id_table = pca9539_id, + + .detect = pca9539_detect, + .address_data = &addr_data, +}; + static int __init pca9539_init(void) { return i2c_add_driver(&pca9539_driver); -- cgit v0.10.2 From bd8d421f7ca9f8da3d820d28379d796500f69529 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 16 Jul 2008 19:30:07 +0200 Subject: i2c: Convert the max6875 driver to a new-style i2c driver The new-style max6875 driver implements the optional detect() callback to cover the use cases of the legacy driver. I'm curious if anyone really needs this though, so it might be removed in the feature. Signed-off-by: Jean Delvare diff --git a/Documentation/i2c/chips/max6875 b/Documentation/i2c/chips/max6875 index a0cd8af..10ca43c 100644 --- a/Documentation/i2c/chips/max6875 +++ b/Documentation/i2c/chips/max6875 @@ -49,7 +49,7 @@ $ modprobe max6875 force=0,0x50 The MAX6874/MAX6875 ignores address bit 0, so this driver attaches to multiple addresses. For example, for address 0x50, it also reserves 0x51. -The even-address instance is called 'max6875', the odd one is 'max6875 subclient'. +The even-address instance is called 'max6875', the odd one is 'dummy'. Programming the chip using i2c-dev diff --git a/drivers/i2c/chips/max6875.c b/drivers/i2c/chips/max6875.c index 5a0285d..033d9d8 100644 --- a/drivers/i2c/chips/max6875.c +++ b/drivers/i2c/chips/max6875.c @@ -53,7 +53,7 @@ I2C_CLIENT_INSMOD_1(max6875); /* Each client has this additional data */ struct max6875_data { - struct i2c_client client; + struct i2c_client *fake_client; struct mutex update_lock; u32 valid; @@ -61,19 +61,6 @@ struct max6875_data { unsigned long last_updated[USER_EEPROM_SLICES]; }; -static int max6875_attach_adapter(struct i2c_adapter *adapter); -static int max6875_detect(struct i2c_adapter *adapter, int address, int kind); -static int max6875_detach_client(struct i2c_client *client); - -/* This is the driver that will be inserted */ -static struct i2c_driver max6875_driver = { - .driver = { - .name = "max6875", - }, - .attach_adapter = max6875_attach_adapter, - .detach_client = max6875_detach_client, -}; - static void max6875_update_slice(struct i2c_client *client, int slice) { struct max6875_data *data = i2c_get_clientdata(client); @@ -159,96 +146,87 @@ static struct bin_attribute user_eeprom_attr = { .read = max6875_read, }; -static int max6875_attach_adapter(struct i2c_adapter *adapter) +/* Return 0 if detection is successful, -ENODEV otherwise */ +static int max6875_detect(struct i2c_client *client, int kind, + struct i2c_board_info *info) { - return i2c_probe(adapter, &addr_data, max6875_detect); -} - -/* This function is called by i2c_probe */ -static int max6875_detect(struct i2c_adapter *adapter, int address, int kind) -{ - struct i2c_client *real_client; - struct i2c_client *fake_client; - struct max6875_data *data; - int err; + struct i2c_adapter *adapter = client->adapter; if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_WRITE_BYTE_DATA | I2C_FUNC_SMBUS_READ_BYTE)) - return 0; + return -ENODEV; /* Only check even addresses */ - if (address & 1) - return 0; + if (client->addr & 1) + return -ENODEV; + + strlcpy(info->type, "max6875", I2C_NAME_SIZE); + + return 0; +} + +static int max6875_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct max6875_data *data; + int err; if (!(data = kzalloc(sizeof(struct max6875_data), GFP_KERNEL))) return -ENOMEM; /* A fake client is created on the odd address */ - if (!(fake_client = kzalloc(sizeof(struct i2c_client), GFP_KERNEL))) { + data->fake_client = i2c_new_dummy(client->adapter, client->addr + 1); + if (!data->fake_client) { err = -ENOMEM; - goto exit_kfree1; + goto exit_kfree; } /* Init real i2c_client */ - real_client = &data->client; - i2c_set_clientdata(real_client, data); - real_client->addr = address; - real_client->adapter = adapter; - real_client->driver = &max6875_driver; - strlcpy(real_client->name, "max6875", I2C_NAME_SIZE); + i2c_set_clientdata(client, data); mutex_init(&data->update_lock); - /* Init fake client data */ - i2c_set_clientdata(fake_client, NULL); - fake_client->addr = address | 1; - fake_client->adapter = adapter; - fake_client->driver = &max6875_driver; - strlcpy(fake_client->name, "max6875 subclient", I2C_NAME_SIZE); - - if ((err = i2c_attach_client(real_client)) != 0) - goto exit_kfree2; - - if ((err = i2c_attach_client(fake_client)) != 0) - goto exit_detach1; - - err = sysfs_create_bin_file(&real_client->dev.kobj, &user_eeprom_attr); + err = sysfs_create_bin_file(&client->dev.kobj, &user_eeprom_attr); if (err) - goto exit_detach2; + goto exit_remove_fake; return 0; -exit_detach2: - i2c_detach_client(fake_client); -exit_detach1: - i2c_detach_client(real_client); -exit_kfree2: - kfree(fake_client); -exit_kfree1: +exit_remove_fake: + i2c_unregister_device(data->fake_client); +exit_kfree: kfree(data); return err; } -/* Will be called for both the real client and the fake client */ -static int max6875_detach_client(struct i2c_client *client) +static int max6875_remove(struct i2c_client *client) { - int err; struct max6875_data *data = i2c_get_clientdata(client); - /* data is NULL for the fake client */ - if (data) - sysfs_remove_bin_file(&client->dev.kobj, &user_eeprom_attr); + i2c_unregister_device(data->fake_client); - err = i2c_detach_client(client); - if (err) - return err; + sysfs_remove_bin_file(&client->dev.kobj, &user_eeprom_attr); + kfree(data); - if (data) /* real client */ - kfree(data); - else /* fake client */ - kfree(client); return 0; } +static const struct i2c_device_id max6875_id[] = { + { "max6875", 0 }, + { } +}; + +static struct i2c_driver max6875_driver = { + .driver = { + .name = "max6875", + }, + .probe = max6875_probe, + .remove = max6875_remove, + .id_table = max6875_id, + + .detect = max6875_detect, + .address_data = &addr_data, +}; + static int __init max6875_init(void) { return i2c_add_driver(&max6875_driver); -- cgit v0.10.2 From 61c91f7ded640bb2b340cc89d9ca3a3ca0229c74 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 16 Jul 2008 19:30:07 +0200 Subject: w1/ds2482: Convert to a new-style driver The new-style ds2482 driver implements the optional detect() callback to cover the use cases of the legacy driver. I'm curious if anyone really needs this though, so it might be removed in the feature. Signed-off-by: Jean Delvare diff --git a/drivers/w1/masters/ds2482.c b/drivers/w1/masters/ds2482.c index 0fd5820..df52cb3 100644 --- a/drivers/w1/masters/ds2482.c +++ b/drivers/w1/masters/ds2482.c @@ -94,21 +94,31 @@ static const u8 ds2482_chan_rd[8] = #define DS2482_REG_STS_1WB 0x01 -static int ds2482_attach_adapter(struct i2c_adapter *adapter); -static int ds2482_detect(struct i2c_adapter *adapter, int address, int kind); -static int ds2482_detach_client(struct i2c_client *client); +static int ds2482_probe(struct i2c_client *client, + const struct i2c_device_id *id); +static int ds2482_detect(struct i2c_client *client, int kind, + struct i2c_board_info *info); +static int ds2482_remove(struct i2c_client *client); /** * Driver data (common to all clients) */ +static const struct i2c_device_id ds2482_id[] = { + { "ds2482", 0 }, + { } +}; + static struct i2c_driver ds2482_driver = { .driver = { .owner = THIS_MODULE, .name = "ds2482", }, - .attach_adapter = ds2482_attach_adapter, - .detach_client = ds2482_detach_client, + .probe = ds2482_probe, + .remove = ds2482_remove, + .id_table = ds2482_id, + .detect = ds2482_detect, + .address_data = &addr_data, }; /* @@ -124,7 +134,7 @@ struct ds2482_w1_chan { }; struct ds2482_data { - struct i2c_client client; + struct i2c_client *client; struct mutex access_lock; /* 1-wire interface(s) */ @@ -147,7 +157,7 @@ struct ds2482_data { static inline int ds2482_select_register(struct ds2482_data *pdev, u8 read_ptr) { if (pdev->read_prt != read_ptr) { - if (i2c_smbus_write_byte_data(&pdev->client, + if (i2c_smbus_write_byte_data(pdev->client, DS2482_CMD_SET_READ_PTR, read_ptr) < 0) return -1; @@ -167,7 +177,7 @@ static inline int ds2482_select_register(struct ds2482_data *pdev, u8 read_ptr) */ static inline int ds2482_send_cmd(struct ds2482_data *pdev, u8 cmd) { - if (i2c_smbus_write_byte(&pdev->client, cmd) < 0) + if (i2c_smbus_write_byte(pdev->client, cmd) < 0) return -1; pdev->read_prt = DS2482_PTR_CODE_STATUS; @@ -187,7 +197,7 @@ static inline int ds2482_send_cmd(struct ds2482_data *pdev, u8 cmd) static inline int ds2482_send_cmd_data(struct ds2482_data *pdev, u8 cmd, u8 byte) { - if (i2c_smbus_write_byte_data(&pdev->client, cmd, byte) < 0) + if (i2c_smbus_write_byte_data(pdev->client, cmd, byte) < 0) return -1; /* all cmds leave in STATUS, except CONFIG */ @@ -216,7 +226,7 @@ static int ds2482_wait_1wire_idle(struct ds2482_data *pdev) if (!ds2482_select_register(pdev, DS2482_PTR_CODE_STATUS)) { do { - temp = i2c_smbus_read_byte(&pdev->client); + temp = i2c_smbus_read_byte(pdev->client); } while ((temp >= 0) && (temp & DS2482_REG_STS_1WB) && (++retries < DS2482_WAIT_IDLE_TIMEOUT)); } @@ -238,13 +248,13 @@ static int ds2482_wait_1wire_idle(struct ds2482_data *pdev) */ static int ds2482_set_channel(struct ds2482_data *pdev, u8 channel) { - if (i2c_smbus_write_byte_data(&pdev->client, DS2482_CMD_CHANNEL_SELECT, + if (i2c_smbus_write_byte_data(pdev->client, DS2482_CMD_CHANNEL_SELECT, ds2482_chan_wr[channel]) < 0) return -1; pdev->read_prt = DS2482_PTR_CODE_CHANNEL; pdev->channel = -1; - if (i2c_smbus_read_byte(&pdev->client) == ds2482_chan_rd[channel]) { + if (i2c_smbus_read_byte(pdev->client) == ds2482_chan_rd[channel]) { pdev->channel = channel; return 0; } @@ -368,7 +378,7 @@ static u8 ds2482_w1_read_byte(void *data) ds2482_select_register(pdev, DS2482_PTR_CODE_DATA); /* Read the data byte */ - result = i2c_smbus_read_byte(&pdev->client); + result = i2c_smbus_read_byte(pdev->client); mutex_unlock(&pdev->access_lock); @@ -415,47 +425,38 @@ static u8 ds2482_w1_reset_bus(void *data) } -/** - * Called to see if the device exists on an i2c bus. - */ -static int ds2482_attach_adapter(struct i2c_adapter *adapter) +static int ds2482_detect(struct i2c_client *client, int kind, + struct i2c_board_info *info) { - return i2c_probe(adapter, &addr_data, ds2482_detect); -} + if (!i2c_check_functionality(client->adapter, + I2C_FUNC_SMBUS_WRITE_BYTE_DATA | + I2C_FUNC_SMBUS_BYTE)) + return -ENODEV; + strlcpy(info->type, "ds2482", I2C_NAME_SIZE); -/* - * The following function does more than just detection. If detection - * succeeds, it also registers the new chip. - */ -static int ds2482_detect(struct i2c_adapter *adapter, int address, int kind) + return 0; +} + +static int ds2482_probe(struct i2c_client *client, + const struct i2c_device_id *id) { struct ds2482_data *data; - struct i2c_client *new_client; - int err = 0; + int err = -ENODEV; int temp1; int idx; - if (!i2c_check_functionality(adapter, - I2C_FUNC_SMBUS_WRITE_BYTE_DATA | - I2C_FUNC_SMBUS_BYTE)) - goto exit; - if (!(data = kzalloc(sizeof(struct ds2482_data), GFP_KERNEL))) { err = -ENOMEM; goto exit; } - new_client = &data->client; - i2c_set_clientdata(new_client, data); - new_client->addr = address; - new_client->driver = &ds2482_driver; - new_client->adapter = adapter; + data->client = client; + i2c_set_clientdata(client, data); /* Reset the device (sets the read_ptr to status) */ if (ds2482_send_cmd(data, DS2482_CMD_RESET) < 0) { - dev_dbg(&adapter->dev, "DS2482 reset failed at 0x%02x.\n", - address); + dev_warn(&client->dev, "DS2482 reset failed.\n"); goto exit_free; } @@ -463,10 +464,10 @@ static int ds2482_detect(struct i2c_adapter *adapter, int address, int kind) ndelay(525); /* Read the status byte - only reset bit and line should be set */ - temp1 = i2c_smbus_read_byte(new_client); + temp1 = i2c_smbus_read_byte(client); if (temp1 != (DS2482_REG_STS_LL | DS2482_REG_STS_RST)) { - dev_dbg(&adapter->dev, "DS2482 (0x%02x) reset status " - "0x%02X - not a DS2482\n", address, temp1); + dev_warn(&client->dev, "DS2482 reset status " + "0x%02X - not a DS2482\n", temp1); goto exit_free; } @@ -478,16 +479,8 @@ static int ds2482_detect(struct i2c_adapter *adapter, int address, int kind) /* Set all config items to 0 (off) */ ds2482_send_cmd_data(data, DS2482_CMD_WRITE_CONFIG, 0xF0); - /* We can fill in the remaining client fields */ - snprintf(new_client->name, sizeof(new_client->name), "ds2482-%d00", - data->w1_count); - mutex_init(&data->access_lock); - /* Tell the I2C layer a new client has arrived */ - if ((err = i2c_attach_client(new_client))) - goto exit_free; - /* Register 1-wire interface(s) */ for (idx = 0; idx < data->w1_count; idx++) { data->w1_ch[idx].pdev = data; @@ -511,8 +504,6 @@ static int ds2482_detect(struct i2c_adapter *adapter, int address, int kind) return 0; exit_w1_remove: - i2c_detach_client(new_client); - for (idx = 0; idx < data->w1_count; idx++) { if (data->w1_ch[idx].pdev != NULL) w1_remove_master_device(&data->w1_ch[idx].w1_bm); @@ -523,10 +514,10 @@ exit: return err; } -static int ds2482_detach_client(struct i2c_client *client) +static int ds2482_remove(struct i2c_client *client) { struct ds2482_data *data = i2c_get_clientdata(client); - int err, idx; + int idx; /* Unregister the 1-wire bridge(s) */ for (idx = 0; idx < data->w1_count; idx++) { @@ -534,13 +525,6 @@ static int ds2482_detach_client(struct i2c_client *client) w1_remove_master_device(&data->w1_ch[idx].w1_bm); } - /* Detach the i2c device */ - if ((err = i2c_detach_client(client))) { - dev_err(&client->dev, - "Deregistration failed, client not detached.\n"); - return err; - } - /* Free the memory */ kfree(data); return 0; -- cgit v0.10.2 From 369932f6f840aedfbc717dd156bba7668a11d916 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 16 Jul 2008 19:30:08 +0200 Subject: hwmon: (ad7418) Convert to a new-style i2c driver The ad7418 driver is only used on embedded platforms where i2c devices can easily be declared in platform code. Thus a new-style i2c driver makes perfect sense. This lets us get rid of quirky detection code (these chips have no identification registers) and shrinks the binary driver size by 38%. Signed-off-by: Jean Delvare Cc: Alessandro Zummo diff --git a/drivers/hwmon/ad7418.c b/drivers/hwmon/ad7418.c index 466b9ee..f97b5b3 100644 --- a/drivers/hwmon/ad7418.c +++ b/drivers/hwmon/ad7418.c @@ -23,12 +23,9 @@ #include "lm75.h" -#define DRV_VERSION "0.3" +#define DRV_VERSION "0.4" -/* Addresses to scan */ -static const unsigned short normal_i2c[] = { 0x28, I2C_CLIENT_END }; -/* Insmod parameters */ -I2C_CLIENT_INSMOD_3(ad7416, ad7417, ad7418); +enum chips { ad7416, ad7417, ad7418 }; /* AD7418 registers */ #define AD7418_REG_TEMP_IN 0x00 @@ -46,7 +43,6 @@ static const u8 AD7418_REG_TEMP[] = { AD7418_REG_TEMP_IN, AD7418_REG_TEMP_OS }; struct ad7418_data { - struct i2c_client client; struct device *hwmon_dev; struct attribute_group attrs; enum chips type; @@ -58,16 +54,25 @@ struct ad7418_data { u16 in[4]; }; -static int ad7418_attach_adapter(struct i2c_adapter *adapter); -static int ad7418_detect(struct i2c_adapter *adapter, int address, int kind); -static int ad7418_detach_client(struct i2c_client *client); +static int ad7418_probe(struct i2c_client *client, + const struct i2c_device_id *id); +static int ad7418_remove(struct i2c_client *client); + +static const struct i2c_device_id ad7418_id[] = { + { "ad7416", ad7416 }, + { "ad7417", ad7417 }, + { "ad7418", ad7418 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, ad7418_id); static struct i2c_driver ad7418_driver = { .driver = { .name = "ad7418", }, - .attach_adapter = ad7418_attach_adapter, - .detach_client = ad7418_detach_client, + .probe = ad7418_probe, + .remove = ad7418_remove, + .id_table = ad7418_id, }; /* All registers are word-sized, except for the configuration registers. @@ -192,13 +197,6 @@ static SENSOR_DEVICE_ATTR(in2_input, S_IRUGO, show_adc, NULL, 1); static SENSOR_DEVICE_ATTR(in3_input, S_IRUGO, show_adc, NULL, 2); static SENSOR_DEVICE_ATTR(in4_input, S_IRUGO, show_adc, NULL, 3); -static int ad7418_attach_adapter(struct i2c_adapter *adapter) -{ - if (!(adapter->class & I2C_CLASS_HWMON)) - return 0; - return i2c_probe(adapter, &addr_data, ad7418_detect); -} - static struct attribute *ad7416_attributes[] = { &sensor_dev_attr_temp1_max.dev_attr.attr, &sensor_dev_attr_temp1_max_hyst.dev_attr.attr, @@ -225,98 +223,46 @@ static struct attribute *ad7418_attributes[] = { NULL }; -static int ad7418_detect(struct i2c_adapter *adapter, int address, int kind) +static int ad7418_probe(struct i2c_client *client, + const struct i2c_device_id *id) { - struct i2c_client *client; + struct i2c_adapter *adapter = client->adapter; struct ad7418_data *data; - int err = 0; + int err; if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA | - I2C_FUNC_SMBUS_WORD_DATA)) + I2C_FUNC_SMBUS_WORD_DATA)) { + err = -EOPNOTSUPP; goto exit; + } if (!(data = kzalloc(sizeof(struct ad7418_data), GFP_KERNEL))) { err = -ENOMEM; goto exit; } - client = &data->client; - client->addr = address; - client->adapter = adapter; - client->driver = &ad7418_driver; - i2c_set_clientdata(client, data); mutex_init(&data->lock); - - /* AD7418 has a curious behaviour on registers 6 and 7. They - * both always read 0xC071 and are not documented on the datasheet. - * We use them to detect the chip. - */ - if (kind <= 0) { - int reg, reg6, reg7; - - /* the AD7416 lies within this address range, but I have - * no means to check. - */ - if (address >= 0x48 && address <= 0x4f) { - /* XXX add tests for AD7416 here */ - /* data->type = ad7416; */ - } - /* here we might have AD7417 or AD7418 */ - else if (address >= 0x28 && address <= 0x2f) { - reg6 = i2c_smbus_read_word_data(client, 0x06); - reg7 = i2c_smbus_read_word_data(client, 0x07); - - if (address == 0x28 && reg6 == 0xC071 && reg7 == 0xC071) - data->type = ad7418; - - /* XXX add tests for AD7417 here */ - - - /* both AD7417 and AD7418 have bits 0-5 of - * the CONF2 register at 0 - */ - reg = i2c_smbus_read_byte_data(client, - AD7418_REG_CONF2); - if (reg & 0x3F) - data->type = any_chip; /* detection failed */ - } - } else { - dev_dbg(&adapter->dev, "detection forced\n"); - } - - if (kind > 0) - data->type = kind; - else if (kind < 0 && data->type == any_chip) { - err = -ENODEV; - goto exit_free; - } + data->type = id->driver_data; switch (data->type) { - case any_chip: case ad7416: data->adc_max = 0; data->attrs.attrs = ad7416_attributes; - strlcpy(client->name, "ad7416", I2C_NAME_SIZE); break; case ad7417: data->adc_max = 4; data->attrs.attrs = ad7417_attributes; - strlcpy(client->name, "ad7417", I2C_NAME_SIZE); break; case ad7418: data->adc_max = 1; data->attrs.attrs = ad7418_attributes; - strlcpy(client->name, "ad7418", I2C_NAME_SIZE); break; } - if ((err = i2c_attach_client(client))) - goto exit_free; - dev_info(&client->dev, "%s chip found\n", client->name); /* Initialize the AD7418 chip */ @@ -324,7 +270,7 @@ static int ad7418_detect(struct i2c_adapter *adapter, int address, int kind) /* Register sysfs hooks */ if ((err = sysfs_create_group(&client->dev.kobj, &data->attrs))) - goto exit_detach; + goto exit_free; data->hwmon_dev = hwmon_device_register(&client->dev); if (IS_ERR(data->hwmon_dev)) { @@ -336,20 +282,17 @@ static int ad7418_detect(struct i2c_adapter *adapter, int address, int kind) exit_remove: sysfs_remove_group(&client->dev.kobj, &data->attrs); -exit_detach: - i2c_detach_client(client); exit_free: kfree(data); exit: return err; } -static int ad7418_detach_client(struct i2c_client *client) +static int ad7418_remove(struct i2c_client *client) { struct ad7418_data *data = i2c_get_clientdata(client); hwmon_device_unregister(data->hwmon_dev); sysfs_remove_group(&client->dev.kobj, &data->attrs); - i2c_detach_client(client); kfree(data); return 0; } -- cgit v0.10.2 From 65817ed8d1376afff21019b5c0e0109e5a7d9cc0 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 16 Jul 2008 19:30:08 +0200 Subject: hwmon: (adm1021) Convert to a new-style i2c driver The new-style adm1021 driver implements the optional detect() callback to cover the use cases of the legacy driver. Signed-off-by: Jean Delvare diff --git a/drivers/hwmon/adm1021.c b/drivers/hwmon/adm1021.c index ecbf694..b11e06f 100644 --- a/drivers/hwmon/adm1021.c +++ b/drivers/hwmon/adm1021.c @@ -78,7 +78,6 @@ clearing it. Weird, ey? --Phil */ /* Each client has this additional data */ struct adm1021_data { - struct i2c_client client; struct device *hwmon_dev; enum chips type; @@ -98,23 +97,42 @@ struct adm1021_data { u8 remote_temp_offset_prec; }; -static int adm1021_attach_adapter(struct i2c_adapter *adapter); -static int adm1021_detect(struct i2c_adapter *adapter, int address, int kind); +static int adm1021_probe(struct i2c_client *client, + const struct i2c_device_id *id); +static int adm1021_detect(struct i2c_client *client, int kind, + struct i2c_board_info *info); static void adm1021_init_client(struct i2c_client *client); -static int adm1021_detach_client(struct i2c_client *client); +static int adm1021_remove(struct i2c_client *client); static struct adm1021_data *adm1021_update_device(struct device *dev); /* (amalysh) read only mode, otherwise any limit's writing confuse BIOS */ static int read_only; +static const struct i2c_device_id adm1021_id[] = { + { "adm1021", adm1021 }, + { "adm1023", adm1023 }, + { "max1617", max1617 }, + { "max1617a", max1617a }, + { "thmc10", thmc10 }, + { "lm84", lm84 }, + { "gl523sm", gl523sm }, + { "mc1066", mc1066 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, adm1021_id); + /* This is the driver that will be inserted */ static struct i2c_driver adm1021_driver = { + .class = I2C_CLASS_HWMON, .driver = { .name = "adm1021", }, - .attach_adapter = adm1021_attach_adapter, - .detach_client = adm1021_detach_client, + .probe = adm1021_probe, + .remove = adm1021_remove, + .id_table = adm1021_id, + .detect = adm1021_detect, + .address_data = &addr_data, }; static ssize_t show_temp(struct device *dev, @@ -216,13 +234,6 @@ static SENSOR_DEVICE_ATTR(temp2_fault, S_IRUGO, show_alarm, NULL, 2); static DEVICE_ATTR(alarms, S_IRUGO, show_alarms, NULL); -static int adm1021_attach_adapter(struct i2c_adapter *adapter) -{ - if (!(adapter->class & I2C_CLASS_HWMON)) - return 0; - return i2c_probe(adapter, &addr_data, adm1021_detect); -} - static struct attribute *adm1021_attributes[] = { &sensor_dev_attr_temp1_max.dev_attr.attr, &sensor_dev_attr_temp1_min.dev_attr.attr, @@ -243,36 +254,21 @@ static const struct attribute_group adm1021_group = { .attrs = adm1021_attributes, }; -static int adm1021_detect(struct i2c_adapter *adapter, int address, int kind) +/* Return 0 if detection is successful, -ENODEV otherwise */ +static int adm1021_detect(struct i2c_client *client, int kind, + struct i2c_board_info *info) { + struct i2c_adapter *adapter = client->adapter; int i; - struct i2c_client *client; - struct adm1021_data *data; - int err = 0; const char *type_name = ""; int conv_rate, status, config; if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA)) { pr_debug("adm1021: detect failed, " "smbus byte data not supported!\n"); - goto error0; - } - - /* OK. For now, we presume we have a valid client. We now create the - client structure, even though we cannot fill it completely yet. - But it allows us to access adm1021 register values. */ - - if (!(data = kzalloc(sizeof(struct adm1021_data), GFP_KERNEL))) { - pr_debug("adm1021: detect failed, kzalloc failed!\n"); - err = -ENOMEM; - goto error0; + return -ENODEV; } - client = &data->client; - i2c_set_clientdata(client, data); - client->addr = address; - client->adapter = adapter; - client->driver = &adm1021_driver; status = i2c_smbus_read_byte_data(client, ADM1021_REG_STATUS); conv_rate = i2c_smbus_read_byte_data(client, ADM1021_REG_CONV_RATE_R); @@ -284,8 +280,7 @@ static int adm1021_detect(struct i2c_adapter *adapter, int address, int kind) || (conv_rate & 0xF8) != 0x00) { pr_debug("adm1021: detect failed, " "chip not detected!\n"); - err = -ENODEV; - goto error1; + return -ENODEV; } } @@ -336,24 +331,36 @@ static int adm1021_detect(struct i2c_adapter *adapter, int address, int kind) type_name = "mc1066"; } pr_debug("adm1021: Detected chip %s at adapter %d, address 0x%02x.\n", - type_name, i2c_adapter_id(adapter), address); + type_name, i2c_adapter_id(adapter), client->addr); + strlcpy(info->type, type_name, I2C_NAME_SIZE); - /* Fill in the remaining client fields */ - strlcpy(client->name, type_name, I2C_NAME_SIZE); - data->type = kind; - mutex_init(&data->update_lock); + return 0; +} - /* Tell the I2C layer a new client has arrived */ - if ((err = i2c_attach_client(client))) - goto error1; +static int adm1021_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct adm1021_data *data; + int err; + + data = kzalloc(sizeof(struct adm1021_data), GFP_KERNEL); + if (!data) { + pr_debug("adm1021: detect failed, kzalloc failed!\n"); + err = -ENOMEM; + goto error0; + } + + i2c_set_clientdata(client, data); + data->type = id->driver_data; + mutex_init(&data->update_lock); /* Initialize the ADM1021 chip */ - if (kind != lm84 && !read_only) + if (data->type != lm84 && !read_only) adm1021_init_client(client); /* Register sysfs hooks */ if ((err = sysfs_create_group(&client->dev.kobj, &adm1021_group))) - goto error2; + goto error1; data->hwmon_dev = hwmon_device_register(&client->dev); if (IS_ERR(data->hwmon_dev)) { @@ -365,8 +372,6 @@ static int adm1021_detect(struct i2c_adapter *adapter, int address, int kind) error3: sysfs_remove_group(&client->dev.kobj, &adm1021_group); -error2: - i2c_detach_client(client); error1: kfree(data); error0: @@ -382,17 +387,13 @@ static void adm1021_init_client(struct i2c_client *client) i2c_smbus_write_byte_data(client, ADM1021_REG_CONV_RATE_W, 0x04); } -static int adm1021_detach_client(struct i2c_client *client) +static int adm1021_remove(struct i2c_client *client) { struct adm1021_data *data = i2c_get_clientdata(client); - int err; hwmon_device_unregister(data->hwmon_dev); sysfs_remove_group(&client->dev.kobj, &adm1021_group); - if ((err = i2c_detach_client(client))) - return err; - kfree(data); return 0; } -- cgit v0.10.2 From 7dbafe021ba360bf25674a7e290d3e4a5c953981 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 16 Jul 2008 19:30:08 +0200 Subject: hwmon: (adm1025) Convert to a new-style i2c driver The new-style adm1025 driver implements the optional detect() callback to cover the use cases of the legacy driver. Signed-off-by: Jean Delvare diff --git a/drivers/hwmon/adm1025.c b/drivers/hwmon/adm1025.c index 1d76de7..4db04d6 100644 --- a/drivers/hwmon/adm1025.c +++ b/drivers/hwmon/adm1025.c @@ -2,7 +2,7 @@ * adm1025.c * * Copyright (C) 2000 Chen-Yuan Wu - * Copyright (C) 2003-2004 Jean Delvare + * Copyright (C) 2003-2008 Jean Delvare * * The ADM1025 is a sensor chip made by Analog Devices. It reports up to 6 * voltages (including its own power source) and up to two temperatures @@ -109,22 +109,35 @@ static const int in_scale[6] = { 2500, 2250, 3300, 5000, 12000, 3300 }; * Functions declaration */ -static int adm1025_attach_adapter(struct i2c_adapter *adapter); -static int adm1025_detect(struct i2c_adapter *adapter, int address, int kind); +static int adm1025_probe(struct i2c_client *client, + const struct i2c_device_id *id); +static int adm1025_detect(struct i2c_client *client, int kind, + struct i2c_board_info *info); static void adm1025_init_client(struct i2c_client *client); -static int adm1025_detach_client(struct i2c_client *client); +static int adm1025_remove(struct i2c_client *client); static struct adm1025_data *adm1025_update_device(struct device *dev); /* * Driver data (common to all clients) */ +static const struct i2c_device_id adm1025_id[] = { + { "adm1025", adm1025 }, + { "ne1619", ne1619 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, adm1025_id); + static struct i2c_driver adm1025_driver = { + .class = I2C_CLASS_HWMON, .driver = { .name = "adm1025", }, - .attach_adapter = adm1025_attach_adapter, - .detach_client = adm1025_detach_client, + .probe = adm1025_probe, + .remove = adm1025_remove, + .id_table = adm1025_id, + .detect = adm1025_detect, + .address_data = &addr_data, }; /* @@ -132,7 +145,6 @@ static struct i2c_driver adm1025_driver = { */ struct adm1025_data { - struct i2c_client client; struct device *hwmon_dev; struct mutex update_lock; char valid; /* zero until following fields are valid */ @@ -344,13 +356,6 @@ static DEVICE_ATTR(vrm, S_IRUGO | S_IWUSR, show_vrm, set_vrm); * Real code */ -static int adm1025_attach_adapter(struct i2c_adapter *adapter) -{ - if (!(adapter->class & I2C_CLASS_HWMON)) - return 0; - return i2c_probe(adapter, &addr_data, adm1025_detect); -} - static struct attribute *adm1025_attributes[] = { &sensor_dev_attr_in0_input.dev_attr.attr, &sensor_dev_attr_in1_input.dev_attr.attr, @@ -403,31 +408,16 @@ static const struct attribute_group adm1025_group_in4 = { .attrs = adm1025_attributes_in4, }; -/* - * The following function does more than just detection. If detection - * succeeds, it also registers the new chip. - */ -static int adm1025_detect(struct i2c_adapter *adapter, int address, int kind) +/* Return 0 if detection is successful, -ENODEV otherwise */ +static int adm1025_detect(struct i2c_client *client, int kind, + struct i2c_board_info *info) { - struct i2c_client *client; - struct adm1025_data *data; - int err = 0; + struct i2c_adapter *adapter = client->adapter; const char *name = ""; u8 config; if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA)) - goto exit; - - if (!(data = kzalloc(sizeof(struct adm1025_data), GFP_KERNEL))) { - err = -ENOMEM; - goto exit; - } - - client = &data->client; - i2c_set_clientdata(client, data); - client->addr = address; - client->adapter = adapter; - client->driver = &adm1025_driver; + return -ENODEV; /* * Now we do the remaining detection. A negative kind means that @@ -448,8 +438,8 @@ static int adm1025_detect(struct i2c_adapter *adapter, int address, int kind) ADM1025_REG_STATUS2) & 0xBC) != 0x00) { dev_dbg(&adapter->dev, "ADM1025 detection failed at 0x%02x.\n", - address); - goto exit_free; + client->addr); + return -ENODEV; } } @@ -465,7 +455,7 @@ static int adm1025_detect(struct i2c_adapter *adapter, int address, int kind) } } else if (man_id == 0xA1) { /* Philips */ - if (address != 0x2E + if (client->addr != 0x2E && (chip_id & 0xF0) == 0x20) { /* NE1619 */ kind = ne1619; } @@ -475,7 +465,7 @@ static int adm1025_detect(struct i2c_adapter *adapter, int address, int kind) dev_info(&adapter->dev, "Unsupported chip (man_id=0x%02X, " "chip_id=0x%02X).\n", man_id, chip_id); - goto exit_free; + return -ENODEV; } } @@ -484,23 +474,36 @@ static int adm1025_detect(struct i2c_adapter *adapter, int address, int kind) } else if (kind == ne1619) { name = "ne1619"; } + strlcpy(info->type, name, I2C_NAME_SIZE); - /* We can fill in the remaining client fields */ - strlcpy(client->name, name, I2C_NAME_SIZE); - mutex_init(&data->update_lock); + return 0; +} - /* Tell the I2C layer a new client has arrived */ - if ((err = i2c_attach_client(client))) - goto exit_free; +static int adm1025_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct adm1025_data *data; + int err; + u8 config; + + data = kzalloc(sizeof(struct adm1025_data), GFP_KERNEL); + if (!data) { + err = -ENOMEM; + goto exit; + } + + i2c_set_clientdata(client, data); + mutex_init(&data->update_lock); /* Initialize the ADM1025 chip */ adm1025_init_client(client); /* Register sysfs hooks */ if ((err = sysfs_create_group(&client->dev.kobj, &adm1025_group))) - goto exit_detach; + goto exit_free; /* Pin 11 is either in4 (+12V) or VID4 */ + config = i2c_smbus_read_byte_data(client, ADM1025_REG_CONFIG); if (!(config & 0x20)) { if ((err = sysfs_create_group(&client->dev.kobj, &adm1025_group_in4))) @@ -518,8 +521,6 @@ static int adm1025_detect(struct i2c_adapter *adapter, int address, int kind) exit_remove: sysfs_remove_group(&client->dev.kobj, &adm1025_group); sysfs_remove_group(&client->dev.kobj, &adm1025_group_in4); -exit_detach: - i2c_detach_client(client); exit_free: kfree(data); exit: @@ -568,18 +569,14 @@ static void adm1025_init_client(struct i2c_client *client) (reg&0x7E)|0x01); } -static int adm1025_detach_client(struct i2c_client *client) +static int adm1025_remove(struct i2c_client *client) { struct adm1025_data *data = i2c_get_clientdata(client); - int err; hwmon_device_unregister(data->hwmon_dev); sysfs_remove_group(&client->dev.kobj, &adm1025_group); sysfs_remove_group(&client->dev.kobj, &adm1025_group_in4); - if ((err = i2c_detach_client(client))) - return err; - kfree(data); return 0; } -- cgit v0.10.2 From 57f7eb0bcb2316dc264cd26f38b33dd2cf3151c1 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 16 Jul 2008 19:30:08 +0200 Subject: hwmon: (adm1026) Convert to a new-style i2c driver The new-style adm1026 driver implements the optional detect() callback to cover the use cases of the legacy driver. Signed-off-by: Jean Delvare diff --git a/drivers/hwmon/adm1026.c b/drivers/hwmon/adm1026.c index 904c6ce..7fe2441 100644 --- a/drivers/hwmon/adm1026.c +++ b/drivers/hwmon/adm1026.c @@ -259,7 +259,6 @@ struct pwm_data { }; struct adm1026_data { - struct i2c_client client; struct device *hwmon_dev; struct mutex update_lock; @@ -293,10 +292,11 @@ struct adm1026_data { u8 config3; /* Register value */ }; -static int adm1026_attach_adapter(struct i2c_adapter *adapter); -static int adm1026_detect(struct i2c_adapter *adapter, int address, - int kind); -static int adm1026_detach_client(struct i2c_client *client); +static int adm1026_probe(struct i2c_client *client, + const struct i2c_device_id *id); +static int adm1026_detect(struct i2c_client *client, int kind, + struct i2c_board_info *info); +static int adm1026_remove(struct i2c_client *client); static int adm1026_read_value(struct i2c_client *client, u8 reg); static int adm1026_write_value(struct i2c_client *client, u8 reg, int value); static void adm1026_print_gpio(struct i2c_client *client); @@ -305,22 +305,24 @@ static struct adm1026_data *adm1026_update_device(struct device *dev); static void adm1026_init_client(struct i2c_client *client); +static const struct i2c_device_id adm1026_id[] = { + { "adm1026", adm1026 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, adm1026_id); + static struct i2c_driver adm1026_driver = { + .class = I2C_CLASS_HWMON, .driver = { .name = "adm1026", }, - .attach_adapter = adm1026_attach_adapter, - .detach_client = adm1026_detach_client, + .probe = adm1026_probe, + .remove = adm1026_remove, + .id_table = adm1026_id, + .detect = adm1026_detect, + .address_data = &addr_data, }; -static int adm1026_attach_adapter(struct i2c_adapter *adapter) -{ - if (!(adapter->class & I2C_CLASS_HWMON)) { - return 0; - } - return i2c_probe(adapter, &addr_data, adm1026_detect); -} - static int adm1026_read_value(struct i2c_client *client, u8 reg) { int res; @@ -1647,48 +1649,32 @@ static const struct attribute_group adm1026_group_in8_9 = { .attrs = adm1026_attributes_in8_9, }; -static int adm1026_detect(struct i2c_adapter *adapter, int address, - int kind) +/* Return 0 if detection is successful, -ENODEV otherwise */ +static int adm1026_detect(struct i2c_client *client, int kind, + struct i2c_board_info *info) { + struct i2c_adapter *adapter = client->adapter; + int address = client->addr; int company, verstep; - struct i2c_client *client; - struct adm1026_data *data; - int err = 0; - const char *type_name = ""; if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA)) { /* We need to be able to do byte I/O */ - goto exit; + return -ENODEV; }; - /* OK. For now, we presume we have a valid client. We now create the - client structure, even though we cannot fill it completely yet. - But it allows us to access adm1026_{read,write}_value. */ - - if (!(data = kzalloc(sizeof(struct adm1026_data), GFP_KERNEL))) { - err = -ENOMEM; - goto exit; - } - - client = &data->client; - i2c_set_clientdata(client, data); - client->addr = address; - client->adapter = adapter; - client->driver = &adm1026_driver; - /* Now, we do the remaining detection. */ company = adm1026_read_value(client, ADM1026_REG_COMPANY); verstep = adm1026_read_value(client, ADM1026_REG_VERSTEP); - dev_dbg(&client->dev, "Detecting device at %d,0x%02x with" + dev_dbg(&adapter->dev, "Detecting device at %d,0x%02x with" " COMPANY: 0x%02x and VERSTEP: 0x%02x\n", i2c_adapter_id(client->adapter), client->addr, company, verstep); /* If auto-detecting, Determine the chip type. */ if (kind <= 0) { - dev_dbg(&client->dev, "Autodetecting device at %d,0x%02x " + dev_dbg(&adapter->dev, "Autodetecting device at %d,0x%02x " "...\n", i2c_adapter_id(adapter), address); if (company == ADM1026_COMPANY_ANALOG_DEV && verstep == ADM1026_VERSTEP_ADM1026) { @@ -1704,7 +1690,7 @@ static int adm1026_detect(struct i2c_adapter *adapter, int address, verstep); kind = any_chip; } else { - dev_dbg(&client->dev, ": Autodetection " + dev_dbg(&adapter->dev, ": Autodetection " "failed\n"); /* Not an ADM1026 ... */ if (kind == 0) { /* User used force=x,y */ @@ -1713,33 +1699,29 @@ static int adm1026_detect(struct i2c_adapter *adapter, int address, "force_adm1026.\n", i2c_adapter_id(adapter), address); } - goto exitfree; + return -ENODEV; } } + strlcpy(info->type, "adm1026", I2C_NAME_SIZE); - /* Fill in the chip specific driver values */ - switch (kind) { - case any_chip : - type_name = "adm1026"; - break; - case adm1026 : - type_name = "adm1026"; - break; - default : - dev_err(&adapter->dev, ": Internal error, invalid " - "kind (%d)!\n", kind); - err = -EFAULT; - goto exitfree; + return 0; +} + +static int adm1026_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct adm1026_data *data; + int err; + + data = kzalloc(sizeof(struct adm1026_data), GFP_KERNEL); + if (!data) { + err = -ENOMEM; + goto exit; } - strlcpy(client->name, type_name, I2C_NAME_SIZE); - /* Fill in the remaining client fields */ + i2c_set_clientdata(client, data); mutex_init(&data->update_lock); - /* Tell the I2C layer a new client has arrived */ - if ((err = i2c_attach_client(client))) - goto exitfree; - /* Set the VRM version */ data->vrm = vid_which_vrm(); @@ -1748,7 +1730,7 @@ static int adm1026_detect(struct i2c_adapter *adapter, int address, /* Register sysfs hooks */ if ((err = sysfs_create_group(&client->dev.kobj, &adm1026_group))) - goto exitdetach; + goto exitfree; if (data->config1 & CFG1_AIN8_9) err = sysfs_create_group(&client->dev.kobj, &adm1026_group_in8_9); @@ -1773,15 +1755,13 @@ exitremove: sysfs_remove_group(&client->dev.kobj, &adm1026_group_in8_9); else sysfs_remove_group(&client->dev.kobj, &adm1026_group_temp3); -exitdetach: - i2c_detach_client(client); exitfree: kfree(data); exit: return err; } -static int adm1026_detach_client(struct i2c_client *client) +static int adm1026_remove(struct i2c_client *client) { struct adm1026_data *data = i2c_get_clientdata(client); hwmon_device_unregister(data->hwmon_dev); @@ -1790,7 +1770,6 @@ static int adm1026_detach_client(struct i2c_client *client) sysfs_remove_group(&client->dev.kobj, &adm1026_group_in8_9); else sysfs_remove_group(&client->dev.kobj, &adm1026_group_temp3); - i2c_detach_client(client); kfree(data); return 0; } -- cgit v0.10.2 From 9c97fb4d255fbf6713ee2d77dd8ed8ae770a7e49 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 16 Jul 2008 19:30:09 +0200 Subject: hwmon: (adm1029) Convert to a new-style i2c driver The new-style adm1029 driver implements the optional detect() callback to cover the use cases of the legacy driver. Signed-off-by: Jean Delvare Cc: Corentin Labbe diff --git a/drivers/hwmon/adm1029.c b/drivers/hwmon/adm1029.c index 2c6608d..ba84ca5 100644 --- a/drivers/hwmon/adm1029.c +++ b/drivers/hwmon/adm1029.c @@ -115,9 +115,11 @@ static const u8 ADM1029_REG_FAN_DIV[] = { * Functions declaration */ -static int adm1029_attach_adapter(struct i2c_adapter *adapter); -static int adm1029_detect(struct i2c_adapter *adapter, int address, int kind); -static int adm1029_detach_client(struct i2c_client *client); +static int adm1029_probe(struct i2c_client *client, + const struct i2c_device_id *id); +static int adm1029_detect(struct i2c_client *client, int kind, + struct i2c_board_info *info); +static int adm1029_remove(struct i2c_client *client); static struct adm1029_data *adm1029_update_device(struct device *dev); static int adm1029_init_client(struct i2c_client *client); @@ -125,12 +127,22 @@ static int adm1029_init_client(struct i2c_client *client); * Driver data (common to all clients) */ +static const struct i2c_device_id adm1029_id[] = { + { "adm1029", adm1029 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, adm1029_id); + static struct i2c_driver adm1029_driver = { + .class = I2C_CLASS_HWMON, .driver = { .name = "adm1029", }, - .attach_adapter = adm1029_attach_adapter, - .detach_client = adm1029_detach_client, + .probe = adm1029_probe, + .remove = adm1029_remove, + .id_table = adm1029_id, + .detect = adm1029_detect, + .address_data = &addr_data, }; /* @@ -138,7 +150,6 @@ static struct i2c_driver adm1029_driver = { */ struct adm1029_data { - struct i2c_client client; struct device *hwmon_dev; struct mutex update_lock; char valid; /* zero until following fields are valid */ @@ -284,37 +295,14 @@ static const struct attribute_group adm1029_group = { * Real code */ -static int adm1029_attach_adapter(struct i2c_adapter *adapter) +/* Return 0 if detection is successful, -ENODEV otherwise */ +static int adm1029_detect(struct i2c_client *client, int kind, + struct i2c_board_info *info) { - if (!(adapter->class & I2C_CLASS_HWMON)) - return 0; - return i2c_probe(adapter, &addr_data, adm1029_detect); -} + struct i2c_adapter *adapter = client->adapter; -/* - * The following function does more than just detection. If detection - * succeeds, it also registers the new chip. - */ - -static int adm1029_detect(struct i2c_adapter *adapter, int address, int kind) -{ - struct i2c_client *client; - struct adm1029_data *data; - int err = 0; - const char *name = ""; if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA)) - goto exit; - - if (!(data = kzalloc(sizeof(struct adm1029_data), GFP_KERNEL))) { - err = -ENOMEM; - goto exit; - } - - client = &data->client; - i2c_set_clientdata(client, data); - client->addr = address; - client->adapter = adapter; - client->driver = &adm1029_driver; + return -ENODEV; /* Now we do the detection and identification. A negative kind * means that the driver was loaded with no force parameter @@ -362,32 +350,41 @@ static int adm1029_detect(struct i2c_adapter *adapter, int address, int kind) if (kind <= 0) { /* identification failed */ pr_debug("adm1029: Unsupported chip (man_id=0x%02X, " "chip_id=0x%02X)\n", man_id, chip_id); - goto exit_free; + return -ENODEV; } } + strlcpy(info->type, "adm1029", I2C_NAME_SIZE); - if (kind == adm1029) { - name = "adm1029"; + return 0; +} + +static int adm1029_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct adm1029_data *data; + int err; + + data = kzalloc(sizeof(struct adm1029_data), GFP_KERNEL); + if (!data) { + err = -ENOMEM; + goto exit; } - /* We can fill in the remaining client fields */ - strlcpy(client->name, name, I2C_NAME_SIZE); + i2c_set_clientdata(client, data); mutex_init(&data->update_lock); - /* Tell the I2C layer a new client has arrived */ - if ((err = i2c_attach_client(client))) - goto exit_free; - /* * Initialize the ADM1029 chip * Check config register */ - if (adm1029_init_client(client) == 0) - goto exit_detach; + if (adm1029_init_client(client) == 0) { + err = -ENODEV; + goto exit_free; + } /* Register sysfs hooks */ if ((err = sysfs_create_group(&client->dev.kobj, &adm1029_group))) - goto exit_detach; + goto exit_free; data->hwmon_dev = hwmon_device_register(&client->dev); if (IS_ERR(data->hwmon_dev)) { @@ -399,8 +396,6 @@ static int adm1029_detect(struct i2c_adapter *adapter, int address, int kind) exit_remove_files: sysfs_remove_group(&client->dev.kobj, &adm1029_group); - exit_detach: - i2c_detach_client(client); exit_free: kfree(data); exit: @@ -424,17 +419,13 @@ static int adm1029_init_client(struct i2c_client *client) return 1; } -static int adm1029_detach_client(struct i2c_client *client) +static int adm1029_remove(struct i2c_client *client) { struct adm1029_data *data = i2c_get_clientdata(client); - int err; hwmon_device_unregister(data->hwmon_dev); sysfs_remove_group(&client->dev.kobj, &adm1029_group); - if ((err = i2c_detach_client(client))) - return err; - kfree(data); return 0; } -- cgit v0.10.2 From af200f881d2cbf3ba2f4c505fa1ae5cfef36f46a Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 16 Jul 2008 19:30:09 +0200 Subject: hwmon: (adm1031) Convert to a new-style i2c driver The new-style adm1031 driver implements the optional detect() callback to cover the use cases of the legacy driver. Signed-off-by: Jean Delvare Cc: Alexandre d'Alton diff --git a/drivers/hwmon/adm1031.c b/drivers/hwmon/adm1031.c index 2bffcab..7894418 100644 --- a/drivers/hwmon/adm1031.c +++ b/drivers/hwmon/adm1031.c @@ -70,7 +70,6 @@ typedef u8 auto_chan_table_t[8][2]; /* Each client has this additional data */ struct adm1031_data { - struct i2c_client client; struct device *hwmon_dev; struct mutex update_lock; int chip_type; @@ -99,19 +98,32 @@ struct adm1031_data { s8 temp_crit[3]; }; -static int adm1031_attach_adapter(struct i2c_adapter *adapter); -static int adm1031_detect(struct i2c_adapter *adapter, int address, int kind); +static int adm1031_probe(struct i2c_client *client, + const struct i2c_device_id *id); +static int adm1031_detect(struct i2c_client *client, int kind, + struct i2c_board_info *info); static void adm1031_init_client(struct i2c_client *client); -static int adm1031_detach_client(struct i2c_client *client); +static int adm1031_remove(struct i2c_client *client); static struct adm1031_data *adm1031_update_device(struct device *dev); +static const struct i2c_device_id adm1031_id[] = { + { "adm1030", adm1030 }, + { "adm1031", adm1031 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, adm1031_id); + /* This is the driver that will be inserted */ static struct i2c_driver adm1031_driver = { + .class = I2C_CLASS_HWMON, .driver = { .name = "adm1031", }, - .attach_adapter = adm1031_attach_adapter, - .detach_client = adm1031_detach_client, + .probe = adm1031_probe, + .remove = adm1031_remove, + .id_table = adm1031_id, + .detect = adm1031_detect, + .address_data = &addr_data, }; static inline u8 adm1031_read_value(struct i2c_client *client, u8 reg) @@ -693,13 +705,6 @@ static SENSOR_DEVICE_ATTR(temp3_crit_alarm, S_IRUGO, show_alarm, NULL, 12); static SENSOR_DEVICE_ATTR(temp3_fault, S_IRUGO, show_alarm, NULL, 13); static SENSOR_DEVICE_ATTR(temp1_crit_alarm, S_IRUGO, show_alarm, NULL, 14); -static int adm1031_attach_adapter(struct i2c_adapter *adapter) -{ - if (!(adapter->class & I2C_CLASS_HWMON)) - return 0; - return i2c_probe(adapter, &addr_data, adm1031_detect); -} - static struct attribute *adm1031_attributes[] = { &sensor_dev_attr_fan1_input.dev_attr.attr, &sensor_dev_attr_fan1_div.dev_attr.attr, @@ -770,27 +775,15 @@ static const struct attribute_group adm1031_group_opt = { .attrs = adm1031_attributes_opt, }; -/* This function is called by i2c_probe */ -static int adm1031_detect(struct i2c_adapter *adapter, int address, int kind) +/* Return 0 if detection is successful, -ENODEV otherwise */ +static int adm1031_detect(struct i2c_client *client, int kind, + struct i2c_board_info *info) { - struct i2c_client *client; - struct adm1031_data *data; - int err = 0; + struct i2c_adapter *adapter = client->adapter; const char *name = ""; if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA)) - goto exit; - - if (!(data = kzalloc(sizeof(struct adm1031_data), GFP_KERNEL))) { - err = -ENOMEM; - goto exit; - } - - client = &data->client; - i2c_set_clientdata(client, data); - client->addr = address; - client->adapter = adapter; - client->driver = &adm1031_driver; + return -ENODEV; if (kind < 0) { int id, co; @@ -798,7 +791,7 @@ static int adm1031_detect(struct i2c_adapter *adapter, int address, int kind) co = i2c_smbus_read_byte_data(client, 0x3e); if (!((id == 0x31 || id == 0x30) && co == 0x41)) - goto exit_free; + return -ENODEV; kind = (id == 0x30) ? adm1030 : adm1031; } @@ -809,28 +802,43 @@ static int adm1031_detect(struct i2c_adapter *adapter, int address, int kind) * auto fan control helper table. */ if (kind == adm1030) { name = "adm1030"; - data->chan_select_table = &auto_channel_select_table_adm1030; } else if (kind == adm1031) { name = "adm1031"; - data->chan_select_table = &auto_channel_select_table_adm1031; } - data->chip_type = kind; + strlcpy(info->type, name, I2C_NAME_SIZE); - strlcpy(client->name, name, I2C_NAME_SIZE); + return 0; +} + +static int adm1031_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct adm1031_data *data; + int err; + + data = kzalloc(sizeof(struct adm1031_data), GFP_KERNEL); + if (!data) { + err = -ENOMEM; + goto exit; + } + + i2c_set_clientdata(client, data); + data->chip_type = id->driver_data; mutex_init(&data->update_lock); - /* Tell the I2C layer a new client has arrived */ - if ((err = i2c_attach_client(client))) - goto exit_free; + if (data->chip_type == adm1030) + data->chan_select_table = &auto_channel_select_table_adm1030; + else + data->chan_select_table = &auto_channel_select_table_adm1031; /* Initialize the ADM1031 chip */ adm1031_init_client(client); /* Register sysfs hooks */ if ((err = sysfs_create_group(&client->dev.kobj, &adm1031_group))) - goto exit_detach; + goto exit_free; - if (kind == adm1031) { + if (data->chip_type == adm1031) { if ((err = sysfs_create_group(&client->dev.kobj, &adm1031_group_opt))) goto exit_remove; @@ -847,25 +855,19 @@ static int adm1031_detect(struct i2c_adapter *adapter, int address, int kind) exit_remove: sysfs_remove_group(&client->dev.kobj, &adm1031_group); sysfs_remove_group(&client->dev.kobj, &adm1031_group_opt); -exit_detach: - i2c_detach_client(client); exit_free: kfree(data); exit: return err; } -static int adm1031_detach_client(struct i2c_client *client) +static int adm1031_remove(struct i2c_client *client) { struct adm1031_data *data = i2c_get_clientdata(client); - int ret; hwmon_device_unregister(data->hwmon_dev); sysfs_remove_group(&client->dev.kobj, &adm1031_group); sysfs_remove_group(&client->dev.kobj, &adm1031_group_opt); - if ((ret = i2c_detach_client(client)) != 0) { - return ret; - } kfree(data); return 0; } -- cgit v0.10.2 From 7fae8283109e155467bc1c622178d3a475cdbddf Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 16 Jul 2008 19:30:09 +0200 Subject: hwmon: (adm9240) Convert to a new-style i2c driver The new-style adm9240 driver implements the optional detect() callback to cover the use cases of the legacy driver. Signed-off-by: Jean Delvare Cc: Grant Coady diff --git a/drivers/hwmon/adm9240.c b/drivers/hwmon/adm9240.c index 149ef25..2444b15 100644 --- a/drivers/hwmon/adm9240.c +++ b/drivers/hwmon/adm9240.c @@ -130,25 +130,37 @@ static inline unsigned int AOUT_FROM_REG(u8 reg) return SCALE(reg, 1250, 255); } -static int adm9240_attach_adapter(struct i2c_adapter *adapter); -static int adm9240_detect(struct i2c_adapter *adapter, int address, int kind); +static int adm9240_probe(struct i2c_client *client, + const struct i2c_device_id *id); +static int adm9240_detect(struct i2c_client *client, int kind, + struct i2c_board_info *info); static void adm9240_init_client(struct i2c_client *client); -static int adm9240_detach_client(struct i2c_client *client); +static int adm9240_remove(struct i2c_client *client); static struct adm9240_data *adm9240_update_device(struct device *dev); /* driver data */ +static const struct i2c_device_id adm9240_id[] = { + { "adm9240", adm9240 }, + { "ds1780", ds1780 }, + { "lm81", lm81 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, adm9240_id); + static struct i2c_driver adm9240_driver = { + .class = I2C_CLASS_HWMON, .driver = { .name = "adm9240", }, - .attach_adapter = adm9240_attach_adapter, - .detach_client = adm9240_detach_client, + .probe = adm9240_probe, + .remove = adm9240_remove, + .id_table = adm9240_id, + .detect = adm9240_detect, + .address_data = &addr_data, }; /* per client data */ struct adm9240_data { - enum chips type; - struct i2c_client client; struct device *hwmon_dev; struct mutex update_lock; char valid; @@ -532,28 +544,17 @@ static const struct attribute_group adm9240_group = { /*** sensor chip detect and driver install ***/ -static int adm9240_detect(struct i2c_adapter *adapter, int address, int kind) +/* Return 0 if detection is successful, -ENODEV otherwise */ +static int adm9240_detect(struct i2c_client *new_client, int kind, + struct i2c_board_info *info) { - struct i2c_client *new_client; - struct adm9240_data *data; - int err = 0; + struct i2c_adapter *adapter = new_client->adapter; const char *name = ""; + int address = new_client->addr; u8 man_id, die_rev; if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA)) - goto exit; - - if (!(data = kzalloc(sizeof(*data), GFP_KERNEL))) { - err = -ENOMEM; - goto exit; - } - - new_client = &data->client; - i2c_set_clientdata(new_client, data); - new_client->addr = address; - new_client->adapter = adapter; - new_client->driver = &adm9240_driver; - new_client->flags = 0; + return -ENODEV; if (kind == 0) { kind = adm9240; @@ -566,7 +567,7 @@ static int adm9240_detect(struct i2c_adapter *adapter, int address, int kind) != address) { dev_err(&adapter->dev, "detect fail: address match, " "0x%02x\n", address); - goto exit_free; + return -ENODEV; } /* check known chip manufacturer */ @@ -581,7 +582,7 @@ static int adm9240_detect(struct i2c_adapter *adapter, int address, int kind) } else { dev_err(&adapter->dev, "detect fail: unknown manuf, " "0x%02x\n", man_id); - goto exit_free; + return -ENODEV; } /* successful detect, print chip info */ @@ -600,20 +601,31 @@ static int adm9240_detect(struct i2c_adapter *adapter, int address, int kind) } else if (kind == lm81) { name = "lm81"; } + strlcpy(info->type, name, I2C_NAME_SIZE); - /* fill in the remaining client fields and attach */ - strlcpy(new_client->name, name, I2C_NAME_SIZE); - data->type = kind; - mutex_init(&data->update_lock); + return 0; +} - if ((err = i2c_attach_client(new_client))) - goto exit_free; +static int adm9240_probe(struct i2c_client *new_client, + const struct i2c_device_id *id) +{ + struct adm9240_data *data; + int err; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) { + err = -ENOMEM; + goto exit; + } + + i2c_set_clientdata(new_client, data); + mutex_init(&data->update_lock); adm9240_init_client(new_client); /* populate sysfs filesystem */ if ((err = sysfs_create_group(&new_client->dev.kobj, &adm9240_group))) - goto exit_detach; + goto exit_free; data->hwmon_dev = hwmon_device_register(&new_client->dev); if (IS_ERR(data->hwmon_dev)) { @@ -625,32 +637,19 @@ static int adm9240_detect(struct i2c_adapter *adapter, int address, int kind) exit_remove: sysfs_remove_group(&new_client->dev.kobj, &adm9240_group); -exit_detach: - i2c_detach_client(new_client); exit_free: kfree(data); exit: return err; } -static int adm9240_attach_adapter(struct i2c_adapter *adapter) -{ - if (!(adapter->class & I2C_CLASS_HWMON)) - return 0; - return i2c_probe(adapter, &addr_data, adm9240_detect); -} - -static int adm9240_detach_client(struct i2c_client *client) +static int adm9240_remove(struct i2c_client *client) { struct adm9240_data *data = i2c_get_clientdata(client); - int err; hwmon_device_unregister(data->hwmon_dev); sysfs_remove_group(&client->dev.kobj, &adm9240_group); - if ((err = i2c_detach_client(client))) - return err; - kfree(data); return 0; } -- cgit v0.10.2 From 7347cb388e5aecffc4920bd5ea6a61e4a4690bae Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 16 Jul 2008 19:30:10 +0200 Subject: hwmon: (ads7828) Convert to a new-style i2c driver The new-style ads7828 driver implements the optional detect() callback to cover the use cases of the legacy driver. Signed-off-by: Jean Delvare diff --git a/drivers/hwmon/ads7828.c b/drivers/hwmon/ads7828.c index 5c8b6e0..5c39b4a 100644 --- a/drivers/hwmon/ads7828.c +++ b/drivers/hwmon/ads7828.c @@ -64,7 +64,6 @@ static unsigned int ads7828_lsb_resol; /* resolution of the ADC sample lsb */ /* Each client has this additional data */ struct ads7828_data { - struct i2c_client client; struct device *hwmon_dev; struct mutex update_lock; /* mutex protect updates */ char valid; /* !=0 if following fields are valid */ @@ -73,7 +72,10 @@ struct ads7828_data { }; /* Function declaration - necessary due to function dependencies */ -static int ads7828_detect(struct i2c_adapter *adapter, int address, int kind); +static int ads7828_detect(struct i2c_client *client, int kind, + struct i2c_board_info *info); +static int ads7828_probe(struct i2c_client *client, + const struct i2c_device_id *id); /* The ADS7828 returns the 12-bit sample in two bytes, these are read as a word then byte-swapped */ @@ -156,58 +158,43 @@ static const struct attribute_group ads7828_group = { .attrs = ads7828_attributes, }; -static int ads7828_attach_adapter(struct i2c_adapter *adapter) -{ - if (!(adapter->class & I2C_CLASS_HWMON)) - return 0; - return i2c_probe(adapter, &addr_data, ads7828_detect); -} - -static int ads7828_detach_client(struct i2c_client *client) +static int ads7828_remove(struct i2c_client *client) { struct ads7828_data *data = i2c_get_clientdata(client); hwmon_device_unregister(data->hwmon_dev); sysfs_remove_group(&client->dev.kobj, &ads7828_group); - i2c_detach_client(client); kfree(i2c_get_clientdata(client)); return 0; } +static const struct i2c_device_id ads7828_id[] = { + { "ads7828", ads7828 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, ads7828_id); + /* This is the driver that will be inserted */ static struct i2c_driver ads7828_driver = { + .class = I2C_CLASS_HWMON, .driver = { .name = "ads7828", }, - .attach_adapter = ads7828_attach_adapter, - .detach_client = ads7828_detach_client, + .probe = ads7828_probe, + .remove = ads7828_remove, + .id_table = ads7828_id, + .detect = ads7828_detect, + .address_data = &addr_data, }; -/* This function is called by i2c_probe */ -static int ads7828_detect(struct i2c_adapter *adapter, int address, int kind) +/* Return 0 if detection is successful, -ENODEV otherwise */ +static int ads7828_detect(struct i2c_client *client, int kind, + struct i2c_board_info *info) { - struct i2c_client *client; - struct ads7828_data *data; - int err = 0; - const char *name = ""; + struct i2c_adapter *adapter = client->adapter; /* Check we have a valid client */ if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_READ_WORD_DATA)) - goto exit; - - /* OK. For now, we presume we have a valid client. We now create the - client structure, even though we cannot fill it completely yet. - But it allows us to access ads7828_read_value. */ - data = kzalloc(sizeof(struct ads7828_data), GFP_KERNEL); - if (!data) { - err = -ENOMEM; - goto exit; - } - - client = &data->client; - i2c_set_clientdata(client, data); - client->addr = address; - client->adapter = adapter; - client->driver = &ads7828_driver; + return -ENODEV; /* Now, we do the remaining detection. There is no identification dedicated register so attempt to sanity check using knowledge of @@ -225,32 +212,34 @@ static int ads7828_detect(struct i2c_adapter *adapter, int address, int kind) printk(KERN_DEBUG "%s : Doesn't look like an ads7828 device\n", __func__); - goto exit_free; + return -ENODEV; } } } + strlcpy(info->type, "ads7828", I2C_NAME_SIZE); - /* Determine the chip type - only one kind supported! */ - if (kind <= 0) - kind = ads7828; + return 0; +} - if (kind == ads7828) - name = "ads7828"; +static int ads7828_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct ads7828_data *data; + int err; - /* Fill in the remaining client fields, put it into the global list */ - strlcpy(client->name, name, I2C_NAME_SIZE); + data = kzalloc(sizeof(struct ads7828_data), GFP_KERNEL); + if (!data) { + err = -ENOMEM; + goto exit; + } + i2c_set_clientdata(client, data); mutex_init(&data->update_lock); - /* Tell the I2C layer a new client has arrived */ - err = i2c_attach_client(client); - if (err) - goto exit_free; - /* Register sysfs hooks */ err = sysfs_create_group(&client->dev.kobj, &ads7828_group); if (err) - goto exit_detach; + goto exit_free; data->hwmon_dev = hwmon_device_register(&client->dev); if (IS_ERR(data->hwmon_dev)) { @@ -262,8 +251,6 @@ static int ads7828_detect(struct i2c_adapter *adapter, int address, int kind) exit_remove: sysfs_remove_group(&client->dev.kobj, &ads7828_group); -exit_detach: - i2c_detach_client(client); exit_free: kfree(data); exit: -- cgit v0.10.2 From 008f1ca51e4a25ee3e34b8faa901dbeefaf0081a Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 16 Jul 2008 19:30:10 +0200 Subject: hwmon: (adt7470) Convert to a new-style i2c driver The new-style adt7470 driver implements the optional detect() callback to cover the use cases of the legacy driver. Signed-off-by: Jean Delvare Cc: Darrick J. Wong diff --git a/drivers/hwmon/adt7470.c b/drivers/hwmon/adt7470.c index 6b5325f..d368d8f 100644 --- a/drivers/hwmon/adt7470.c +++ b/drivers/hwmon/adt7470.c @@ -138,7 +138,6 @@ I2C_CLIENT_INSMOD_1(adt7470); #define FAN_DATA_VALID(x) ((x) && (x) != FAN_PERIOD_INVALID) struct adt7470_data { - struct i2c_client client; struct device *hwmon_dev; struct attribute_group attrs; struct mutex lock; @@ -164,16 +163,28 @@ struct adt7470_data { u8 pwm_auto_temp[ADT7470_PWM_COUNT]; }; -static int adt7470_attach_adapter(struct i2c_adapter *adapter); -static int adt7470_detect(struct i2c_adapter *adapter, int address, int kind); -static int adt7470_detach_client(struct i2c_client *client); +static int adt7470_probe(struct i2c_client *client, + const struct i2c_device_id *id); +static int adt7470_detect(struct i2c_client *client, int kind, + struct i2c_board_info *info); +static int adt7470_remove(struct i2c_client *client); + +static const struct i2c_device_id adt7470_id[] = { + { "adt7470", adt7470 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, adt7470_id); static struct i2c_driver adt7470_driver = { + .class = I2C_CLASS_HWMON, .driver = { .name = "adt7470", }, - .attach_adapter = adt7470_attach_adapter, - .detach_client = adt7470_detach_client, + .probe = adt7470_probe, + .remove = adt7470_remove, + .id_table = adt7470_id, + .detect = adt7470_detect, + .address_data = &addr_data, }; /* @@ -1004,64 +1015,52 @@ static struct attribute *adt7470_attr[] = NULL }; -static int adt7470_attach_adapter(struct i2c_adapter *adapter) -{ - if (!(adapter->class & I2C_CLASS_HWMON)) - return 0; - return i2c_probe(adapter, &addr_data, adt7470_detect); -} - -static int adt7470_detect(struct i2c_adapter *adapter, int address, int kind) +/* Return 0 if detection is successful, -ENODEV otherwise */ +static int adt7470_detect(struct i2c_client *client, int kind, + struct i2c_board_info *info) { - struct i2c_client *client; - struct adt7470_data *data; - int err = 0; + struct i2c_adapter *adapter = client->adapter; if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA)) - goto exit; - - if (!(data = kzalloc(sizeof(struct adt7470_data), GFP_KERNEL))) { - err = -ENOMEM; - goto exit; - } - - client = &data->client; - client->addr = address; - client->adapter = adapter; - client->driver = &adt7470_driver; - - i2c_set_clientdata(client, data); - - mutex_init(&data->lock); + return -ENODEV; if (kind <= 0) { int vendor, device, revision; vendor = i2c_smbus_read_byte_data(client, ADT7470_REG_VENDOR); - if (vendor != ADT7470_VENDOR) { - err = -ENODEV; - goto exit_free; - } + if (vendor != ADT7470_VENDOR) + return -ENODEV; device = i2c_smbus_read_byte_data(client, ADT7470_REG_DEVICE); - if (device != ADT7470_DEVICE) { - err = -ENODEV; - goto exit_free; - } + if (device != ADT7470_DEVICE) + return -ENODEV; revision = i2c_smbus_read_byte_data(client, ADT7470_REG_REVISION); - if (revision != ADT7470_REVISION) { - err = -ENODEV; - goto exit_free; - } + if (revision != ADT7470_REVISION) + return -ENODEV; } else dev_dbg(&adapter->dev, "detection forced\n"); - strlcpy(client->name, "adt7470", I2C_NAME_SIZE); + strlcpy(info->type, "adt7470", I2C_NAME_SIZE); - if ((err = i2c_attach_client(client))) - goto exit_free; + return 0; +} + +static int adt7470_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct adt7470_data *data; + int err; + + data = kzalloc(sizeof(struct adt7470_data), GFP_KERNEL); + if (!data) { + err = -ENOMEM; + goto exit; + } + + i2c_set_clientdata(client, data); + mutex_init(&data->lock); dev_info(&client->dev, "%s chip found\n", client->name); @@ -1071,7 +1070,7 @@ static int adt7470_detect(struct i2c_adapter *adapter, int address, int kind) /* Register sysfs hooks */ data->attrs.attrs = adt7470_attr; if ((err = sysfs_create_group(&client->dev.kobj, &data->attrs))) - goto exit_detach; + goto exit_free; data->hwmon_dev = hwmon_device_register(&client->dev); if (IS_ERR(data->hwmon_dev)) { @@ -1083,21 +1082,18 @@ static int adt7470_detect(struct i2c_adapter *adapter, int address, int kind) exit_remove: sysfs_remove_group(&client->dev.kobj, &data->attrs); -exit_detach: - i2c_detach_client(client); exit_free: kfree(data); exit: return err; } -static int adt7470_detach_client(struct i2c_client *client) +static int adt7470_remove(struct i2c_client *client) { struct adt7470_data *data = i2c_get_clientdata(client); hwmon_device_unregister(data->hwmon_dev); sysfs_remove_group(&client->dev.kobj, &data->attrs); - i2c_detach_client(client); kfree(data); return 0; } -- cgit v0.10.2 From eea54766c6e3f9850affa91061164aeb6bba44b6 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 16 Jul 2008 19:30:10 +0200 Subject: hwmon: (adt7473) Convert to a new-style i2c driver The new-style adt7473 driver implements the optional detect() callback to cover the use cases of the legacy driver. Signed-off-by: Jean Delvare Cc: Darrick J. Wong diff --git a/drivers/hwmon/adt7473.c b/drivers/hwmon/adt7473.c index 93dbf5e..ce4a7cb 100644 --- a/drivers/hwmon/adt7473.c +++ b/drivers/hwmon/adt7473.c @@ -143,7 +143,6 @@ I2C_CLIENT_INSMOD_1(adt7473); #define FAN_DATA_VALID(x) ((x) && (x) != FAN_PERIOD_INVALID) struct adt7473_data { - struct i2c_client client; struct device *hwmon_dev; struct attribute_group attrs; struct mutex lock; @@ -178,16 +177,28 @@ struct adt7473_data { u8 max_duty_at_overheat; }; -static int adt7473_attach_adapter(struct i2c_adapter *adapter); -static int adt7473_detect(struct i2c_adapter *adapter, int address, int kind); -static int adt7473_detach_client(struct i2c_client *client); +static int adt7473_probe(struct i2c_client *client, + const struct i2c_device_id *id); +static int adt7473_detect(struct i2c_client *client, int kind, + struct i2c_board_info *info); +static int adt7473_remove(struct i2c_client *client); + +static const struct i2c_device_id adt7473_id[] = { + { "adt7473", adt7473 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, adt7473_id); static struct i2c_driver adt7473_driver = { + .class = I2C_CLASS_HWMON, .driver = { .name = "adt7473", }, - .attach_adapter = adt7473_attach_adapter, - .detach_client = adt7473_detach_client, + .probe = adt7473_probe, + .remove = adt7473_remove, + .id_table = adt7473_id, + .detect = adt7473_detect, + .address_data = &addr_data, }; /* @@ -1042,66 +1053,52 @@ static struct attribute *adt7473_attr[] = NULL }; -static int adt7473_attach_adapter(struct i2c_adapter *adapter) +/* Return 0 if detection is successful, -ENODEV otherwise */ +static int adt7473_detect(struct i2c_client *client, int kind, + struct i2c_board_info *info) { - if (!(adapter->class & I2C_CLASS_HWMON)) - return 0; - return i2c_probe(adapter, &addr_data, adt7473_detect); -} - -static int adt7473_detect(struct i2c_adapter *adapter, int address, int kind) -{ - struct i2c_client *client; - struct adt7473_data *data; - int err = 0; + struct i2c_adapter *adapter = client->adapter; if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA)) - goto exit; - - data = kzalloc(sizeof(struct adt7473_data), GFP_KERNEL); - if (!data) { - err = -ENOMEM; - goto exit; - } - - client = &data->client; - client->addr = address; - client->adapter = adapter; - client->driver = &adt7473_driver; - - i2c_set_clientdata(client, data); - - mutex_init(&data->lock); + return -ENODEV; if (kind <= 0) { int vendor, device, revision; vendor = i2c_smbus_read_byte_data(client, ADT7473_REG_VENDOR); - if (vendor != ADT7473_VENDOR) { - err = -ENODEV; - goto exit_free; - } + if (vendor != ADT7473_VENDOR) + return -ENODEV; device = i2c_smbus_read_byte_data(client, ADT7473_REG_DEVICE); - if (device != ADT7473_DEVICE) { - err = -ENODEV; - goto exit_free; - } + if (device != ADT7473_DEVICE) + return -ENODEV; revision = i2c_smbus_read_byte_data(client, ADT7473_REG_REVISION); - if (revision != ADT7473_REV_68 && revision != ADT7473_REV_69) { - err = -ENODEV; - goto exit_free; - } + if (revision != ADT7473_REV_68 && revision != ADT7473_REV_69) + return -ENODEV; } else dev_dbg(&adapter->dev, "detection forced\n"); - strlcpy(client->name, "adt7473", I2C_NAME_SIZE); + strlcpy(info->type, "adt7473", I2C_NAME_SIZE); - err = i2c_attach_client(client); - if (err) - goto exit_free; + return 0; +} + +static int adt7473_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct adt7473_data *data; + int err; + + data = kzalloc(sizeof(struct adt7473_data), GFP_KERNEL); + if (!data) { + err = -ENOMEM; + goto exit; + } + + i2c_set_clientdata(client, data); + mutex_init(&data->lock); dev_info(&client->dev, "%s chip found\n", client->name); @@ -1112,7 +1109,7 @@ static int adt7473_detect(struct i2c_adapter *adapter, int address, int kind) data->attrs.attrs = adt7473_attr; err = sysfs_create_group(&client->dev.kobj, &data->attrs); if (err) - goto exit_detach; + goto exit_free; data->hwmon_dev = hwmon_device_register(&client->dev); if (IS_ERR(data->hwmon_dev)) { @@ -1124,21 +1121,18 @@ static int adt7473_detect(struct i2c_adapter *adapter, int address, int kind) exit_remove: sysfs_remove_group(&client->dev.kobj, &data->attrs); -exit_detach: - i2c_detach_client(client); exit_free: kfree(data); exit: return err; } -static int adt7473_detach_client(struct i2c_client *client) +static int adt7473_remove(struct i2c_client *client) { struct adt7473_data *data = i2c_get_clientdata(client); hwmon_device_unregister(data->hwmon_dev); sysfs_remove_group(&client->dev.kobj, &data->attrs); - i2c_detach_client(client); kfree(data); return 0; } -- cgit v0.10.2 From 063675b15608dfbb8404b3a19546d579bd039d02 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 16 Jul 2008 19:30:11 +0200 Subject: hwmon: (asb100) Convert to a new-style i2c driver The new-style asb100 driver implements the optional detect() callback to cover the use cases of the legacy driver. Signed-off-by: Jean Delvare diff --git a/drivers/hwmon/asb100.c b/drivers/hwmon/asb100.c index fe2eea4..8a45a2e 100644 --- a/drivers/hwmon/asb100.c +++ b/drivers/hwmon/asb100.c @@ -176,10 +176,8 @@ static u8 DIV_TO_REG(long val) data is pointed to by client->data. The structure itself is dynamically allocated, at the same time the client itself is allocated. */ struct asb100_data { - struct i2c_client client; struct device *hwmon_dev; struct mutex lock; - enum chips type; struct mutex update_lock; unsigned long last_updated; /* In jiffies */ @@ -206,18 +204,30 @@ struct asb100_data { static int asb100_read_value(struct i2c_client *client, u16 reg); static void asb100_write_value(struct i2c_client *client, u16 reg, u16 val); -static int asb100_attach_adapter(struct i2c_adapter *adapter); -static int asb100_detect(struct i2c_adapter *adapter, int address, int kind); -static int asb100_detach_client(struct i2c_client *client); +static int asb100_probe(struct i2c_client *client, + const struct i2c_device_id *id); +static int asb100_detect(struct i2c_client *client, int kind, + struct i2c_board_info *info); +static int asb100_remove(struct i2c_client *client); static struct asb100_data *asb100_update_device(struct device *dev); static void asb100_init_client(struct i2c_client *client); +static const struct i2c_device_id asb100_id[] = { + { "asb100", asb100 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, asb100_id); + static struct i2c_driver asb100_driver = { + .class = I2C_CLASS_HWMON, .driver = { .name = "asb100", }, - .attach_adapter = asb100_attach_adapter, - .detach_client = asb100_detach_client, + .probe = asb100_probe, + .remove = asb100_remove, + .id_table = asb100_id, + .detect = asb100_detect, + .address_data = &addr_data, }; /* 7 Voltages */ @@ -619,35 +629,13 @@ static const struct attribute_group asb100_group = { .attrs = asb100_attributes, }; -/* This function is called when: - asb100_driver is inserted (when this module is loaded), for each - available adapter - when a new adapter is inserted (and asb100_driver is still present) - */ -static int asb100_attach_adapter(struct i2c_adapter *adapter) -{ - if (!(adapter->class & I2C_CLASS_HWMON)) - return 0; - return i2c_probe(adapter, &addr_data, asb100_detect); -} - -static int asb100_detect_subclients(struct i2c_adapter *adapter, int address, - int kind, struct i2c_client *client) +static int asb100_detect_subclients(struct i2c_client *client) { int i, id, err; + int address = client->addr; + unsigned short sc_addr[2]; struct asb100_data *data = i2c_get_clientdata(client); - - data->lm75[0] = kzalloc(sizeof(struct i2c_client), GFP_KERNEL); - if (!(data->lm75[0])) { - err = -ENOMEM; - goto ERROR_SC_0; - } - - data->lm75[1] = kzalloc(sizeof(struct i2c_client), GFP_KERNEL); - if (!(data->lm75[1])) { - err = -ENOMEM; - goto ERROR_SC_1; - } + struct i2c_adapter *adapter = client->adapter; id = i2c_adapter_id(adapter); @@ -665,37 +653,34 @@ static int asb100_detect_subclients(struct i2c_adapter *adapter, int address, asb100_write_value(client, ASB100_REG_I2C_SUBADDR, (force_subclients[2] & 0x07) | ((force_subclients[3] & 0x07) << 4)); - data->lm75[0]->addr = force_subclients[2]; - data->lm75[1]->addr = force_subclients[3]; + sc_addr[0] = force_subclients[2]; + sc_addr[1] = force_subclients[3]; } else { int val = asb100_read_value(client, ASB100_REG_I2C_SUBADDR); - data->lm75[0]->addr = 0x48 + (val & 0x07); - data->lm75[1]->addr = 0x48 + ((val >> 4) & 0x07); + sc_addr[0] = 0x48 + (val & 0x07); + sc_addr[1] = 0x48 + ((val >> 4) & 0x07); } - if (data->lm75[0]->addr == data->lm75[1]->addr) { + if (sc_addr[0] == sc_addr[1]) { dev_err(&client->dev, "duplicate addresses 0x%x " - "for subclients\n", data->lm75[0]->addr); + "for subclients\n", sc_addr[0]); err = -ENODEV; goto ERROR_SC_2; } - for (i = 0; i <= 1; i++) { - i2c_set_clientdata(data->lm75[i], NULL); - data->lm75[i]->adapter = adapter; - data->lm75[i]->driver = &asb100_driver; - strlcpy(data->lm75[i]->name, "asb100 subclient", I2C_NAME_SIZE); - } - - if ((err = i2c_attach_client(data->lm75[0]))) { + data->lm75[0] = i2c_new_dummy(adapter, sc_addr[0]); + if (!data->lm75[0]) { dev_err(&client->dev, "subclient %d registration " - "at address 0x%x failed.\n", i, data->lm75[0]->addr); + "at address 0x%x failed.\n", 1, sc_addr[0]); + err = -ENOMEM; goto ERROR_SC_2; } - if ((err = i2c_attach_client(data->lm75[1]))) { + data->lm75[1] = i2c_new_dummy(adapter, sc_addr[1]); + if (!data->lm75[1]) { dev_err(&client->dev, "subclient %d registration " - "at address 0x%x failed.\n", i, data->lm75[1]->addr); + "at address 0x%x failed.\n", 2, sc_addr[1]); + err = -ENOMEM; goto ERROR_SC_3; } @@ -703,55 +688,31 @@ static int asb100_detect_subclients(struct i2c_adapter *adapter, int address, /* Undo inits in case of errors */ ERROR_SC_3: - i2c_detach_client(data->lm75[0]); + i2c_unregister_device(data->lm75[0]); ERROR_SC_2: - kfree(data->lm75[1]); -ERROR_SC_1: - kfree(data->lm75[0]); -ERROR_SC_0: return err; } -static int asb100_detect(struct i2c_adapter *adapter, int address, int kind) +/* Return 0 if detection is successful, -ENODEV otherwise */ +static int asb100_detect(struct i2c_client *client, int kind, + struct i2c_board_info *info) { - int err; - struct i2c_client *client; - struct asb100_data *data; + struct i2c_adapter *adapter = client->adapter; if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA)) { pr_debug("asb100.o: detect failed, " "smbus byte data not supported!\n"); - err = -ENODEV; - goto ERROR0; + return -ENODEV; } - /* OK. For now, we presume we have a valid client. We now create the - client structure, even though we cannot fill it completely yet. - But it allows us to access asb100_{read,write}_value. */ - - if (!(data = kzalloc(sizeof(struct asb100_data), GFP_KERNEL))) { - pr_debug("asb100.o: detect failed, kzalloc failed!\n"); - err = -ENOMEM; - goto ERROR0; - } - - client = &data->client; - mutex_init(&data->lock); - i2c_set_clientdata(client, data); - client->addr = address; - client->adapter = adapter; - client->driver = &asb100_driver; - - /* Now, we do the remaining detection. */ - /* The chip may be stuck in some other bank than bank 0. This may make reading other information impossible. Specify a force=... or force_*=... parameter, and the chip will be reset to the right bank. */ if (kind < 0) { - int val1 = asb100_read_value(client, ASB100_REG_BANK); - int val2 = asb100_read_value(client, ASB100_REG_CHIPMAN); + int val1 = i2c_smbus_read_byte_data(client, ASB100_REG_BANK); + int val2 = i2c_smbus_read_byte_data(client, ASB100_REG_CHIPMAN); /* If we're in bank 0 */ if ((!(val1 & 0x07)) && @@ -761,48 +722,60 @@ static int asb100_detect(struct i2c_adapter *adapter, int address, int kind) ((val1 & 0x80) && (val2 != 0x06)))) { pr_debug("asb100.o: detect failed, " "bad chip id 0x%02x!\n", val2); - err = -ENODEV; - goto ERROR1; + return -ENODEV; } } /* kind < 0 */ /* We have either had a force parameter, or we have already detected Winbond. Put it now into bank 0 and Vendor ID High Byte */ - asb100_write_value(client, ASB100_REG_BANK, - (asb100_read_value(client, ASB100_REG_BANK) & 0x78) | 0x80); + i2c_smbus_write_byte_data(client, ASB100_REG_BANK, + (i2c_smbus_read_byte_data(client, ASB100_REG_BANK) & 0x78) + | 0x80); /* Determine the chip type. */ if (kind <= 0) { - int val1 = asb100_read_value(client, ASB100_REG_WCHIPID); - int val2 = asb100_read_value(client, ASB100_REG_CHIPMAN); + int val1 = i2c_smbus_read_byte_data(client, ASB100_REG_WCHIPID); + int val2 = i2c_smbus_read_byte_data(client, ASB100_REG_CHIPMAN); if ((val1 == 0x31) && (val2 == 0x06)) kind = asb100; else { if (kind == 0) - dev_warn(&client->dev, "ignoring " + dev_warn(&adapter->dev, "ignoring " "'force' parameter for unknown chip " "at adapter %d, address 0x%02x.\n", - i2c_adapter_id(adapter), address); - err = -ENODEV; - goto ERROR1; + i2c_adapter_id(adapter), client->addr); + return -ENODEV; } } - /* Fill in remaining client fields and put it into the global list */ - strlcpy(client->name, "asb100", I2C_NAME_SIZE); - data->type = kind; - mutex_init(&data->update_lock); + strlcpy(info->type, "asb100", I2C_NAME_SIZE); - /* Tell the I2C layer a new client has arrived */ - if ((err = i2c_attach_client(client))) - goto ERROR1; + return 0; +} + +static int asb100_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + int err; + struct asb100_data *data; + + data = kzalloc(sizeof(struct asb100_data), GFP_KERNEL); + if (!data) { + pr_debug("asb100.o: probe failed, kzalloc failed!\n"); + err = -ENOMEM; + goto ERROR0; + } + + i2c_set_clientdata(client, data); + mutex_init(&data->lock); + mutex_init(&data->update_lock); /* Attach secondary lm75 clients */ - if ((err = asb100_detect_subclients(adapter, address, kind, - client))) - goto ERROR2; + err = asb100_detect_subclients(client); + if (err) + goto ERROR1; /* Initialize the chip */ asb100_init_client(client); @@ -827,39 +800,25 @@ static int asb100_detect(struct i2c_adapter *adapter, int address, int kind) ERROR4: sysfs_remove_group(&client->dev.kobj, &asb100_group); ERROR3: - i2c_detach_client(data->lm75[1]); - i2c_detach_client(data->lm75[0]); - kfree(data->lm75[1]); - kfree(data->lm75[0]); -ERROR2: - i2c_detach_client(client); + i2c_unregister_device(data->lm75[1]); + i2c_unregister_device(data->lm75[0]); ERROR1: kfree(data); ERROR0: return err; } -static int asb100_detach_client(struct i2c_client *client) +static int asb100_remove(struct i2c_client *client) { struct asb100_data *data = i2c_get_clientdata(client); - int err; - - /* main client */ - if (data) { - hwmon_device_unregister(data->hwmon_dev); - sysfs_remove_group(&client->dev.kobj, &asb100_group); - } - if ((err = i2c_detach_client(client))) - return err; + hwmon_device_unregister(data->hwmon_dev); + sysfs_remove_group(&client->dev.kobj, &asb100_group); - /* main client */ - if (data) - kfree(data); + i2c_unregister_device(data->lm75[1]); + i2c_unregister_device(data->lm75[0]); - /* subclient */ - else - kfree(client); + kfree(data); return 0; } -- cgit v0.10.2 From 71163c7c36fa76bb5a72feca7fb685677444070a Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 16 Jul 2008 19:30:11 +0200 Subject: hwmon: (atxp1) Convert to a new-style i2c driver The new-style atxp1 driver implements the optional detect() callback to cover the use cases of the legacy driver. Signed-off-by: Jean Delvare diff --git a/drivers/hwmon/atxp1.c b/drivers/hwmon/atxp1.c index 01c17e3..d191118 100644 --- a/drivers/hwmon/atxp1.c +++ b/drivers/hwmon/atxp1.c @@ -46,21 +46,32 @@ static const unsigned short normal_i2c[] = { 0x37, 0x4e, I2C_CLIENT_END }; I2C_CLIENT_INSMOD_1(atxp1); -static int atxp1_attach_adapter(struct i2c_adapter * adapter); -static int atxp1_detach_client(struct i2c_client * client); +static int atxp1_probe(struct i2c_client *client, + const struct i2c_device_id *id); +static int atxp1_remove(struct i2c_client *client); static struct atxp1_data * atxp1_update_device(struct device *dev); -static int atxp1_detect(struct i2c_adapter *adapter, int address, int kind); +static int atxp1_detect(struct i2c_client *client, int kind, + struct i2c_board_info *info); + +static const struct i2c_device_id atxp1_id[] = { + { "atxp1", atxp1 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, atxp1_id); static struct i2c_driver atxp1_driver = { + .class = I2C_CLASS_HWMON, .driver = { .name = "atxp1", }, - .attach_adapter = atxp1_attach_adapter, - .detach_client = atxp1_detach_client, + .probe = atxp1_probe, + .remove = atxp1_remove, + .id_table = atxp1_id, + .detect = atxp1_detect, + .address_data = &addr_data, }; struct atxp1_data { - struct i2c_client client; struct device *hwmon_dev; struct mutex update_lock; unsigned long last_updated; @@ -263,35 +274,16 @@ static const struct attribute_group atxp1_group = { }; -static int atxp1_attach_adapter(struct i2c_adapter *adapter) +/* Return 0 if detection is successful, -ENODEV otherwise */ +static int atxp1_detect(struct i2c_client *new_client, int kind, + struct i2c_board_info *info) { - if (!(adapter->class & I2C_CLASS_HWMON)) - return 0; - return i2c_probe(adapter, &addr_data, &atxp1_detect); -}; + struct i2c_adapter *adapter = new_client->adapter; -static int atxp1_detect(struct i2c_adapter *adapter, int address, int kind) -{ - struct i2c_client * new_client; - struct atxp1_data * data; - int err = 0; u8 temp; if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA)) - goto exit; - - if (!(data = kzalloc(sizeof(struct atxp1_data), GFP_KERNEL))) { - err = -ENOMEM; - goto exit; - } - - new_client = &data->client; - i2c_set_clientdata(new_client, data); - - new_client->addr = address; - new_client->adapter = adapter; - new_client->driver = &atxp1_driver; - new_client->flags = 0; + return -ENODEV; /* Detect ATXP1, checking if vendor ID registers are all zero */ if (!((i2c_smbus_read_byte_data(new_client, 0x3e) == 0) && @@ -305,35 +297,46 @@ static int atxp1_detect(struct i2c_adapter *adapter, int address, int kind) if (!((i2c_smbus_read_byte_data(new_client, 0x10) == temp) && (i2c_smbus_read_byte_data(new_client, 0x11) == temp) )) - goto exit_free; + return -ENODEV; } /* Get VRM */ - data->vrm = vid_which_vrm(); + temp = vid_which_vrm(); - if ((data->vrm != 90) && (data->vrm != 91)) { - dev_err(&new_client->dev, "Not supporting VRM %d.%d\n", - data->vrm / 10, data->vrm % 10); - goto exit_free; + if ((temp != 90) && (temp != 91)) { + dev_err(&adapter->dev, "atxp1: Not supporting VRM %d.%d\n", + temp / 10, temp % 10); + return -ENODEV; } - strncpy(new_client->name, "atxp1", I2C_NAME_SIZE); - - data->valid = 0; + strlcpy(info->type, "atxp1", I2C_NAME_SIZE); - mutex_init(&data->update_lock); + return 0; +} - err = i2c_attach_client(new_client); +static int atxp1_probe(struct i2c_client *new_client, + const struct i2c_device_id *id) +{ + struct atxp1_data *data; + int err; - if (err) - { - dev_err(&new_client->dev, "Attach client error.\n"); - goto exit_free; + data = kzalloc(sizeof(struct atxp1_data), GFP_KERNEL); + if (!data) { + err = -ENOMEM; + goto exit; } + /* Get VRM */ + data->vrm = vid_which_vrm(); + + i2c_set_clientdata(new_client, data); + data->valid = 0; + + mutex_init(&data->update_lock); + /* Register sysfs hooks */ if ((err = sysfs_create_group(&new_client->dev.kobj, &atxp1_group))) - goto exit_detach; + goto exit_free; data->hwmon_dev = hwmon_device_register(&new_client->dev); if (IS_ERR(data->hwmon_dev)) { @@ -348,30 +351,22 @@ static int atxp1_detect(struct i2c_adapter *adapter, int address, int kind) exit_remove_files: sysfs_remove_group(&new_client->dev.kobj, &atxp1_group); -exit_detach: - i2c_detach_client(new_client); exit_free: kfree(data); exit: return err; }; -static int atxp1_detach_client(struct i2c_client * client) +static int atxp1_remove(struct i2c_client *client) { struct atxp1_data * data = i2c_get_clientdata(client); - int err; hwmon_device_unregister(data->hwmon_dev); sysfs_remove_group(&client->dev.kobj, &atxp1_group); - err = i2c_detach_client(client); - - if (err) - dev_err(&client->dev, "Failed to detach client.\n"); - else - kfree(data); + kfree(data); - return err; + return 0; }; static int __init atxp1_init(void) -- cgit v0.10.2 From 70313eabfc63ce6aa89b9fa3074129e5c521568a Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 16 Jul 2008 19:30:11 +0200 Subject: hwmon: (ds1621) Convert to a new-style i2c driver The new-style ds1621 driver implements the optional detect() callback to cover the use cases of the legacy driver. Signed-off-by: Jean Delvare diff --git a/drivers/hwmon/ds1621.c b/drivers/hwmon/ds1621.c index 5f300ff..7415381 100644 --- a/drivers/hwmon/ds1621.c +++ b/drivers/hwmon/ds1621.c @@ -72,7 +72,6 @@ static const u8 DS1621_REG_TEMP[3] = { /* Each client has this additional data */ struct ds1621_data { - struct i2c_client client; struct device *hwmon_dev; struct mutex update_lock; char valid; /* !=0 if following fields are valid */ @@ -82,20 +81,32 @@ struct ds1621_data { u8 conf; /* Register encoding, combined */ }; -static int ds1621_attach_adapter(struct i2c_adapter *adapter); -static int ds1621_detect(struct i2c_adapter *adapter, int address, - int kind); +static int ds1621_probe(struct i2c_client *client, + const struct i2c_device_id *id); +static int ds1621_detect(struct i2c_client *client, int kind, + struct i2c_board_info *info); static void ds1621_init_client(struct i2c_client *client); -static int ds1621_detach_client(struct i2c_client *client); +static int ds1621_remove(struct i2c_client *client); static struct ds1621_data *ds1621_update_client(struct device *dev); +static const struct i2c_device_id ds1621_id[] = { + { "ds1621", ds1621 }, + { "ds1625", ds1621 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, ds1621_id); + /* This is the driver that will be inserted */ static struct i2c_driver ds1621_driver = { + .class = I2C_CLASS_HWMON, .driver = { .name = "ds1621", }, - .attach_adapter = ds1621_attach_adapter, - .detach_client = ds1621_detach_client, + .probe = ds1621_probe, + .remove = ds1621_remove, + .id_table = ds1621_id, + .detect = ds1621_detect, + .address_data = &addr_data, }; /* All registers are word-sized, except for the configuration register. @@ -199,40 +210,18 @@ static const struct attribute_group ds1621_group = { }; -static int ds1621_attach_adapter(struct i2c_adapter *adapter) -{ - if (!(adapter->class & I2C_CLASS_HWMON)) - return 0; - return i2c_probe(adapter, &addr_data, ds1621_detect); -} - -/* This function is called by i2c_probe */ -static int ds1621_detect(struct i2c_adapter *adapter, int address, - int kind) +/* Return 0 if detection is successful, -ENODEV otherwise */ +static int ds1621_detect(struct i2c_client *client, int kind, + struct i2c_board_info *info) { + struct i2c_adapter *adapter = client->adapter; int conf, temp; - struct i2c_client *client; - struct ds1621_data *data; - int i, err = 0; + int i; if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA | I2C_FUNC_SMBUS_WORD_DATA | I2C_FUNC_SMBUS_WRITE_BYTE)) - goto exit; - - /* OK. For now, we presume we have a valid client. We now create the - client structure, even though we cannot fill it completely yet. - But it allows us to access ds1621_{read,write}_value. */ - if (!(data = kzalloc(sizeof(struct ds1621_data), GFP_KERNEL))) { - err = -ENOMEM; - goto exit; - } - - client = &data->client; - i2c_set_clientdata(client, data); - client->addr = address; - client->adapter = adapter; - client->driver = &ds1621_driver; + return -ENODEV; /* Now, we do the remaining detection. It is lousy. */ if (kind < 0) { @@ -241,29 +230,41 @@ static int ds1621_detect(struct i2c_adapter *adapter, int address, improbable in our case. */ conf = ds1621_read_value(client, DS1621_REG_CONF); if (conf & DS1621_REG_CONFIG_NVB) - goto exit_free; + return -ENODEV; /* The 7 lowest bits of a temperature should always be 0. */ - for (i = 0; i < ARRAY_SIZE(data->temp); i++) { + for (i = 0; i < ARRAY_SIZE(DS1621_REG_TEMP); i++) { temp = ds1621_read_value(client, DS1621_REG_TEMP[i]); if (temp & 0x007f) - goto exit_free; + return -ENODEV; } } - /* Fill in remaining client fields and put it into the global list */ - strlcpy(client->name, "ds1621", I2C_NAME_SIZE); - mutex_init(&data->update_lock); + strlcpy(info->type, "ds1621", I2C_NAME_SIZE); - /* Tell the I2C layer a new client has arrived */ - if ((err = i2c_attach_client(client))) - goto exit_free; + return 0; +} + +static int ds1621_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct ds1621_data *data; + int err; + + data = kzalloc(sizeof(struct ds1621_data), GFP_KERNEL); + if (!data) { + err = -ENOMEM; + goto exit; + } + + i2c_set_clientdata(client, data); + mutex_init(&data->update_lock); /* Initialize the DS1621 chip */ ds1621_init_client(client); /* Register sysfs hooks */ if ((err = sysfs_create_group(&client->dev.kobj, &ds1621_group))) - goto exit_detach; + goto exit_free; data->hwmon_dev = hwmon_device_register(&client->dev); if (IS_ERR(data->hwmon_dev)) { @@ -275,25 +276,19 @@ static int ds1621_detect(struct i2c_adapter *adapter, int address, exit_remove_files: sysfs_remove_group(&client->dev.kobj, &ds1621_group); - exit_detach: - i2c_detach_client(client); exit_free: kfree(data); exit: return err; } -static int ds1621_detach_client(struct i2c_client *client) +static int ds1621_remove(struct i2c_client *client) { struct ds1621_data *data = i2c_get_clientdata(client); - int err; hwmon_device_unregister(data->hwmon_dev); sysfs_remove_group(&client->dev.kobj, &ds1621_group); - if ((err = i2c_detach_client(client))) - return err; - kfree(data); return 0; -- cgit v0.10.2 From 935ada8c4481f9591caa1b88c83127c5931a8855 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 16 Jul 2008 19:30:11 +0200 Subject: hwmon: (f75375s) Drop legacy i2c driver Drop the legacy f75375s i2c driver, and add a detect callback to the new-style i2c driver to achieve the same functionality. Signed-off-by: Jean Delvare Acked-by: Riku Voipio diff --git a/drivers/hwmon/f75375s.c b/drivers/hwmon/f75375s.c index dc1f30e..1692de3 100644 --- a/drivers/hwmon/f75375s.c +++ b/drivers/hwmon/f75375s.c @@ -87,7 +87,6 @@ I2C_CLIENT_INSMOD_2(f75373, f75375); struct f75375_data { unsigned short addr; - struct i2c_client *client; struct device *hwmon_dev; const char *name; @@ -114,21 +113,12 @@ struct f75375_data { s8 temp_max_hyst[2]; }; -static int f75375_attach_adapter(struct i2c_adapter *adapter); -static int f75375_detect(struct i2c_adapter *adapter, int address, int kind); -static int f75375_detach_client(struct i2c_client *client); +static int f75375_detect(struct i2c_client *client, int kind, + struct i2c_board_info *info); static int f75375_probe(struct i2c_client *client, const struct i2c_device_id *id); static int f75375_remove(struct i2c_client *client); -static struct i2c_driver f75375_legacy_driver = { - .driver = { - .name = "f75375_legacy", - }, - .attach_adapter = f75375_attach_adapter, - .detach_client = f75375_detach_client, -}; - static const struct i2c_device_id f75375_id[] = { { "f75373", f75373 }, { "f75375", f75375 }, @@ -137,12 +127,15 @@ static const struct i2c_device_id f75375_id[] = { MODULE_DEVICE_TABLE(i2c, f75375_id); static struct i2c_driver f75375_driver = { + .class = I2C_CLASS_HWMON, .driver = { .name = "f75375", }, .probe = f75375_probe, .remove = f75375_remove, .id_table = f75375_id, + .detect = f75375_detect, + .address_data = &addr_data, }; static inline int f75375_read8(struct i2c_client *client, u8 reg) @@ -607,22 +600,6 @@ static const struct attribute_group f75375_group = { .attrs = f75375_attributes, }; -static int f75375_detach_client(struct i2c_client *client) -{ - int err; - - f75375_remove(client); - err = i2c_detach_client(client); - if (err) { - dev_err(&client->dev, - "Client deregistration failed, " - "client not detached.\n"); - return err; - } - kfree(client); - return 0; -} - static void f75375_init(struct i2c_client *client, struct f75375_data *data, struct f75375s_platform_data *f75375s_pdata) { @@ -651,7 +628,6 @@ static int f75375_probe(struct i2c_client *client, return -ENOMEM; i2c_set_clientdata(client, data); - data->client = client; mutex_init(&data->update_lock); data->kind = id->driver_data; @@ -700,29 +676,13 @@ static int f75375_remove(struct i2c_client *client) return 0; } -static int f75375_attach_adapter(struct i2c_adapter *adapter) -{ - if (!(adapter->class & I2C_CLASS_HWMON)) - return 0; - return i2c_probe(adapter, &addr_data, f75375_detect); -} - -/* This function is called by i2c_probe */ -static int f75375_detect(struct i2c_adapter *adapter, int address, int kind) +/* Return 0 if detection is successful, -ENODEV otherwise */ +static int f75375_detect(struct i2c_client *client, int kind, + struct i2c_board_info *info) { - struct i2c_client *client; + struct i2c_adapter *adapter = client->adapter; u8 version = 0; - int err = 0; const char *name = ""; - struct i2c_device_id id; - - if (!(client = kzalloc(sizeof(*client), GFP_KERNEL))) { - err = -ENOMEM; - goto exit; - } - client->addr = address; - client->adapter = adapter; - client->driver = &f75375_legacy_driver; if (kind < 0) { u16 vendid = f75375_read16(client, F75375_REG_VENDOR); @@ -736,7 +696,7 @@ static int f75375_detect(struct i2c_adapter *adapter, int address, int kind) dev_err(&adapter->dev, "failed,%02X,%02X,%02X\n", chipid, version, vendid); - goto exit_free; + return -ENODEV; } } @@ -746,43 +706,18 @@ static int f75375_detect(struct i2c_adapter *adapter, int address, int kind) name = "f75373"; } dev_info(&adapter->dev, "found %s version: %02X\n", name, version); - strlcpy(client->name, name, I2C_NAME_SIZE); - - if ((err = i2c_attach_client(client))) - goto exit_free; - - strlcpy(id.name, name, I2C_NAME_SIZE); - id.driver_data = kind; - if ((err = f75375_probe(client, &id)) < 0) - goto exit_detach; + strlcpy(info->type, name, I2C_NAME_SIZE); return 0; - -exit_detach: - i2c_detach_client(client); -exit_free: - kfree(client); -exit: - return err; } static int __init sensors_f75375_init(void) { - int status; - status = i2c_add_driver(&f75375_driver); - if (status) - return status; - - status = i2c_add_driver(&f75375_legacy_driver); - if (status) - i2c_del_driver(&f75375_driver); - - return status; + return i2c_add_driver(&f75375_driver); } static void __exit sensors_f75375_exit(void) { - i2c_del_driver(&f75375_legacy_driver); i2c_del_driver(&f75375_driver); } -- cgit v0.10.2 From c2df1591df3ea83b4a5890a1131dd821ca07e761 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 16 Jul 2008 19:30:12 +0200 Subject: hwmon: (fscher) Convert to a new-style i2c driver The new-style fscher driver implements the optional detect() callback to cover the use cases of the legacy driver. Signed-off-by: Jean Delvare Cc: Reinhard Nissl diff --git a/drivers/hwmon/fscher.c b/drivers/hwmon/fscher.c index ed26b66..12c70e4 100644 --- a/drivers/hwmon/fscher.c +++ b/drivers/hwmon/fscher.c @@ -106,9 +106,11 @@ I2C_CLIENT_INSMOD_1(fscher); * Functions declaration */ -static int fscher_attach_adapter(struct i2c_adapter *adapter); -static int fscher_detect(struct i2c_adapter *adapter, int address, int kind); -static int fscher_detach_client(struct i2c_client *client); +static int fscher_probe(struct i2c_client *client, + const struct i2c_device_id *id); +static int fscher_detect(struct i2c_client *client, int kind, + struct i2c_board_info *info); +static int fscher_remove(struct i2c_client *client); static struct fscher_data *fscher_update_device(struct device *dev); static void fscher_init_client(struct i2c_client *client); @@ -119,12 +121,21 @@ static int fscher_write_value(struct i2c_client *client, u8 reg, u8 value); * Driver data (common to all clients) */ +static const struct i2c_device_id fscher_id[] = { + { "fscher", fscher }, + { } +}; + static struct i2c_driver fscher_driver = { + .class = I2C_CLASS_HWMON, .driver = { .name = "fscher", }, - .attach_adapter = fscher_attach_adapter, - .detach_client = fscher_detach_client, + .probe = fscher_probe, + .remove = fscher_remove, + .id_table = fscher_id, + .detect = fscher_detect, + .address_data = &addr_data, }; /* @@ -132,7 +143,6 @@ static struct i2c_driver fscher_driver = { */ struct fscher_data { - struct i2c_client client; struct device *hwmon_dev; struct mutex update_lock; char valid; /* zero until following fields are valid */ @@ -283,38 +293,14 @@ static const struct attribute_group fscher_group = { * Real code */ -static int fscher_attach_adapter(struct i2c_adapter *adapter) -{ - if (!(adapter->class & I2C_CLASS_HWMON)) - return 0; - return i2c_probe(adapter, &addr_data, fscher_detect); -} - -static int fscher_detect(struct i2c_adapter *adapter, int address, int kind) +/* Return 0 if detection is successful, -ENODEV otherwise */ +static int fscher_detect(struct i2c_client *new_client, int kind, + struct i2c_board_info *info) { - struct i2c_client *new_client; - struct fscher_data *data; - int err = 0; + struct i2c_adapter *adapter = new_client->adapter; if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA)) - goto exit; - - /* OK. For now, we presume we have a valid client. We now create the - * client structure, even though we cannot fill it completely yet. - * But it allows us to access i2c_smbus_read_byte_data. */ - if (!(data = kzalloc(sizeof(struct fscher_data), GFP_KERNEL))) { - err = -ENOMEM; - goto exit; - } - - /* The common I2C client data is placed right before the - * Hermes-specific data. */ - new_client = &data->client; - i2c_set_clientdata(new_client, data); - new_client->addr = address; - new_client->adapter = adapter; - new_client->driver = &fscher_driver; - new_client->flags = 0; + return -ENODEV; /* Do the remaining detection unless force or force_fscher parameter */ if (kind < 0) { @@ -324,24 +310,35 @@ static int fscher_detect(struct i2c_adapter *adapter, int address, int kind) FSCHER_REG_IDENT_1) != 0x45) /* 'E' */ || (i2c_smbus_read_byte_data(new_client, FSCHER_REG_IDENT_2) != 0x52)) /* 'R' */ - goto exit_free; + return -ENODEV; + } + + strlcpy(info->type, "fscher", I2C_NAME_SIZE); + + return 0; +} + +static int fscher_probe(struct i2c_client *new_client, + const struct i2c_device_id *id) +{ + struct fscher_data *data; + int err; + + data = kzalloc(sizeof(struct fscher_data), GFP_KERNEL); + if (!data) { + err = -ENOMEM; + goto exit; } - /* Fill in the remaining client fields and put it into the - * global list */ - strlcpy(new_client->name, "fscher", I2C_NAME_SIZE); + i2c_set_clientdata(new_client, data); data->valid = 0; mutex_init(&data->update_lock); - /* Tell the I2C layer a new client has arrived */ - if ((err = i2c_attach_client(new_client))) - goto exit_free; - fscher_init_client(new_client); /* Register sysfs hooks */ if ((err = sysfs_create_group(&new_client->dev.kobj, &fscher_group))) - goto exit_detach; + goto exit_free; data->hwmon_dev = hwmon_device_register(&new_client->dev); if (IS_ERR(data->hwmon_dev)) { @@ -353,25 +350,19 @@ static int fscher_detect(struct i2c_adapter *adapter, int address, int kind) exit_remove_files: sysfs_remove_group(&new_client->dev.kobj, &fscher_group); -exit_detach: - i2c_detach_client(new_client); exit_free: kfree(data); exit: return err; } -static int fscher_detach_client(struct i2c_client *client) +static int fscher_remove(struct i2c_client *client) { struct fscher_data *data = i2c_get_clientdata(client); - int err; hwmon_device_unregister(data->hwmon_dev); sysfs_remove_group(&client->dev.kobj, &fscher_group); - if ((err = i2c_detach_client(client))) - return err; - kfree(data); return 0; } -- cgit v0.10.2 From 40ac1994bf988dac05434f856c2a4fa24e22b2ea Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 16 Jul 2008 19:30:12 +0200 Subject: hwmon: (fschmd) Convert to a new-style i2c driver The new-style fschmd driver implements the optional detect() callback to cover the use cases of the legacy driver. Signed-off-by: Jean Delvare Cc: Hans de Goede diff --git a/drivers/hwmon/fschmd.c b/drivers/hwmon/fschmd.c index bd89d27..9671703 100644 --- a/drivers/hwmon/fschmd.c +++ b/drivers/hwmon/fschmd.c @@ -171,20 +171,37 @@ static const int FSCHMD_NO_TEMP_SENSORS[5] = { 3, 3, 4, 3, 5 }; * Functions declarations */ -static int fschmd_attach_adapter(struct i2c_adapter *adapter); -static int fschmd_detach_client(struct i2c_client *client); +static int fschmd_probe(struct i2c_client *client, + const struct i2c_device_id *id); +static int fschmd_detect(struct i2c_client *client, int kind, + struct i2c_board_info *info); +static int fschmd_remove(struct i2c_client *client); static struct fschmd_data *fschmd_update_device(struct device *dev); /* * Driver data (common to all clients) */ +static const struct i2c_device_id fschmd_id[] = { + { "fscpos", fscpos }, + { "fscher", fscher }, + { "fscscy", fscscy }, + { "fschrc", fschrc }, + { "fschmd", fschmd }, + { } +}; +MODULE_DEVICE_TABLE(i2c, fschmd_id); + static struct i2c_driver fschmd_driver = { + .class = I2C_CLASS_HWMON, .driver = { .name = FSCHMD_NAME, }, - .attach_adapter = fschmd_attach_adapter, - .detach_client = fschmd_detach_client, + .probe = fschmd_probe, + .remove = fschmd_remove, + .id_table = fschmd_id, + .detect = fschmd_detect, + .address_data = &addr_data, }; /* @@ -192,7 +209,6 @@ static struct i2c_driver fschmd_driver = { */ struct fschmd_data { - struct i2c_client client; struct device *hwmon_dev; struct mutex update_lock; int kind; @@ -269,7 +285,7 @@ static ssize_t store_temp_max(struct device *dev, struct device_attribute v = SENSORS_LIMIT(v, -128, 127) + 128; mutex_lock(&data->update_lock); - i2c_smbus_write_byte_data(&data->client, + i2c_smbus_write_byte_data(to_i2c_client(dev), FSCHMD_REG_TEMP_LIMIT[data->kind][index], v); data->temp_max[index] = v; mutex_unlock(&data->update_lock); @@ -346,14 +362,14 @@ static ssize_t store_fan_div(struct device *dev, struct device_attribute mutex_lock(&data->update_lock); - reg = i2c_smbus_read_byte_data(&data->client, + reg = i2c_smbus_read_byte_data(to_i2c_client(dev), FSCHMD_REG_FAN_RIPPLE[data->kind][index]); /* bits 2..7 reserved => mask with 0x03 */ reg &= ~0x03; reg |= v; - i2c_smbus_write_byte_data(&data->client, + i2c_smbus_write_byte_data(to_i2c_client(dev), FSCHMD_REG_FAN_RIPPLE[data->kind][index], reg); data->fan_ripple[index] = reg; @@ -416,7 +432,7 @@ static ssize_t store_pwm_auto_point1_pwm(struct device *dev, mutex_lock(&data->update_lock); - i2c_smbus_write_byte_data(&data->client, + i2c_smbus_write_byte_data(to_i2c_client(dev), FSCHMD_REG_FAN_MIN[data->kind][index], v); data->fan_min[index] = v; @@ -448,14 +464,14 @@ static ssize_t store_alert_led(struct device *dev, mutex_lock(&data->update_lock); - reg = i2c_smbus_read_byte_data(&data->client, FSCHMD_REG_CONTROL); + reg = i2c_smbus_read_byte_data(to_i2c_client(dev), FSCHMD_REG_CONTROL); if (v) reg |= FSCHMD_CONTROL_ALERT_LED_MASK; else reg &= ~FSCHMD_CONTROL_ALERT_LED_MASK; - i2c_smbus_write_byte_data(&data->client, FSCHMD_REG_CONTROL, reg); + i2c_smbus_write_byte_data(to_i2c_client(dev), FSCHMD_REG_CONTROL, reg); data->global_control = reg; @@ -600,32 +616,15 @@ static void fschmd_dmi_decode(const struct dmi_header *header) } } -static int fschmd_detect(struct i2c_adapter *adapter, int address, int kind) +static int fschmd_detect(struct i2c_client *client, int kind, + struct i2c_board_info *info) { - struct i2c_client *client; - struct fschmd_data *data; - u8 revision; - const char * const names[5] = { "Poseidon", "Hermes", "Scylla", - "Heracles", "Heimdall" }; + struct i2c_adapter *adapter = client->adapter; const char * const client_names[5] = { "fscpos", "fscher", "fscscy", "fschrc", "fschmd" }; - int i, err = 0; if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA)) - return 0; - - /* OK. For now, we presume we have a valid client. We now create the - * client structure, even though we cannot fill it completely yet. - * But it allows us to access i2c_smbus_read_byte_data. */ - if (!(data = kzalloc(sizeof(struct fschmd_data), GFP_KERNEL))) - return -ENOMEM; - - client = &data->client; - i2c_set_clientdata(client, data); - client->addr = address; - client->adapter = adapter; - client->driver = &fschmd_driver; - mutex_init(&data->update_lock); + return -ENODEV; /* Detect & Identify the chip */ if (kind <= 0) { @@ -650,9 +649,31 @@ static int fschmd_detect(struct i2c_adapter *adapter, int address, int kind) else if (!strcmp(id, "HMD")) kind = fschmd; else - goto exit_free; + return -ENODEV; } + strlcpy(info->type, client_names[kind - 1], I2C_NAME_SIZE); + + return 0; +} + +static int fschmd_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct fschmd_data *data; + u8 revision; + const char * const names[5] = { "Poseidon", "Hermes", "Scylla", + "Heracles", "Heimdall" }; + int i, err; + enum chips kind = id->driver_data; + + data = kzalloc(sizeof(struct fschmd_data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + i2c_set_clientdata(client, data); + mutex_init(&data->update_lock); + if (kind == fscpos) { /* The Poseidon has hardwired temp limits, fill these in for the alarm resetting code */ @@ -674,11 +695,6 @@ static int fschmd_detect(struct i2c_adapter *adapter, int address, int kind) /* i2c kind goes from 1-5, we want from 0-4 to address arrays */ data->kind = kind - 1; - strlcpy(client->name, client_names[data->kind], I2C_NAME_SIZE); - - /* Tell the I2C layer a new client has arrived */ - if ((err = i2c_attach_client(client))) - goto exit_free; for (i = 0; i < ARRAY_SIZE(fschmd_attr); i++) { err = device_create_file(&client->dev, @@ -726,25 +742,14 @@ static int fschmd_detect(struct i2c_adapter *adapter, int address, int kind) return 0; exit_detach: - fschmd_detach_client(client); /* will also free data for us */ - return err; - -exit_free: - kfree(data); + fschmd_remove(client); /* will also free data for us */ return err; } -static int fschmd_attach_adapter(struct i2c_adapter *adapter) -{ - if (!(adapter->class & I2C_CLASS_HWMON)) - return 0; - return i2c_probe(adapter, &addr_data, fschmd_detect); -} - -static int fschmd_detach_client(struct i2c_client *client) +static int fschmd_remove(struct i2c_client *client) { struct fschmd_data *data = i2c_get_clientdata(client); - int i, err; + int i; /* Check if registered in case we're called from fschmd_detect to cleanup after an error */ @@ -760,9 +765,6 @@ static int fschmd_detach_client(struct i2c_client *client) device_remove_file(&client->dev, &fschmd_fan_attr[i].dev_attr); - if ((err = i2c_detach_client(client))) - return err; - kfree(data); return 0; } -- cgit v0.10.2 From b9e39b1b1b79d3e074ca79b63f548ac48c85ca98 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 16 Jul 2008 19:30:12 +0200 Subject: hwmon: (fscpos) Convert to a new-style i2c driver The new-style fscpos driver implements the optional detect() callback to cover the use cases of the legacy driver. Signed-off-by: Jean Delvare diff --git a/drivers/hwmon/fscpos.c b/drivers/hwmon/fscpos.c index 00f4848..8a7bcf5 100644 --- a/drivers/hwmon/fscpos.c +++ b/drivers/hwmon/fscpos.c @@ -87,9 +87,11 @@ static u8 FSCPOS_REG_TEMP_STATE[] = { 0x71, 0x81, 0x91 }; /* * Functions declaration */ -static int fscpos_attach_adapter(struct i2c_adapter *adapter); -static int fscpos_detect(struct i2c_adapter *adapter, int address, int kind); -static int fscpos_detach_client(struct i2c_client *client); +static int fscpos_probe(struct i2c_client *client, + const struct i2c_device_id *id); +static int fscpos_detect(struct i2c_client *client, int kind, + struct i2c_board_info *info); +static int fscpos_remove(struct i2c_client *client); static int fscpos_read_value(struct i2c_client *client, u8 reg); static int fscpos_write_value(struct i2c_client *client, u8 reg, u8 value); @@ -101,19 +103,27 @@ static void reset_fan_alarm(struct i2c_client *client, int nr); /* * Driver data (common to all clients) */ +static const struct i2c_device_id fscpos_id[] = { + { "fscpos", fscpos }, + { } +}; + static struct i2c_driver fscpos_driver = { + .class = I2C_CLASS_HWMON, .driver = { .name = "fscpos", }, - .attach_adapter = fscpos_attach_adapter, - .detach_client = fscpos_detach_client, + .probe = fscpos_probe, + .remove = fscpos_remove, + .id_table = fscpos_id, + .detect = fscpos_detect, + .address_data = &addr_data, }; /* * Client data (each client gets its own) */ struct fscpos_data { - struct i2c_client client; struct device *hwmon_dev; struct mutex update_lock; char valid; /* 0 until following fields are valid */ @@ -470,39 +480,14 @@ static const struct attribute_group fscpos_group = { .attrs = fscpos_attributes, }; -static int fscpos_attach_adapter(struct i2c_adapter *adapter) -{ - if (!(adapter->class & I2C_CLASS_HWMON)) - return 0; - return i2c_probe(adapter, &addr_data, fscpos_detect); -} - -static int fscpos_detect(struct i2c_adapter *adapter, int address, int kind) +/* Return 0 if detection is successful, -ENODEV otherwise */ +static int fscpos_detect(struct i2c_client *new_client, int kind, + struct i2c_board_info *info) { - struct i2c_client *new_client; - struct fscpos_data *data; - int err = 0; + struct i2c_adapter *adapter = new_client->adapter; if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA)) - goto exit; - - /* - * OK. For now, we presume we have a valid client. We now create the - * client structure, even though we cannot fill it completely yet. - * But it allows us to access fscpos_{read,write}_value. - */ - - if (!(data = kzalloc(sizeof(struct fscpos_data), GFP_KERNEL))) { - err = -ENOMEM; - goto exit; - } - - new_client = &data->client; - i2c_set_clientdata(new_client, data); - new_client->addr = address; - new_client->adapter = adapter; - new_client->driver = &fscpos_driver; - new_client->flags = 0; + return -ENODEV; /* Do the remaining detection unless force or force_fscpos parameter */ if (kind < 0) { @@ -512,22 +497,30 @@ static int fscpos_detect(struct i2c_adapter *adapter, int address, int kind) != 0x45) /* 'E' */ || (fscpos_read_value(new_client, FSCPOS_REG_IDENT_2) != 0x47))/* 'G' */ - { - dev_dbg(&new_client->dev, "fscpos detection failed\n"); - goto exit_free; - } + return -ENODEV; } - /* Fill in the remaining client fields and put it in the global list */ - strlcpy(new_client->name, "fscpos", I2C_NAME_SIZE); + strlcpy(info->type, "fscpos", I2C_NAME_SIZE); + return 0; +} + +static int fscpos_probe(struct i2c_client *new_client, + const struct i2c_device_id *id) +{ + struct fscpos_data *data; + int err; + + data = kzalloc(sizeof(struct fscpos_data), GFP_KERNEL); + if (!data) { + err = -ENOMEM; + goto exit; + } + + i2c_set_clientdata(new_client, data); data->valid = 0; mutex_init(&data->update_lock); - /* Tell the I2C layer a new client has arrived */ - if ((err = i2c_attach_client(new_client))) - goto exit_free; - /* Inizialize the fscpos chip */ fscpos_init_client(new_client); @@ -536,7 +529,7 @@ static int fscpos_detect(struct i2c_adapter *adapter, int address, int kind) /* Register sysfs hooks */ if ((err = sysfs_create_group(&new_client->dev.kobj, &fscpos_group))) - goto exit_detach; + goto exit_free; data->hwmon_dev = hwmon_device_register(&new_client->dev); if (IS_ERR(data->hwmon_dev)) { @@ -548,24 +541,19 @@ static int fscpos_detect(struct i2c_adapter *adapter, int address, int kind) exit_remove_files: sysfs_remove_group(&new_client->dev.kobj, &fscpos_group); -exit_detach: - i2c_detach_client(new_client); exit_free: kfree(data); exit: return err; } -static int fscpos_detach_client(struct i2c_client *client) +static int fscpos_remove(struct i2c_client *client) { struct fscpos_data *data = i2c_get_clientdata(client); - int err; hwmon_device_unregister(data->hwmon_dev); sysfs_remove_group(&client->dev.kobj, &fscpos_group); - if ((err = i2c_detach_client(client))) - return err; kfree(data); return 0; } -- cgit v0.10.2 From 95d80e7c834d8742da38e2b9d2f36dec36da03a6 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 16 Jul 2008 19:30:13 +0200 Subject: hwmon: (gl518sm) Convert to a new-style i2c driver The new-style gl518sm driver implements the optional detect() callback to cover the use cases of the legacy driver. Signed-off-by: Jean Delvare diff --git a/drivers/hwmon/gl518sm.c b/drivers/hwmon/gl518sm.c index 33e9e8a..7820df4 100644 --- a/drivers/hwmon/gl518sm.c +++ b/drivers/hwmon/gl518sm.c @@ -114,7 +114,6 @@ static inline u8 FAN_TO_REG(long rpm, int div) /* Each client has this additional data */ struct gl518_data { - struct i2c_client client; struct device *hwmon_dev; enum chips type; @@ -138,21 +137,33 @@ struct gl518_data { u8 beep_enable; /* Boolean */ }; -static int gl518_attach_adapter(struct i2c_adapter *adapter); -static int gl518_detect(struct i2c_adapter *adapter, int address, int kind); +static int gl518_probe(struct i2c_client *client, + const struct i2c_device_id *id); +static int gl518_detect(struct i2c_client *client, int kind, + struct i2c_board_info *info); static void gl518_init_client(struct i2c_client *client); -static int gl518_detach_client(struct i2c_client *client); +static int gl518_remove(struct i2c_client *client); static int gl518_read_value(struct i2c_client *client, u8 reg); static int gl518_write_value(struct i2c_client *client, u8 reg, u16 value); static struct gl518_data *gl518_update_device(struct device *dev); +static const struct i2c_device_id gl518_id[] = { + { "gl518sm", 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, gl518_id); + /* This is the driver that will be inserted */ static struct i2c_driver gl518_driver = { + .class = I2C_CLASS_HWMON, .driver = { .name = "gl518sm", }, - .attach_adapter = gl518_attach_adapter, - .detach_client = gl518_detach_client, + .probe = gl518_probe, + .remove = gl518_remove, + .id_table = gl518_id, + .detect = gl518_detect, + .address_data = &addr_data, }; /* @@ -472,46 +483,23 @@ static const struct attribute_group gl518_group_r80 = { * Real code */ -static int gl518_attach_adapter(struct i2c_adapter *adapter) -{ - if (!(adapter->class & I2C_CLASS_HWMON)) - return 0; - return i2c_probe(adapter, &addr_data, gl518_detect); -} - -static int gl518_detect(struct i2c_adapter *adapter, int address, int kind) +/* Return 0 if detection is successful, -ENODEV otherwise */ +static int gl518_detect(struct i2c_client *client, int kind, + struct i2c_board_info *info) { + struct i2c_adapter *adapter = client->adapter; int i; - struct i2c_client *client; - struct gl518_data *data; - int err = 0; if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA | I2C_FUNC_SMBUS_WORD_DATA)) - goto exit; - - /* OK. For now, we presume we have a valid client. We now create the - client structure, even though we cannot fill it completely yet. - But it allows us to access gl518_{read,write}_value. */ - - if (!(data = kzalloc(sizeof(struct gl518_data), GFP_KERNEL))) { - err = -ENOMEM; - goto exit; - } - - client = &data->client; - i2c_set_clientdata(client, data); - - client->addr = address; - client->adapter = adapter; - client->driver = &gl518_driver; + return -ENODEV; /* Now, we do the remaining detection. */ if (kind < 0) { if ((gl518_read_value(client, GL518_REG_CHIP_ID) != 0x80) || (gl518_read_value(client, GL518_REG_CONF) & 0x80)) - goto exit_free; + return -ENODEV; } /* Determine the chip type. */ @@ -526,19 +514,32 @@ static int gl518_detect(struct i2c_adapter *adapter, int address, int kind) dev_info(&adapter->dev, "Ignoring 'force' parameter for unknown " "chip at adapter %d, address 0x%02x\n", - i2c_adapter_id(adapter), address); - goto exit_free; + i2c_adapter_id(adapter), client->addr); + return -ENODEV; } } - /* Fill in the remaining client fields */ - strlcpy(client->name, "gl518sm", I2C_NAME_SIZE); - data->type = kind; - mutex_init(&data->update_lock); + strlcpy(info->type, "gl518sm", I2C_NAME_SIZE); - /* Tell the I2C layer a new client has arrived */ - if ((err = i2c_attach_client(client))) - goto exit_free; + return 0; +} + +static int gl518_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct gl518_data *data; + int err, revision; + + data = kzalloc(sizeof(struct gl518_data), GFP_KERNEL); + if (!data) { + err = -ENOMEM; + goto exit; + } + + i2c_set_clientdata(client, data); + revision = gl518_read_value(client, GL518_REG_REVISION); + data->type = revision == 0x80 ? gl518sm_r80 : gl518sm_r00; + mutex_init(&data->update_lock); /* Initialize the GL518SM chip */ data->alarm_mask = 0xff; @@ -546,7 +547,7 @@ static int gl518_detect(struct i2c_adapter *adapter, int address, int kind) /* Register sysfs hooks */ if ((err = sysfs_create_group(&client->dev.kobj, &gl518_group))) - goto exit_detach; + goto exit_free; if (data->type == gl518sm_r80) if ((err = sysfs_create_group(&client->dev.kobj, &gl518_group_r80))) @@ -564,8 +565,6 @@ exit_remove_files: sysfs_remove_group(&client->dev.kobj, &gl518_group); if (data->type == gl518sm_r80) sysfs_remove_group(&client->dev.kobj, &gl518_group_r80); -exit_detach: - i2c_detach_client(client); exit_free: kfree(data); exit: @@ -591,19 +590,15 @@ static void gl518_init_client(struct i2c_client *client) gl518_write_value(client, GL518_REG_CONF, 0x40 | regvalue); } -static int gl518_detach_client(struct i2c_client *client) +static int gl518_remove(struct i2c_client *client) { struct gl518_data *data = i2c_get_clientdata(client); - int err; hwmon_device_unregister(data->hwmon_dev); sysfs_remove_group(&client->dev.kobj, &gl518_group); if (data->type == gl518sm_r80) sysfs_remove_group(&client->dev.kobj, &gl518_group_r80); - if ((err = i2c_detach_client(client))) - return err; - kfree(data); return 0; } -- cgit v0.10.2 From a23a9fe1d4d725e1624635f49e9790ec32deffd0 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 16 Jul 2008 19:30:13 +0200 Subject: hwmon: (gl520sm) Convert to a new-style i2c driver The new-style gl520sm driver implements the optional detect() callback to cover the use cases of the legacy driver. Signed-off-by: Jean Delvare Cc: Maarten Deprez diff --git a/drivers/hwmon/gl520sm.c b/drivers/hwmon/gl520sm.c index 8984ef1..19616f2 100644 --- a/drivers/hwmon/gl520sm.c +++ b/drivers/hwmon/gl520sm.c @@ -79,26 +79,37 @@ static const u8 GL520_REG_TEMP_MAX_HYST[] = { 0x06, 0x18 }; * Function declarations */ -static int gl520_attach_adapter(struct i2c_adapter *adapter); -static int gl520_detect(struct i2c_adapter *adapter, int address, int kind); +static int gl520_probe(struct i2c_client *client, + const struct i2c_device_id *id); +static int gl520_detect(struct i2c_client *client, int kind, + struct i2c_board_info *info); static void gl520_init_client(struct i2c_client *client); -static int gl520_detach_client(struct i2c_client *client); +static int gl520_remove(struct i2c_client *client); static int gl520_read_value(struct i2c_client *client, u8 reg); static int gl520_write_value(struct i2c_client *client, u8 reg, u16 value); static struct gl520_data *gl520_update_device(struct device *dev); /* Driver data */ +static const struct i2c_device_id gl520_id[] = { + { "gl520sm", gl520sm }, + { } +}; +MODULE_DEVICE_TABLE(i2c, gl520_id); + static struct i2c_driver gl520_driver = { + .class = I2C_CLASS_HWMON, .driver = { .name = "gl520sm", }, - .attach_adapter = gl520_attach_adapter, - .detach_client = gl520_detach_client, + .probe = gl520_probe, + .remove = gl520_remove, + .id_table = gl520_id, + .detect = gl520_detect, + .address_data = &addr_data, }; /* Client data */ struct gl520_data { - struct i2c_client client; struct device *hwmon_dev; struct mutex update_lock; char valid; /* zero until the following fields are valid */ @@ -669,37 +680,15 @@ static const struct attribute_group gl520_group_opt = { * Real code */ -static int gl520_attach_adapter(struct i2c_adapter *adapter) -{ - if (!(adapter->class & I2C_CLASS_HWMON)) - return 0; - return i2c_probe(adapter, &addr_data, gl520_detect); -} - -static int gl520_detect(struct i2c_adapter *adapter, int address, int kind) +/* Return 0 if detection is successful, -ENODEV otherwise */ +static int gl520_detect(struct i2c_client *client, int kind, + struct i2c_board_info *info) { - struct i2c_client *client; - struct gl520_data *data; - int err = 0; + struct i2c_adapter *adapter = client->adapter; if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA | I2C_FUNC_SMBUS_WORD_DATA)) - goto exit; - - /* OK. For now, we presume we have a valid client. We now create the - client structure, even though we cannot fill it completely yet. - But it allows us to access gl520_{read,write}_value. */ - - if (!(data = kzalloc(sizeof(struct gl520_data), GFP_KERNEL))) { - err = -ENOMEM; - goto exit; - } - - client = &data->client; - i2c_set_clientdata(client, data); - client->addr = address; - client->adapter = adapter; - client->driver = &gl520_driver; + return -ENODEV; /* Determine the chip type. */ if (kind < 0) { @@ -707,24 +696,36 @@ static int gl520_detect(struct i2c_adapter *adapter, int address, int kind) ((gl520_read_value(client, GL520_REG_REVISION) & 0x7f) != 0x00) || ((gl520_read_value(client, GL520_REG_CONF) & 0x80) != 0x00)) { dev_dbg(&client->dev, "Unknown chip type, skipping\n"); - goto exit_free; + return -ENODEV; } } - /* Fill in the remaining client fields */ - strlcpy(client->name, "gl520sm", I2C_NAME_SIZE); - mutex_init(&data->update_lock); + strlcpy(info->type, "gl520sm", I2C_NAME_SIZE); - /* Tell the I2C layer a new client has arrived */ - if ((err = i2c_attach_client(client))) - goto exit_free; + return 0; +} + +static int gl520_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct gl520_data *data; + int err; + + data = kzalloc(sizeof(struct gl520_data), GFP_KERNEL); + if (!data) { + err = -ENOMEM; + goto exit; + } + + i2c_set_clientdata(client, data); + mutex_init(&data->update_lock); /* Initialize the GL520SM chip */ gl520_init_client(client); /* Register sysfs hooks */ if ((err = sysfs_create_group(&client->dev.kobj, &gl520_group))) - goto exit_detach; + goto exit_free; if (data->two_temps) { if ((err = device_create_file(&client->dev, @@ -764,8 +765,6 @@ static int gl520_detect(struct i2c_adapter *adapter, int address, int kind) exit_remove_files: sysfs_remove_group(&client->dev.kobj, &gl520_group); sysfs_remove_group(&client->dev.kobj, &gl520_group_opt); -exit_detach: - i2c_detach_client(client); exit_free: kfree(data); exit: @@ -811,18 +810,14 @@ static void gl520_init_client(struct i2c_client *client) gl520_write_value(client, GL520_REG_BEEP_MASK, data->beep_mask); } -static int gl520_detach_client(struct i2c_client *client) +static int gl520_remove(struct i2c_client *client) { struct gl520_data *data = i2c_get_clientdata(client); - int err; hwmon_device_unregister(data->hwmon_dev); sysfs_remove_group(&client->dev.kobj, &gl520_group); sysfs_remove_group(&client->dev.kobj, &gl520_group_opt); - if ((err = i2c_detach_client(client))) - return err; - kfree(data); return 0; } -- cgit v0.10.2 From d5957be2f1535b1a6c77eabba0781ec7245c5dea Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 16 Jul 2008 19:30:13 +0200 Subject: hwmon: (lm63) Convert to a new-style i2c driver The new-style lm63 driver implements the optional detect() callback to cover the use cases of the legacy driver. Signed-off-by: Jean Delvare diff --git a/drivers/hwmon/lm63.c b/drivers/hwmon/lm63.c index 1162870..3195a26 100644 --- a/drivers/hwmon/lm63.c +++ b/drivers/hwmon/lm63.c @@ -1,7 +1,7 @@ /* * lm63.c - driver for the National Semiconductor LM63 temperature sensor * with integrated fan control - * Copyright (C) 2004-2006 Jean Delvare + * Copyright (C) 2004-2008 Jean Delvare * Based on the lm90 driver. * * The LM63 is a sensor chip made by National Semiconductor. It measures @@ -128,24 +128,36 @@ I2C_CLIENT_INSMOD_1(lm63); * Functions declaration */ -static int lm63_attach_adapter(struct i2c_adapter *adapter); -static int lm63_detach_client(struct i2c_client *client); +static int lm63_probe(struct i2c_client *client, + const struct i2c_device_id *id); +static int lm63_remove(struct i2c_client *client); static struct lm63_data *lm63_update_device(struct device *dev); -static int lm63_detect(struct i2c_adapter *adapter, int address, int kind); +static int lm63_detect(struct i2c_client *client, int kind, + struct i2c_board_info *info); static void lm63_init_client(struct i2c_client *client); /* * Driver data (common to all clients) */ +static const struct i2c_device_id lm63_id[] = { + { "lm63", lm63 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, lm63_id); + static struct i2c_driver lm63_driver = { + .class = I2C_CLASS_HWMON, .driver = { .name = "lm63", }, - .attach_adapter = lm63_attach_adapter, - .detach_client = lm63_detach_client, + .probe = lm63_probe, + .remove = lm63_remove, + .id_table = lm63_id, + .detect = lm63_detect, + .address_data = &addr_data, }; /* @@ -153,7 +165,6 @@ static struct i2c_driver lm63_driver = { */ struct lm63_data { - struct i2c_client client; struct device *hwmon_dev; struct mutex update_lock; char valid; /* zero until following fields are valid */ @@ -411,43 +422,14 @@ static const struct attribute_group lm63_group_fan1 = { * Real code */ -static int lm63_attach_adapter(struct i2c_adapter *adapter) +/* Return 0 if detection is successful, -ENODEV otherwise */ +static int lm63_detect(struct i2c_client *new_client, int kind, + struct i2c_board_info *info) { - if (!(adapter->class & I2C_CLASS_HWMON)) - return 0; - return i2c_probe(adapter, &addr_data, lm63_detect); -} - -/* - * The following function does more than just detection. If detection - * succeeds, it also registers the new chip. - */ -static int lm63_detect(struct i2c_adapter *adapter, int address, int kind) -{ - struct i2c_client *new_client; - struct lm63_data *data; - int err = 0; + struct i2c_adapter *adapter = new_client->adapter; if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA)) - goto exit; - - if (!(data = kzalloc(sizeof(struct lm63_data), GFP_KERNEL))) { - err = -ENOMEM; - goto exit; - } - - /* The common I2C client data is placed right before the - LM63-specific data. */ - new_client = &data->client; - i2c_set_clientdata(new_client, data); - new_client->addr = address; - new_client->adapter = adapter; - new_client->driver = &lm63_driver; - new_client->flags = 0; - - /* Default to an LM63 if forced */ - if (kind == 0) - kind = lm63; + return -ENODEV; if (kind < 0) { /* must identify */ u8 man_id, chip_id, reg_config1, reg_config2; @@ -477,25 +459,38 @@ static int lm63_detect(struct i2c_adapter *adapter, int address, int kind) dev_dbg(&adapter->dev, "Unsupported chip " "(man_id=0x%02X, chip_id=0x%02X).\n", man_id, chip_id); - goto exit_free; + return -ENODEV; } } - strlcpy(new_client->name, "lm63", I2C_NAME_SIZE); + strlcpy(info->type, "lm63", I2C_NAME_SIZE); + + return 0; +} + +static int lm63_probe(struct i2c_client *new_client, + const struct i2c_device_id *id) +{ + struct lm63_data *data; + int err; + + data = kzalloc(sizeof(struct lm63_data), GFP_KERNEL); + if (!data) { + err = -ENOMEM; + goto exit; + } + + i2c_set_clientdata(new_client, data); data->valid = 0; mutex_init(&data->update_lock); - /* Tell the I2C layer a new client has arrived */ - if ((err = i2c_attach_client(new_client))) - goto exit_free; - /* Initialize the LM63 chip */ lm63_init_client(new_client); /* Register sysfs hooks */ if ((err = sysfs_create_group(&new_client->dev.kobj, &lm63_group))) - goto exit_detach; + goto exit_free; if (data->config & 0x04) { /* tachometer enabled */ if ((err = sysfs_create_group(&new_client->dev.kobj, &lm63_group_fan1))) @@ -513,8 +508,6 @@ static int lm63_detect(struct i2c_adapter *adapter, int address, int kind) exit_remove_files: sysfs_remove_group(&new_client->dev.kobj, &lm63_group); sysfs_remove_group(&new_client->dev.kobj, &lm63_group_fan1); -exit_detach: - i2c_detach_client(new_client); exit_free: kfree(data); exit: @@ -556,18 +549,14 @@ static void lm63_init_client(struct i2c_client *client) (data->config_fan & 0x20) ? "manual" : "auto"); } -static int lm63_detach_client(struct i2c_client *client) +static int lm63_remove(struct i2c_client *client) { struct lm63_data *data = i2c_get_clientdata(client); - int err; hwmon_device_unregister(data->hwmon_dev); sysfs_remove_group(&client->dev.kobj, &lm63_group); sysfs_remove_group(&client->dev.kobj, &lm63_group_fan1); - if ((err = i2c_detach_client(client))) - return err; - kfree(data); return 0; } -- cgit v0.10.2 From a189dd62d328db7bf8ba68de6a948fdbc93dca25 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 16 Jul 2008 19:30:13 +0200 Subject: hwmon: (lm77) Convert to a new-style i2c driver The new-style lm77 driver implements the optional detect() callback to cover the use cases of the legacy driver. Signed-off-by: Jean Delvare Cc: Andras Bali diff --git a/drivers/hwmon/lm77.c b/drivers/hwmon/lm77.c index 36d5a8c..866b401 100644 --- a/drivers/hwmon/lm77.c +++ b/drivers/hwmon/lm77.c @@ -52,7 +52,6 @@ I2C_CLIENT_INSMOD_1(lm77); /* Each client has this additional data */ struct lm77_data { - struct i2c_client client; struct device *hwmon_dev; struct mutex update_lock; char valid; @@ -65,23 +64,35 @@ struct lm77_data { u8 alarms; }; -static int lm77_attach_adapter(struct i2c_adapter *adapter); -static int lm77_detect(struct i2c_adapter *adapter, int address, int kind); +static int lm77_probe(struct i2c_client *client, + const struct i2c_device_id *id); +static int lm77_detect(struct i2c_client *client, int kind, + struct i2c_board_info *info); static void lm77_init_client(struct i2c_client *client); -static int lm77_detach_client(struct i2c_client *client); +static int lm77_remove(struct i2c_client *client); static u16 lm77_read_value(struct i2c_client *client, u8 reg); static int lm77_write_value(struct i2c_client *client, u8 reg, u16 value); static struct lm77_data *lm77_update_device(struct device *dev); +static const struct i2c_device_id lm77_id[] = { + { "lm77", lm77 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, lm77_id); + /* This is the driver that will be inserted */ static struct i2c_driver lm77_driver = { + .class = I2C_CLASS_HWMON, .driver = { .name = "lm77", }, - .attach_adapter = lm77_attach_adapter, - .detach_client = lm77_detach_client, + .probe = lm77_probe, + .remove = lm77_remove, + .id_table = lm77_id, + .detect = lm77_detect, + .address_data = &addr_data, }; /* straight from the datasheet */ @@ -215,13 +226,6 @@ static SENSOR_DEVICE_ATTR(temp1_crit_alarm, S_IRUGO, show_alarm, NULL, 2); static SENSOR_DEVICE_ATTR(temp1_min_alarm, S_IRUGO, show_alarm, NULL, 0); static SENSOR_DEVICE_ATTR(temp1_max_alarm, S_IRUGO, show_alarm, NULL, 1); -static int lm77_attach_adapter(struct i2c_adapter *adapter) -{ - if (!(adapter->class & I2C_CLASS_HWMON)) - return 0; - return i2c_probe(adapter, &addr_data, lm77_detect); -} - static struct attribute *lm77_attributes[] = { &dev_attr_temp1_input.attr, &dev_attr_temp1_crit.attr, @@ -240,32 +244,15 @@ static const struct attribute_group lm77_group = { .attrs = lm77_attributes, }; -/* This function is called by i2c_probe */ -static int lm77_detect(struct i2c_adapter *adapter, int address, int kind) +/* Return 0 if detection is successful, -ENODEV otherwise */ +static int lm77_detect(struct i2c_client *new_client, int kind, + struct i2c_board_info *info) { - struct i2c_client *new_client; - struct lm77_data *data; - int err = 0; - const char *name = ""; + struct i2c_adapter *adapter = new_client->adapter; if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA | I2C_FUNC_SMBUS_WORD_DATA)) - goto exit; - - /* OK. For now, we presume we have a valid client. We now create the - client structure, even though we cannot fill it completely yet. - But it allows us to access lm77_{read,write}_value. */ - if (!(data = kzalloc(sizeof(struct lm77_data), GFP_KERNEL))) { - err = -ENOMEM; - goto exit; - } - - new_client = &data->client; - i2c_set_clientdata(new_client, data); - new_client->addr = address; - new_client->adapter = adapter; - new_client->driver = &lm77_driver; - new_client->flags = 0; + return -ENODEV; /* Here comes the remaining detection. Since the LM77 has no register dedicated to identification, we have to rely on the @@ -294,7 +281,7 @@ static int lm77_detect(struct i2c_adapter *adapter, int address, int kind) || i2c_smbus_read_word_data(new_client, i + 3) != crit || i2c_smbus_read_word_data(new_client, i + 4) != min || i2c_smbus_read_word_data(new_client, i + 5) != max) - goto exit_free; + return -ENODEV; /* sign bits */ if (((cur & 0x00f0) != 0xf0 && (cur & 0x00f0) != 0x0) @@ -302,51 +289,55 @@ static int lm77_detect(struct i2c_adapter *adapter, int address, int kind) || ((crit & 0x00f0) != 0xf0 && (crit & 0x00f0) != 0x0) || ((min & 0x00f0) != 0xf0 && (min & 0x00f0) != 0x0) || ((max & 0x00f0) != 0xf0 && (max & 0x00f0) != 0x0)) - goto exit_free; + return -ENODEV; /* unused bits */ if (conf & 0xe0) - goto exit_free; + return -ENODEV; /* 0x06 and 0x07 return the last read value */ cur = i2c_smbus_read_word_data(new_client, 0); if (i2c_smbus_read_word_data(new_client, 6) != cur || i2c_smbus_read_word_data(new_client, 7) != cur) - goto exit_free; + return -ENODEV; hyst = i2c_smbus_read_word_data(new_client, 2); if (i2c_smbus_read_word_data(new_client, 6) != hyst || i2c_smbus_read_word_data(new_client, 7) != hyst) - goto exit_free; + return -ENODEV; min = i2c_smbus_read_word_data(new_client, 4); if (i2c_smbus_read_word_data(new_client, 6) != min || i2c_smbus_read_word_data(new_client, 7) != min) - goto exit_free; + return -ENODEV; } - /* Determine the chip type - only one kind supported! */ - if (kind <= 0) - kind = lm77; + strlcpy(info->type, "lm77", I2C_NAME_SIZE); - if (kind == lm77) { - name = "lm77"; + return 0; +} + +static int lm77_probe(struct i2c_client *new_client, + const struct i2c_device_id *id) +{ + struct lm77_data *data; + int err; + + data = kzalloc(sizeof(struct lm77_data), GFP_KERNEL); + if (!data) { + err = -ENOMEM; + goto exit; } - /* Fill in the remaining client fields and put it into the global list */ - strlcpy(new_client->name, name, I2C_NAME_SIZE); + i2c_set_clientdata(new_client, data); data->valid = 0; mutex_init(&data->update_lock); - /* Tell the I2C layer a new client has arrived */ - if ((err = i2c_attach_client(new_client))) - goto exit_free; - /* Initialize the LM77 chip */ lm77_init_client(new_client); /* Register sysfs hooks */ if ((err = sysfs_create_group(&new_client->dev.kobj, &lm77_group))) - goto exit_detach; + goto exit_free; data->hwmon_dev = hwmon_device_register(&new_client->dev); if (IS_ERR(data->hwmon_dev)) { @@ -358,20 +349,17 @@ static int lm77_detect(struct i2c_adapter *adapter, int address, int kind) exit_remove: sysfs_remove_group(&new_client->dev.kobj, &lm77_group); -exit_detach: - i2c_detach_client(new_client); exit_free: kfree(data); exit: return err; } -static int lm77_detach_client(struct i2c_client *client) +static int lm77_remove(struct i2c_client *client) { struct lm77_data *data = i2c_get_clientdata(client); hwmon_device_unregister(data->hwmon_dev); sysfs_remove_group(&client->dev.kobj, &lm77_group); - i2c_detach_client(client); kfree(data); return 0; } -- cgit v0.10.2 From 8c8bacc883610b672d3f08dc6ebf1f17e495f5b9 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 16 Jul 2008 19:30:14 +0200 Subject: hwmon: (lm80) Convert to a new-style i2c driver The new-style lm80 driver implements the optional detect() callback to cover the use cases of the legacy driver. Signed-off-by: Jean Delvare diff --git a/drivers/hwmon/lm80.c b/drivers/hwmon/lm80.c index 26c91c9..bcffc18 100644 --- a/drivers/hwmon/lm80.c +++ b/drivers/hwmon/lm80.c @@ -108,7 +108,6 @@ static inline long TEMP_FROM_REG(u16 temp) */ struct lm80_data { - struct i2c_client client; struct device *hwmon_dev; struct mutex update_lock; char valid; /* !=0 if following fields are valid */ @@ -132,10 +131,12 @@ struct lm80_data { * Functions declaration */ -static int lm80_attach_adapter(struct i2c_adapter *adapter); -static int lm80_detect(struct i2c_adapter *adapter, int address, int kind); +static int lm80_probe(struct i2c_client *client, + const struct i2c_device_id *id); +static int lm80_detect(struct i2c_client *client, int kind, + struct i2c_board_info *info); static void lm80_init_client(struct i2c_client *client); -static int lm80_detach_client(struct i2c_client *client); +static int lm80_remove(struct i2c_client *client); static struct lm80_data *lm80_update_device(struct device *dev); static int lm80_read_value(struct i2c_client *client, u8 reg); static int lm80_write_value(struct i2c_client *client, u8 reg, u8 value); @@ -144,12 +145,22 @@ static int lm80_write_value(struct i2c_client *client, u8 reg, u8 value); * Driver data (common to all clients) */ +static const struct i2c_device_id lm80_id[] = { + { "lm80", lm80 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, lm80_id); + static struct i2c_driver lm80_driver = { + .class = I2C_CLASS_HWMON, .driver = { .name = "lm80", }, - .attach_adapter = lm80_attach_adapter, - .detach_client = lm80_detach_client, + .probe = lm80_probe, + .remove = lm80_remove, + .id_table = lm80_id, + .detect = lm80_detect, + .address_data = &addr_data, }; /* @@ -383,13 +394,6 @@ static SENSOR_DEVICE_ATTR(temp1_crit_alarm, S_IRUGO, show_alarm, NULL, 13); * Real code */ -static int lm80_attach_adapter(struct i2c_adapter *adapter) -{ - if (!(adapter->class & I2C_CLASS_HWMON)) - return 0; - return i2c_probe(adapter, &addr_data, lm80_detect); -} - static struct attribute *lm80_attributes[] = { &sensor_dev_attr_in0_min.dev_attr.attr, &sensor_dev_attr_in1_min.dev_attr.attr, @@ -442,53 +446,46 @@ static const struct attribute_group lm80_group = { .attrs = lm80_attributes, }; -static int lm80_detect(struct i2c_adapter *adapter, int address, int kind) +/* Return 0 if detection is successful, -ENODEV otherwise */ +static int lm80_detect(struct i2c_client *client, int kind, + struct i2c_board_info *info) { + struct i2c_adapter *adapter = client->adapter; int i, cur; - struct i2c_client *client; - struct lm80_data *data; - int err = 0; - const char *name; if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA)) - goto exit; - - /* OK. For now, we presume we have a valid client. We now create the - client structure, even though we cannot fill it completely yet. - But it allows us to access lm80_{read,write}_value. */ - if (!(data = kzalloc(sizeof(struct lm80_data), GFP_KERNEL))) { - err = -ENOMEM; - goto exit; - } - - client = &data->client; - i2c_set_clientdata(client, data); - client->addr = address; - client->adapter = adapter; - client->driver = &lm80_driver; + return -ENODEV; /* Now, we do the remaining detection. It is lousy. */ if (lm80_read_value(client, LM80_REG_ALARM2) & 0xc0) - goto error_free; + return -ENODEV; for (i = 0x2a; i <= 0x3d; i++) { cur = i2c_smbus_read_byte_data(client, i); if ((i2c_smbus_read_byte_data(client, i + 0x40) != cur) || (i2c_smbus_read_byte_data(client, i + 0x80) != cur) || (i2c_smbus_read_byte_data(client, i + 0xc0) != cur)) - goto error_free; + return -ENODEV; } - /* Determine the chip type - only one kind supported! */ - kind = lm80; - name = "lm80"; + strlcpy(info->type, "lm80", I2C_NAME_SIZE); - /* Fill in the remaining client fields */ - strlcpy(client->name, name, I2C_NAME_SIZE); - mutex_init(&data->update_lock); + return 0; +} - /* Tell the I2C layer a new client has arrived */ - if ((err = i2c_attach_client(client))) - goto error_free; +static int lm80_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct lm80_data *data; + int err; + + data = kzalloc(sizeof(struct lm80_data), GFP_KERNEL); + if (!data) { + err = -ENOMEM; + goto exit; + } + + i2c_set_clientdata(client, data); + mutex_init(&data->update_lock); /* Initialize the LM80 chip */ lm80_init_client(client); @@ -499,7 +496,7 @@ static int lm80_detect(struct i2c_adapter *adapter, int address, int kind) /* Register sysfs hooks */ if ((err = sysfs_create_group(&client->dev.kobj, &lm80_group))) - goto error_detach; + goto error_free; data->hwmon_dev = hwmon_device_register(&client->dev); if (IS_ERR(data->hwmon_dev)) { @@ -511,23 +508,18 @@ static int lm80_detect(struct i2c_adapter *adapter, int address, int kind) error_remove: sysfs_remove_group(&client->dev.kobj, &lm80_group); -error_detach: - i2c_detach_client(client); error_free: kfree(data); exit: return err; } -static int lm80_detach_client(struct i2c_client *client) +static int lm80_remove(struct i2c_client *client) { struct lm80_data *data = i2c_get_clientdata(client); - int err; hwmon_device_unregister(data->hwmon_dev); sysfs_remove_group(&client->dev.kobj, &lm80_group); - if ((err = i2c_detach_client(client))) - return err; kfree(data); return 0; -- cgit v0.10.2 From b6aacdcefac8ed60e77930b6e74129da6478e20e Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 16 Jul 2008 19:30:14 +0200 Subject: hwmon: (lm83) Convert to a new-style i2c driver The new-style lm83 driver implements the optional detect() callback to cover the use cases of the legacy driver. Signed-off-by: Jean Delvare diff --git a/drivers/hwmon/lm83.c b/drivers/hwmon/lm83.c index 6a8642f..e59e2d1 100644 --- a/drivers/hwmon/lm83.c +++ b/drivers/hwmon/lm83.c @@ -1,7 +1,7 @@ /* * lm83.c - Part of lm_sensors, Linux kernel modules for hardware * monitoring - * Copyright (C) 2003-2006 Jean Delvare + * Copyright (C) 2003-2008 Jean Delvare * * Heavily inspired from the lm78, lm75 and adm1021 drivers. The LM83 is * a sensor chip made by National Semiconductor. It reports up to four @@ -118,21 +118,34 @@ static const u8 LM83_REG_W_HIGH[] = { * Functions declaration */ -static int lm83_attach_adapter(struct i2c_adapter *adapter); -static int lm83_detect(struct i2c_adapter *adapter, int address, int kind); -static int lm83_detach_client(struct i2c_client *client); +static int lm83_detect(struct i2c_client *new_client, int kind, + struct i2c_board_info *info); +static int lm83_probe(struct i2c_client *client, + const struct i2c_device_id *id); +static int lm83_remove(struct i2c_client *client); static struct lm83_data *lm83_update_device(struct device *dev); /* * Driver data (common to all clients) */ +static const struct i2c_device_id lm83_id[] = { + { "lm83", lm83 }, + { "lm82", lm82 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, lm83_id); + static struct i2c_driver lm83_driver = { + .class = I2C_CLASS_HWMON, .driver = { .name = "lm83", }, - .attach_adapter = lm83_attach_adapter, - .detach_client = lm83_detach_client, + .probe = lm83_probe, + .remove = lm83_remove, + .id_table = lm83_id, + .detect = lm83_detect, + .address_data = &addr_data, }; /* @@ -140,7 +153,6 @@ static struct i2c_driver lm83_driver = { */ struct lm83_data { - struct i2c_client client; struct device *hwmon_dev; struct mutex update_lock; char valid; /* zero until following fields are valid */ @@ -278,40 +290,15 @@ static const struct attribute_group lm83_group_opt = { * Real code */ -static int lm83_attach_adapter(struct i2c_adapter *adapter) +/* Return 0 if detection is successful, -ENODEV otherwise */ +static int lm83_detect(struct i2c_client *new_client, int kind, + struct i2c_board_info *info) { - if (!(adapter->class & I2C_CLASS_HWMON)) - return 0; - return i2c_probe(adapter, &addr_data, lm83_detect); -} - -/* - * The following function does more than just detection. If detection - * succeeds, it also registers the new chip. - */ -static int lm83_detect(struct i2c_adapter *adapter, int address, int kind) -{ - struct i2c_client *new_client; - struct lm83_data *data; - int err = 0; + struct i2c_adapter *adapter = new_client->adapter; const char *name = ""; if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA)) - goto exit; - - if (!(data = kzalloc(sizeof(struct lm83_data), GFP_KERNEL))) { - err = -ENOMEM; - goto exit; - } - - /* The common I2C client data is placed right after the - * LM83-specific data. */ - new_client = &data->client; - i2c_set_clientdata(new_client, data); - new_client->addr = address; - new_client->adapter = adapter; - new_client->driver = &lm83_driver; - new_client->flags = 0; + return -ENODEV; /* Now we do the detection and identification. A negative kind * means that the driver was loaded with no force parameter @@ -335,8 +322,9 @@ static int lm83_detect(struct i2c_adapter *adapter, int address, int kind) ((i2c_smbus_read_byte_data(new_client, LM83_REG_R_CONFIG) & 0x41) != 0x00)) { dev_dbg(&adapter->dev, - "LM83 detection failed at 0x%02x.\n", address); - goto exit_free; + "LM83 detection failed at 0x%02x.\n", + new_client->addr); + return -ENODEV; } } @@ -361,7 +349,7 @@ static int lm83_detect(struct i2c_adapter *adapter, int address, int kind) dev_info(&adapter->dev, "Unsupported chip (man_id=0x%02X, " "chip_id=0x%02X).\n", man_id, chip_id); - goto exit_free; + return -ENODEV; } } @@ -372,15 +360,27 @@ static int lm83_detect(struct i2c_adapter *adapter, int address, int kind) name = "lm82"; } - /* We can fill in the remaining client fields */ - strlcpy(new_client->name, name, I2C_NAME_SIZE); + strlcpy(info->type, name, I2C_NAME_SIZE); + + return 0; +} + +static int lm83_probe(struct i2c_client *new_client, + const struct i2c_device_id *id) +{ + struct lm83_data *data; + int err; + + data = kzalloc(sizeof(struct lm83_data), GFP_KERNEL); + if (!data) { + err = -ENOMEM; + goto exit; + } + + i2c_set_clientdata(new_client, data); data->valid = 0; mutex_init(&data->update_lock); - /* Tell the I2C layer a new client has arrived */ - if ((err = i2c_attach_client(new_client))) - goto exit_free; - /* * Register sysfs hooks * The LM82 can only monitor one external diode which is @@ -389,9 +389,9 @@ static int lm83_detect(struct i2c_adapter *adapter, int address, int kind) */ if ((err = sysfs_create_group(&new_client->dev.kobj, &lm83_group))) - goto exit_detach; + goto exit_free; - if (kind == lm83) { + if (id->driver_data == lm83) { if ((err = sysfs_create_group(&new_client->dev.kobj, &lm83_group_opt))) goto exit_remove_files; @@ -408,26 +408,20 @@ static int lm83_detect(struct i2c_adapter *adapter, int address, int kind) exit_remove_files: sysfs_remove_group(&new_client->dev.kobj, &lm83_group); sysfs_remove_group(&new_client->dev.kobj, &lm83_group_opt); -exit_detach: - i2c_detach_client(new_client); exit_free: kfree(data); exit: return err; } -static int lm83_detach_client(struct i2c_client *client) +static int lm83_remove(struct i2c_client *client) { struct lm83_data *data = i2c_get_clientdata(client); - int err; hwmon_device_unregister(data->hwmon_dev); sysfs_remove_group(&client->dev.kobj, &lm83_group); sysfs_remove_group(&client->dev.kobj, &lm83_group_opt); - if ((err = i2c_detach_client(client))) - return err; - kfree(data); return 0; } -- cgit v0.10.2 From a888420af07b9a26b6d518328baa913fb704e950 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 16 Jul 2008 19:30:14 +0200 Subject: hwmon: (lm87) Convert to a new-style i2c driver The new-style lm87 driver implements the optional detect() callback to cover the use cases of the legacy driver. Signed-off-by: Jean Delvare Cc: Ben Hutchings diff --git a/drivers/hwmon/lm87.c b/drivers/hwmon/lm87.c index e1c183f..21970f0 100644 --- a/drivers/hwmon/lm87.c +++ b/drivers/hwmon/lm87.c @@ -5,7 +5,7 @@ * Philip Edelbrock * Stephen Rousset * Dan Eaton - * Copyright (C) 2004,2007 Jean Delvare + * Copyright (C) 2004-2008 Jean Delvare * * Original port to Linux 2.6 by Jeff Oliver. * @@ -157,22 +157,35 @@ static u8 LM87_REG_TEMP_LOW[3] = { 0x3A, 0x38, 0x2C }; * Functions declaration */ -static int lm87_attach_adapter(struct i2c_adapter *adapter); -static int lm87_detect(struct i2c_adapter *adapter, int address, int kind); +static int lm87_probe(struct i2c_client *client, + const struct i2c_device_id *id); +static int lm87_detect(struct i2c_client *new_client, int kind, + struct i2c_board_info *info); static void lm87_init_client(struct i2c_client *client); -static int lm87_detach_client(struct i2c_client *client); +static int lm87_remove(struct i2c_client *client); static struct lm87_data *lm87_update_device(struct device *dev); /* * Driver data (common to all clients) */ +static const struct i2c_device_id lm87_id[] = { + { "lm87", lm87 }, + { "adm1024", adm1024 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, lm87_id); + static struct i2c_driver lm87_driver = { + .class = I2C_CLASS_HWMON, .driver = { .name = "lm87", }, - .attach_adapter = lm87_attach_adapter, - .detach_client = lm87_detach_client, + .probe = lm87_probe, + .remove = lm87_remove, + .id_table = lm87_id, + .detect = lm87_detect, + .address_data = &addr_data, }; /* @@ -180,7 +193,6 @@ static struct i2c_driver lm87_driver = { */ struct lm87_data { - struct i2c_client client; struct device *hwmon_dev; struct mutex update_lock; char valid; /* zero until following fields are valid */ @@ -562,13 +574,6 @@ static SENSOR_DEVICE_ATTR(temp3_fault, S_IRUGO, show_alarm, NULL, 15); * Real code */ -static int lm87_attach_adapter(struct i2c_adapter *adapter) -{ - if (!(adapter->class & I2C_CLASS_HWMON)) - return 0; - return i2c_probe(adapter, &addr_data, lm87_detect); -} - static struct attribute *lm87_attributes[] = { &dev_attr_in1_input.attr, &dev_attr_in1_min.attr, @@ -656,33 +661,15 @@ static const struct attribute_group lm87_group_opt = { .attrs = lm87_attributes_opt, }; -/* - * The following function does more than just detection. If detection - * succeeds, it also registers the new chip. - */ -static int lm87_detect(struct i2c_adapter *adapter, int address, int kind) +/* Return 0 if detection is successful, -ENODEV otherwise */ +static int lm87_detect(struct i2c_client *new_client, int kind, + struct i2c_board_info *info) { - struct i2c_client *new_client; - struct lm87_data *data; - int err = 0; + struct i2c_adapter *adapter = new_client->adapter; static const char *names[] = { "lm87", "adm1024" }; if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA)) - goto exit; - - if (!(data = kzalloc(sizeof(struct lm87_data), GFP_KERNEL))) { - err = -ENOMEM; - goto exit; - } - - /* The common I2C client data is placed right before the - LM87-specific data. */ - new_client = &data->client; - i2c_set_clientdata(new_client, data); - new_client->addr = address; - new_client->adapter = adapter; - new_client->driver = &lm87_driver; - new_client->flags = 0; + return -ENODEV; /* Default to an LM87 if forced */ if (kind == 0) @@ -704,20 +691,32 @@ static int lm87_detect(struct i2c_adapter *adapter, int address, int kind) || (lm87_read_value(new_client, LM87_REG_CONFIG) & 0x80)) { dev_dbg(&adapter->dev, "LM87 detection failed at 0x%02x.\n", - address); - goto exit_free; + new_client->addr); + return -ENODEV; } } - /* We can fill in the remaining client fields */ - strlcpy(new_client->name, names[kind - 1], I2C_NAME_SIZE); + strlcpy(info->type, names[kind - 1], I2C_NAME_SIZE); + + return 0; +} + +static int lm87_probe(struct i2c_client *new_client, + const struct i2c_device_id *id) +{ + struct lm87_data *data; + int err; + + data = kzalloc(sizeof(struct lm87_data), GFP_KERNEL); + if (!data) { + err = -ENOMEM; + goto exit; + } + + i2c_set_clientdata(new_client, data); data->valid = 0; mutex_init(&data->update_lock); - /* Tell the I2C layer a new client has arrived */ - if ((err = i2c_attach_client(new_client))) - goto exit_free; - /* Initialize the LM87 chip */ lm87_init_client(new_client); @@ -732,7 +731,7 @@ static int lm87_detect(struct i2c_adapter *adapter, int address, int kind) /* Register sysfs hooks */ if ((err = sysfs_create_group(&new_client->dev.kobj, &lm87_group))) - goto exit_detach; + goto exit_free; if (data->channel & CHAN_NO_FAN(0)) { if ((err = device_create_file(&new_client->dev, @@ -832,8 +831,6 @@ static int lm87_detect(struct i2c_adapter *adapter, int address, int kind) exit_remove: sysfs_remove_group(&new_client->dev.kobj, &lm87_group); sysfs_remove_group(&new_client->dev.kobj, &lm87_group_opt); -exit_detach: - i2c_detach_client(new_client); exit_free: kfree(data); exit: @@ -877,18 +874,14 @@ static void lm87_init_client(struct i2c_client *client) } } -static int lm87_detach_client(struct i2c_client *client) +static int lm87_remove(struct i2c_client *client) { struct lm87_data *data = i2c_get_clientdata(client); - int err; hwmon_device_unregister(data->hwmon_dev); sysfs_remove_group(&client->dev.kobj, &lm87_group); sysfs_remove_group(&client->dev.kobj, &lm87_group_opt); - if ((err = i2c_detach_client(client))) - return err; - kfree(data); return 0; } -- cgit v0.10.2 From 9b0e85269275159a1f9c3e4a5d254caf5211950b Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 16 Jul 2008 19:30:15 +0200 Subject: hwmon: (lm90) Convert to a new-style i2c driver The new-style lm90 driver implements the optional detect() callback to cover the use cases of the legacy driver. Signed-off-by: Jean Delvare diff --git a/drivers/hwmon/lm90.c b/drivers/hwmon/lm90.c index d1a3da3..c24fe36 100644 --- a/drivers/hwmon/lm90.c +++ b/drivers/hwmon/lm90.c @@ -187,23 +187,44 @@ I2C_CLIENT_INSMOD_7(lm90, adm1032, lm99, lm86, max6657, adt7461, max6680); * Functions declaration */ -static int lm90_attach_adapter(struct i2c_adapter *adapter); -static int lm90_detect(struct i2c_adapter *adapter, int address, - int kind); +static int lm90_detect(struct i2c_client *client, int kind, + struct i2c_board_info *info); +static int lm90_probe(struct i2c_client *client, + const struct i2c_device_id *id); static void lm90_init_client(struct i2c_client *client); -static int lm90_detach_client(struct i2c_client *client); +static int lm90_remove(struct i2c_client *client); static struct lm90_data *lm90_update_device(struct device *dev); /* * Driver data (common to all clients) */ +static const struct i2c_device_id lm90_id[] = { + { "adm1032", adm1032 }, + { "adt7461", adt7461 }, + { "lm90", lm90 }, + { "lm86", lm86 }, + { "lm89", lm99 }, + { "lm99", lm99 }, /* Missing temperature offset */ + { "max6657", max6657 }, + { "max6658", max6657 }, + { "max6659", max6657 }, + { "max6680", max6680 }, + { "max6681", max6680 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, lm90_id); + static struct i2c_driver lm90_driver = { + .class = I2C_CLASS_HWMON, .driver = { .name = "lm90", }, - .attach_adapter = lm90_attach_adapter, - .detach_client = lm90_detach_client, + .probe = lm90_probe, + .remove = lm90_remove, + .id_table = lm90_id, + .detect = lm90_detect, + .address_data = &addr_data, }; /* @@ -211,7 +232,6 @@ static struct i2c_driver lm90_driver = { */ struct lm90_data { - struct i2c_client client; struct device *hwmon_dev; struct mutex update_lock; char valid; /* zero until following fields are valid */ @@ -477,40 +497,16 @@ static int lm90_read_reg(struct i2c_client* client, u8 reg, u8 *value) return 0; } -static int lm90_attach_adapter(struct i2c_adapter *adapter) -{ - if (!(adapter->class & I2C_CLASS_HWMON)) - return 0; - return i2c_probe(adapter, &addr_data, lm90_detect); -} - -/* - * The following function does more than just detection. If detection - * succeeds, it also registers the new chip. - */ -static int lm90_detect(struct i2c_adapter *adapter, int address, int kind) +/* Return 0 if detection is successful, -ENODEV otherwise */ +static int lm90_detect(struct i2c_client *new_client, int kind, + struct i2c_board_info *info) { - struct i2c_client *new_client; - struct lm90_data *data; - int err = 0; + struct i2c_adapter *adapter = new_client->adapter; + int address = new_client->addr; const char *name = ""; if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA)) - goto exit; - - if (!(data = kzalloc(sizeof(struct lm90_data), GFP_KERNEL))) { - err = -ENOMEM; - goto exit; - } - - /* The common I2C client data is placed right before the - LM90-specific data. */ - new_client = &data->client; - i2c_set_clientdata(new_client, data); - new_client->addr = address; - new_client->adapter = adapter; - new_client->driver = &lm90_driver; - new_client->flags = 0; + return -ENODEV; /* * Now we do the remaining detection. A negative kind means that @@ -538,7 +534,7 @@ static int lm90_detect(struct i2c_adapter *adapter, int address, int kind) LM90_REG_R_CONFIG1)) < 0 || (reg_convrate = i2c_smbus_read_byte_data(new_client, LM90_REG_R_CONVRATE)) < 0) - goto exit_free; + return -ENODEV; if ((address == 0x4C || address == 0x4D) && man_id == 0x01) { /* National Semiconductor */ @@ -546,7 +542,7 @@ static int lm90_detect(struct i2c_adapter *adapter, int address, int kind) if ((reg_config2 = i2c_smbus_read_byte_data(new_client, LM90_REG_R_CONFIG2)) < 0) - goto exit_free; + return -ENODEV; if ((reg_config1 & 0x2A) == 0x00 && (reg_config2 & 0xF8) == 0x00 @@ -610,10 +606,11 @@ static int lm90_detect(struct i2c_adapter *adapter, int address, int kind) dev_info(&adapter->dev, "Unsupported chip (man_id=0x%02X, " "chip_id=0x%02X).\n", man_id, chip_id); - goto exit_free; + return -ENODEV; } } + /* Fill the i2c board info */ if (kind == lm90) { name = "lm90"; } else if (kind == adm1032) { @@ -621,7 +618,7 @@ static int lm90_detect(struct i2c_adapter *adapter, int address, int kind) /* The ADM1032 supports PEC, but only if combined transactions are not used. */ if (i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE)) - new_client->flags |= I2C_CLIENT_PEC; + info->flags |= I2C_CLIENT_PEC; } else if (kind == lm99) { name = "lm99"; } else if (kind == lm86) { @@ -633,23 +630,39 @@ static int lm90_detect(struct i2c_adapter *adapter, int address, int kind) } else if (kind == adt7461) { name = "adt7461"; } + strlcpy(info->type, name, I2C_NAME_SIZE); + + return 0; +} + +static int lm90_probe(struct i2c_client *new_client, + const struct i2c_device_id *id) +{ + struct i2c_adapter *adapter = to_i2c_adapter(new_client->dev.parent); + struct lm90_data *data; + int err; - /* We can fill in the remaining client fields */ - strlcpy(new_client->name, name, I2C_NAME_SIZE); - data->valid = 0; - data->kind = kind; + data = kzalloc(sizeof(struct lm90_data), GFP_KERNEL); + if (!data) { + err = -ENOMEM; + goto exit; + } + i2c_set_clientdata(new_client, data); mutex_init(&data->update_lock); - /* Tell the I2C layer a new client has arrived */ - if ((err = i2c_attach_client(new_client))) - goto exit_free; + /* Set the device type */ + data->kind = id->driver_data; + if (data->kind == adm1032) { + if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE)) + new_client->flags &= ~I2C_CLIENT_PEC; + } /* Initialize the LM90 chip */ lm90_init_client(new_client); /* Register sysfs hooks */ if ((err = sysfs_create_group(&new_client->dev.kobj, &lm90_group))) - goto exit_detach; + goto exit_free; if (new_client->flags & I2C_CLIENT_PEC) { if ((err = device_create_file(&new_client->dev, &dev_attr_pec))) @@ -672,8 +685,6 @@ static int lm90_detect(struct i2c_adapter *adapter, int address, int kind) exit_remove_files: sysfs_remove_group(&new_client->dev.kobj, &lm90_group); device_remove_file(&new_client->dev, &dev_attr_pec); -exit_detach: - i2c_detach_client(new_client); exit_free: kfree(data); exit: @@ -710,10 +721,9 @@ static void lm90_init_client(struct i2c_client *client) i2c_smbus_write_byte_data(client, LM90_REG_W_CONFIG1, config); } -static int lm90_detach_client(struct i2c_client *client) +static int lm90_remove(struct i2c_client *client) { struct lm90_data *data = i2c_get_clientdata(client); - int err; hwmon_device_unregister(data->hwmon_dev); sysfs_remove_group(&client->dev.kobj, &lm90_group); @@ -722,9 +732,6 @@ static int lm90_detach_client(struct i2c_client *client) device_remove_file(&client->dev, &sensor_dev_attr_temp2_offset.dev_attr); - if ((err = i2c_detach_client(client))) - return err; - kfree(data); return 0; } -- cgit v0.10.2 From 910e8dcf16dd7afc08dc1791155cc69e07ca4183 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 16 Jul 2008 19:30:15 +0200 Subject: hwmon: (lm92) Convert to a new-style i2c driver The new-style lm92 driver implements the optional detect() callback to cover the use cases of the legacy driver. Signed-off-by: Jean Delvare diff --git a/drivers/hwmon/lm92.c b/drivers/hwmon/lm92.c index c31942e..b2e00c5 100644 --- a/drivers/hwmon/lm92.c +++ b/drivers/hwmon/lm92.c @@ -1,6 +1,6 @@ /* * lm92 - Hardware monitoring driver - * Copyright (C) 2005 Jean Delvare + * Copyright (C) 2005-2008 Jean Delvare * * Based on the lm90 driver, with some ideas taken from the lm_sensors * lm92 driver as well. @@ -96,7 +96,6 @@ static struct i2c_driver lm92_driver; /* Client data (each client gets its own) */ struct lm92_data { - struct i2c_client client; struct device *hwmon_dev; struct mutex update_lock; char valid; /* zero until following fields are valid */ @@ -319,32 +318,15 @@ static const struct attribute_group lm92_group = { .attrs = lm92_attributes, }; -/* The following function does more than just detection. If detection - succeeds, it also registers the new chip. */ -static int lm92_detect(struct i2c_adapter *adapter, int address, int kind) +/* Return 0 if detection is successful, -ENODEV otherwise */ +static int lm92_detect(struct i2c_client *new_client, int kind, + struct i2c_board_info *info) { - struct i2c_client *new_client; - struct lm92_data *data; - int err = 0; - char *name; + struct i2c_adapter *adapter = new_client->adapter; if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA | I2C_FUNC_SMBUS_WORD_DATA)) - goto exit; - - if (!(data = kzalloc(sizeof(struct lm92_data), GFP_KERNEL))) { - err = -ENOMEM; - goto exit; - } - - /* Fill in enough client fields so that we can read from the chip, - which is required for identication */ - new_client = &data->client; - i2c_set_clientdata(new_client, data); - new_client->addr = address; - new_client->adapter = adapter; - new_client->driver = &lm92_driver; - new_client->flags = 0; + return -ENODEV; /* A negative kind means that the driver was loaded with no force parameter (default), so we must identify the chip. */ @@ -364,34 +346,36 @@ static int lm92_detect(struct i2c_adapter *adapter, int address, int kind) kind = lm92; /* No separate prefix */ } else - goto exit_free; - } else - if (kind == 0) /* Default to an LM92 if forced */ - kind = lm92; - - /* Give it the proper name */ - if (kind == lm92) { - name = "lm92"; - } else { /* Supposedly cannot happen */ - dev_dbg(&new_client->dev, "Kind out of range?\n"); - goto exit_free; + return -ENODEV; } - /* Fill in the remaining client fields */ - strlcpy(new_client->name, name, I2C_NAME_SIZE); + strlcpy(info->type, "lm92", I2C_NAME_SIZE); + + return 0; +} + +static int lm92_probe(struct i2c_client *new_client, + const struct i2c_device_id *id) +{ + struct lm92_data *data; + int err; + + data = kzalloc(sizeof(struct lm92_data), GFP_KERNEL); + if (!data) { + err = -ENOMEM; + goto exit; + } + + i2c_set_clientdata(new_client, data); data->valid = 0; mutex_init(&data->update_lock); - /* Tell the i2c subsystem a new client has arrived */ - if ((err = i2c_attach_client(new_client))) - goto exit_free; - /* Initialize the chipset */ lm92_init_client(new_client); /* Register sysfs hooks */ if ((err = sysfs_create_group(&new_client->dev.kobj, &lm92_group))) - goto exit_detach; + goto exit_free; data->hwmon_dev = hwmon_device_register(&new_client->dev); if (IS_ERR(data->hwmon_dev)) { @@ -403,32 +387,19 @@ static int lm92_detect(struct i2c_adapter *adapter, int address, int kind) exit_remove: sysfs_remove_group(&new_client->dev.kobj, &lm92_group); -exit_detach: - i2c_detach_client(new_client); exit_free: kfree(data); exit: return err; } -static int lm92_attach_adapter(struct i2c_adapter *adapter) -{ - if (!(adapter->class & I2C_CLASS_HWMON)) - return 0; - return i2c_probe(adapter, &addr_data, lm92_detect); -} - -static int lm92_detach_client(struct i2c_client *client) +static int lm92_remove(struct i2c_client *client) { struct lm92_data *data = i2c_get_clientdata(client); - int err; hwmon_device_unregister(data->hwmon_dev); sysfs_remove_group(&client->dev.kobj, &lm92_group); - if ((err = i2c_detach_client(client))) - return err; - kfree(data); return 0; } @@ -438,12 +409,23 @@ static int lm92_detach_client(struct i2c_client *client) * Module and driver stuff */ +static const struct i2c_device_id lm92_id[] = { + { "lm92", lm92 }, + /* max6635 could be added here */ + { } +}; +MODULE_DEVICE_TABLE(i2c, lm92_id); + static struct i2c_driver lm92_driver = { + .class = I2C_CLASS_HWMON, .driver = { .name = "lm92", }, - .attach_adapter = lm92_attach_adapter, - .detach_client = lm92_detach_client, + .probe = lm92_probe, + .remove = lm92_remove, + .id_table = lm92_id, + .detect = lm92_detect, + .address_data = &addr_data, }; static int __init sensors_lm92_init(void) -- cgit v0.10.2 From 70b724063f789a443aff1e1a6f0f04d971342116 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 16 Jul 2008 19:30:15 +0200 Subject: hwmon: (lm93) Convert to a new-style i2c driver The new-style lm93 driver implements the optional detect() callback to cover the use cases of the legacy driver. Signed-off-by: Jean Delvare Cc: Mark M. Hoffman Cc: Eric J. Bowersox Cc: Carsten Emde Cc: Hans J. Koch diff --git a/drivers/hwmon/lm93.c b/drivers/hwmon/lm93.c index 5e678f5..fc36cad 100644 --- a/drivers/hwmon/lm93.c +++ b/drivers/hwmon/lm93.c @@ -200,7 +200,6 @@ struct block1_t { * Client-specific data */ struct lm93_data { - struct i2c_client client; struct device *hwmon_dev; struct mutex update_lock; @@ -2501,45 +2500,14 @@ static void lm93_init_client(struct i2c_client *client) "chip to signal ready!\n"); } -static int lm93_detect(struct i2c_adapter *adapter, int address, int kind) +/* Return 0 if detection is successful, -ENODEV otherwise */ +static int lm93_detect(struct i2c_client *client, int kind, + struct i2c_board_info *info) { - struct lm93_data *data; - struct i2c_client *client; - - int err = -ENODEV, func; - void (*update)(struct lm93_data *, struct i2c_client *); - - /* choose update routine based on bus capabilities */ - func = i2c_get_functionality(adapter); - if ( ((LM93_SMBUS_FUNC_FULL & func) == LM93_SMBUS_FUNC_FULL) && - (!disable_block) ) { - dev_dbg(&adapter->dev,"using SMBus block data transactions\n"); - update = lm93_update_client_full; - } else if ((LM93_SMBUS_FUNC_MIN & func) == LM93_SMBUS_FUNC_MIN) { - dev_dbg(&adapter->dev,"disabled SMBus block data " - "transactions\n"); - update = lm93_update_client_min; - } else { - dev_dbg(&adapter->dev,"detect failed, " - "smbus byte and/or word data not supported!\n"); - goto err_out; - } + struct i2c_adapter *adapter = client->adapter; - /* OK. For now, we presume we have a valid client. We now create the - client structure, even though we cannot fill it completely yet. - But it allows us to access lm78_{read,write}_value. */ - - if ( !(data = kzalloc(sizeof(struct lm93_data), GFP_KERNEL))) { - dev_dbg(&adapter->dev,"out of memory!\n"); - err = -ENOMEM; - goto err_out; - } - - client = &data->client; - i2c_set_clientdata(client, data); - client->addr = address; - client->adapter = adapter; - client->driver = &lm93_driver; + if (!i2c_check_functionality(adapter, LM93_SMBUS_FUNC_MIN)) + return -ENODEV; /* detection */ if (kind < 0) { @@ -2548,7 +2516,7 @@ static int lm93_detect(struct i2c_adapter *adapter, int address, int kind) if (mfr != 0x01) { dev_dbg(&adapter->dev,"detect failed, " "bad manufacturer id 0x%02x!\n", mfr); - goto err_free; + return -ENODEV; } } @@ -2563,31 +2531,61 @@ static int lm93_detect(struct i2c_adapter *adapter, int address, int kind) if (kind == 0) dev_dbg(&adapter->dev, "(ignored 'force' parameter)\n"); - goto err_free; + return -ENODEV; } } - /* fill in remaining client fields */ - strlcpy(client->name, "lm93", I2C_NAME_SIZE); + strlcpy(info->type, "lm93", I2C_NAME_SIZE); dev_dbg(&adapter->dev,"loading %s at %d,0x%02x\n", client->name, i2c_adapter_id(client->adapter), client->addr); + return 0; +} + +static int lm93_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct lm93_data *data; + int err, func; + void (*update)(struct lm93_data *, struct i2c_client *); + + /* choose update routine based on bus capabilities */ + func = i2c_get_functionality(client->adapter); + if (((LM93_SMBUS_FUNC_FULL & func) == LM93_SMBUS_FUNC_FULL) && + (!disable_block)) { + dev_dbg(&client->dev, "using SMBus block data transactions\n"); + update = lm93_update_client_full; + } else if ((LM93_SMBUS_FUNC_MIN & func) == LM93_SMBUS_FUNC_MIN) { + dev_dbg(&client->dev, "disabled SMBus block data " + "transactions\n"); + update = lm93_update_client_min; + } else { + dev_dbg(&client->dev, "detect failed, " + "smbus byte and/or word data not supported!\n"); + err = -ENODEV; + goto err_out; + } + + data = kzalloc(sizeof(struct lm93_data), GFP_KERNEL); + if (!data) { + dev_dbg(&client->dev, "out of memory!\n"); + err = -ENOMEM; + goto err_out; + } + i2c_set_clientdata(client, data); + /* housekeeping */ data->valid = 0; data->update = update; mutex_init(&data->update_lock); - /* tell the I2C layer a new client has arrived */ - if ((err = i2c_attach_client(client))) - goto err_free; - /* initialize the chip */ lm93_init_client(client); err = sysfs_create_group(&client->dev.kobj, &lm93_attr_grp); if (err) - goto err_detach; + goto err_free; /* Register hwmon driver class */ data->hwmon_dev = hwmon_device_register(&client->dev); @@ -2597,43 +2595,39 @@ static int lm93_detect(struct i2c_adapter *adapter, int address, int kind) err = PTR_ERR(data->hwmon_dev); dev_err(&client->dev, "error registering hwmon device.\n"); sysfs_remove_group(&client->dev.kobj, &lm93_attr_grp); -err_detach: - i2c_detach_client(client); err_free: kfree(data); err_out: return err; } -/* This function is called when: - * lm93_driver is inserted (when this module is loaded), for each - available adapter - * when a new adapter is inserted (and lm93_driver is still present) */ -static int lm93_attach_adapter(struct i2c_adapter *adapter) -{ - return i2c_probe(adapter, &addr_data, lm93_detect); -} - -static int lm93_detach_client(struct i2c_client *client) +static int lm93_remove(struct i2c_client *client) { struct lm93_data *data = i2c_get_clientdata(client); - int err = 0; hwmon_device_unregister(data->hwmon_dev); sysfs_remove_group(&client->dev.kobj, &lm93_attr_grp); - err = i2c_detach_client(client); - if (!err) - kfree(data); - return err; + kfree(data); + return 0; } +static const struct i2c_device_id lm93_id[] = { + { "lm93", lm93 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, lm93_id); + static struct i2c_driver lm93_driver = { + .class = I2C_CLASS_HWMON, .driver = { .name = "lm93", }, - .attach_adapter = lm93_attach_adapter, - .detach_client = lm93_detach_client, + .probe = lm93_probe, + .remove = lm93_remove, + .id_table = lm93_id, + .detect = lm93_detect, + .address_data = &addr_data, }; static int __init lm93_init(void) -- cgit v0.10.2 From c6d3f6fa1b0b984991d6e2a261c7dd7f2685c7bd Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 16 Jul 2008 19:30:15 +0200 Subject: hwmon: (max1619) Convert to a new-style i2c driver The new-style max1619 driver implements the optional detect() callback to cover the use cases of the legacy driver. Signed-off-by: Jean Delvare Cc: Alexey Fisher diff --git a/drivers/hwmon/max1619.c b/drivers/hwmon/max1619.c index 7e7267a..1ab1cac 100644 --- a/drivers/hwmon/max1619.c +++ b/drivers/hwmon/max1619.c @@ -79,23 +79,34 @@ I2C_CLIENT_INSMOD_1(max1619); * Functions declaration */ -static int max1619_attach_adapter(struct i2c_adapter *adapter); -static int max1619_detect(struct i2c_adapter *adapter, int address, - int kind); +static int max1619_probe(struct i2c_client *client, + const struct i2c_device_id *id); +static int max1619_detect(struct i2c_client *client, int kind, + struct i2c_board_info *info); static void max1619_init_client(struct i2c_client *client); -static int max1619_detach_client(struct i2c_client *client); +static int max1619_remove(struct i2c_client *client); static struct max1619_data *max1619_update_device(struct device *dev); /* * Driver data (common to all clients) */ +static const struct i2c_device_id max1619_id[] = { + { "max1619", max1619 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, max1619_id); + static struct i2c_driver max1619_driver = { + .class = I2C_CLASS_HWMON, .driver = { .name = "max1619", }, - .attach_adapter = max1619_attach_adapter, - .detach_client = max1619_detach_client, + .probe = max1619_probe, + .remove = max1619_remove, + .id_table = max1619_id, + .detect = max1619_detect, + .address_data = &addr_data, }; /* @@ -103,7 +114,6 @@ static struct i2c_driver max1619_driver = { */ struct max1619_data { - struct i2c_client client; struct device *hwmon_dev; struct mutex update_lock; char valid; /* zero until following fields are valid */ @@ -208,41 +218,15 @@ static const struct attribute_group max1619_group = { * Real code */ -static int max1619_attach_adapter(struct i2c_adapter *adapter) -{ - if (!(adapter->class & I2C_CLASS_HWMON)) - return 0; - return i2c_probe(adapter, &addr_data, max1619_detect); -} - -/* - * The following function does more than just detection. If detection - * succeeds, it also registers the new chip. - */ -static int max1619_detect(struct i2c_adapter *adapter, int address, int kind) +/* Return 0 if detection is successful, -ENODEV otherwise */ +static int max1619_detect(struct i2c_client *new_client, int kind, + struct i2c_board_info *info) { - struct i2c_client *new_client; - struct max1619_data *data; - int err = 0; - const char *name = ""; + struct i2c_adapter *adapter = new_client->adapter; u8 reg_config=0, reg_convrate=0, reg_status=0; if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA)) - goto exit; - - if (!(data = kzalloc(sizeof(struct max1619_data), GFP_KERNEL))) { - err = -ENOMEM; - goto exit; - } - - /* The common I2C client data is placed right before the - MAX1619-specific data. */ - new_client = &data->client; - i2c_set_clientdata(new_client, data); - new_client->addr = address; - new_client->adapter = adapter; - new_client->driver = &max1619_driver; - new_client->flags = 0; + return -ENODEV; /* * Now we do the remaining detection. A negative kind means that @@ -265,8 +249,8 @@ static int max1619_detect(struct i2c_adapter *adapter, int address, int kind) || reg_convrate > 0x07 || (reg_status & 0x61 ) !=0x00) { dev_dbg(&adapter->dev, "MAX1619 detection failed at 0x%02x.\n", - address); - goto exit_free; + new_client->addr); + return -ENODEV; } } @@ -285,28 +269,37 @@ static int max1619_detect(struct i2c_adapter *adapter, int address, int kind) dev_info(&adapter->dev, "Unsupported chip (man_id=0x%02X, " "chip_id=0x%02X).\n", man_id, chip_id); - goto exit_free; + return -ENODEV; } } - if (kind == max1619) - name = "max1619"; + strlcpy(info->type, "max1619", I2C_NAME_SIZE); + + return 0; +} + +static int max1619_probe(struct i2c_client *new_client, + const struct i2c_device_id *id) +{ + struct max1619_data *data; + int err; + + data = kzalloc(sizeof(struct max1619_data), GFP_KERNEL); + if (!data) { + err = -ENOMEM; + goto exit; + } - /* We can fill in the remaining client fields */ - strlcpy(new_client->name, name, I2C_NAME_SIZE); + i2c_set_clientdata(new_client, data); data->valid = 0; mutex_init(&data->update_lock); - /* Tell the I2C layer a new client has arrived */ - if ((err = i2c_attach_client(new_client))) - goto exit_free; - /* Initialize the MAX1619 chip */ max1619_init_client(new_client); /* Register sysfs hooks */ if ((err = sysfs_create_group(&new_client->dev.kobj, &max1619_group))) - goto exit_detach; + goto exit_free; data->hwmon_dev = hwmon_device_register(&new_client->dev); if (IS_ERR(data->hwmon_dev)) { @@ -318,8 +311,6 @@ static int max1619_detect(struct i2c_adapter *adapter, int address, int kind) exit_remove_files: sysfs_remove_group(&new_client->dev.kobj, &max1619_group); -exit_detach: - i2c_detach_client(new_client); exit_free: kfree(data); exit: @@ -341,17 +332,13 @@ static void max1619_init_client(struct i2c_client *client) config & 0xBF); /* run */ } -static int max1619_detach_client(struct i2c_client *client) +static int max1619_remove(struct i2c_client *client) { struct max1619_data *data = i2c_get_clientdata(client); - int err; hwmon_device_unregister(data->hwmon_dev); sysfs_remove_group(&client->dev.kobj, &max1619_group); - if ((err = i2c_detach_client(client))) - return err; - kfree(data); return 0; } -- cgit v0.10.2 From 0d57abd5b87e2e82d8d2e8d5c9a3b56743ffa5ab Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 16 Jul 2008 19:30:16 +0200 Subject: hwmon: (max6650) Convert to a new-style i2c driver The new-style max6650 driver implements the optional detect() callback to cover the use cases of the legacy driver. Signed-off-by: Jean Delvare Cc: Hans J. Koch diff --git a/drivers/hwmon/max6650.c b/drivers/hwmon/max6650.c index 52d528b..f27af6a 100644 --- a/drivers/hwmon/max6650.c +++ b/drivers/hwmon/max6650.c @@ -104,22 +104,34 @@ I2C_CLIENT_INSMOD_1(max6650); #define DIV_FROM_REG(reg) (1 << (reg & 7)) -static int max6650_attach_adapter(struct i2c_adapter *adapter); -static int max6650_detect(struct i2c_adapter *adapter, int address, int kind); +static int max6650_probe(struct i2c_client *client, + const struct i2c_device_id *id); +static int max6650_detect(struct i2c_client *client, int kind, + struct i2c_board_info *info); static int max6650_init_client(struct i2c_client *client); -static int max6650_detach_client(struct i2c_client *client); +static int max6650_remove(struct i2c_client *client); static struct max6650_data *max6650_update_device(struct device *dev); /* * Driver data (common to all clients) */ +static const struct i2c_device_id max6650_id[] = { + { "max6650", max6650 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, max6650_id); + static struct i2c_driver max6650_driver = { + .class = I2C_CLASS_HWMON, .driver = { .name = "max6650", }, - .attach_adapter = max6650_attach_adapter, - .detach_client = max6650_detach_client, + .probe = max6650_probe, + .remove = max6650_remove, + .id_table = max6650_id, + .detect = max6650_detect, + .address_data = &addr_data, }; /* @@ -128,7 +140,6 @@ static struct i2c_driver max6650_driver = { struct max6650_data { - struct i2c_client client; struct device *hwmon_dev; struct mutex update_lock; char valid; /* zero until following fields are valid */ @@ -437,47 +448,21 @@ static struct attribute_group max6650_attr_grp = { * Real code */ -static int max6650_attach_adapter(struct i2c_adapter *adapter) +/* Return 0 if detection is successful, -ENODEV otherwise */ +static int max6650_detect(struct i2c_client *client, int kind, + struct i2c_board_info *info) { - if (!(adapter->class & I2C_CLASS_HWMON)) { - dev_dbg(&adapter->dev, - "FATAL: max6650_attach_adapter class HWMON not set\n"); - return 0; - } - - return i2c_probe(adapter, &addr_data, max6650_detect); -} - -/* - * The following function does more than just detection. If detection - * succeeds, it also registers the new chip. - */ - -static int max6650_detect(struct i2c_adapter *adapter, int address, int kind) -{ - struct i2c_client *client; - struct max6650_data *data; - int err = -ENODEV; + struct i2c_adapter *adapter = client->adapter; + int address = client->addr; dev_dbg(&adapter->dev, "max6650_detect called, kind = %d\n", kind); if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA)) { dev_dbg(&adapter->dev, "max6650: I2C bus doesn't support " "byte read mode, skipping.\n"); - return 0; - } - - if (!(data = kzalloc(sizeof(struct max6650_data), GFP_KERNEL))) { - dev_err(&adapter->dev, "max6650: out of memory.\n"); - return -ENOMEM; + return -ENODEV; } - client = &data->client; - i2c_set_clientdata(client, data); - client->addr = address; - client->adapter = adapter; - client->driver = &max6650_driver; - /* * Now we do the remaining detection. A negative kind means that * the driver was loaded with no force parameter (default), so we @@ -501,28 +486,40 @@ static int max6650_detect(struct i2c_adapter *adapter, int address, int kind) ||(i2c_smbus_read_byte_data(client, MAX6650_REG_COUNT) & 0xFC))) { dev_dbg(&adapter->dev, "max6650: detection failed at 0x%02x.\n", address); - goto err_free; + return -ENODEV; } dev_info(&adapter->dev, "max6650: chip found at 0x%02x.\n", address); - strlcpy(client->name, "max6650", I2C_NAME_SIZE); - mutex_init(&data->update_lock); + strlcpy(info->type, "max6650", I2C_NAME_SIZE); - if ((err = i2c_attach_client(client))) { - dev_err(&adapter->dev, "max6650: failed to attach client.\n"); - goto err_free; + return 0; +} + +static int max6650_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct max6650_data *data; + int err; + + if (!(data = kzalloc(sizeof(struct max6650_data), GFP_KERNEL))) { + dev_err(&client->dev, "out of memory.\n"); + return -ENOMEM; } + i2c_set_clientdata(client, data); + mutex_init(&data->update_lock); + /* * Initialize the max6650 chip */ - if (max6650_init_client(client)) - goto err_detach; + err = max6650_init_client(client); + if (err) + goto err_free; err = sysfs_create_group(&client->dev.kobj, &max6650_attr_grp); if (err) - goto err_detach; + goto err_free; data->hwmon_dev = hwmon_device_register(&client->dev); if (!IS_ERR(data->hwmon_dev)) @@ -531,24 +528,19 @@ static int max6650_detect(struct i2c_adapter *adapter, int address, int kind) err = PTR_ERR(data->hwmon_dev); dev_err(&client->dev, "error registering hwmon device.\n"); sysfs_remove_group(&client->dev.kobj, &max6650_attr_grp); -err_detach: - i2c_detach_client(client); err_free: kfree(data); return err; } -static int max6650_detach_client(struct i2c_client *client) +static int max6650_remove(struct i2c_client *client) { struct max6650_data *data = i2c_get_clientdata(client); - int err; sysfs_remove_group(&client->dev.kobj, &max6650_attr_grp); hwmon_device_unregister(data->hwmon_dev); - err = i2c_detach_client(client); - if (!err) - kfree(data); - return err; + kfree(data); + return 0; } static int max6650_init_client(struct i2c_client *client) -- cgit v0.10.2 From 8fb597bb6ec80d53836229bf3576c7b848b909e3 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 16 Jul 2008 19:30:16 +0200 Subject: hwmon: (smsc47m192) Convert to a new-style i2c driver The new-style smsc47m192 driver implements the optional detect() callback to cover the use cases of the legacy driver. Signed-off-by: Jean Delvare Cc: Hartmut Rick diff --git a/drivers/hwmon/smsc47m192.c b/drivers/hwmon/smsc47m192.c index 3c9db65..8bb5cb5 100644 --- a/drivers/hwmon/smsc47m192.c +++ b/drivers/hwmon/smsc47m192.c @@ -96,7 +96,6 @@ static inline int TEMP_FROM_REG(s8 val) } struct smsc47m192_data { - struct i2c_client client; struct device *hwmon_dev; struct mutex update_lock; char valid; /* !=0 if following fields are valid */ @@ -114,18 +113,29 @@ struct smsc47m192_data { u8 vrm; }; -static int smsc47m192_attach_adapter(struct i2c_adapter *adapter); -static int smsc47m192_detect(struct i2c_adapter *adapter, int address, - int kind); -static int smsc47m192_detach_client(struct i2c_client *client); +static int smsc47m192_probe(struct i2c_client *client, + const struct i2c_device_id *id); +static int smsc47m192_detect(struct i2c_client *client, int kind, + struct i2c_board_info *info); +static int smsc47m192_remove(struct i2c_client *client); static struct smsc47m192_data *smsc47m192_update_device(struct device *dev); +static const struct i2c_device_id smsc47m192_id[] = { + { "smsc47m192", smsc47m192 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, smsc47m192_id); + static struct i2c_driver smsc47m192_driver = { + .class = I2C_CLASS_HWMON, .driver = { .name = "smsc47m192", }, - .attach_adapter = smsc47m192_attach_adapter, - .detach_client = smsc47m192_detach_client, + .probe = smsc47m192_probe, + .remove = smsc47m192_remove, + .id_table = smsc47m192_id, + .detect = smsc47m192_detect, + .address_data = &addr_data, }; /* Voltages */ @@ -440,17 +450,6 @@ static const struct attribute_group smsc47m192_group_in4 = { .attrs = smsc47m192_attributes_in4, }; -/* This function is called when: - * smsc47m192_driver is inserted (when this module is loaded), for each - available adapter - * when a new adapter is inserted (and smsc47m192_driver is still present) */ -static int smsc47m192_attach_adapter(struct i2c_adapter *adapter) -{ - if (!(adapter->class & I2C_CLASS_HWMON)) - return 0; - return i2c_probe(adapter, &addr_data, smsc47m192_detect); -} - static void smsc47m192_init_client(struct i2c_client *client) { int i; @@ -481,31 +480,15 @@ static void smsc47m192_init_client(struct i2c_client *client) } } -/* This function is called by i2c_probe */ -static int smsc47m192_detect(struct i2c_adapter *adapter, int address, - int kind) +/* Return 0 if detection is successful, -ENODEV otherwise */ +static int smsc47m192_detect(struct i2c_client *client, int kind, + struct i2c_board_info *info) { - struct i2c_client *client; - struct smsc47m192_data *data; - int err = 0; - int version, config; + struct i2c_adapter *adapter = client->adapter; + int version; if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA)) - goto exit; - - if (!(data = kzalloc(sizeof(struct smsc47m192_data), GFP_KERNEL))) { - err = -ENOMEM; - goto exit; - } - - client = &data->client; - i2c_set_clientdata(client, data); - client->addr = address; - client->adapter = adapter; - client->driver = &smsc47m192_driver; - - if (kind == 0) - kind = smsc47m192; + return -ENODEV; /* Detection criteria from sensors_detect script */ if (kind < 0) { @@ -523,26 +506,39 @@ static int smsc47m192_detect(struct i2c_adapter *adapter, int address, } else { dev_dbg(&adapter->dev, "SMSC47M192 detection failed at 0x%02x\n", - address); - goto exit_free; + client->addr); + return -ENODEV; } } - /* Fill in the remaining client fields and put into the global list */ - strlcpy(client->name, "smsc47m192", I2C_NAME_SIZE); + strlcpy(info->type, "smsc47m192", I2C_NAME_SIZE); + + return 0; +} + +static int smsc47m192_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct smsc47m192_data *data; + int config; + int err; + + data = kzalloc(sizeof(struct smsc47m192_data), GFP_KERNEL); + if (!data) { + err = -ENOMEM; + goto exit; + } + + i2c_set_clientdata(client, data); data->vrm = vid_which_vrm(); mutex_init(&data->update_lock); - /* Tell the I2C layer a new client has arrived */ - if ((err = i2c_attach_client(client))) - goto exit_free; - /* Initialize the SMSC47M192 chip */ smsc47m192_init_client(client); /* Register sysfs hooks */ if ((err = sysfs_create_group(&client->dev.kobj, &smsc47m192_group))) - goto exit_detach; + goto exit_free; /* Pin 110 is either in4 (+12V) or VID4 */ config = i2c_smbus_read_byte_data(client, SMSC47M192_REG_CONFIG); @@ -563,26 +559,20 @@ static int smsc47m192_detect(struct i2c_adapter *adapter, int address, exit_remove_files: sysfs_remove_group(&client->dev.kobj, &smsc47m192_group); sysfs_remove_group(&client->dev.kobj, &smsc47m192_group_in4); -exit_detach: - i2c_detach_client(client); exit_free: kfree(data); exit: return err; } -static int smsc47m192_detach_client(struct i2c_client *client) +static int smsc47m192_remove(struct i2c_client *client) { struct smsc47m192_data *data = i2c_get_clientdata(client); - int err; hwmon_device_unregister(data->hwmon_dev); sysfs_remove_group(&client->dev.kobj, &smsc47m192_group); sysfs_remove_group(&client->dev.kobj, &smsc47m192_group_in4); - if ((err = i2c_detach_client(client))) - return err; - kfree(data); return 0; -- cgit v0.10.2 From ccf37488322429bf8709f2227f3d48466add2b6b Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 16 Jul 2008 19:30:16 +0200 Subject: hwmon: (thmc50) Convert to a new-style i2c driver The new-style thmc50 driver implements the optional detect() callback to cover the use cases of the legacy driver. Signed-off-by: Jean Delvare Cc: Krzysztof Helt diff --git a/drivers/hwmon/thmc50.c b/drivers/hwmon/thmc50.c index 76a3859..3b01001 100644 --- a/drivers/hwmon/thmc50.c +++ b/drivers/hwmon/thmc50.c @@ -60,7 +60,6 @@ static const u8 THMC50_REG_TEMP_MAX[] = { 0x39, 0x37, 0x2B }; /* Each client has this additional data */ struct thmc50_data { - struct i2c_client client; struct device *hwmon_dev; struct mutex update_lock; @@ -77,17 +76,31 @@ struct thmc50_data { u8 alarms; }; -static int thmc50_attach_adapter(struct i2c_adapter *adapter); -static int thmc50_detach_client(struct i2c_client *client); +static int thmc50_detect(struct i2c_client *client, int kind, + struct i2c_board_info *info); +static int thmc50_probe(struct i2c_client *client, + const struct i2c_device_id *id); +static int thmc50_remove(struct i2c_client *client); static void thmc50_init_client(struct i2c_client *client); static struct thmc50_data *thmc50_update_device(struct device *dev); +static const struct i2c_device_id thmc50_id[] = { + { "adm1022", adm1022 }, + { "thmc50", thmc50 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, thmc50_id); + static struct i2c_driver thmc50_driver = { + .class = I2C_CLASS_HWMON, .driver = { .name = "thmc50", }, - .attach_adapter = thmc50_attach_adapter, - .detach_client = thmc50_detach_client, + .probe = thmc50_probe, + .remove = thmc50_remove, + .id_table = thmc50_id, + .detect = thmc50_detect, + .address_data = &addr_data, }; static ssize_t show_analog_out(struct device *dev, @@ -250,39 +263,23 @@ static const struct attribute_group temp3_group = { .attrs = temp3_attributes, }; -static int thmc50_detect(struct i2c_adapter *adapter, int address, int kind) +/* Return 0 if detection is successful, -ENODEV otherwise */ +static int thmc50_detect(struct i2c_client *client, int kind, + struct i2c_board_info *info) { unsigned company; unsigned revision; unsigned config; - struct i2c_client *client; - struct thmc50_data *data; - struct device *dev; + struct i2c_adapter *adapter = client->adapter; int err = 0; const char *type_name; if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA)) { pr_debug("thmc50: detect failed, " "smbus byte data not supported!\n"); - goto exit; - } - - /* OK. For now, we presume we have a valid client. We now create the - client structure, even though we cannot fill it completely yet. - But it allows us to access thmc50 registers. */ - if (!(data = kzalloc(sizeof(struct thmc50_data), GFP_KERNEL))) { - pr_debug("thmc50: detect failed, kzalloc failed!\n"); - err = -ENOMEM; - goto exit; + return -ENODEV; } - client = &data->client; - i2c_set_clientdata(client, data); - client->addr = address; - client->adapter = adapter; - client->driver = &thmc50_driver; - dev = &client->dev; - pr_debug("thmc50: Probing for THMC50 at 0x%2X on bus %d\n", client->addr, i2c_adapter_id(client->adapter)); @@ -307,21 +304,22 @@ static int thmc50_detect(struct i2c_adapter *adapter, int address, int kind) } if (err == -ENODEV) { pr_debug("thmc50: Detection of THMC50/ADM1022 failed\n"); - goto exit_free; + return err; } - data->type = kind; if (kind == adm1022) { int id = i2c_adapter_id(client->adapter); int i; type_name = "adm1022"; - data->has_temp3 = (config >> 7) & 1; /* config MSB */ for (i = 0; i + 1 < adm1022_temp3_num; i += 2) if (adm1022_temp3[i] == id && - adm1022_temp3[i + 1] == address) { + adm1022_temp3[i + 1] == client->addr) { /* enable 2nd remote temp */ - data->has_temp3 = 1; + config |= (1 << 7); + i2c_smbus_write_byte_data(client, + THMC50_REG_CONF, + config); break; } } else { @@ -330,19 +328,33 @@ static int thmc50_detect(struct i2c_adapter *adapter, int address, int kind) pr_debug("thmc50: Detected %s (version %x, revision %x)\n", type_name, (revision >> 4) - 0xc, revision & 0xf); - /* Fill in the remaining client fields & put it into the global list */ - strlcpy(client->name, type_name, I2C_NAME_SIZE); - mutex_init(&data->update_lock); + strlcpy(info->type, type_name, I2C_NAME_SIZE); - /* Tell the I2C layer a new client has arrived */ - if ((err = i2c_attach_client(client))) - goto exit_free; + return 0; +} + +static int thmc50_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct thmc50_data *data; + int err; + + data = kzalloc(sizeof(struct thmc50_data), GFP_KERNEL); + if (!data) { + pr_debug("thmc50: detect failed, kzalloc failed!\n"); + err = -ENOMEM; + goto exit; + } + + i2c_set_clientdata(client, data); + data->type = id->driver_data; + mutex_init(&data->update_lock); thmc50_init_client(client); /* Register sysfs hooks */ if ((err = sysfs_create_group(&client->dev.kobj, &thmc50_group))) - goto exit_detach; + goto exit_free; /* Register ADM1022 sysfs hooks */ if (data->has_temp3) @@ -364,34 +376,21 @@ exit_remove_sysfs: sysfs_remove_group(&client->dev.kobj, &temp3_group); exit_remove_sysfs_thmc50: sysfs_remove_group(&client->dev.kobj, &thmc50_group); -exit_detach: - i2c_detach_client(client); exit_free: kfree(data); exit: return err; } -static int thmc50_attach_adapter(struct i2c_adapter *adapter) -{ - if (!(adapter->class & I2C_CLASS_HWMON)) - return 0; - return i2c_probe(adapter, &addr_data, thmc50_detect); -} - -static int thmc50_detach_client(struct i2c_client *client) +static int thmc50_remove(struct i2c_client *client) { struct thmc50_data *data = i2c_get_clientdata(client); - int err; hwmon_device_unregister(data->hwmon_dev); sysfs_remove_group(&client->dev.kobj, &thmc50_group); if (data->has_temp3) sysfs_remove_group(&client->dev.kobj, &temp3_group); - if ((err = i2c_detach_client(client))) - return err; - kfree(data); return 0; @@ -412,8 +411,8 @@ static void thmc50_init_client(struct i2c_client *client) } config = i2c_smbus_read_byte_data(client, THMC50_REG_CONF); config |= 0x1; /* start the chip if it is in standby mode */ - if (data->has_temp3) - config |= 0x80; /* enable 2nd remote temp */ + if (data->type == adm1022 && (config & (1 << 7))) + data->has_temp3 = 1; i2c_smbus_write_byte_data(client, THMC50_REG_CONF, config); } -- cgit v0.10.2 From cb0c1af37996f3016e34e9c709e5f727646f7207 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 16 Jul 2008 19:30:17 +0200 Subject: hwmon: (w83791d) Convert to a new-style i2c driver The new-style w83791d driver implements the optional detect() callback to cover the use cases of the legacy driver. Signed-off-by: Jean Delvare Cc: Marc Hulsman diff --git a/drivers/hwmon/w83791d.c b/drivers/hwmon/w83791d.c index 85077c4..e4e91c9 100644 --- a/drivers/hwmon/w83791d.c +++ b/drivers/hwmon/w83791d.c @@ -247,7 +247,6 @@ static u8 div_to_reg(int nr, long val) } struct w83791d_data { - struct i2c_client client; struct device *hwmon_dev; struct mutex update_lock; @@ -286,9 +285,11 @@ struct w83791d_data { u8 vrm; /* hwmon-vid */ }; -static int w83791d_attach_adapter(struct i2c_adapter *adapter); -static int w83791d_detect(struct i2c_adapter *adapter, int address, int kind); -static int w83791d_detach_client(struct i2c_client *client); +static int w83791d_probe(struct i2c_client *client, + const struct i2c_device_id *id); +static int w83791d_detect(struct i2c_client *client, int kind, + struct i2c_board_info *info); +static int w83791d_remove(struct i2c_client *client); static int w83791d_read(struct i2c_client *client, u8 register); static int w83791d_write(struct i2c_client *client, u8 register, u8 value); @@ -300,12 +301,22 @@ static void w83791d_print_debug(struct w83791d_data *data, struct device *dev); static void w83791d_init_client(struct i2c_client *client); +static const struct i2c_device_id w83791d_id[] = { + { "w83791d", w83791d }, + { } +}; +MODULE_DEVICE_TABLE(i2c, w83791d_id); + static struct i2c_driver w83791d_driver = { + .class = I2C_CLASS_HWMON, .driver = { .name = "w83791d", }, - .attach_adapter = w83791d_attach_adapter, - .detach_client = w83791d_detach_client, + .probe = w83791d_probe, + .remove = w83791d_remove, + .id_table = w83791d_id, + .detect = w83791d_detect, + .address_data = &addr_data, }; /* following are the sysfs callback functions */ @@ -905,49 +916,12 @@ static const struct attribute_group w83791d_group = { .attrs = w83791d_attributes, }; -/* This function is called when: - * w83791d_driver is inserted (when this module is loaded), for each - available adapter - * when a new adapter is inserted (and w83791d_driver is still present) */ -static int w83791d_attach_adapter(struct i2c_adapter *adapter) -{ - if (!(adapter->class & I2C_CLASS_HWMON)) - return 0; - return i2c_probe(adapter, &addr_data, w83791d_detect); -} - -static int w83791d_create_subclient(struct i2c_adapter *adapter, - struct i2c_client *client, int addr, - struct i2c_client **sub_cli) -{ - int err; - struct i2c_client *sub_client; - - (*sub_cli) = sub_client = - kzalloc(sizeof(struct i2c_client), GFP_KERNEL); - if (!(sub_client)) { - return -ENOMEM; - } - sub_client->addr = 0x48 + addr; - i2c_set_clientdata(sub_client, NULL); - sub_client->adapter = adapter; - sub_client->driver = &w83791d_driver; - strlcpy(sub_client->name, "w83791d subclient", I2C_NAME_SIZE); - if ((err = i2c_attach_client(sub_client))) { - dev_err(&client->dev, "subclient registration " - "at address 0x%x failed\n", sub_client->addr); - kfree(sub_client); - return err; - } - return 0; -} - - -static int w83791d_detect_subclients(struct i2c_adapter *adapter, int address, - int kind, struct i2c_client *client) +static int w83791d_detect_subclients(struct i2c_client *client) { + struct i2c_adapter *adapter = client->adapter; struct w83791d_data *data = i2c_get_clientdata(client); + int address = client->addr; int i, id, err; u8 val; @@ -971,10 +945,7 @@ static int w83791d_detect_subclients(struct i2c_adapter *adapter, int address, val = w83791d_read(client, W83791D_REG_I2C_SUBADDR); if (!(val & 0x08)) { - err = w83791d_create_subclient(adapter, client, - val & 0x7, &data->lm75[0]); - if (err < 0) - goto error_sc_0; + data->lm75[0] = i2c_new_dummy(adapter, 0x48 + (val & 0x7)); } if (!(val & 0x80)) { if ((data->lm75[0] != NULL) && @@ -986,10 +957,8 @@ static int w83791d_detect_subclients(struct i2c_adapter *adapter, int address, err = -ENODEV; goto error_sc_1; } - err = w83791d_create_subclient(adapter, client, - (val >> 4) & 0x7, &data->lm75[1]); - if (err < 0) - goto error_sc_1; + data->lm75[1] = i2c_new_dummy(adapter, + 0x48 + ((val >> 4) & 0x7)); } return 0; @@ -997,53 +966,31 @@ static int w83791d_detect_subclients(struct i2c_adapter *adapter, int address, /* Undo inits in case of errors */ error_sc_1: - if (data->lm75[0] != NULL) { - i2c_detach_client(data->lm75[0]); - kfree(data->lm75[0]); - } + if (data->lm75[0] != NULL) + i2c_unregister_device(data->lm75[0]); error_sc_0: return err; } -static int w83791d_detect(struct i2c_adapter *adapter, int address, int kind) +/* Return 0 if detection is successful, -ENODEV otherwise */ +static int w83791d_detect(struct i2c_client *client, int kind, + struct i2c_board_info *info) { - struct i2c_client *client; - struct device *dev; - struct w83791d_data *data; - int i, val1, val2; - int err = 0; - const char *client_name = ""; + struct i2c_adapter *adapter = client->adapter; + int val1, val2; + unsigned short address = client->addr; if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA)) { - goto error0; + return -ENODEV; } - /* OK. For now, we presume we have a valid client. We now create the - client structure, even though we cannot fill it completely yet. - But it allows us to access w83791d_{read,write}_value. */ - if (!(data = kzalloc(sizeof(struct w83791d_data), GFP_KERNEL))) { - err = -ENOMEM; - goto error0; - } - - client = &data->client; - dev = &client->dev; - i2c_set_clientdata(client, data); - client->addr = address; - client->adapter = adapter; - client->driver = &w83791d_driver; - mutex_init(&data->update_lock); - - /* Now, we do the remaining detection. */ - /* The w83791d may be stuck in some other bank than bank 0. This may make reading other information impossible. Specify a force=... parameter, and the Winbond will be reset to the right bank. */ if (kind < 0) { if (w83791d_read(client, W83791D_REG_CONFIG) & 0x80) { - dev_dbg(dev, "Detection failed at step 1\n"); - goto error1; + return -ENODEV; } val1 = w83791d_read(client, W83791D_REG_BANK); val2 = w83791d_read(client, W83791D_REG_CHIPMAN); @@ -1052,15 +999,13 @@ static int w83791d_detect(struct i2c_adapter *adapter, int address, int kind) /* yes it is Bank0 */ if (((!(val1 & 0x80)) && (val2 != 0xa3)) || ((val1 & 0x80) && (val2 != 0x5c))) { - dev_dbg(dev, "Detection failed at step 2\n"); - goto error1; + return -ENODEV; } } /* If Winbond chip, address of chip and W83791D_REG_I2C_ADDR should match */ if (w83791d_read(client, W83791D_REG_I2C_ADDR) != address) { - dev_dbg(dev, "Detection failed at step 3\n"); - goto error1; + return -ENODEV; } } @@ -1075,30 +1020,33 @@ static int w83791d_detect(struct i2c_adapter *adapter, int address, int kind) /* get vendor ID */ val2 = w83791d_read(client, W83791D_REG_CHIPMAN); if (val2 != 0x5c) { /* the vendor is NOT Winbond */ - dev_dbg(dev, "Detection failed at step 4\n"); - goto error1; + return -ENODEV; } val1 = w83791d_read(client, W83791D_REG_WCHIPID); if (val1 == 0x71) { kind = w83791d; } else { if (kind == 0) - dev_warn(dev, + dev_warn(&adapter->dev, "w83791d: Ignoring 'force' parameter " "for unknown chip at adapter %d, " "address 0x%02x\n", i2c_adapter_id(adapter), address); - goto error1; + return -ENODEV; } } - if (kind == w83791d) { - client_name = "w83791d"; - } else { - dev_err(dev, "w83791d: Internal error: unknown kind (%d)?!?\n", - kind); - goto error1; - } + strlcpy(info->type, "w83791d", I2C_NAME_SIZE); + + return 0; +} + +static int w83791d_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct w83791d_data *data; + struct device *dev = &client->dev; + int i, val1, err; #ifdef DEBUG val1 = w83791d_read(client, W83791D_REG_DID_VID4); @@ -1106,15 +1054,18 @@ static int w83791d_detect(struct i2c_adapter *adapter, int address, int kind) (val1 >> 5) & 0x07, (val1 >> 1) & 0x0f, val1); #endif - /* Fill in the remaining client fields and put into the global list */ - strlcpy(client->name, client_name, I2C_NAME_SIZE); + data = kzalloc(sizeof(struct w83791d_data), GFP_KERNEL); + if (!data) { + err = -ENOMEM; + goto error0; + } - /* Tell the I2C layer a new client has arrived */ - if ((err = i2c_attach_client(client))) - goto error1; + i2c_set_clientdata(client, data); + mutex_init(&data->update_lock); - if ((err = w83791d_detect_subclients(adapter, address, kind, client))) - goto error2; + err = w83791d_detect_subclients(client); + if (err) + goto error1; /* Initialize the chip */ w83791d_init_client(client); @@ -1141,43 +1092,29 @@ static int w83791d_detect(struct i2c_adapter *adapter, int address, int kind) error4: sysfs_remove_group(&client->dev.kobj, &w83791d_group); error3: - if (data->lm75[0] != NULL) { - i2c_detach_client(data->lm75[0]); - kfree(data->lm75[0]); - } - if (data->lm75[1] != NULL) { - i2c_detach_client(data->lm75[1]); - kfree(data->lm75[1]); - } -error2: - i2c_detach_client(client); + if (data->lm75[0] != NULL) + i2c_unregister_device(data->lm75[0]); + if (data->lm75[1] != NULL) + i2c_unregister_device(data->lm75[1]); error1: kfree(data); error0: return err; } -static int w83791d_detach_client(struct i2c_client *client) +static int w83791d_remove(struct i2c_client *client) { struct w83791d_data *data = i2c_get_clientdata(client); - int err; - - /* main client */ - if (data) { - hwmon_device_unregister(data->hwmon_dev); - sysfs_remove_group(&client->dev.kobj, &w83791d_group); - } - if ((err = i2c_detach_client(client))) - return err; + hwmon_device_unregister(data->hwmon_dev); + sysfs_remove_group(&client->dev.kobj, &w83791d_group); - /* main client */ - if (data) - kfree(data); - /* subclient */ - else - kfree(client); + if (data->lm75[0] != NULL) + i2c_unregister_device(data->lm75[0]); + if (data->lm75[1] != NULL) + i2c_unregister_device(data->lm75[1]); + kfree(data); return 0; } -- cgit v0.10.2 From 31d5d275a118527a2b9b0f68613428f176efbb8f Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 16 Jul 2008 19:30:17 +0200 Subject: hwmon: (w83792d) Convert to a new-style i2c driver The new-style w83792d driver implements the optional detect() callback to cover the use cases of the legacy driver. Signed-off-by: Jean Delvare diff --git a/drivers/hwmon/w83792d.c b/drivers/hwmon/w83792d.c index 299629d..cf94c5b 100644 --- a/drivers/hwmon/w83792d.c +++ b/drivers/hwmon/w83792d.c @@ -267,9 +267,7 @@ DIV_TO_REG(long val) } struct w83792d_data { - struct i2c_client client; struct device *hwmon_dev; - enum chips type; struct mutex update_lock; char valid; /* !=0 if following fields are valid */ @@ -299,9 +297,11 @@ struct w83792d_data { u8 sf2_levels[3][4]; /* Smart FanII: Fan1,2,3 duty cycle levels */ }; -static int w83792d_attach_adapter(struct i2c_adapter *adapter); -static int w83792d_detect(struct i2c_adapter *adapter, int address, int kind); -static int w83792d_detach_client(struct i2c_client *client); +static int w83792d_probe(struct i2c_client *client, + const struct i2c_device_id *id); +static int w83792d_detect(struct i2c_client *client, int kind, + struct i2c_board_info *info); +static int w83792d_remove(struct i2c_client *client); static struct w83792d_data *w83792d_update_device(struct device *dev); #ifdef DEBUG @@ -310,12 +310,22 @@ static void w83792d_print_debug(struct w83792d_data *data, struct device *dev); static void w83792d_init_client(struct i2c_client *client); +static const struct i2c_device_id w83792d_id[] = { + { "w83792d", w83792d }, + { } +}; +MODULE_DEVICE_TABLE(i2c, w83792d_id); + static struct i2c_driver w83792d_driver = { + .class = I2C_CLASS_HWMON, .driver = { .name = "w83792d", }, - .attach_adapter = w83792d_attach_adapter, - .detach_client = w83792d_detach_client, + .probe = w83792d_probe, + .remove = w83792d_remove, + .id_table = w83792d_id, + .detect = w83792d_detect, + .address_data = &addr_data, }; static inline long in_count_from_reg(int nr, struct w83792d_data *data) @@ -864,53 +874,14 @@ store_sf2_level(struct device *dev, struct device_attribute *attr, return count; } -/* This function is called when: - * w83792d_driver is inserted (when this module is loaded), for each - available adapter - * when a new adapter is inserted (and w83792d_driver is still present) */ -static int -w83792d_attach_adapter(struct i2c_adapter *adapter) -{ - if (!(adapter->class & I2C_CLASS_HWMON)) - return 0; - return i2c_probe(adapter, &addr_data, w83792d_detect); -} - - -static int -w83792d_create_subclient(struct i2c_adapter *adapter, - struct i2c_client *new_client, int addr, - struct i2c_client **sub_cli) -{ - int err; - struct i2c_client *sub_client; - - (*sub_cli) = sub_client = kzalloc(sizeof(struct i2c_client), GFP_KERNEL); - if (!(sub_client)) { - return -ENOMEM; - } - sub_client->addr = 0x48 + addr; - i2c_set_clientdata(sub_client, NULL); - sub_client->adapter = adapter; - sub_client->driver = &w83792d_driver; - sub_client->flags = 0; - strlcpy(sub_client->name, "w83792d subclient", I2C_NAME_SIZE); - if ((err = i2c_attach_client(sub_client))) { - dev_err(&new_client->dev, "subclient registration " - "at address 0x%x failed\n", sub_client->addr); - kfree(sub_client); - return err; - } - return 0; -} - static int -w83792d_detect_subclients(struct i2c_adapter *adapter, int address, int kind, - struct i2c_client *new_client) +w83792d_detect_subclients(struct i2c_client *new_client) { int i, id, err; + int address = new_client->addr; u8 val; + struct i2c_adapter *adapter = new_client->adapter; struct w83792d_data *data = i2c_get_clientdata(new_client); id = i2c_adapter_id(adapter); @@ -932,10 +903,7 @@ w83792d_detect_subclients(struct i2c_adapter *adapter, int address, int kind, val = w83792d_read_value(new_client, W83792D_REG_I2C_SUBADDR); if (!(val & 0x08)) { - err = w83792d_create_subclient(adapter, new_client, val & 0x7, - &data->lm75[0]); - if (err < 0) - goto ERROR_SC_0; + data->lm75[0] = i2c_new_dummy(adapter, 0x48 + (val & 0x7)); } if (!(val & 0x80)) { if ((data->lm75[0] != NULL) && @@ -945,10 +913,8 @@ w83792d_detect_subclients(struct i2c_adapter *adapter, int address, int kind, err = -ENODEV; goto ERROR_SC_1; } - err = w83792d_create_subclient(adapter, new_client, - (val >> 4) & 0x7, &data->lm75[1]); - if (err < 0) - goto ERROR_SC_1; + data->lm75[1] = i2c_new_dummy(adapter, + 0x48 + ((val >> 4) & 0x7)); } return 0; @@ -956,10 +922,8 @@ w83792d_detect_subclients(struct i2c_adapter *adapter, int address, int kind, /* Undo inits in case of errors */ ERROR_SC_1: - if (data->lm75[0] != NULL) { - i2c_detach_client(data->lm75[0]); - kfree(data->lm75[0]); - } + if (data->lm75[0] != NULL) + i2c_unregister_device(data->lm75[0]); ERROR_SC_0: return err; } @@ -1294,47 +1258,25 @@ static const struct attribute_group w83792d_group = { .attrs = w83792d_attributes, }; +/* Return 0 if detection is successful, -ENODEV otherwise */ static int -w83792d_detect(struct i2c_adapter *adapter, int address, int kind) +w83792d_detect(struct i2c_client *client, int kind, struct i2c_board_info *info) { - int i = 0, val1 = 0, val2; - struct i2c_client *client; - struct device *dev; - struct w83792d_data *data; - int err = 0; - const char *client_name = ""; + struct i2c_adapter *adapter = client->adapter; + int val1, val2; + unsigned short address = client->addr; if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA)) { - goto ERROR0; - } - - /* OK. For now, we presume we have a valid client. We now create the - client structure, even though we cannot fill it completely yet. - But it allows us to access w83792d_{read,write}_value. */ - - if (!(data = kzalloc(sizeof(struct w83792d_data), GFP_KERNEL))) { - err = -ENOMEM; - goto ERROR0; + return -ENODEV; } - client = &data->client; - dev = &client->dev; - i2c_set_clientdata(client, data); - client->addr = address; - client->adapter = adapter; - client->driver = &w83792d_driver; - client->flags = 0; - - /* Now, we do the remaining detection. */ - /* The w83792d may be stuck in some other bank than bank 0. This may make reading other information impossible. Specify a force=... or force_*=... parameter, and the Winbond will be reset to the right bank. */ if (kind < 0) { if (w83792d_read_value(client, W83792D_REG_CONFIG) & 0x80) { - dev_dbg(dev, "Detection failed at step 1\n"); - goto ERROR1; + return -ENODEV; } val1 = w83792d_read_value(client, W83792D_REG_BANK); val2 = w83792d_read_value(client, W83792D_REG_CHIPMAN); @@ -1342,16 +1284,14 @@ w83792d_detect(struct i2c_adapter *adapter, int address, int kind) if (!(val1 & 0x07)) { /* is Bank0 */ if (((!(val1 & 0x80)) && (val2 != 0xa3)) || ((val1 & 0x80) && (val2 != 0x5c))) { - dev_dbg(dev, "Detection failed at step 2\n"); - goto ERROR1; + return -ENODEV; } } /* If Winbond chip, address of chip and W83792D_REG_I2C_ADDR should match */ if (w83792d_read_value(client, W83792D_REG_I2C_ADDR) != address) { - dev_dbg(dev, "Detection failed at step 3\n"); - goto ERROR1; + return -ENODEV; } } @@ -1367,45 +1307,48 @@ w83792d_detect(struct i2c_adapter *adapter, int address, int kind) /* get vendor ID */ val2 = w83792d_read_value(client, W83792D_REG_CHIPMAN); if (val2 != 0x5c) { /* the vendor is NOT Winbond */ - goto ERROR1; + return -ENODEV; } val1 = w83792d_read_value(client, W83792D_REG_WCHIPID); if (val1 == 0x7a) { kind = w83792d; } else { if (kind == 0) - dev_warn(dev, + dev_warn(&adapter->dev, "w83792d: Ignoring 'force' parameter for" " unknown chip at adapter %d, address" " 0x%02x\n", i2c_adapter_id(adapter), address); - goto ERROR1; + return -ENODEV; } } - if (kind == w83792d) { - client_name = "w83792d"; - } else { - dev_err(dev, "w83792d: Internal error: unknown kind (%d)?!?\n", - kind); - goto ERROR1; - } + strlcpy(info->type, "w83792d", I2C_NAME_SIZE); + + return 0; +} + +static int +w83792d_probe(struct i2c_client *client, const struct i2c_device_id *id) +{ + struct w83792d_data *data; + struct device *dev = &client->dev; + int i, val1, err; - /* Fill in the remaining client fields and put into the global list */ - strlcpy(client->name, client_name, I2C_NAME_SIZE); - data->type = kind; + data = kzalloc(sizeof(struct w83792d_data), GFP_KERNEL); + if (!data) { + err = -ENOMEM; + goto ERROR0; + } + i2c_set_clientdata(client, data); data->valid = 0; mutex_init(&data->update_lock); - /* Tell the I2C layer a new client has arrived */ - if ((err = i2c_attach_client(client))) + err = w83792d_detect_subclients(client); + if (err) goto ERROR1; - if ((err = w83792d_detect_subclients(adapter, address, - kind, client))) - goto ERROR2; - /* Initialize the chip */ w83792d_init_client(client); @@ -1457,16 +1400,10 @@ exit_remove_files: for (i = 0; i < ARRAY_SIZE(w83792d_group_fan); i++) sysfs_remove_group(&dev->kobj, &w83792d_group_fan[i]); ERROR3: - if (data->lm75[0] != NULL) { - i2c_detach_client(data->lm75[0]); - kfree(data->lm75[0]); - } - if (data->lm75[1] != NULL) { - i2c_detach_client(data->lm75[1]); - kfree(data->lm75[1]); - } -ERROR2: - i2c_detach_client(client); + if (data->lm75[0] != NULL) + i2c_unregister_device(data->lm75[0]); + if (data->lm75[1] != NULL) + i2c_unregister_device(data->lm75[1]); ERROR1: kfree(data); ERROR0: @@ -1474,30 +1411,23 @@ ERROR0: } static int -w83792d_detach_client(struct i2c_client *client) +w83792d_remove(struct i2c_client *client) { struct w83792d_data *data = i2c_get_clientdata(client); - int err, i; - - /* main client */ - if (data) { - hwmon_device_unregister(data->hwmon_dev); - sysfs_remove_group(&client->dev.kobj, &w83792d_group); - for (i = 0; i < ARRAY_SIZE(w83792d_group_fan); i++) - sysfs_remove_group(&client->dev.kobj, - &w83792d_group_fan[i]); - } + int i; - if ((err = i2c_detach_client(client))) - return err; + hwmon_device_unregister(data->hwmon_dev); + sysfs_remove_group(&client->dev.kobj, &w83792d_group); + for (i = 0; i < ARRAY_SIZE(w83792d_group_fan); i++) + sysfs_remove_group(&client->dev.kobj, + &w83792d_group_fan[i]); - /* main client */ - if (data) - kfree(data); - /* subclient */ - else - kfree(client); + if (data->lm75[0] != NULL) + i2c_unregister_device(data->lm75[0]); + if (data->lm75[1] != NULL) + i2c_unregister_device(data->lm75[1]); + kfree(data); return 0; } -- cgit v0.10.2 From a7f13a6ec40379fe2116c647ac8e569227ba8d4f Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 16 Jul 2008 19:30:17 +0200 Subject: hwmon: (w83793) Convert to a new-style i2c driver The new-style w83793 driver implements the optional detect() callback to cover the use cases of the legacy driver. Signed-off-by: Jean Delvare diff --git a/drivers/hwmon/w83793.c b/drivers/hwmon/w83793.c index ed3c019..0a739f1 100644 --- a/drivers/hwmon/w83793.c +++ b/drivers/hwmon/w83793.c @@ -179,7 +179,6 @@ static inline s8 TEMP_TO_REG(long val, s8 min, s8 max) } struct w83793_data { - struct i2c_client client; struct i2c_client *lm75[2]; struct device *hwmon_dev; struct mutex update_lock; @@ -226,19 +225,31 @@ struct w83793_data { static u8 w83793_read_value(struct i2c_client *client, u16 reg); static int w83793_write_value(struct i2c_client *client, u16 reg, u8 value); -static int w83793_attach_adapter(struct i2c_adapter *adapter); -static int w83793_detect(struct i2c_adapter *adapter, int address, int kind); -static int w83793_detach_client(struct i2c_client *client); +static int w83793_probe(struct i2c_client *client, + const struct i2c_device_id *id); +static int w83793_detect(struct i2c_client *client, int kind, + struct i2c_board_info *info); +static int w83793_remove(struct i2c_client *client); static void w83793_init_client(struct i2c_client *client); static void w83793_update_nonvolatile(struct device *dev); static struct w83793_data *w83793_update_device(struct device *dev); +static const struct i2c_device_id w83793_id[] = { + { "w83793", w83793 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, w83793_id); + static struct i2c_driver w83793_driver = { + .class = I2C_CLASS_HWMON, .driver = { .name = "w83793", }, - .attach_adapter = w83793_attach_adapter, - .detach_client = w83793_detach_client, + .probe = w83793_probe, + .remove = w83793_remove, + .id_table = w83793_id, + .detect = w83793_detect, + .address_data = &addr_data, }; static ssize_t @@ -1053,89 +1064,51 @@ static void w83793_init_client(struct i2c_client *client) } -static int w83793_attach_adapter(struct i2c_adapter *adapter) -{ - if (!(adapter->class & I2C_CLASS_HWMON)) - return 0; - return i2c_probe(adapter, &addr_data, w83793_detect); -} - -static int w83793_detach_client(struct i2c_client *client) +static int w83793_remove(struct i2c_client *client) { struct w83793_data *data = i2c_get_clientdata(client); struct device *dev = &client->dev; - int err, i; + int i; - /* main client */ - if (data) { - hwmon_device_unregister(data->hwmon_dev); + hwmon_device_unregister(data->hwmon_dev); - for (i = 0; i < ARRAY_SIZE(w83793_sensor_attr_2); i++) - device_remove_file(dev, - &w83793_sensor_attr_2[i].dev_attr); + for (i = 0; i < ARRAY_SIZE(w83793_sensor_attr_2); i++) + device_remove_file(dev, + &w83793_sensor_attr_2[i].dev_attr); - for (i = 0; i < ARRAY_SIZE(sda_single_files); i++) - device_remove_file(dev, &sda_single_files[i].dev_attr); + for (i = 0; i < ARRAY_SIZE(sda_single_files); i++) + device_remove_file(dev, &sda_single_files[i].dev_attr); - for (i = 0; i < ARRAY_SIZE(w83793_vid); i++) - device_remove_file(dev, &w83793_vid[i].dev_attr); - device_remove_file(dev, &dev_attr_vrm); + for (i = 0; i < ARRAY_SIZE(w83793_vid); i++) + device_remove_file(dev, &w83793_vid[i].dev_attr); + device_remove_file(dev, &dev_attr_vrm); - for (i = 0; i < ARRAY_SIZE(w83793_left_fan); i++) - device_remove_file(dev, &w83793_left_fan[i].dev_attr); + for (i = 0; i < ARRAY_SIZE(w83793_left_fan); i++) + device_remove_file(dev, &w83793_left_fan[i].dev_attr); - for (i = 0; i < ARRAY_SIZE(w83793_left_pwm); i++) - device_remove_file(dev, &w83793_left_pwm[i].dev_attr); + for (i = 0; i < ARRAY_SIZE(w83793_left_pwm); i++) + device_remove_file(dev, &w83793_left_pwm[i].dev_attr); - for (i = 0; i < ARRAY_SIZE(w83793_temp); i++) - device_remove_file(dev, &w83793_temp[i].dev_attr); - } + for (i = 0; i < ARRAY_SIZE(w83793_temp); i++) + device_remove_file(dev, &w83793_temp[i].dev_attr); - if ((err = i2c_detach_client(client))) - return err; + if (data->lm75[0] != NULL) + i2c_unregister_device(data->lm75[0]); + if (data->lm75[1] != NULL) + i2c_unregister_device(data->lm75[1]); - /* main client */ - if (data) - kfree(data); - /* subclient */ - else - kfree(client); + kfree(data); return 0; } static int -w83793_create_subclient(struct i2c_adapter *adapter, - struct i2c_client *client, int addr, - struct i2c_client **sub_cli) -{ - int err = 0; - struct i2c_client *sub_client; - - (*sub_cli) = sub_client = - kzalloc(sizeof(struct i2c_client), GFP_KERNEL); - if (!(sub_client)) { - return -ENOMEM; - } - sub_client->addr = 0x48 + addr; - i2c_set_clientdata(sub_client, NULL); - sub_client->adapter = adapter; - sub_client->driver = &w83793_driver; - strlcpy(sub_client->name, "w83793 subclient", I2C_NAME_SIZE); - if ((err = i2c_attach_client(sub_client))) { - dev_err(&client->dev, "subclient registration " - "at address 0x%x failed\n", sub_client->addr); - kfree(sub_client); - } - return err; -} - -static int -w83793_detect_subclients(struct i2c_adapter *adapter, int address, - int kind, struct i2c_client *client) +w83793_detect_subclients(struct i2c_client *client) { int i, id, err; + int address = client->addr; u8 tmp; + struct i2c_adapter *adapter = client->adapter; struct w83793_data *data = i2c_get_clientdata(client); id = i2c_adapter_id(adapter); @@ -1158,11 +1131,7 @@ w83793_detect_subclients(struct i2c_adapter *adapter, int address, tmp = w83793_read_value(client, W83793_REG_I2C_SUBADDR); if (!(tmp & 0x08)) { - err = - w83793_create_subclient(adapter, client, tmp & 0x7, - &data->lm75[0]); - if (err < 0) - goto ERROR_SC_0; + data->lm75[0] = i2c_new_dummy(adapter, 0x48 + (tmp & 0x7)); } if (!(tmp & 0x80)) { if ((data->lm75[0] != NULL) @@ -1173,10 +1142,8 @@ w83793_detect_subclients(struct i2c_adapter *adapter, int address, err = -ENODEV; goto ERROR_SC_1; } - err = w83793_create_subclient(adapter, client, - (tmp >> 4) & 0x7, &data->lm75[1]); - if (err < 0) - goto ERROR_SC_1; + data->lm75[1] = i2c_new_dummy(adapter, + 0x48 + ((tmp >> 4) & 0x7)); } return 0; @@ -1184,69 +1151,44 @@ w83793_detect_subclients(struct i2c_adapter *adapter, int address, /* Undo inits in case of errors */ ERROR_SC_1: - if (data->lm75[0] != NULL) { - i2c_detach_client(data->lm75[0]); - kfree(data->lm75[0]); - } + if (data->lm75[0] != NULL) + i2c_unregister_device(data->lm75[0]); ERROR_SC_0: return err; } -static int w83793_detect(struct i2c_adapter *adapter, int address, int kind) +/* Return 0 if detection is successful, -ENODEV otherwise */ +static int w83793_detect(struct i2c_client *client, int kind, + struct i2c_board_info *info) { - int i; - u8 tmp, val; - struct i2c_client *client; - struct device *dev; - struct w83793_data *data; - int files_fan = ARRAY_SIZE(w83793_left_fan) / 7; - int files_pwm = ARRAY_SIZE(w83793_left_pwm) / 5; - int files_temp = ARRAY_SIZE(w83793_temp) / 6; - int err = 0; + u8 tmp, bank; + struct i2c_adapter *adapter = client->adapter; + unsigned short address = client->addr; if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA)) { - goto exit; + return -ENODEV; } - /* OK. For now, we presume we have a valid client. We now create the - client structure, even though we cannot fill it completely yet. - But it allows us to access w83793_{read,write}_value. */ - - if (!(data = kzalloc(sizeof(struct w83793_data), GFP_KERNEL))) { - err = -ENOMEM; - goto exit; - } - - client = &data->client; - dev = &client->dev; - i2c_set_clientdata(client, data); - client->addr = address; - client->adapter = adapter; - client->driver = &w83793_driver; + bank = i2c_smbus_read_byte_data(client, W83793_REG_BANKSEL); - data->bank = i2c_smbus_read_byte_data(client, W83793_REG_BANKSEL); - - /* Now, we do the remaining detection. */ if (kind < 0) { - tmp = data->bank & 0x80 ? 0x5c : 0xa3; + tmp = bank & 0x80 ? 0x5c : 0xa3; /* Check Winbond vendor ID */ if (tmp != i2c_smbus_read_byte_data(client, W83793_REG_VENDORID)) { pr_debug("w83793: Detection failed at check " "vendor id\n"); - err = -ENODEV; - goto free_mem; + return -ENODEV; } /* If Winbond chip, address of chip and W83793_REG_I2C_ADDR should match */ - if ((data->bank & 0x07) == 0 + if ((bank & 0x07) == 0 && i2c_smbus_read_byte_data(client, W83793_REG_I2C_ADDR) != (address << 1)) { pr_debug("w83793: Detection failed at check " "i2c addr\n"); - err = -ENODEV; - goto free_mem; + return -ENODEV; } } @@ -1255,30 +1197,47 @@ static int w83793_detect(struct i2c_adapter *adapter, int address, int kind) Winbond. Determine the chip type now */ if (kind <= 0) { - if (0x7b == w83793_read_value(client, W83793_REG_CHIPID)) { + if (0x7b == i2c_smbus_read_byte_data(client, + W83793_REG_CHIPID)) { kind = w83793; } else { if (kind == 0) dev_warn(&adapter->dev, "w83793: Ignoring " "'force' parameter for unknown chip " "at address 0x%02x\n", address); - err = -ENODEV; - goto free_mem; + return -ENODEV; } } - /* Fill in the remaining client fields and put into the global list */ - strlcpy(client->name, "w83793", I2C_NAME_SIZE); + strlcpy(info->type, "w83793", I2C_NAME_SIZE); + + return 0; +} +static int w83793_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct device *dev = &client->dev; + struct w83793_data *data; + int i, tmp, val, err; + int files_fan = ARRAY_SIZE(w83793_left_fan) / 7; + int files_pwm = ARRAY_SIZE(w83793_left_pwm) / 5; + int files_temp = ARRAY_SIZE(w83793_temp) / 6; + + data = kzalloc(sizeof(struct w83793_data), GFP_KERNEL); + if (!data) { + err = -ENOMEM; + goto exit; + } + + i2c_set_clientdata(client, data); + data->bank = i2c_smbus_read_byte_data(client, W83793_REG_BANKSEL); mutex_init(&data->update_lock); - /* Tell the I2C layer a new client has arrived */ - if ((err = i2c_attach_client(client))) + err = w83793_detect_subclients(client); + if (err) goto free_mem; - if ((err = w83793_detect_subclients(adapter, address, kind, client))) - goto detach_client; - /* Initialize the chip */ w83793_init_client(client); @@ -1459,16 +1418,10 @@ exit_remove: for (i = 0; i < ARRAY_SIZE(w83793_temp); i++) device_remove_file(dev, &w83793_temp[i].dev_attr); - if (data->lm75[0] != NULL) { - i2c_detach_client(data->lm75[0]); - kfree(data->lm75[0]); - } - if (data->lm75[1] != NULL) { - i2c_detach_client(data->lm75[1]); - kfree(data->lm75[1]); - } -detach_client: - i2c_detach_client(client); + if (data->lm75[0] != NULL) + i2c_unregister_device(data->lm75[0]); + if (data->lm75[1] != NULL) + i2c_unregister_device(data->lm75[1]); free_mem: kfree(data); exit: -- cgit v0.10.2 From dc18a4184d6794e2e5c6f05142f3f8aaeeaee506 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 16 Jul 2008 19:30:18 +0200 Subject: hwmon: (w83l785ts) Convert to a new-style i2c driver The new-style w83l785ts driver implements the optional detect() callback to cover the use cases of the legacy driver. Signed-off-by: Jean Delvare diff --git a/drivers/hwmon/w83l785ts.c b/drivers/hwmon/w83l785ts.c index 52e268e..ea295b9 100644 --- a/drivers/hwmon/w83l785ts.c +++ b/drivers/hwmon/w83l785ts.c @@ -81,10 +81,11 @@ I2C_CLIENT_INSMOD_1(w83l785ts); * Functions declaration */ -static int w83l785ts_attach_adapter(struct i2c_adapter *adapter); -static int w83l785ts_detect(struct i2c_adapter *adapter, int address, - int kind); -static int w83l785ts_detach_client(struct i2c_client *client); +static int w83l785ts_probe(struct i2c_client *client, + const struct i2c_device_id *id); +static int w83l785ts_detect(struct i2c_client *client, int kind, + struct i2c_board_info *info); +static int w83l785ts_remove(struct i2c_client *client); static u8 w83l785ts_read_value(struct i2c_client *client, u8 reg, u8 defval); static struct w83l785ts_data *w83l785ts_update_device(struct device *dev); @@ -92,12 +93,22 @@ static struct w83l785ts_data *w83l785ts_update_device(struct device *dev); * Driver data (common to all clients) */ +static const struct i2c_device_id w83l785ts_id[] = { + { "w83l785ts", w83l785ts }, + { } +}; +MODULE_DEVICE_TABLE(i2c, w83l785ts_id); + static struct i2c_driver w83l785ts_driver = { + .class = I2C_CLASS_HWMON, .driver = { .name = "w83l785ts", }, - .attach_adapter = w83l785ts_attach_adapter, - .detach_client = w83l785ts_detach_client, + .probe = w83l785ts_probe, + .remove = w83l785ts_remove, + .id_table = w83l785ts_id, + .detect = w83l785ts_detect, + .address_data = &addr_data, }; /* @@ -105,7 +116,6 @@ static struct i2c_driver w83l785ts_driver = { */ struct w83l785ts_data { - struct i2c_client client; struct device *hwmon_dev; struct mutex update_lock; char valid; /* zero until following fields are valid */ @@ -135,40 +145,14 @@ static SENSOR_DEVICE_ATTR(temp1_max, S_IRUGO, show_temp, NULL, 1); * Real code */ -static int w83l785ts_attach_adapter(struct i2c_adapter *adapter) -{ - if (!(adapter->class & I2C_CLASS_HWMON)) - return 0; - return i2c_probe(adapter, &addr_data, w83l785ts_detect); -} - -/* - * The following function does more than just detection. If detection - * succeeds, it also registers the new chip. - */ -static int w83l785ts_detect(struct i2c_adapter *adapter, int address, int kind) +/* Return 0 if detection is successful, -ENODEV otherwise */ +static int w83l785ts_detect(struct i2c_client *new_client, int kind, + struct i2c_board_info *info) { - struct i2c_client *new_client; - struct w83l785ts_data *data; - int err = 0; - + struct i2c_adapter *adapter = new_client->adapter; if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA)) - goto exit; - - if (!(data = kzalloc(sizeof(struct w83l785ts_data), GFP_KERNEL))) { - err = -ENOMEM; - goto exit; - } - - /* The common I2C client data is placed right before the - * W83L785TS-specific data. */ - new_client = &data->client; - i2c_set_clientdata(new_client, data); - new_client->addr = address; - new_client->adapter = adapter; - new_client->driver = &w83l785ts_driver; - new_client->flags = 0; + return -ENODEV; /* * Now we do the remaining detection. A negative kind means that @@ -188,8 +172,8 @@ static int w83l785ts_detect(struct i2c_adapter *adapter, int address, int kind) W83L785TS_REG_TYPE, 0) & 0xFC) != 0x00)) { dev_dbg(&adapter->dev, "W83L785TS-S detection failed at 0x%02x.\n", - address); - goto exit_free; + new_client->addr); + return -ENODEV; } } @@ -214,22 +198,34 @@ static int w83l785ts_detect(struct i2c_adapter *adapter, int address, int kind) dev_info(&adapter->dev, "Unsupported chip (man_id=0x%04X, " "chip_id=0x%02X).\n", man_id, chip_id); - goto exit_free; + return -ENODEV; } } - /* We can fill in the remaining client fields. */ - strlcpy(new_client->name, "w83l785ts", I2C_NAME_SIZE); + strlcpy(info->type, "w83l785ts", I2C_NAME_SIZE); + + return 0; +} + +static int w83l785ts_probe(struct i2c_client *new_client, + const struct i2c_device_id *id) +{ + struct w83l785ts_data *data; + int err = 0; + + data = kzalloc(sizeof(struct w83l785ts_data), GFP_KERNEL); + if (!data) { + err = -ENOMEM; + goto exit; + } + + i2c_set_clientdata(new_client, data); data->valid = 0; mutex_init(&data->update_lock); /* Default values in case the first read fails (unlikely). */ data->temp[1] = data->temp[0] = 0; - /* Tell the I2C layer a new client has arrived. */ - if ((err = i2c_attach_client(new_client))) - goto exit_free; - /* * Initialize the W83L785TS chip * Nothing yet, assume it is already started. @@ -259,25 +255,20 @@ exit_remove: &sensor_dev_attr_temp1_input.dev_attr); device_remove_file(&new_client->dev, &sensor_dev_attr_temp1_max.dev_attr); - i2c_detach_client(new_client); -exit_free: kfree(data); exit: return err; } -static int w83l785ts_detach_client(struct i2c_client *client) +static int w83l785ts_remove(struct i2c_client *client) { struct w83l785ts_data *data = i2c_get_clientdata(client); - int err; hwmon_device_unregister(data->hwmon_dev); device_remove_file(&client->dev, &sensor_dev_attr_temp1_input.dev_attr); device_remove_file(&client->dev, &sensor_dev_attr_temp1_max.dev_attr); - if ((err = i2c_detach_client(client))) - return err; kfree(data); return 0; @@ -286,6 +277,18 @@ static int w83l785ts_detach_client(struct i2c_client *client) static u8 w83l785ts_read_value(struct i2c_client *client, u8 reg, u8 defval) { int value, i; + struct device *dev; + const char *prefix; + + /* We might be called during detection, at which point the client + isn't yet fully initialized, so we can't use dev_dbg on it */ + if (i2c_get_clientdata(client)) { + dev = &client->dev; + prefix = ""; + } else { + dev = &client->adapter->dev; + prefix = "w83l785ts: "; + } /* Frequent read errors have been reported on Asus boards, so we * retry on read errors. If it still fails (unlikely), return the @@ -293,15 +296,15 @@ static u8 w83l785ts_read_value(struct i2c_client *client, u8 reg, u8 defval) for (i = 1; i <= MAX_RETRIES; i++) { value = i2c_smbus_read_byte_data(client, reg); if (value >= 0) { - dev_dbg(&client->dev, "Read 0x%02x from register " - "0x%02x.\n", value, reg); + dev_dbg(dev, "%sRead 0x%02x from register 0x%02x.\n", + prefix, value, reg); return value; } - dev_dbg(&client->dev, "Read failed, will retry in %d.\n", i); + dev_dbg(dev, "%sRead failed, will retry in %d.\n", prefix, i); msleep(i); } - dev_err(&client->dev, "Couldn't read value from register 0x%02x.\n", + dev_err(dev, "%sCouldn't read value from register 0x%02x.\n", prefix, reg); return defval; } -- cgit v0.10.2 From 33468e7637c53b5516902422d66ca3d3fe64a9c3 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 16 Jul 2008 19:30:18 +0200 Subject: hwmon: (w83l786ng) Convert to a new-style i2c driver The new-style w83l786ng driver implements the optional detect() callback to cover the use cases of the legacy driver. Signed-off-by: Jean Delvare Cc: Kevin Lo diff --git a/drivers/hwmon/w83l786ng.c b/drivers/hwmon/w83l786ng.c index 41e22dd..badca76 100644 --- a/drivers/hwmon/w83l786ng.c +++ b/drivers/hwmon/w83l786ng.c @@ -121,7 +121,6 @@ DIV_TO_REG(long val) } struct w83l786ng_data { - struct i2c_client client; struct device *hwmon_dev; struct mutex update_lock; char valid; /* !=0 if following fields are valid */ @@ -146,18 +145,30 @@ struct w83l786ng_data { u8 tolerance[2]; }; -static int w83l786ng_attach_adapter(struct i2c_adapter *adapter); -static int w83l786ng_detect(struct i2c_adapter *adapter, int address, int kind); -static int w83l786ng_detach_client(struct i2c_client *client); +static int w83l786ng_probe(struct i2c_client *client, + const struct i2c_device_id *id); +static int w83l786ng_detect(struct i2c_client *client, int kind, + struct i2c_board_info *info); +static int w83l786ng_remove(struct i2c_client *client); static void w83l786ng_init_client(struct i2c_client *client); static struct w83l786ng_data *w83l786ng_update_device(struct device *dev); +static const struct i2c_device_id w83l786ng_id[] = { + { "w83l786ng", w83l786ng }, + { } +}; +MODULE_DEVICE_TABLE(i2c, w83l786ng_id); + static struct i2c_driver w83l786ng_driver = { + .class = I2C_CLASS_HWMON, .driver = { .name = "w83l786ng", }, - .attach_adapter = w83l786ng_attach_adapter, - .detach_client = w83l786ng_detach_client, + .probe = w83l786ng_probe, + .remove = w83l786ng_remove, + .id_table = w83l786ng_id, + .detect = w83l786ng_detect, + .address_data = &addr_data, }; static u8 @@ -575,42 +586,15 @@ static const struct attribute_group w83l786ng_group = { }; static int -w83l786ng_attach_adapter(struct i2c_adapter *adapter) +w83l786ng_detect(struct i2c_client *client, int kind, + struct i2c_board_info *info) { - if (!(adapter->class & I2C_CLASS_HWMON)) - return 0; - return i2c_probe(adapter, &addr_data, w83l786ng_detect); -} - -static int -w83l786ng_detect(struct i2c_adapter *adapter, int address, int kind) -{ - struct i2c_client *client; - struct device *dev; - struct w83l786ng_data *data; - int i, err = 0; - u8 reg_tmp; + struct i2c_adapter *adapter = client->adapter; if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA)) { - goto exit; - } - - /* OK. For now, we presume we have a valid client. We now create the - client structure, even though we cannot fill it completely yet. - But it allows us to access w83l786ng_{read,write}_value. */ - - if (!(data = kzalloc(sizeof(struct w83l786ng_data), GFP_KERNEL))) { - err = -ENOMEM; - goto exit; + return -ENODEV; } - client = &data->client; - dev = &client->dev; - i2c_set_clientdata(client, data); - client->addr = address; - client->adapter = adapter; - client->driver = &w83l786ng_driver; - /* * Now we do the remaining detection. A negative kind means that * the driver was loaded with no force parameter (default), so we @@ -627,8 +611,8 @@ w83l786ng_detect(struct i2c_adapter *adapter, int address, int kind) W83L786NG_REG_CONFIG) & 0x80) != 0x00)) { dev_dbg(&adapter->dev, "W83L786NG detection failed at 0x%02x.\n", - address); - goto exit_free; + client->addr); + return -ENODEV; } } @@ -651,17 +635,31 @@ w83l786ng_detect(struct i2c_adapter *adapter, int address, int kind) dev_info(&adapter->dev, "Unsupported chip (man_id=0x%04X, " "chip_id=0x%02X).\n", man_id, chip_id); - goto exit_free; + return -ENODEV; } } - /* Fill in the remaining client fields and put into the global list */ - strlcpy(client->name, "w83l786ng", I2C_NAME_SIZE); - mutex_init(&data->update_lock); + strlcpy(info->type, "w83l786ng", I2C_NAME_SIZE); - /* Tell the I2C layer a new client has arrived */ - if ((err = i2c_attach_client(client))) - goto exit_free; + return 0; +} + +static int +w83l786ng_probe(struct i2c_client *client, const struct i2c_device_id *id) +{ + struct device *dev = &client->dev; + struct w83l786ng_data *data; + int i, err = 0; + u8 reg_tmp; + + data = kzalloc(sizeof(struct w83l786ng_data), GFP_KERNEL); + if (!data) { + err = -ENOMEM; + goto exit; + } + + i2c_set_clientdata(client, data); + mutex_init(&data->update_lock); /* Initialize the chip */ w83l786ng_init_client(client); @@ -693,25 +691,19 @@ w83l786ng_detect(struct i2c_adapter *adapter, int address, int kind) exit_remove: sysfs_remove_group(&client->dev.kobj, &w83l786ng_group); - i2c_detach_client(client); -exit_free: kfree(data); exit: return err; } static int -w83l786ng_detach_client(struct i2c_client *client) +w83l786ng_remove(struct i2c_client *client) { struct w83l786ng_data *data = i2c_get_clientdata(client); - int err; hwmon_device_unregister(data->hwmon_dev); sysfs_remove_group(&client->dev.kobj, &w83l786ng_group); - if ((err = i2c_detach_client(client))) - return err; - kfree(data); return 0; -- cgit v0.10.2 From 84e0f3f6c1e26588fdcb9f1b0f99d0275229bc99 Mon Sep 17 00:00:00 2001 From: Dimitri Gorokhovik Date: Wed, 16 Jul 2008 20:33:34 +0200 Subject: ide: it821x in pass-through mode segfaults in 2.6.26-stable The driver of ITE8212 in pass-through mode (it8212.noraid=1 on cmndline) attempts to use the field `.dma_host_set' of the struct ide_dma_ops in `ide_config_drive_speed' which is set to NULL by default. So give a value to all fields of the struct ide_dma_ops. Signed-off-by: Dimitri Gorokhovik Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/pci/it821x.c b/drivers/ide/pci/it821x.c index 6ab0411..cbf6472 100644 --- a/drivers/ide/pci/it821x.c +++ b/drivers/ide/pci/it821x.c @@ -512,8 +512,14 @@ static void __devinit it821x_quirkproc(ide_drive_t *drive) } static struct ide_dma_ops it821x_pass_through_dma_ops = { + .dma_host_set = ide_dma_host_set, + .dma_setup = ide_dma_setup, + .dma_exec_cmd = ide_dma_exec_cmd, .dma_start = it821x_dma_start, .dma_end = it821x_dma_end, + .dma_test_irq = ide_dma_test_irq, + .dma_timeout = ide_dma_timeout, + .dma_lost_irq = ide_dma_lost_irq, }; /** -- cgit v0.10.2 From 0b6abc17700a7843b165c677da0ac94522f83083 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Wed, 16 Jul 2008 20:33:35 +0200 Subject: ide: avoid DMA on the stack for REQ_TYPE_ATA_PC Some REQ_TYPE_ATA_PC commands uses the stack buffers for DMA, which leads to memory corruption on a non-coherent platform. With regard to alignment and padding, ide-cd has the the dma safe check for sg requests and REQ_TYPE_ATA_PC. This adds the stack buffer check to that check. Signed-off-by: FUJITA Tomonori Acked-by: Borislav Petkov Cc: Thomas Bogendoerfer Cc: Tejun Heo Cc: Jens Axboe Cc: James Bottomley Signed-off-by: Andrew Morton Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index d998471..d6667c3 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -1195,6 +1195,7 @@ static ide_startstop_t cdrom_do_block_pc(ide_drive_t *drive, struct request *rq) int mask = drive->queue->dma_alignment; unsigned long addr = (unsigned long)page_address(bio_page(rq->bio)); + unsigned long stack_mask = ~(THREAD_SIZE - 1); info->dma = drive->using_dma; @@ -1206,6 +1207,10 @@ static ide_startstop_t cdrom_do_block_pc(ide_drive_t *drive, struct request *rq) */ if ((rq->data_len & 15) || (addr & mask)) info->dma = 0; + + if (!((addr & stack_mask) ^ + ((unsigned long)current->stack & stack_mask))) + info->dma = 0; } /* start sending the command to the drive */ -- cgit v0.10.2 From e5318b531b008c79d2a0c0df06a7b8628da38e2f Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Wed, 16 Jul 2008 20:33:35 +0200 Subject: ide: use the dma safe check for REQ_TYPE_ATA_PC This uses the dma safe check for REQ_TYPE_ATA_PC. The dma safe check is used for only sg requests but it should be used for other non fs commands. This uses blk_queue_update_dma_pad to make the intention clear though ide don't use the blk APIs so it doesn't change anything. Signed-off-by: FUJITA Tomonori Acked-by: Borislav Petkov Cc: Thomas Bogendoerfer Cc: Tejun Heo Cc: Jens Axboe Cc: James Bottomley Signed-off-by: Andrew Morton Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index d6667c3..e12d602 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -1191,12 +1191,17 @@ static ide_startstop_t cdrom_do_block_pc(ide_drive_t *drive, struct request *rq) info->dma = 0; /* sg request */ - if (rq->bio) { - int mask = drive->queue->dma_alignment; - unsigned long addr = - (unsigned long)page_address(bio_page(rq->bio)); + if (rq->bio || ((rq->cmd_type == REQ_TYPE_ATA_PC) && rq->data_len)) { + struct request_queue *q = drive->queue; + unsigned int alignment; + unsigned long addr; unsigned long stack_mask = ~(THREAD_SIZE - 1); + if (rq->bio) + addr = (unsigned long)bio_data(rq->bio); + else + addr = (unsigned long)rq->data; + info->dma = drive->using_dma; /* @@ -1205,7 +1210,8 @@ static ide_startstop_t cdrom_do_block_pc(ide_drive_t *drive, struct request *rq) * NOTE! The "len" and "addr" checks should possibly have * separate masks. */ - if ((rq->data_len & 15) || (addr & mask)) + alignment = queue_dma_alignment(q) | q->dma_pad_mask; + if (addr & alignment || rq->data_len & alignment) info->dma = 0; if (!((addr & stack_mask) ^ @@ -1877,6 +1883,7 @@ static int ide_cdrom_setup(ide_drive_t *drive) blk_queue_prep_rq(drive->queue, ide_cdrom_prep_fn); blk_queue_dma_alignment(drive->queue, 31); + blk_queue_update_dma_pad(drive->queue, 15); drive->queue->unplug_delay = (1 * HZ) / 1000; if (!drive->queue->unplug_delay) drive->queue->unplug_delay = 1; -- cgit v0.10.2 From 96b1dfe8fe02e35f017c885b11f0beb10ff4f316 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Fri, 20 Jun 2008 20:53:35 +0200 Subject: BAST: Remove old IDE driver Remove the old BAST IDE driver, as we are now using the platform-pata support. Signed-off-by: Ben Dooks Cc: Jeff Garzik Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig index cf707c8..cd08dba 100644 --- a/drivers/ide/Kconfig +++ b/drivers/ide/Kconfig @@ -829,13 +829,6 @@ config BLK_DEV_IDE_RAPIDE Say Y here if you want to support the Yellowstone RapIDE controller manufactured for use with Acorn computers. -config BLK_DEV_IDE_BAST - tristate "Simtec BAST / Thorcom VR1000 IDE support" - depends on ARM && (ARCH_BAST || MACH_VR1000) - help - Say Y here if you want to support the onboard IDE channels on the - Simtec BAST or the Thorcom VR1000 - config IDE_H8300 tristate "H8300 IDE support" depends on H8300 diff --git a/drivers/ide/arm/Makefile b/drivers/ide/arm/Makefile index 936e7b0..5bc2605 100644 --- a/drivers/ide/arm/Makefile +++ b/drivers/ide/arm/Makefile @@ -1,7 +1,6 @@ obj-$(CONFIG_BLK_DEV_IDE_ICSIDE) += icside.o obj-$(CONFIG_BLK_DEV_IDE_RAPIDE) += rapide.o -obj-$(CONFIG_BLK_DEV_IDE_BAST) += bast-ide.o obj-$(CONFIG_BLK_DEV_PALMCHIP_BK3710) += palm_bk3710.o ifeq ($(CONFIG_IDE_ARM), m) diff --git a/drivers/ide/arm/bast-ide.c b/drivers/ide/arm/bast-ide.c deleted file mode 100644 index 8e8c281..0000000 --- a/drivers/ide/arm/bast-ide.c +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (c) 2003-2004 Simtec Electronics - * Ben Dooks - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * -*/ - -#include -#include -#include -#include - -#include - -#include -#include -#include -#include -#include - -#define DRV_NAME "bast-ide" - -static int __init bastide_register(unsigned int base, unsigned int aux, int irq) -{ - ide_hwif_t *hwif; - hw_regs_t hw; - int i; - u8 idx[4] = { 0xff, 0xff, 0xff, 0xff }; - - memset(&hw, 0, sizeof(hw)); - - base += BAST_IDE_CS; - aux += BAST_IDE_CS; - - for (i = 0; i <= 7; i++) { - hw.io_ports_array[i] = (unsigned long)base; - base += 0x20; - } - - hw.io_ports.ctl_addr = aux + (6 * 0x20); - hw.irq = irq; - hw.chipset = ide_generic; - - hwif = ide_find_port(); - if (hwif == NULL) - goto out; - - i = hwif->index; - - ide_init_port_data(hwif, i); - ide_init_port_hw(hwif, &hw); - hwif->port_ops = NULL; - - idx[0] = i; - - ide_device_add(idx, NULL); -out: - return 0; -} - -static int __init bastide_init(void) -{ - unsigned long base = BAST_VA_IDEPRI + BAST_IDE_CS; - - /* we can treat the VR1000 and the BAST the same */ - - if (!(machine_is_bast() || machine_is_vr1000())) - return 0; - - printk("BAST: IDE driver, (c) 2003-2004 Simtec Electronics\n"); - - if (!request_mem_region(base, 0x400000, DRV_NAME)) { - printk(KERN_ERR "%s: resources busy\n", DRV_NAME); - return -EBUSY; - } - - bastide_register(BAST_VA_IDEPRI, BAST_VA_IDEPRIAUX, IRQ_IDE0); - bastide_register(BAST_VA_IDESEC, BAST_VA_IDESECAUX, IRQ_IDE1); - - return 0; -} - -module_init(bastide_init); - -MODULE_AUTHOR("Ben Dooks "); -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("Simtec BAST / Thorcom VR1000 IDE driver"); -- cgit v0.10.2 From e4e8d02f56f5c0cefc6713384629e068193d706a Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 16 Jul 2008 20:33:35 +0200 Subject: ide: remove needless includes from ide-lib.c Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/ide-lib.c b/drivers/ide/ide-lib.c index 47af80d..7e053d2 100644 --- a/drivers/ide/ide-lib.c +++ b/drivers/ide/ide-lib.c @@ -1,26 +1,11 @@ -#include #include #include #include -#include -#include #include -#include -#include -#include -#include -#include -#include -#include #include #include #include -#include -#include -#include -#include - static const char *udma_str[] = { "UDMA/16", "UDMA/25", "UDMA/33", "UDMA/44", "UDMA/66", "UDMA/100", "UDMA/133", "UDMA7" }; -- cgit v0.10.2 From f0ffc9872e972e9d9fe8f7ae577ff046dbdba51b Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 16 Jul 2008 20:33:36 +0200 Subject: ide: remove unused XFER_UDMA_SLOW Remove unused XFER_UDMA_SLOW from ide_timing[]. While at it: - fix re-defining XFER_PIO_5 (no need to define it in ide-timing.h as it is defined in which is included by ) - fix whitespace damage There should be no functional changes caused by this patch. Acked-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/ide-timing.h b/drivers/ide/ide-timing.h index 2e91c58..a20c4cb 100644 --- a/drivers/ide/ide-timing.h +++ b/drivers/ide/ide-timing.h @@ -28,9 +28,6 @@ #include #include -#define XFER_PIO_5 0x0d -#define XFER_UDMA_SLOW 0x4f - struct ide_timing { short mode; short setup; /* t1 */ @@ -61,12 +58,10 @@ static struct ide_timing ide_timing[] = { { XFER_UDMA_1, 0, 0, 0, 0, 0, 0, 0, 80 }, { XFER_UDMA_0, 0, 0, 0, 0, 0, 0, 0, 120 }, - { XFER_UDMA_SLOW, 0, 0, 0, 0, 0, 0, 0, 150 }, - { XFER_MW_DMA_2, 25, 0, 0, 0, 70, 25, 120, 0 }, { XFER_MW_DMA_1, 45, 0, 0, 0, 80, 50, 150, 0 }, { XFER_MW_DMA_0, 60, 0, 0, 0, 215, 215, 480, 0 }, - + { XFER_SW_DMA_2, 60, 0, 0, 0, 120, 120, 240, 0 }, { XFER_SW_DMA_1, 90, 0, 0, 0, 240, 240, 480, 0 }, { XFER_SW_DMA_0, 120, 0, 0, 0, 480, 480, 960, 0 }, -- cgit v0.10.2 From 71d5161426c26742ba053fe93637559cbe2cea37 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 16 Jul 2008 20:33:36 +0200 Subject: ide: use u8 for xfer modes in ide-timing.h There should be no functional changes caused by this patch. Acked-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/ide-timing.h b/drivers/ide/ide-timing.h index a20c4cb..ebe884d 100644 --- a/drivers/ide/ide-timing.h +++ b/drivers/ide/ide-timing.h @@ -29,7 +29,7 @@ #include struct ide_timing { - short mode; + u8 mode; short setup; /* t1 */ short act8b; /* t2 for 8-bit io */ short rec8b; /* t2i for 8-bit io */ @@ -76,7 +76,7 @@ static struct ide_timing ide_timing[] = { { XFER_PIO_SLOW, 120, 290, 240, 960, 290, 240, 960, 0 }, - { -1 } + { 0xff } }; #define IDE_TIMING_SETUP 0x01 @@ -122,17 +122,18 @@ static void ide_timing_merge(struct ide_timing *a, struct ide_timing *b, struct if (what & IDE_TIMING_UDMA ) m->udma = max(a->udma, b->udma); } -static struct ide_timing* ide_timing_find_mode(short speed) +static struct ide_timing *ide_timing_find_mode(u8 speed) { struct ide_timing *t; for (t = ide_timing; t->mode != speed; t++) - if (t->mode < 0) + if (t->mode == 0xff) return NULL; return t; } -static int ide_timing_compute(ide_drive_t *drive, short speed, struct ide_timing *t, int T, int UT) +static int ide_timing_compute(ide_drive_t *drive, u8 speed, + struct ide_timing *t, int T, int UT) { struct hd_driveid *id = drive->id; struct ide_timing *s, p; -- cgit v0.10.2 From 3be53f3f213223f50d8e29b5e1869685bf040a1e Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 16 Jul 2008 20:33:36 +0200 Subject: ide: move some bits from ide-timing.h to Move struct ide_timing and IDE_TIMING_* defines to from drivers/ide/ide-timing.h. While at it: - use u8/u16 instead of short for struct ide_timing fields - use enum for IDE_TIMING_* There should be no functional changes caused by this patch. Acked-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/ide-timing.h b/drivers/ide/ide-timing.h index ebe884d..7248799 100644 --- a/drivers/ide/ide-timing.h +++ b/drivers/ide/ide-timing.h @@ -28,18 +28,6 @@ #include #include -struct ide_timing { - u8 mode; - short setup; /* t1 */ - short act8b; /* t2 for 8-bit io */ - short rec8b; /* t2i for 8-bit io */ - short cyc8b; /* t0 for 8-bit io */ - short active; /* t2 or tD */ - short recover; /* t2i or tK */ - short cycle; /* t0 */ - short udma; /* t2CYCTYP/2 */ -}; - /* * PIO 0-5, MWDMA 0-2 and UDMA 0-6 timings (in nanoseconds). * These were taken from ATA/ATAPI-6 standard, rev 0a, except @@ -79,17 +67,6 @@ static struct ide_timing ide_timing[] = { { 0xff } }; -#define IDE_TIMING_SETUP 0x01 -#define IDE_TIMING_ACT8B 0x02 -#define IDE_TIMING_REC8B 0x04 -#define IDE_TIMING_CYC8B 0x08 -#define IDE_TIMING_8BIT 0x0e -#define IDE_TIMING_ACTIVE 0x10 -#define IDE_TIMING_RECOVER 0x20 -#define IDE_TIMING_CYCLE 0x40 -#define IDE_TIMING_UDMA 0x80 -#define IDE_TIMING_ALL 0xff - #define ENOUGH(v,unit) (((v)-1)/(unit)+1) #define EZ(v,unit) ((v)?ENOUGH(v,unit):0) diff --git a/include/linux/ide.h b/include/linux/ide.h index ac4eeb2..81c6ea4 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1271,6 +1271,34 @@ static inline int ide_dev_is_sata(struct hd_driveid *id) u64 ide_get_lba_addr(struct ide_taskfile *, int); u8 ide_dump_status(ide_drive_t *, const char *, u8); +struct ide_timing { + u8 mode; + u8 setup; /* t1 */ + u16 act8b; /* t2 for 8-bit io */ + u16 rec8b; /* t2i for 8-bit io */ + u16 cyc8b; /* t0 for 8-bit io */ + u16 active; /* t2 or tD */ + u16 recover; /* t2i or tK */ + u16 cycle; /* t0 */ + u16 udma; /* t2CYCTYP/2 */ +}; + +enum { + IDE_TIMING_SETUP = (1 << 0), + IDE_TIMING_ACT8B = (1 << 1), + IDE_TIMING_REC8B = (1 << 2), + IDE_TIMING_CYC8B = (1 << 3), + IDE_TIMING_8BIT = IDE_TIMING_ACT8B | IDE_TIMING_REC8B | + IDE_TIMING_CYC8B, + IDE_TIMING_ACTIVE = (1 << 4), + IDE_TIMING_RECOVER = (1 << 5), + IDE_TIMING_CYCLE = (1 << 6), + IDE_TIMING_UDMA = (1 << 7), + IDE_TIMING_ALL = IDE_TIMING_SETUP | IDE_TIMING_8BIT | + IDE_TIMING_ACTIVE | IDE_TIMING_RECOVER | + IDE_TIMING_CYCLE | IDE_TIMING_UDMA, +}; + typedef struct ide_pio_timings_s { int setup_time; /* Address setup (ns) minimum */ int active_time; /* Active pulse (ns) minimum */ -- cgit v0.10.2 From bd887f72d2a28a8202519e67fd9ed93ee3c4e78d Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 16 Jul 2008 20:33:36 +0200 Subject: ide: remove XFER_* masks from ide-timing.h * Check requested xfer mode against xfer modes instead of XFER_* masks in ide_timing_compute() and cs5535.c::cs5535_set_speed(). * Remove XFER_[MODE,MWDMA,EPIO,PIO] masks. There should be no functional changes caused by this patch. Acked-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/ide-timing.h b/drivers/ide/ide-timing.h index 7248799..a401d8f 100644 --- a/drivers/ide/ide-timing.h +++ b/drivers/ide/ide-timing.h @@ -70,11 +70,6 @@ static struct ide_timing ide_timing[] = { #define ENOUGH(v,unit) (((v)-1)/(unit)+1) #define EZ(v,unit) ((v)?ENOUGH(v,unit):0) -#define XFER_MODE 0xf0 -#define XFER_MWDMA 0x20 -#define XFER_EPIO 0x01 -#define XFER_PIO 0x00 - static void ide_timing_quantize(struct ide_timing *t, struct ide_timing *q, int T, int UT) { q->setup = EZ(t->setup * 1000, T); @@ -137,17 +132,12 @@ static int ide_timing_compute(ide_drive_t *drive, u8 speed, memset(&p, 0, sizeof(p)); - switch (speed & XFER_MODE) { - - case XFER_PIO: - if (speed <= XFER_PIO_2) p.cycle = p.cyc8b = id->eide_pio; - else p.cycle = p.cyc8b = id->eide_pio_iordy; - break; - - case XFER_MWDMA: - p.cycle = id->eide_dma_min; - break; - } + if (speed <= XFER_PIO_2) + p.cycle = p.cyc8b = id->eide_pio; + else if (speed <= XFER_PIO_5) + p.cycle = p.cyc8b = id->eide_pio_iordy; + else if (speed >= XFER_MW_DMA_0 && speed <= XFER_MW_DMA_2) + p.cycle = id->eide_dma_min; ide_timing_merge(&p, t, t, IDE_TIMING_CYCLE | IDE_TIMING_CYC8B); } @@ -164,7 +154,7 @@ static int ide_timing_compute(ide_drive_t *drive, u8 speed, * slower/equal than the fastest PIO timing. */ - if ((speed & XFER_MODE) != XFER_PIO) { + if (speed >= XFER_SW_DMA_0) { u8 pio = ide_get_best_pio_mode(drive, 255, 5); ide_timing_compute(drive, XFER_PIO_0 + pio, &p, T, UT); ide_timing_merge(&p, t, t, IDE_TIMING_ALL); diff --git a/drivers/ide/pci/cs5535.c b/drivers/ide/pci/cs5535.c index 99fe91a..2a2cb49 100644 --- a/drivers/ide/pci/cs5535.c +++ b/drivers/ide/pci/cs5535.c @@ -75,13 +75,11 @@ static unsigned int cs5535_udma_timings[5] = */ static void cs5535_set_speed(ide_drive_t *drive, const u8 speed) { - u32 reg = 0, dummy; int unit = drive->select.b.unit; - /* Set the PIO timings */ - if ((speed & XFER_MODE) == XFER_PIO) { + if (speed < XFER_SW_DMA_0) { ide_drive_t *pair = ide_get_paired_drive(drive); u8 cmd, pioa; -- cgit v0.10.2 From 2c139e7a7152f66ff93b173f8770c94ea53a691e Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 16 Jul 2008 20:33:36 +0200 Subject: ide: checkpatch.pl fixes for ide-timing.h Also fix placement of comments in ide_timing_compute() while at it. There should be no functional changes caused by this patch (md5sum was verified to be the same before/after the patch). Acked-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/ide-timing.h b/drivers/ide/ide-timing.h index a401d8f..98e05f5 100644 --- a/drivers/ide/ide-timing.h +++ b/drivers/ide/ide-timing.h @@ -3,9 +3,7 @@ /* * Copyright (c) 1999-2001 Vojtech Pavlik - */ - -/* + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -32,7 +30,7 @@ * PIO 0-5, MWDMA 0-2 and UDMA 0-6 timings (in nanoseconds). * These were taken from ATA/ATAPI-6 standard, rev 0a, except * for PIO 5, which is a nonstandard extension and UDMA6, which - * is currently supported only by Maxtor drives. + * is currently supported only by Maxtor drives. */ static struct ide_timing ide_timing[] = { @@ -67,10 +65,11 @@ static struct ide_timing ide_timing[] = { { 0xff } }; -#define ENOUGH(v,unit) (((v)-1)/(unit)+1) -#define EZ(v,unit) ((v)?ENOUGH(v,unit):0) +#define ENOUGH(v, unit) (((v) - 1) / (unit) + 1) +#define EZ(v, unit) ((v) ? ENOUGH(v, unit) : 0) -static void ide_timing_quantize(struct ide_timing *t, struct ide_timing *q, int T, int UT) +static void ide_timing_quantize(struct ide_timing *t, struct ide_timing *q, + int T, int UT) { q->setup = EZ(t->setup * 1000, T); q->act8b = EZ(t->act8b * 1000, T); @@ -82,16 +81,25 @@ static void ide_timing_quantize(struct ide_timing *t, struct ide_timing *q, int q->udma = EZ(t->udma * 1000, UT); } -static void ide_timing_merge(struct ide_timing *a, struct ide_timing *b, struct ide_timing *m, unsigned int what) +static void ide_timing_merge(struct ide_timing *a, struct ide_timing *b, + struct ide_timing *m, unsigned int what) { - if (what & IDE_TIMING_SETUP ) m->setup = max(a->setup, b->setup); - if (what & IDE_TIMING_ACT8B ) m->act8b = max(a->act8b, b->act8b); - if (what & IDE_TIMING_REC8B ) m->rec8b = max(a->rec8b, b->rec8b); - if (what & IDE_TIMING_CYC8B ) m->cyc8b = max(a->cyc8b, b->cyc8b); - if (what & IDE_TIMING_ACTIVE ) m->active = max(a->active, b->active); - if (what & IDE_TIMING_RECOVER) m->recover = max(a->recover, b->recover); - if (what & IDE_TIMING_CYCLE ) m->cycle = max(a->cycle, b->cycle); - if (what & IDE_TIMING_UDMA ) m->udma = max(a->udma, b->udma); + if (what & IDE_TIMING_SETUP) + m->setup = max(a->setup, b->setup); + if (what & IDE_TIMING_ACT8B) + m->act8b = max(a->act8b, b->act8b); + if (what & IDE_TIMING_REC8B) + m->rec8b = max(a->rec8b, b->rec8b); + if (what & IDE_TIMING_CYC8B) + m->cyc8b = max(a->cyc8b, b->cyc8b); + if (what & IDE_TIMING_ACTIVE) + m->active = max(a->active, b->active); + if (what & IDE_TIMING_RECOVER) + m->recover = max(a->recover, b->recover); + if (what & IDE_TIMING_CYCLE) + m->cycle = max(a->cycle, b->cycle); + if (what & IDE_TIMING_UDMA) + m->udma = max(a->udma, b->udma); } static struct ide_timing *ide_timing_find_mode(u8 speed) @@ -101,7 +109,7 @@ static struct ide_timing *ide_timing_find_mode(u8 speed) for (t = ide_timing; t->mode != speed; t++) if (t->mode == 0xff) return NULL; - return t; + return t; } static int ide_timing_compute(ide_drive_t *drive, u8 speed, @@ -110,24 +118,22 @@ static int ide_timing_compute(ide_drive_t *drive, u8 speed, struct hd_driveid *id = drive->id; struct ide_timing *s, p; -/* - * Find the mode. - */ - - if (!(s = ide_timing_find_mode(speed))) + /* + * Find the mode. + */ + s = ide_timing_find_mode(speed); + if (s == NULL) return -EINVAL; -/* - * Copy the timing from the table. - */ - + /* + * Copy the timing from the table. + */ *t = *s; -/* - * If the drive is an EIDE drive, it can tell us it needs extended - * PIO/MWDMA cycle timing. - */ - + /* + * If the drive is an EIDE drive, it can tell us it needs extended + * PIO/MWDMA cycle timing. + */ if (id && id->field_valid & 2) { /* EIDE drive */ memset(&p, 0, sizeof(p)); @@ -142,28 +148,25 @@ static int ide_timing_compute(ide_drive_t *drive, u8 speed, ide_timing_merge(&p, t, t, IDE_TIMING_CYCLE | IDE_TIMING_CYC8B); } -/* - * Convert the timing to bus clock counts. - */ - + /* + * Convert the timing to bus clock counts. + */ ide_timing_quantize(t, t, T, UT); -/* - * Even in DMA/UDMA modes we still use PIO access for IDENTIFY, S.M.A.R.T - * and some other commands. We have to ensure that the DMA cycle timing is - * slower/equal than the fastest PIO timing. - */ - + /* + * Even in DMA/UDMA modes we still use PIO access for IDENTIFY, + * S.M.A.R.T and some other commands. We have to ensure that the + * DMA cycle timing is slower/equal than the fastest PIO timing. + */ if (speed >= XFER_SW_DMA_0) { u8 pio = ide_get_best_pio_mode(drive, 255, 5); ide_timing_compute(drive, XFER_PIO_0 + pio, &p, T, UT); ide_timing_merge(&p, t, t, IDE_TIMING_ALL); } -/* - * Lengthen active & recovery time so that cycle time is correct. - */ - + /* + * Lengthen active & recovery time so that cycle time is correct. + */ if (t->act8b + t->rec8b < t->cyc8b) { t->act8b += (t->cyc8b - (t->act8b + t->rec8b)) / 2; t->rec8b = t->cyc8b - t->act8b; -- cgit v0.10.2 From f06ab3402aa2d6de060442c1053ea10b24b65076 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 16 Jul 2008 20:33:37 +0200 Subject: ide: convert ide-timing.h to ide-timings.c library (take 2) * Don't include ide-timing.h in cs5535 and sis5513 host drivers (they don't need it currently). * Convert ide-timing.h to ide-timings.c library and add CONFIG_IDE_TIMINGS config option to be selected by host drivers using the library. While at it: - fix ide_timing_find_mode() placement v2: * Add missing EXPORT_SYMBOLs. (Stephen Rothwell ) There should be no functional changes caused by this patch. Cc: Stephen Rothwell Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig index cd08dba..994b6d3 100644 --- a/drivers/ide/Kconfig +++ b/drivers/ide/Kconfig @@ -98,6 +98,9 @@ if BLK_DEV_IDE comment "Please see Documentation/ide/ide.txt for help/info on IDE drives" +config IDE_TIMINGS + bool + config IDE_ATAPI bool @@ -469,6 +472,7 @@ config BLK_DEV_ALI15X3 config BLK_DEV_AMD74XX tristate "AMD and nVidia IDE support" depends on !ARM + select IDE_TIMINGS select BLK_DEV_IDEDMA_PCI help This driver adds explicit support for AMD-7xx and AMD-8111 chips @@ -725,6 +729,7 @@ config BLK_DEV_TRM290 config BLK_DEV_VIA82CXXX tristate "VIA82CXXX chipset support" + select IDE_TIMINGS select BLK_DEV_IDEDMA_PCI help This driver adds explicit support for VIA BusMastering IDE chips. @@ -751,6 +756,7 @@ endif config BLK_DEV_IDE_PMAC tristate "PowerMac on-board IDE support" depends on PPC_PMAC && IDE=y && BLK_DEV_IDE=y + select IDE_TIMINGS help This driver provides support for the on-board IDE controller on most of the recent Apple Power Macintoshes and PowerBooks. @@ -912,6 +918,7 @@ config BLK_DEV_Q40IDE config BLK_DEV_PALMCHIP_BK3710 tristate "Palmchip bk3710 IDE controller support" depends on ARCH_DAVINCI + select IDE_TIMINGS select BLK_DEV_IDEDMA_SFF help Say Y here if you want to support the onchip IDE controller on the diff --git a/drivers/ide/Makefile b/drivers/ide/Makefile index a2b3f84..cb13506 100644 --- a/drivers/ide/Makefile +++ b/drivers/ide/Makefile @@ -14,6 +14,7 @@ EXTRA_CFLAGS += -Idrivers/ide ide-core-y += ide.o ide-io.o ide-iops.o ide-lib.o ide-probe.o ide-taskfile.o # core IDE code +ide-core-$(CONFIG_IDE_TIMINGS) += ide-timings.o ide-core-$(CONFIG_IDE_ATAPI) += ide-atapi.o ide-core-$(CONFIG_BLK_DEV_IDEPCI) += setup-pci.o ide-core-$(CONFIG_BLK_DEV_IDEDMA) += ide-dma.o diff --git a/drivers/ide/arm/palm_bk3710.c b/drivers/ide/arm/palm_bk3710.c index 3839f57..9b8a45d2 100644 --- a/drivers/ide/arm/palm_bk3710.c +++ b/drivers/ide/arm/palm_bk3710.c @@ -74,8 +74,6 @@ struct palm_bk3710_udmatiming { #define BK3710_IORDYTMP 0x78 #define BK3710_IORDYTMS 0x7C -#include "../ide-timing.h" - static unsigned ideclk_period; /* in nanoseconds */ static const struct palm_bk3710_udmatiming palm_bk3710_udmatimings[6] = { diff --git a/drivers/ide/ide-timing.h b/drivers/ide/ide-timing.h deleted file mode 100644 index 98e05f5..0000000 --- a/drivers/ide/ide-timing.h +++ /dev/null @@ -1,183 +0,0 @@ -#ifndef _IDE_TIMING_H -#define _IDE_TIMING_H - -/* - * Copyright (c) 1999-2001 Vojtech Pavlik - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Should you need to contact me, the author, you can do so either by - * e-mail - mail your message to , or by paper mail: - * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic - */ - -#include -#include - -/* - * PIO 0-5, MWDMA 0-2 and UDMA 0-6 timings (in nanoseconds). - * These were taken from ATA/ATAPI-6 standard, rev 0a, except - * for PIO 5, which is a nonstandard extension and UDMA6, which - * is currently supported only by Maxtor drives. - */ - -static struct ide_timing ide_timing[] = { - - { XFER_UDMA_6, 0, 0, 0, 0, 0, 0, 0, 15 }, - { XFER_UDMA_5, 0, 0, 0, 0, 0, 0, 0, 20 }, - { XFER_UDMA_4, 0, 0, 0, 0, 0, 0, 0, 30 }, - { XFER_UDMA_3, 0, 0, 0, 0, 0, 0, 0, 45 }, - - { XFER_UDMA_2, 0, 0, 0, 0, 0, 0, 0, 60 }, - { XFER_UDMA_1, 0, 0, 0, 0, 0, 0, 0, 80 }, - { XFER_UDMA_0, 0, 0, 0, 0, 0, 0, 0, 120 }, - - { XFER_MW_DMA_2, 25, 0, 0, 0, 70, 25, 120, 0 }, - { XFER_MW_DMA_1, 45, 0, 0, 0, 80, 50, 150, 0 }, - { XFER_MW_DMA_0, 60, 0, 0, 0, 215, 215, 480, 0 }, - - { XFER_SW_DMA_2, 60, 0, 0, 0, 120, 120, 240, 0 }, - { XFER_SW_DMA_1, 90, 0, 0, 0, 240, 240, 480, 0 }, - { XFER_SW_DMA_0, 120, 0, 0, 0, 480, 480, 960, 0 }, - - { XFER_PIO_5, 20, 50, 30, 100, 50, 30, 100, 0 }, - { XFER_PIO_4, 25, 70, 25, 120, 70, 25, 120, 0 }, - { XFER_PIO_3, 30, 80, 70, 180, 80, 70, 180, 0 }, - - { XFER_PIO_2, 30, 290, 40, 330, 100, 90, 240, 0 }, - { XFER_PIO_1, 50, 290, 93, 383, 125, 100, 383, 0 }, - { XFER_PIO_0, 70, 290, 240, 600, 165, 150, 600, 0 }, - - { XFER_PIO_SLOW, 120, 290, 240, 960, 290, 240, 960, 0 }, - - { 0xff } -}; - -#define ENOUGH(v, unit) (((v) - 1) / (unit) + 1) -#define EZ(v, unit) ((v) ? ENOUGH(v, unit) : 0) - -static void ide_timing_quantize(struct ide_timing *t, struct ide_timing *q, - int T, int UT) -{ - q->setup = EZ(t->setup * 1000, T); - q->act8b = EZ(t->act8b * 1000, T); - q->rec8b = EZ(t->rec8b * 1000, T); - q->cyc8b = EZ(t->cyc8b * 1000, T); - q->active = EZ(t->active * 1000, T); - q->recover = EZ(t->recover * 1000, T); - q->cycle = EZ(t->cycle * 1000, T); - q->udma = EZ(t->udma * 1000, UT); -} - -static void ide_timing_merge(struct ide_timing *a, struct ide_timing *b, - struct ide_timing *m, unsigned int what) -{ - if (what & IDE_TIMING_SETUP) - m->setup = max(a->setup, b->setup); - if (what & IDE_TIMING_ACT8B) - m->act8b = max(a->act8b, b->act8b); - if (what & IDE_TIMING_REC8B) - m->rec8b = max(a->rec8b, b->rec8b); - if (what & IDE_TIMING_CYC8B) - m->cyc8b = max(a->cyc8b, b->cyc8b); - if (what & IDE_TIMING_ACTIVE) - m->active = max(a->active, b->active); - if (what & IDE_TIMING_RECOVER) - m->recover = max(a->recover, b->recover); - if (what & IDE_TIMING_CYCLE) - m->cycle = max(a->cycle, b->cycle); - if (what & IDE_TIMING_UDMA) - m->udma = max(a->udma, b->udma); -} - -static struct ide_timing *ide_timing_find_mode(u8 speed) -{ - struct ide_timing *t; - - for (t = ide_timing; t->mode != speed; t++) - if (t->mode == 0xff) - return NULL; - return t; -} - -static int ide_timing_compute(ide_drive_t *drive, u8 speed, - struct ide_timing *t, int T, int UT) -{ - struct hd_driveid *id = drive->id; - struct ide_timing *s, p; - - /* - * Find the mode. - */ - s = ide_timing_find_mode(speed); - if (s == NULL) - return -EINVAL; - - /* - * Copy the timing from the table. - */ - *t = *s; - - /* - * If the drive is an EIDE drive, it can tell us it needs extended - * PIO/MWDMA cycle timing. - */ - if (id && id->field_valid & 2) { /* EIDE drive */ - - memset(&p, 0, sizeof(p)); - - if (speed <= XFER_PIO_2) - p.cycle = p.cyc8b = id->eide_pio; - else if (speed <= XFER_PIO_5) - p.cycle = p.cyc8b = id->eide_pio_iordy; - else if (speed >= XFER_MW_DMA_0 && speed <= XFER_MW_DMA_2) - p.cycle = id->eide_dma_min; - - ide_timing_merge(&p, t, t, IDE_TIMING_CYCLE | IDE_TIMING_CYC8B); - } - - /* - * Convert the timing to bus clock counts. - */ - ide_timing_quantize(t, t, T, UT); - - /* - * Even in DMA/UDMA modes we still use PIO access for IDENTIFY, - * S.M.A.R.T and some other commands. We have to ensure that the - * DMA cycle timing is slower/equal than the fastest PIO timing. - */ - if (speed >= XFER_SW_DMA_0) { - u8 pio = ide_get_best_pio_mode(drive, 255, 5); - ide_timing_compute(drive, XFER_PIO_0 + pio, &p, T, UT); - ide_timing_merge(&p, t, t, IDE_TIMING_ALL); - } - - /* - * Lengthen active & recovery time so that cycle time is correct. - */ - if (t->act8b + t->rec8b < t->cyc8b) { - t->act8b += (t->cyc8b - (t->act8b + t->rec8b)) / 2; - t->rec8b = t->cyc8b - t->act8b; - } - - if (t->active + t->recover < t->cycle) { - t->active += (t->cycle - (t->active + t->recover)) / 2; - t->recover = t->cycle - t->active; - } - - return 0; -} - -#endif diff --git a/drivers/ide/ide-timings.c b/drivers/ide/ide-timings.c new file mode 100644 index 0000000..ebef6d4 --- /dev/null +++ b/drivers/ide/ide-timings.c @@ -0,0 +1,183 @@ +/* + * Copyright (c) 1999-2001 Vojtech Pavlik + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Should you need to contact me, the author, you can do so either by + * e-mail - mail your message to , or by paper mail: + * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic + */ + +#include +#include +#include +#include + +/* + * PIO 0-5, MWDMA 0-2 and UDMA 0-6 timings (in nanoseconds). + * These were taken from ATA/ATAPI-6 standard, rev 0a, except + * for PIO 5, which is a nonstandard extension and UDMA6, which + * is currently supported only by Maxtor drives. + */ + +static struct ide_timing ide_timing[] = { + + { XFER_UDMA_6, 0, 0, 0, 0, 0, 0, 0, 15 }, + { XFER_UDMA_5, 0, 0, 0, 0, 0, 0, 0, 20 }, + { XFER_UDMA_4, 0, 0, 0, 0, 0, 0, 0, 30 }, + { XFER_UDMA_3, 0, 0, 0, 0, 0, 0, 0, 45 }, + + { XFER_UDMA_2, 0, 0, 0, 0, 0, 0, 0, 60 }, + { XFER_UDMA_1, 0, 0, 0, 0, 0, 0, 0, 80 }, + { XFER_UDMA_0, 0, 0, 0, 0, 0, 0, 0, 120 }, + + { XFER_MW_DMA_2, 25, 0, 0, 0, 70, 25, 120, 0 }, + { XFER_MW_DMA_1, 45, 0, 0, 0, 80, 50, 150, 0 }, + { XFER_MW_DMA_0, 60, 0, 0, 0, 215, 215, 480, 0 }, + + { XFER_SW_DMA_2, 60, 0, 0, 0, 120, 120, 240, 0 }, + { XFER_SW_DMA_1, 90, 0, 0, 0, 240, 240, 480, 0 }, + { XFER_SW_DMA_0, 120, 0, 0, 0, 480, 480, 960, 0 }, + + { XFER_PIO_5, 20, 50, 30, 100, 50, 30, 100, 0 }, + { XFER_PIO_4, 25, 70, 25, 120, 70, 25, 120, 0 }, + { XFER_PIO_3, 30, 80, 70, 180, 80, 70, 180, 0 }, + + { XFER_PIO_2, 30, 290, 40, 330, 100, 90, 240, 0 }, + { XFER_PIO_1, 50, 290, 93, 383, 125, 100, 383, 0 }, + { XFER_PIO_0, 70, 290, 240, 600, 165, 150, 600, 0 }, + + { XFER_PIO_SLOW, 120, 290, 240, 960, 290, 240, 960, 0 }, + + { 0xff } +}; + +struct ide_timing *ide_timing_find_mode(u8 speed) +{ + struct ide_timing *t; + + for (t = ide_timing; t->mode != speed; t++) + if (t->mode == 0xff) + return NULL; + return t; +} +EXPORT_SYMBOL_GPL(ide_timing_find_mode); + +#define ENOUGH(v, unit) (((v) - 1) / (unit) + 1) +#define EZ(v, unit) ((v) ? ENOUGH(v, unit) : 0) + +static void ide_timing_quantize(struct ide_timing *t, struct ide_timing *q, + int T, int UT) +{ + q->setup = EZ(t->setup * 1000, T); + q->act8b = EZ(t->act8b * 1000, T); + q->rec8b = EZ(t->rec8b * 1000, T); + q->cyc8b = EZ(t->cyc8b * 1000, T); + q->active = EZ(t->active * 1000, T); + q->recover = EZ(t->recover * 1000, T); + q->cycle = EZ(t->cycle * 1000, T); + q->udma = EZ(t->udma * 1000, UT); +} + +void ide_timing_merge(struct ide_timing *a, struct ide_timing *b, + struct ide_timing *m, unsigned int what) +{ + if (what & IDE_TIMING_SETUP) + m->setup = max(a->setup, b->setup); + if (what & IDE_TIMING_ACT8B) + m->act8b = max(a->act8b, b->act8b); + if (what & IDE_TIMING_REC8B) + m->rec8b = max(a->rec8b, b->rec8b); + if (what & IDE_TIMING_CYC8B) + m->cyc8b = max(a->cyc8b, b->cyc8b); + if (what & IDE_TIMING_ACTIVE) + m->active = max(a->active, b->active); + if (what & IDE_TIMING_RECOVER) + m->recover = max(a->recover, b->recover); + if (what & IDE_TIMING_CYCLE) + m->cycle = max(a->cycle, b->cycle); + if (what & IDE_TIMING_UDMA) + m->udma = max(a->udma, b->udma); +} +EXPORT_SYMBOL_GPL(ide_timing_merge); + +int ide_timing_compute(ide_drive_t *drive, u8 speed, + struct ide_timing *t, int T, int UT) +{ + struct hd_driveid *id = drive->id; + struct ide_timing *s, p; + + /* + * Find the mode. + */ + s = ide_timing_find_mode(speed); + if (s == NULL) + return -EINVAL; + + /* + * Copy the timing from the table. + */ + *t = *s; + + /* + * If the drive is an EIDE drive, it can tell us it needs extended + * PIO/MWDMA cycle timing. + */ + if (id && id->field_valid & 2) { /* EIDE drive */ + + memset(&p, 0, sizeof(p)); + + if (speed <= XFER_PIO_2) + p.cycle = p.cyc8b = id->eide_pio; + else if (speed <= XFER_PIO_5) + p.cycle = p.cyc8b = id->eide_pio_iordy; + else if (speed >= XFER_MW_DMA_0 && speed <= XFER_MW_DMA_2) + p.cycle = id->eide_dma_min; + + ide_timing_merge(&p, t, t, IDE_TIMING_CYCLE | IDE_TIMING_CYC8B); + } + + /* + * Convert the timing to bus clock counts. + */ + ide_timing_quantize(t, t, T, UT); + + /* + * Even in DMA/UDMA modes we still use PIO access for IDENTIFY, + * S.M.A.R.T and some other commands. We have to ensure that the + * DMA cycle timing is slower/equal than the fastest PIO timing. + */ + if (speed >= XFER_SW_DMA_0) { + u8 pio = ide_get_best_pio_mode(drive, 255, 5); + ide_timing_compute(drive, XFER_PIO_0 + pio, &p, T, UT); + ide_timing_merge(&p, t, t, IDE_TIMING_ALL); + } + + /* + * Lengthen active & recovery time so that cycle time is correct. + */ + if (t->act8b + t->rec8b < t->cyc8b) { + t->act8b += (t->cyc8b - (t->act8b + t->rec8b)) / 2; + t->rec8b = t->cyc8b - t->act8b; + } + + if (t->active + t->recover < t->cycle) { + t->active += (t->cycle - (t->active + t->recover)) / 2; + t->recover = t->cycle - t->active; + } + + return 0; +} +EXPORT_SYMBOL_GPL(ide_timing_compute); diff --git a/drivers/ide/pci/amd74xx.c b/drivers/ide/pci/amd74xx.c index ad22220..0bfcdd0 100644 --- a/drivers/ide/pci/amd74xx.c +++ b/drivers/ide/pci/amd74xx.c @@ -21,8 +21,6 @@ #include #include -#include "ide-timing.h" - enum { AMD_IDE_CONFIG = 0x41, AMD_CABLE_DETECT = 0x42, diff --git a/drivers/ide/pci/cs5535.c b/drivers/ide/pci/cs5535.c index 2a2cb49..dc97c48 100644 --- a/drivers/ide/pci/cs5535.c +++ b/drivers/ide/pci/cs5535.c @@ -26,8 +26,6 @@ #include #include -#include "ide-timing.h" - #define MSR_ATAC_BASE 0x51300000 #define ATAC_GLD_MSR_CAP (MSR_ATAC_BASE+0) #define ATAC_GLD_MSR_CONFIG (MSR_ATAC_BASE+0x01) diff --git a/drivers/ide/pci/sis5513.c b/drivers/ide/pci/sis5513.c index e127eb2..2389945 100644 --- a/drivers/ide/pci/sis5513.c +++ b/drivers/ide/pci/sis5513.c @@ -52,8 +52,6 @@ #include #include -#include "ide-timing.h" - /* registers layout and init values are chipset family dependant */ #define ATA_16 0x01 @@ -616,7 +614,6 @@ MODULE_LICENSE("GPL"); /* * TODO: * - CLEANUP - * - Use drivers/ide/ide-timing.h ! * - More checks in the config registers (force values instead of * relying on the BIOS setting them correctly). * - Further optimisations ? diff --git a/drivers/ide/pci/via82cxxx.c b/drivers/ide/pci/via82cxxx.c index 3ed9728..e47384c 100644 --- a/drivers/ide/pci/via82cxxx.c +++ b/drivers/ide/pci/via82cxxx.c @@ -35,8 +35,6 @@ #include #endif -#include "ide-timing.h" - #define VIA_IDE_ENABLE 0x40 #define VIA_IDE_CONFIG 0x41 #define VIA_FIFO_CONFIG 0x43 diff --git a/drivers/ide/ppc/pmac.c b/drivers/ide/ppc/pmac.c index dcb2c46..5b91d23 100644 --- a/drivers/ide/ppc/pmac.c +++ b/drivers/ide/ppc/pmac.c @@ -48,8 +48,6 @@ #include #endif -#include "../ide-timing.h" - #undef IDE_PMAC_DEBUG #define DMA_WAIT_TIMEOUT 50 diff --git a/include/linux/ide.h b/include/linux/ide.h index 81c6ea4..057001f 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1299,6 +1299,11 @@ enum { IDE_TIMING_CYCLE | IDE_TIMING_UDMA, }; +struct ide_timing *ide_timing_find_mode(u8); +void ide_timing_merge(struct ide_timing *, struct ide_timing *, + struct ide_timing *, unsigned int); +int ide_timing_compute(ide_drive_t *, u8, struct ide_timing *, int, int); + typedef struct ide_pio_timings_s { int setup_time; /* Address setup (ns) minimum */ int active_time; /* Active pulse (ns) minimum */ -- cgit v0.10.2 From cc57ccc03d2a9b5622300f4b59fc8b54408c6e24 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 16 Jul 2008 20:33:37 +0200 Subject: ali14xx: convert to use ide_timing_find_mode() There should be no functional changes caused by this patch. Acked-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig index 994b6d3..e9afafd 100644 --- a/drivers/ide/Kconfig +++ b/drivers/ide/Kconfig @@ -981,6 +981,7 @@ config BLK_DEV_4DRIVES config BLK_DEV_ALI14XX tristate "ALI M14xx support" + select IDE_TIMINGS help This driver is enabled at runtime using the "ali14xx.probe" kernel boot parameter. It enables support for the secondary IDE interface diff --git a/drivers/ide/legacy/ali14xx.c b/drivers/ide/legacy/ali14xx.c index 052125f..4ec1973 100644 --- a/drivers/ide/legacy/ali14xx.c +++ b/drivers/ide/legacy/ali14xx.c @@ -117,10 +117,11 @@ static void ali14xx_set_pio_mode(ide_drive_t *drive, const u8 pio) u8 param1, param2, param3, param4; unsigned long flags; int bus_speed = ide_vlb_clk ? ide_vlb_clk : 50; + struct ide_timing *t = ide_timing_find_mode(XFER_PIO_0 + pio); /* calculate timing, according to PIO mode */ time1 = ide_pio_cycle_time(drive, pio); - time2 = ide_pio_timings[pio].active_time; + time2 = t->active; param3 = param1 = (time2 * bus_speed + 999) / 1000; param4 = param2 = (time1 * bus_speed + 999) / 1000 - param1; if (pio < 3) { -- cgit v0.10.2 From b32b76f72df17de891181b47e714f9f897bb62a1 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 16 Jul 2008 20:33:37 +0200 Subject: ht6560b: convert to use ide_timing_find_mode() There should be no functional changes caused by this patch. Acked-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig index e9afafd..615e0bc0 100644 --- a/drivers/ide/Kconfig +++ b/drivers/ide/Kconfig @@ -1001,6 +1001,7 @@ config BLK_DEV_DTC2278 config BLK_DEV_HT6560B tristate "Holtek HT6560B support" + select IDE_TIMINGS help This driver is enabled at runtime using the "ht6560b.probe" kernel boot parameter. It enables support for the secondary IDE interface diff --git a/drivers/ide/legacy/ht6560b.c b/drivers/ide/legacy/ht6560b.c index dd6dfb3..bd2f579 100644 --- a/drivers/ide/legacy/ht6560b.c +++ b/drivers/ide/legacy/ht6560b.c @@ -216,6 +216,7 @@ static u8 ht_pio2timings(ide_drive_t *drive, const u8 pio) if (pio) { unsigned int cycle_time; + struct ide_timing *t = ide_timing_find_mode(XFER_PIO_0 + pio); cycle_time = ide_pio_cycle_time(drive, pio); @@ -224,10 +225,8 @@ static u8 ht_pio2timings(ide_drive_t *drive, const u8 pio) * actual cycle time for recovery and activity * according system bus speed. */ - active_time = ide_pio_timings[pio].active_time; - recovery_time = cycle_time - - active_time - - ide_pio_timings[pio].setup_time; + active_time = t->active; + recovery_time = cycle_time - active_time - t->setup; /* * Cycle times should be Vesa bus cycles */ -- cgit v0.10.2 From 2feecface7fd62be75bd4961324dc279a04bef22 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 16 Jul 2008 20:33:37 +0200 Subject: qd65xx: convert to use ide_timing_find_mode() There should be no functional changes caused by this patch. Acked-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig index 615e0bc0..86db707 100644 --- a/drivers/ide/Kconfig +++ b/drivers/ide/Kconfig @@ -1011,6 +1011,7 @@ config BLK_DEV_HT6560B config BLK_DEV_QD65XX tristate "QDI QD65xx support" + select IDE_TIMINGS help This driver is enabled at runtime using the "qd65xx.probe" kernel boot parameter. It permits faster I/O speeds to be set. See the diff --git a/drivers/ide/legacy/qd65xx.c b/drivers/ide/legacy/qd65xx.c index 51dba82..63f6c31 100644 --- a/drivers/ide/legacy/qd65xx.c +++ b/drivers/ide/legacy/qd65xx.c @@ -207,6 +207,7 @@ static void qd6500_set_pio_mode(ide_drive_t *drive, const u8 pio) static void qd6580_set_pio_mode(ide_drive_t *drive, const u8 pio) { ide_hwif_t *hwif = drive->hwif; + struct ide_timing *t = ide_timing_find_mode(XFER_PIO_0 + pio); unsigned int cycle_time; int active_time = 175; int recovery_time = 415; /* worst case values from the dos driver */ @@ -236,7 +237,7 @@ static void qd6580_set_pio_mode(ide_drive_t *drive, const u8 pio) active_time = 110; recovery_time = cycle_time - 120; } else { - active_time = ide_pio_timings[pio].active_time; + active_time = t->active; recovery_time = cycle_time - active_time; } } -- cgit v0.10.2 From 288911af1209f5aa6119c9ec6d5a9bdb16a385b5 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 16 Jul 2008 20:33:37 +0200 Subject: alim15x3: convert to use ide_timing_find_mode() There should be no functional changes caused by this patch. Acked-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig index 86db707..cadce92 100644 --- a/drivers/ide/Kconfig +++ b/drivers/ide/Kconfig @@ -458,6 +458,7 @@ config BLK_DEV_AEC62XX config BLK_DEV_ALI15X3 tristate "ALI M15x3 chipset support" + select IDE_TIMINGS select BLK_DEV_IDEDMA_PCI help This driver ensures (U)DMA support for ALI 1533, 1543 and 1543C diff --git a/drivers/ide/pci/alim15x3.c b/drivers/ide/pci/alim15x3.c index f2de00ad..80d19c0 100644 --- a/drivers/ide/pci/alim15x3.c +++ b/drivers/ide/pci/alim15x3.c @@ -69,7 +69,8 @@ static void ali_set_pio_mode(ide_drive_t *drive, const u8 pio) { ide_hwif_t *hwif = HWIF(drive); struct pci_dev *dev = to_pci_dev(hwif->dev); - int s_time, a_time, c_time; + struct ide_timing *t = ide_timing_find_mode(XFER_PIO_0 + pio); + int s_time = t->setup, a_time = t->active, c_time = t->cycle; u8 s_clc, a_clc, r_clc; unsigned long flags; int bus_speed = ide_pci_clk ? ide_pci_clk : 33; @@ -78,13 +79,10 @@ static void ali_set_pio_mode(ide_drive_t *drive, const u8 pio) u8 cd_dma_fifo = 0; int unit = drive->select.b.unit & 1; - s_time = ide_pio_timings[pio].setup_time; - a_time = ide_pio_timings[pio].active_time; if ((s_clc = (s_time * bus_speed + 999) / 1000) >= 8) s_clc = 0; if ((a_clc = (a_time * bus_speed + 999) / 1000) >= 8) a_clc = 0; - c_time = ide_pio_timings[pio].cycle_time; if (!(r_clc = (c_time * bus_speed + 999) / 1000 - a_clc - s_clc)) { r_clc = 1; -- cgit v0.10.2 From 17b500de0ad79a306a0cd8acfe9a9f086ad28b4c Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 16 Jul 2008 20:33:38 +0200 Subject: cmd640: convert to use ide_timing_find_mode() There should be no functional changes caused by this patch. Acked-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig index cadce92..e072bde 100644 --- a/drivers/ide/Kconfig +++ b/drivers/ide/Kconfig @@ -329,6 +329,7 @@ config BLK_DEV_PLATFORM config BLK_DEV_CMD640 tristate "CMD640 chipset bugfix/support" depends on X86 + select IDE_TIMINGS ---help--- The CMD-Technologies CMD640 IDE chip is used on many common 486 and Pentium motherboards, usually in combination with a "Neptune" or diff --git a/drivers/ide/pci/cmd640.c b/drivers/ide/pci/cmd640.c index cd1ba14..baa26a2 100644 --- a/drivers/ide/pci/cmd640.c +++ b/drivers/ide/pci/cmd640.c @@ -521,6 +521,7 @@ static void program_drive_counts(ide_drive_t *drive, unsigned int index) static void cmd640_set_mode(ide_drive_t *drive, unsigned int index, u8 pio_mode, unsigned int cycle_time) { + struct ide_timing *t; int setup_time, active_time, recovery_time, clock_time; u8 setup_count, active_count, recovery_count, recovery_count2, cycle_count; int bus_speed; @@ -532,8 +533,11 @@ static void cmd640_set_mode(ide_drive_t *drive, unsigned int index, if (pio_mode > 5) pio_mode = 5; - setup_time = ide_pio_timings[pio_mode].setup_time; - active_time = ide_pio_timings[pio_mode].active_time; + + t = ide_timing_find_mode(XFER_PIO_0 + pio_mode); + setup_time = t->setup; + active_time = t->active; + recovery_time = cycle_time - (setup_time + active_time); clock_time = 1000 / bus_speed; cycle_count = DIV_ROUND_UP(cycle_time, clock_time); -- cgit v0.10.2 From 86a0e12fcb590a6a84b90ae00e6d6564ce770749 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 16 Jul 2008 20:33:38 +0200 Subject: cmd64x: convert to use ide_timing_find_mode() There should be no functional changes caused by this patch. Acked-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig index e072bde..d5526d3 100644 --- a/drivers/ide/Kconfig +++ b/drivers/ide/Kconfig @@ -495,6 +495,7 @@ config BLK_DEV_ATIIXP config BLK_DEV_CMD64X tristate "CMD64{3|6|8|9} chipset support" + select IDE_TIMINGS select BLK_DEV_IDEDMA_PCI help Say Y here if you have an IDE controller which uses any of these diff --git a/drivers/ide/pci/cmd64x.c b/drivers/ide/pci/cmd64x.c index ca4774a..cfa784b 100644 --- a/drivers/ide/pci/cmd64x.c +++ b/drivers/ide/pci/cmd64x.c @@ -116,6 +116,7 @@ static void cmd64x_tune_pio(ide_drive_t *drive, const u8 pio) { ide_hwif_t *hwif = HWIF(drive); struct pci_dev *dev = to_pci_dev(hwif->dev); + struct ide_timing *t = ide_timing_find_mode(XFER_PIO_0 + pio); unsigned int cycle_time; u8 setup_count, arttim = 0; @@ -124,10 +125,9 @@ static void cmd64x_tune_pio(ide_drive_t *drive, const u8 pio) cycle_time = ide_pio_cycle_time(drive, pio); - program_cycle_times(drive, cycle_time, - ide_pio_timings[pio].active_time); + program_cycle_times(drive, cycle_time, t->active); - setup_count = quantize_timing(ide_pio_timings[pio].setup_time, + setup_count = quantize_timing(t->setup, 1000 / (ide_pci_clk ? ide_pci_clk : 33)); /* -- cgit v0.10.2 From 713a590dea9a61e84f2837e75c5e9429d95908b5 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 16 Jul 2008 20:33:38 +0200 Subject: cy82c693: convert to use ide_timing_find_mode() There should be no functional changes caused by this patch. Acked-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig index d5526d3..7446db6 100644 --- a/drivers/ide/Kconfig +++ b/drivers/ide/Kconfig @@ -510,6 +510,7 @@ config BLK_DEV_TRIFLEX config BLK_DEV_CY82C693 tristate "CY82C693 chipset support" + select IDE_TIMINGS select BLK_DEV_IDEDMA_PCI help This driver adds detection and support for the CY82C693 chipset diff --git a/drivers/ide/pci/cy82c693.c b/drivers/ide/pci/cy82c693.c index 8c534af..e14ad55 100644 --- a/drivers/ide/pci/cy82c693.c +++ b/drivers/ide/pci/cy82c693.c @@ -133,6 +133,7 @@ static int calc_clk(int time, int bus_speed) */ static void compute_clocks(u8 pio, pio_clocks_t *p_pclk) { + struct ide_timing *t = ide_timing_find_mode(XFER_PIO_0 + pio); int clk1, clk2; int bus_speed = ide_pci_clk ? ide_pci_clk : 33; @@ -141,15 +142,13 @@ static void compute_clocks(u8 pio, pio_clocks_t *p_pclk) */ /* let's calc the address setup time clocks */ - p_pclk->address_time = (u8)calc_clk(ide_pio_timings[pio].setup_time, bus_speed); + p_pclk->address_time = (u8)calc_clk(t->setup, bus_speed); /* let's calc the active and recovery time clocks */ - clk1 = calc_clk(ide_pio_timings[pio].active_time, bus_speed); + clk1 = calc_clk(t->active, bus_speed); /* calc recovery timing */ - clk2 = ide_pio_timings[pio].cycle_time - - ide_pio_timings[pio].active_time - - ide_pio_timings[pio].setup_time; + clk2 = t->cycle - t->active - t->setup; clk2 = calc_clk(clk2, bus_speed); -- cgit v0.10.2 From 3f847571a1cf845a338bcd352f31240b3615f40d Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 16 Jul 2008 20:33:38 +0200 Subject: sl82c105: convert to use ide_timing_find_mode() There should be no functional changes caused by this patch. Acked-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig index 7446db6..5f4d6ea 100644 --- a/drivers/ide/Kconfig +++ b/drivers/ide/Kconfig @@ -703,6 +703,7 @@ config BLK_DEV_SIS5513 config BLK_DEV_SL82C105 tristate "Winbond SL82c105 support" depends on (PPC || ARM) + select IDE_TIMINGS select BLK_DEV_IDEDMA_PCI help If you have a Winbond SL82c105 IDE controller, say Y here to enable diff --git a/drivers/ide/pci/sl82c105.c b/drivers/ide/pci/sl82c105.c index ce84fa0..6efbde2 100644 --- a/drivers/ide/pci/sl82c105.c +++ b/drivers/ide/pci/sl82c105.c @@ -47,10 +47,11 @@ */ static unsigned int get_pio_timings(ide_drive_t *drive, u8 pio) { + struct ide_timing *t = ide_timing_find_mode(XFER_PIO_0 + pio); unsigned int cmd_on, cmd_off; u8 iordy = 0; - cmd_on = (ide_pio_timings[pio].active_time + 29) / 30; + cmd_on = (t->active + 29) / 30; cmd_off = (ide_pio_cycle_time(drive, pio) - 30 * cmd_on + 29) / 30; if (cmd_on == 0) -- cgit v0.10.2 From b96f7384646519da54ad50bfad8d53b915b70cb3 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 16 Jul 2008 20:33:38 +0200 Subject: ide-mpc8xx: convert to use ide_timing_find_mode() Also fix (disabled) debugging code while at it. There should be no functional changes caused by this patch. Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/ppc/mpc8xx.c b/drivers/ide/ppc/mpc8xx.c index 236f9c3..33bc699 100644 --- a/drivers/ide/ppc/mpc8xx.c +++ b/drivers/ide/ppc/mpc8xx.c @@ -89,7 +89,7 @@ ide_ioport_desc_t ioport_dsc[MAX_HWIFS] = { #endif /* IDE0_BASE_OFFSET */ }; -ide_pio_timings_t ide_pio_clocks[6]; +struct ide_timing ide_pio_clocks[6]; int hold_time[6] = {30, 20, 15, 10, 10, 10 }; /* PIO Mode 5 with IORDY (nonstandard) */ /* @@ -200,30 +200,23 @@ static int __init m8xx_ide_init_ports(hw_regs_t *hw, unsigned long data_port) /* Compute clock cycles for PIO timings */ for (i=0; i<6; ++i) { bd_t *binfo = (bd_t *)__res; + struct ide_timing *t, *n; hold_time[i] = PCMCIA_MK_CLKS (hold_time[i], binfo->bi_busfreq); - ide_pio_clocks[i].setup_time = - PCMCIA_MK_CLKS (ide_pio_timings[i].setup_time, - binfo->bi_busfreq); - ide_pio_clocks[i].active_time = - PCMCIA_MK_CLKS (ide_pio_timings[i].active_time, - binfo->bi_busfreq); - ide_pio_clocks[i].cycle_time = - PCMCIA_MK_CLKS (ide_pio_timings[i].cycle_time, - binfo->bi_busfreq); + + t = ide_timing_find_mode(XFER_PIO_0 + i); + n = &ide_pio_clocks[i]; + + n->setup = PCMCIA_MK_CLKS(t->setup, binfo->bi_busfreq); + n->active = PCMCIA_MK_CLKS(t->active, binfo->bi_busfreq); + n->cycle = PCMCIA_MK_CLKS(t->cycle, binfo->bi_busfreq); #if 0 printk ("PIO mode %d timings: %d/%d/%d => %d/%d/%d\n", i, - ide_pio_clocks[i].setup_time, - ide_pio_clocks[i].active_time, - ide_pio_clocks[i].hold_time, - ide_pio_clocks[i].cycle_time, - ide_pio_timings[i].setup_time, - ide_pio_timings[i].active_time, - ide_pio_timings[i].hold_time, - ide_pio_timings[i].cycle_time); + t->setup, t->active, t->cycle, + n->setup, n->active, n->cycle); #endif } } @@ -408,8 +401,8 @@ static void m8xx_ide_set_pio_mode(ide_drive_t *drive, const u8 pio) mask = ~(PCMCIA_SHT(0xFF) | PCMCIA_SST(0xFF) | PCMCIA_SL(0xFF)); timing = PCMCIA_SHT(hold_time[pio] ) - | PCMCIA_SST(ide_pio_clocks[pio].setup_time ) - | PCMCIA_SL (ide_pio_clocks[pio].active_time) + | PCMCIA_SST(ide_pio_clocks[pio].setup) + | PCMCIA_SL (ide_pio_clocks[pio].active) ; #if 1 -- cgit v0.10.2 From 8a97206e31dc2e2f8f9b4d97e234b5c701fe9894 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 16 Jul 2008 20:33:38 +0200 Subject: ide-pmac: convert to use ide_timing_find_mode() Also update my Copyrights while at it. There should be no functional changes caused by this patch. Acked-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/ppc/pmac.c b/drivers/ide/ppc/pmac.c index 5b91d23..cfa103a 100644 --- a/drivers/ide/ppc/pmac.c +++ b/drivers/ide/ppc/pmac.c @@ -5,7 +5,7 @@ * for doing DMA. * * Copyright (C) 1998-2003 Paul Mackerras & Ben. Herrenschmidt - * Copyright (C) 2007 Bartlomiej Zolnierkiewicz + * Copyright (C) 2007-2008 Bartlomiej Zolnierkiewicz * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -493,6 +493,7 @@ static void pmac_outbsync(ide_hwif_t *hwif, u8 value, unsigned long port) static void pmac_ide_set_pio_mode(ide_drive_t *drive, const u8 pio) { + struct ide_timing *tim = ide_timing_find_mode(XFER_PIO_0 + pio); u32 *timings, t; unsigned accessTicks, recTicks; unsigned accessTime, recTime; @@ -524,10 +525,9 @@ pmac_ide_set_pio_mode(ide_drive_t *drive, const u8 pio) } case controller_kl_ata4: /* 66Mhz cell */ - recTime = cycle_time - ide_pio_timings[pio].active_time - - ide_pio_timings[pio].setup_time; + recTime = cycle_time - tim->active - tim->setup; recTime = max(recTime, 150U); - accessTime = ide_pio_timings[pio].active_time; + accessTime = tim->active; accessTime = max(accessTime, 150U); accessTicks = SYSCLK_TICKS_66(accessTime); accessTicks = min(accessTicks, 0x1fU); @@ -540,10 +540,9 @@ pmac_ide_set_pio_mode(ide_drive_t *drive, const u8 pio) default: { /* 33Mhz cell */ int ebit = 0; - recTime = cycle_time - ide_pio_timings[pio].active_time - - ide_pio_timings[pio].setup_time; + recTime = cycle_time - tim->active - tim->setup; recTime = max(recTime, 150U); - accessTime = ide_pio_timings[pio].active_time; + accessTime = tim->active; accessTime = max(accessTime, 150U); accessTicks = SYSCLK_TICKS(accessTime); accessTicks = min(accessTicks, 0x1fU); -- cgit v0.10.2 From c9d6c1a2379373219bb3271bdcbdc0ab2edf349d Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 16 Jul 2008 20:33:39 +0200 Subject: ide: move ide_pio_cycle_time() to ide-timings.c All ide_pio_cycle_time() users already select CONFIG_IDE_TIMINGS so move the function from ide-lib.c to ide-timings.c. While at it: - convert ide_pio_cycle_time() to use ide_timing_find_mode() - cleanup ide_pio_cycle_time() a bit There should be no functional changes caused by this patch. Acked-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/ide-lib.c b/drivers/ide/ide-lib.c index 7e053d2..efa5bfa 100644 --- a/drivers/ide/ide-lib.c +++ b/drivers/ide/ide-lib.c @@ -188,29 +188,6 @@ static int ide_scan_pio_blacklist (char *model) return -1; } -unsigned int ide_pio_cycle_time(ide_drive_t *drive, u8 pio) -{ - struct hd_driveid *id = drive->id; - int cycle_time = 0; - - if (id->field_valid & 2) { - if (id->capability & 8) - cycle_time = id->eide_pio_iordy; - else - cycle_time = id->eide_pio; - } - - /* conservative "downgrade" for all pre-ATA2 drives */ - if (pio < 3) { - if (cycle_time && cycle_time < ide_pio_timings[pio].cycle_time) - cycle_time = 0; /* use standard timing */ - } - - return cycle_time ? cycle_time : ide_pio_timings[pio].cycle_time; -} - -EXPORT_SYMBOL_GPL(ide_pio_cycle_time); - /** * ide_get_best_pio_mode - get PIO mode from drive * @drive: drive to consider diff --git a/drivers/ide/ide-timings.c b/drivers/ide/ide-timings.c index ebef6d4..8c2f832 100644 --- a/drivers/ide/ide-timings.c +++ b/drivers/ide/ide-timings.c @@ -1,5 +1,6 @@ /* * Copyright (c) 1999-2001 Vojtech Pavlik + * Copyright (c) 2007-2008 Bartlomiej Zolnierkiewicz * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -75,6 +76,27 @@ struct ide_timing *ide_timing_find_mode(u8 speed) } EXPORT_SYMBOL_GPL(ide_timing_find_mode); +u16 ide_pio_cycle_time(ide_drive_t *drive, u8 pio) +{ + struct hd_driveid *id = drive->id; + struct ide_timing *t = ide_timing_find_mode(XFER_PIO_0 + pio); + u16 cycle = 0; + + if (id->field_valid & 2) { + if (id->capability & 8) + cycle = id->eide_pio_iordy; + else + cycle = id->eide_pio; + + /* conservative "downgrade" for all pre-ATA2 drives */ + if (pio < 3 && cycle < t->cycle) + cycle = 0; /* use standard timing */ + } + + return cycle ? cycle : t->cycle; +} +EXPORT_SYMBOL_GPL(ide_pio_cycle_time); + #define ENOUGH(v, unit) (((v) - 1) / (unit) + 1) #define EZ(v, unit) ((v) ? ENOUGH(v, unit) : 0) diff --git a/include/linux/ide.h b/include/linux/ide.h index 057001f..3899c76 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1300,6 +1300,7 @@ enum { }; struct ide_timing *ide_timing_find_mode(u8); +u16 ide_pio_cycle_time(ide_drive_t *, u8); void ide_timing_merge(struct ide_timing *, struct ide_timing *, struct ide_timing *, unsigned int); int ide_timing_compute(ide_drive_t *, u8, struct ide_timing *, int, int); @@ -1311,7 +1312,6 @@ typedef struct ide_pio_timings_s { /* active + recovery (+ setup for some chips) */ } ide_pio_timings_t; -unsigned int ide_pio_cycle_time(ide_drive_t *, u8); u8 ide_get_best_pio_mode(ide_drive_t *, u8, u8); extern const ide_pio_timings_t ide_pio_timings[6]; -- cgit v0.10.2 From 3e153cfb5e38ae237ff27a10a833946ac95db8a4 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 16 Jul 2008 20:33:39 +0200 Subject: ide: remove no longer used ide_pio_timings[] Acked-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/ide-lib.c b/drivers/ide/ide-lib.c index efa5bfa..3e12f22 100644 --- a/drivers/ide/ide-lib.c +++ b/drivers/ide/ide-lib.c @@ -76,23 +76,6 @@ static u8 ide_rate_filter(ide_drive_t *drive, u8 speed) } /* - * Standard (generic) timings for PIO modes, from ATA2 specification. - * These timings are for access to the IDE data port register *only*. - * Some drives may specify a mode, while also specifying a different - * value for cycle_time (from drive identification data). - */ -const ide_pio_timings_t ide_pio_timings[6] = { - { 70, 165, 600 }, /* PIO Mode 0 */ - { 50, 125, 383 }, /* PIO Mode 1 */ - { 30, 100, 240 }, /* PIO Mode 2 */ - { 30, 80, 180 }, /* PIO Mode 3 with IORDY */ - { 25, 70, 120 }, /* PIO Mode 4 with IORDY */ - { 20, 50, 100 } /* PIO Mode 5 with IORDY (nonstandard) */ -}; - -EXPORT_SYMBOL_GPL(ide_pio_timings); - -/* * Shared data/functions for determining best PIO mode for an IDE drive. * Most of this stuff originally lived in cmd640.c, and changes to the * ide_pio_blacklist[] table should be made with EXTREME CAUTION to avoid diff --git a/include/linux/ide.h b/include/linux/ide.h index 3899c76..4e44525 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1305,15 +1305,7 @@ void ide_timing_merge(struct ide_timing *, struct ide_timing *, struct ide_timing *, unsigned int); int ide_timing_compute(ide_drive_t *, u8, struct ide_timing *, int, int); -typedef struct ide_pio_timings_s { - int setup_time; /* Address setup (ns) minimum */ - int active_time; /* Active pulse (ns) minimum */ - int cycle_time; /* Cycle time (ns) minimum = */ - /* active + recovery (+ setup for some chips) */ -} ide_pio_timings_t; - u8 ide_get_best_pio_mode(ide_drive_t *, u8, u8); -extern const ide_pio_timings_t ide_pio_timings[6]; int ide_set_pio_mode(ide_drive_t *, u8); int ide_set_dma_mode(ide_drive_t *, u8); -- cgit v0.10.2 From 9ad540937554a3779c5fe7af13aa390b1d2aeb3e Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 16 Jul 2008 20:33:39 +0200 Subject: ide: move PIO blacklist to ide-pio-blacklist.c Move PIO blacklist to ide-pio-blacklist.c. While at it: - fix comment - fix whitespace damage There should be no functional changes caused by this patch. Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/Makefile b/drivers/ide/Makefile index cb13506..8605536 100644 --- a/drivers/ide/Makefile +++ b/drivers/ide/Makefile @@ -11,7 +11,8 @@ EXTRA_CFLAGS += -Idrivers/ide -ide-core-y += ide.o ide-io.o ide-iops.o ide-lib.o ide-probe.o ide-taskfile.o +ide-core-y += ide.o ide-io.o ide-iops.o ide-lib.o ide-probe.o ide-taskfile.o \ + ide-pio-blacklist.o # core IDE code ide-core-$(CONFIG_IDE_TIMINGS) += ide-timings.o diff --git a/drivers/ide/ide-lib.c b/drivers/ide/ide-lib.c index 3e12f22..13af72f 100644 --- a/drivers/ide/ide-lib.c +++ b/drivers/ide/ide-lib.c @@ -75,102 +75,6 @@ static u8 ide_rate_filter(ide_drive_t *drive, u8 speed) return min(speed, mode); } -/* - * Shared data/functions for determining best PIO mode for an IDE drive. - * Most of this stuff originally lived in cmd640.c, and changes to the - * ide_pio_blacklist[] table should be made with EXTREME CAUTION to avoid - * breaking the fragile cmd640.c support. - */ - -/* - * Black list. Some drives incorrectly report their maximal PIO mode, - * at least in respect to CMD640. Here we keep info on some known drives. - */ -static struct ide_pio_info { - const char *name; - int pio; -} ide_pio_blacklist [] = { - { "Conner Peripherals 540MB - CFS540A", 3 }, - - { "WDC AC2700", 3 }, - { "WDC AC2540", 3 }, - { "WDC AC2420", 3 }, - { "WDC AC2340", 3 }, - { "WDC AC2250", 0 }, - { "WDC AC2200", 0 }, - { "WDC AC21200", 4 }, - { "WDC AC2120", 0 }, - { "WDC AC2850", 3 }, - { "WDC AC1270", 3 }, - { "WDC AC1170", 1 }, - { "WDC AC1210", 1 }, - { "WDC AC280", 0 }, - { "WDC AC31000", 3 }, - { "WDC AC31200", 3 }, - - { "Maxtor 7131 AT", 1 }, - { "Maxtor 7171 AT", 1 }, - { "Maxtor 7213 AT", 1 }, - { "Maxtor 7245 AT", 1 }, - { "Maxtor 7345 AT", 1 }, - { "Maxtor 7546 AT", 3 }, - { "Maxtor 7540 AV", 3 }, - - { "SAMSUNG SHD-3121A", 1 }, - { "SAMSUNG SHD-3122A", 1 }, - { "SAMSUNG SHD-3172A", 1 }, - - { "ST5660A", 3 }, - { "ST3660A", 3 }, - { "ST3630A", 3 }, - { "ST3655A", 3 }, - { "ST3391A", 3 }, - { "ST3390A", 1 }, - { "ST3600A", 1 }, - { "ST3290A", 0 }, - { "ST3144A", 0 }, - { "ST3491A", 1 }, /* reports 3, should be 1 or 2 (depending on */ - /* drive) according to Seagates FIND-ATA program */ - - { "QUANTUM ELS127A", 0 }, - { "QUANTUM ELS170A", 0 }, - { "QUANTUM LPS240A", 0 }, - { "QUANTUM LPS210A", 3 }, - { "QUANTUM LPS270A", 3 }, - { "QUANTUM LPS365A", 3 }, - { "QUANTUM LPS540A", 3 }, - { "QUANTUM LIGHTNING 540A", 3 }, - { "QUANTUM LIGHTNING 730A", 3 }, - - { "QUANTUM FIREBALL_540", 3 }, /* Older Quantum Fireballs don't work */ - { "QUANTUM FIREBALL_640", 3 }, - { "QUANTUM FIREBALL_1080", 3 }, - { "QUANTUM FIREBALL_1280", 3 }, - { NULL, 0 } -}; - -/** - * ide_scan_pio_blacklist - check for a blacklisted drive - * @model: Drive model string - * - * This routine searches the ide_pio_blacklist for an entry - * matching the start/whole of the supplied model name. - * - * Returns -1 if no match found. - * Otherwise returns the recommended PIO mode from ide_pio_blacklist[]. - */ - -static int ide_scan_pio_blacklist (char *model) -{ - struct ide_pio_info *p; - - for (p = ide_pio_blacklist; p->name != NULL; p++) { - if (strncmp(p->name, model, strlen(p->name)) == 0) - return p->pio; - } - return -1; -} - /** * ide_get_best_pio_mode - get PIO mode from drive * @drive: drive to consider diff --git a/drivers/ide/ide-pio-blacklist.c b/drivers/ide/ide-pio-blacklist.c new file mode 100644 index 0000000..a8c2c8f8 --- /dev/null +++ b/drivers/ide/ide-pio-blacklist.c @@ -0,0 +1,94 @@ +/* + * PIO blacklist. Some drives incorrectly report their maximal PIO mode, + * at least in respect to CMD640. Here we keep info on some known drives. + * + * Changes to the ide_pio_blacklist[] should be made with EXTREME CAUTION + * to avoid breaking the fragile cmd640.c support. + */ + +#include + +static struct ide_pio_info { + const char *name; + int pio; +} ide_pio_blacklist [] = { + { "Conner Peripherals 540MB - CFS540A", 3 }, + + { "WDC AC2700", 3 }, + { "WDC AC2540", 3 }, + { "WDC AC2420", 3 }, + { "WDC AC2340", 3 }, + { "WDC AC2250", 0 }, + { "WDC AC2200", 0 }, + { "WDC AC21200", 4 }, + { "WDC AC2120", 0 }, + { "WDC AC2850", 3 }, + { "WDC AC1270", 3 }, + { "WDC AC1170", 1 }, + { "WDC AC1210", 1 }, + { "WDC AC280", 0 }, + { "WDC AC31000", 3 }, + { "WDC AC31200", 3 }, + + { "Maxtor 7131 AT", 1 }, + { "Maxtor 7171 AT", 1 }, + { "Maxtor 7213 AT", 1 }, + { "Maxtor 7245 AT", 1 }, + { "Maxtor 7345 AT", 1 }, + { "Maxtor 7546 AT", 3 }, + { "Maxtor 7540 AV", 3 }, + + { "SAMSUNG SHD-3121A", 1 }, + { "SAMSUNG SHD-3122A", 1 }, + { "SAMSUNG SHD-3172A", 1 }, + + { "ST5660A", 3 }, + { "ST3660A", 3 }, + { "ST3630A", 3 }, + { "ST3655A", 3 }, + { "ST3391A", 3 }, + { "ST3390A", 1 }, + { "ST3600A", 1 }, + { "ST3290A", 0 }, + { "ST3144A", 0 }, + { "ST3491A", 1 }, /* reports 3, should be 1 or 2 (depending on drive) + according to Seagate's FIND-ATA program */ + + { "QUANTUM ELS127A", 0 }, + { "QUANTUM ELS170A", 0 }, + { "QUANTUM LPS240A", 0 }, + { "QUANTUM LPS210A", 3 }, + { "QUANTUM LPS270A", 3 }, + { "QUANTUM LPS365A", 3 }, + { "QUANTUM LPS540A", 3 }, + { "QUANTUM LIGHTNING 540A", 3 }, + { "QUANTUM LIGHTNING 730A", 3 }, + + { "QUANTUM FIREBALL_540", 3 }, /* Older Quantum Fireballs don't work */ + { "QUANTUM FIREBALL_640", 3 }, + { "QUANTUM FIREBALL_1080", 3 }, + { "QUANTUM FIREBALL_1280", 3 }, + { NULL, 0 } +}; + +/** + * ide_scan_pio_blacklist - check for a blacklisted drive + * @model: Drive model string + * + * This routine searches the ide_pio_blacklist for an entry + * matching the start/whole of the supplied model name. + * + * Returns -1 if no match found. + * Otherwise returns the recommended PIO mode from ide_pio_blacklist[]. + */ + +int ide_scan_pio_blacklist(char *model) +{ + struct ide_pio_info *p; + + for (p = ide_pio_blacklist; p->name != NULL; p++) { + if (strncmp(p->name, model, strlen(p->name)) == 0) + return p->pio; + } + return -1; +} diff --git a/include/linux/ide.h b/include/linux/ide.h index 4e44525..535c439 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1305,6 +1305,8 @@ void ide_timing_merge(struct ide_timing *, struct ide_timing *, struct ide_timing *, unsigned int); int ide_timing_compute(ide_drive_t *, u8, struct ide_timing *, int, int); +int ide_scan_pio_blacklist(char *); + u8 ide_get_best_pio_mode(ide_drive_t *, u8, u8); int ide_set_pio_mode(ide_drive_t *, u8); -- cgit v0.10.2 From 256c5f8eef7b9a8c8a85c15c58cda9df455f947e Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 16 Jul 2008 20:33:39 +0200 Subject: ide: fix hwif-s initialization * Add ide_hwifs[] entry initialization to ide_find_port_slot() and remove ide_init_port_data() calls from host drivers. * Unexport ide_init_port_data(). * Remove no longer needed init_ide_data(). Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/arm/palm_bk3710.c b/drivers/ide/arm/palm_bk3710.c index 9b8a45d2..c79b85b 100644 --- a/drivers/ide/arm/palm_bk3710.c +++ b/drivers/ide/arm/palm_bk3710.c @@ -400,7 +400,6 @@ static int __devinit palm_bk3710_probe(struct platform_device *pdev) i = hwif->index; - ide_init_port_data(hwif, i); ide_init_port_hw(hwif, &hw); default_hwif_mmiops(hwif); diff --git a/drivers/ide/h8300/ide-h8300.c b/drivers/ide/h8300/ide-h8300.c index ae37ee5..dfce105 100644 --- a/drivers/ide/h8300/ide-h8300.c +++ b/drivers/ide/h8300/ide-h8300.c @@ -199,7 +199,6 @@ static int __init h8300_ide_init(void) } index = hwif->index; - ide_init_port_data(hwif, index); ide_init_port_hw(hwif, &hw); hwif_setup(hwif); hwif->host_flags = IDE_HFLAG_NO_IO_32BIT; diff --git a/drivers/ide/ide-pnp.c b/drivers/ide/ide-pnp.c index adbd017..fc1e10c 100644 --- a/drivers/ide/ide-pnp.c +++ b/drivers/ide/ide-pnp.c @@ -62,7 +62,6 @@ static int idepnp_probe(struct pnp_dev *dev, const struct pnp_device_id *dev_id) u8 index = hwif->index; u8 idx[4] = { index, 0xff, 0xff, 0xff }; - ide_init_port_data(hwif, index); ide_init_port_hw(hwif, &hw); printk(KERN_INFO "ide%d: generic PnP IDE interface\n", index); diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index d21e51a..592d424 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -1473,22 +1473,26 @@ ide_hwif_t *ide_find_port_slot(const struct ide_port_info *d) for (; i < MAX_HWIFS; i++) { hwif = &ide_hwifs[i]; if (hwif->chipset == ide_unknown) - return hwif; + goto out_found; } } else { for (i = 2; i < MAX_HWIFS; i++) { hwif = &ide_hwifs[i]; if (hwif->chipset == ide_unknown) - return hwif; + goto out_found; } for (i = 0; i < 2 && i < MAX_HWIFS; i++) { hwif = &ide_hwifs[i]; if (hwif->chipset == ide_unknown) - return hwif; + goto out_found; } } return NULL; + +out_found: + ide_init_port_data(hwif, i); + return hwif; } EXPORT_SYMBOL_GPL(ide_find_port_slot); diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c index 2b84535..32d8ee2 100644 --- a/drivers/ide/ide.c +++ b/drivers/ide/ide.c @@ -121,7 +121,6 @@ void ide_init_port_data(ide_hwif_t *hwif, unsigned int index) ide_port_init_devices_data(hwif); } -EXPORT_SYMBOL_GPL(ide_init_port_data); static void ide_port_init_devices_data(ide_hwif_t *hwif) { @@ -150,18 +149,6 @@ static void ide_port_init_devices_data(ide_hwif_t *hwif) } } -static void __init init_ide_data (void) -{ - unsigned int index; - - /* Initialise all interface structures */ - for (index = 0; index < MAX_HWIFS; ++index) { - ide_hwif_t *hwif = &ide_hwifs[index]; - - ide_init_port_data(hwif, index); - } -} - void ide_remove_port_from_hwgroup(ide_hwif_t *hwif) { ide_hwgroup_t *hwgroup = hwif->hwgroup; @@ -1021,8 +1008,6 @@ static int __init ide_init(void) goto out_port_class; } - init_ide_data(); - proc_ide_create(); return 0; diff --git a/drivers/ide/legacy/buddha.c b/drivers/ide/legacy/buddha.c index 9a1d27e..0497e7f 100644 --- a/drivers/ide/legacy/buddha.c +++ b/drivers/ide/legacy/buddha.c @@ -227,7 +227,6 @@ fail_base2: if (hwif) { u8 index = hwif->index; - ide_init_port_data(hwif, index); ide_init_port_hw(hwif, &hw); idx[i] = index; diff --git a/drivers/ide/legacy/falconide.c b/drivers/ide/legacy/falconide.c index af11028..129a812 100644 --- a/drivers/ide/legacy/falconide.c +++ b/drivers/ide/legacy/falconide.c @@ -111,7 +111,6 @@ static int __init falconide_init(void) u8 index = hwif->index; u8 idx[4] = { index, 0xff, 0xff, 0xff }; - ide_init_port_data(hwif, index); ide_init_port_hw(hwif, &hw); /* Atari has a byte-swapped IDE interface */ diff --git a/drivers/ide/legacy/gayle.c b/drivers/ide/legacy/gayle.c index b789416..7e74b20 100644 --- a/drivers/ide/legacy/gayle.c +++ b/drivers/ide/legacy/gayle.c @@ -185,7 +185,6 @@ found: if (hwif) { u8 index = hwif->index; - ide_init_port_data(hwif, index); ide_init_port_hw(hwif, &hw); idx[i] = index; diff --git a/drivers/ide/legacy/ide-cs.c b/drivers/ide/legacy/ide-cs.c index 8dbf4d9..7c69608 100644 --- a/drivers/ide/legacy/ide-cs.c +++ b/drivers/ide/legacy/ide-cs.c @@ -187,7 +187,6 @@ static ide_hwif_t *idecs_register(unsigned long io, unsigned long ctl, i = hwif->index; - ide_init_port_data(hwif, i); ide_init_port_hw(hwif, &hw); hwif->port_ops = &idecs_port_ops; diff --git a/drivers/ide/legacy/macide.c b/drivers/ide/legacy/macide.c index 2e84290..0a6195b 100644 --- a/drivers/ide/legacy/macide.c +++ b/drivers/ide/legacy/macide.c @@ -130,7 +130,6 @@ static int __init macide_init(void) u8 index = hwif->index; u8 idx[4] = { index, 0xff, 0xff, 0xff }; - ide_init_port_data(hwif, index); ide_init_port_hw(hwif, &hw); ide_device_add(idx, NULL); diff --git a/drivers/ide/legacy/q40ide.c b/drivers/ide/legacy/q40ide.c index 8ff6e2d..9c2b9d0 100644 --- a/drivers/ide/legacy/q40ide.c +++ b/drivers/ide/legacy/q40ide.c @@ -142,7 +142,6 @@ static int __init q40ide_init(void) hwif = ide_find_port(); if (hwif) { - ide_init_port_data(hwif, hwif->index); ide_init_port_hw(hwif, &hw); /* Q40 has a byte-swapped IDE interface */ diff --git a/drivers/ide/pci/delkin_cb.c b/drivers/ide/pci/delkin_cb.c index af0f300..0106e2a 100644 --- a/drivers/ide/pci/delkin_cb.c +++ b/drivers/ide/pci/delkin_cb.c @@ -93,7 +93,6 @@ delkin_cb_probe (struct pci_dev *dev, const struct pci_device_id *id) i = hwif->index; - ide_init_port_data(hwif, i); ide_init_port_hw(hwif, &hw); idx[0] = i; -- cgit v0.10.2 From 63b51c6d1d63276fd320615c042f1ff5d94ebab8 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 16 Jul 2008 20:33:40 +0200 Subject: ide: make ide_hwifs[] static Move ide_hwifs[] from ide.c to ide-probe.c and make it static. Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index 592d424..97dda05 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -39,6 +39,8 @@ #include #include +static ide_hwif_t ide_hwifs[MAX_HWIFS]; /* master data repository */ + /** * generic_id - add a generic drive id * @drive: drive to make an ID block for diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c index 32d8ee2..b7855a1 100644 --- a/drivers/ide/ide.c +++ b/drivers/ide/ide.c @@ -91,8 +91,6 @@ DEFINE_MUTEX(ide_cfg_mtx); __cacheline_aligned_in_smp DEFINE_SPINLOCK(ide_lock); EXPORT_SYMBOL(ide_lock); -ide_hwif_t ide_hwifs[MAX_HWIFS]; /* master data repository */ - static void ide_port_init_devices_data(ide_hwif_t *); /* diff --git a/include/linux/ide.h b/include/linux/ide.h index 535c439..15d5668 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -801,18 +801,6 @@ struct ide_driver_s { int generic_ide_ioctl(ide_drive_t *, struct file *, struct block_device *, unsigned, unsigned long); -/* - * ide_hwifs[] is the master data structure used to keep track - * of just about everything in ide.c. Whenever possible, routines - * should be using pointers to a drive (ide_drive_t *) or - * pointers to a hwif (ide_hwif_t *), rather than indexing this - * structure directly (the allocation/layout may change!). - * - */ -#ifndef _IDE_C -extern ide_hwif_t ide_hwifs[]; /* master data repository */ -#endif - extern int ide_vlb_clk; extern int ide_pci_clk; -- cgit v0.10.2 From c56c5648a3bd15ff14c50f284b261140cd5b5472 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 16 Jul 2008 20:33:40 +0200 Subject: ide: set hwif->dev in ide_init_port_hw() (take 2) * Add 'parent' field to hw_regs_t for optional parent device pointer (needed by macio PMAC IDE controllers) and set hwif->dev in ide_init_port_hw(). * Update au1xxx-ide.c, sgiioc4.c, pmac.c and setup-pci.c accordingly. v2: * Update scc_pata.c. There should be no functional changes caused by this patch. Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c index b7855a1..9240888 100644 --- a/drivers/ide/ide.c +++ b/drivers/ide/ide.c @@ -297,7 +297,8 @@ void ide_init_port_hw(ide_hwif_t *hwif, hw_regs_t *hw) memcpy(&hwif->io_ports, &hw->io_ports, sizeof(hwif->io_ports)); hwif->irq = hw->irq; hwif->chipset = hw->chipset; - hwif->gendev.parent = hw->dev; + hwif->dev = hw->dev; + hwif->gendev.parent = hw->parent ? hw->parent : hw->dev; hwif->ack_intr = hw->ack_intr; } EXPORT_SYMBOL_GPL(ide_init_port_hw); diff --git a/drivers/ide/mips/au1xxx-ide.c b/drivers/ide/mips/au1xxx-ide.c index 1a6c27b..08c46c3 100644 --- a/drivers/ide/mips/au1xxx-ide.c +++ b/drivers/ide/mips/au1xxx-ide.c @@ -600,8 +600,6 @@ static int au_ide_probe(struct device *dev) ide_init_port_hw(hwif, &hw); - hwif->dev = dev; - /* If the user has selected DDMA assisted copies, then set up a few local I/O function entry points */ diff --git a/drivers/ide/pci/scc_pata.c b/drivers/ide/pci/scc_pata.c index 1584ebb..71bff31 100644 --- a/drivers/ide/pci/scc_pata.c +++ b/drivers/ide/pci/scc_pata.c @@ -572,7 +572,6 @@ static int scc_ide_setup_pci_device(struct pci_dev *dev, hw.dev = &dev->dev; hw.chipset = ide_pci; ide_init_port_hw(hwif, &hw); - hwif->dev = &dev->dev; idx[0] = hwif->index; diff --git a/drivers/ide/pci/sgiioc4.c b/drivers/ide/pci/sgiioc4.c index 24513e3..af91cc5 100644 --- a/drivers/ide/pci/sgiioc4.c +++ b/drivers/ide/pci/sgiioc4.c @@ -625,8 +625,6 @@ sgiioc4_ide_setup_pci_device(struct pci_dev *dev) hw.dev = &dev->dev; ide_init_port_hw(hwif, &hw); - hwif->dev = &dev->dev; - /* The IOC4 uses MMIO rather than Port IO. */ default_hwif_mmiops(hwif); diff --git a/drivers/ide/ppc/pmac.c b/drivers/ide/ppc/pmac.c index cfa103a..93fb906 100644 --- a/drivers/ide/ppc/pmac.c +++ b/drivers/ide/ppc/pmac.c @@ -1148,8 +1148,6 @@ pmac_ide_macio_attach(struct macio_dev *mdev, const struct of_device_id *match) base = ioremap(macio_resource_start(mdev, 0), 0x400); regbase = (unsigned long) base; - hwif->dev = &mdev->bus->pdev->dev; - pmif->mdev = mdev; pmif->node = mdev->ofdev.node; pmif->regbase = regbase; @@ -1171,7 +1169,8 @@ pmac_ide_macio_attach(struct macio_dev *mdev, const struct of_device_id *match) memset(&hw, 0, sizeof(hw)); pmac_ide_init_ports(&hw, pmif->regbase); hw.irq = irq; - hw.dev = &mdev->ofdev.dev; + hw.dev = &mdev->bus->pdev->dev; + hw.parent = &mdev->ofdev.dev; rc = pmac_ide_setup_device(pmif, hwif, &hw); if (rc != 0) { @@ -1271,7 +1270,6 @@ pmac_ide_pci_attach(struct pci_dev *pdev, const struct pci_device_id *id) goto out_free_pmif; } - hwif->dev = &pdev->dev; pmif->mdev = NULL; pmif->node = np; diff --git a/drivers/ide/setup-pci.c b/drivers/ide/setup-pci.c index abcfb17..3dea5a5 100644 --- a/drivers/ide/setup-pci.c +++ b/drivers/ide/setup-pci.c @@ -346,8 +346,6 @@ static ide_hwif_t *ide_hwif_configure(struct pci_dev *dev, ide_init_port_hw(hwif, &hw); - hwif->dev = &dev->dev; - return hwif; } diff --git a/include/linux/ide.h b/include/linux/ide.h index 15d5668..a6a2ecc 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -171,7 +171,7 @@ typedef struct hw_regs_s { int irq; /* our irq number */ ide_ack_intr_t *ack_intr; /* acknowledge interrupt */ hwif_chipset_t chipset; - struct device *dev; + struct device *dev, *parent; } hw_regs_t; void ide_init_port_data(struct hwif_s *, unsigned int); -- cgit v0.10.2 From a536f326a2223c951818e199e23847c2ac5e483b Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 16 Jul 2008 20:33:40 +0200 Subject: au1xxx-ide: don't use hwif->hwif_data * Use &auide_hwif directly instead of using hwif->hwif_data. While at it: * No need to initialize hwif->{select,config}_data. There should be no functional changes caused by this patch. Acked-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/mips/au1xxx-ide.c b/drivers/ide/mips/au1xxx-ide.c index 08c46c3..48d57ca 100644 --- a/drivers/ide/mips/au1xxx-ide.c +++ b/drivers/ide/mips/au1xxx-ide.c @@ -213,10 +213,8 @@ static int auide_build_dmatable(ide_drive_t *drive) { int i, iswrite, count = 0; ide_hwif_t *hwif = HWIF(drive); - struct request *rq = HWGROUP(drive)->rq; - - _auide_hwif *ahwif = (_auide_hwif*)hwif->hwif_data; + _auide_hwif *ahwif = &auide_hwif; struct scatterlist *sg; iswrite = (rq_data_dir(rq) == WRITE); @@ -402,7 +400,7 @@ static const struct ide_dma_ops au1xxx_dma_ops = { static int auide_ddma_init(ide_hwif_t *hwif, const struct ide_port_info *d) { - _auide_hwif *auide = (_auide_hwif *)hwif->hwif_data; + _auide_hwif *auide = &auide_hwif; dbdev_tab_t source_dev_tab, target_dev_tab; u32 dev_id, tsize, devwidth, flags; @@ -463,7 +461,7 @@ static int auide_ddma_init(ide_hwif_t *hwif, const struct ide_port_info *d) #else static int auide_ddma_init(ide_hwif_t *hwif, const struct ide_port_info *d) { - _auide_hwif *auide = (_auide_hwif *)hwif->hwif_data; + _auide_hwif *auide = &auide_hwif; dbdev_tab_t source_dev_tab; int flags; @@ -608,11 +606,8 @@ static int au_ide_probe(struct device *dev) hwif->input_data = au1xxx_input_data; hwif->output_data = au1xxx_output_data; #endif - hwif->select_data = 0; /* no chipset-specific code */ - hwif->config_data = 0; /* no chipset-specific code */ auide_hwif.hwif = hwif; - hwif->hwif_data = &auide_hwif; idx[0] = hwif->index; -- cgit v0.10.2 From f333f92bf9040fb63d13c184295629c7a0ff449f Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 16 Jul 2008 20:33:40 +0200 Subject: ide_4drives: use struct ide_port_info Convert the driver to use struct ide_port_info - as a nice side-effect this fixes hwif->channel initialization. Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/legacy/ide-4drives.c b/drivers/ide/legacy/ide-4drives.c index ecae916..5cd6ce5 100644 --- a/drivers/ide/legacy/ide-4drives.c +++ b/drivers/ide/legacy/ide-4drives.c @@ -11,6 +11,23 @@ static int probe_4drives; module_param_named(probe, probe_4drives, bool, 0); MODULE_PARM_DESC(probe, "probe for generic IDE chipset with 4 drives/port"); +static void ide_4drives_port_init_devs(ide_hwif_t *hwif) +{ + if (hwif->channel) { + hwif->drives[0].select.all ^= 0x20; + hwif->drives[1].select.all ^= 0x20; + } +} + +static const struct ide_port_ops ide_4drives_port_ops = { + .port_init_devs = ide_4drives_port_init_devs, +}; + +static const struct ide_port_info ide_4drives_port_info = { + .port_ops = &ide_4drives_port_ops, + .host_flags = IDE_HFLAG_SERIALIZE | IDE_HFLAG_NO_DMA, +}; + static int __init ide_4drives_init(void) { ide_hwif_t *hwif, *mate; @@ -49,18 +66,10 @@ static int __init ide_4drives_init(void) mate = ide_find_port(); if (mate) { ide_init_port_hw(mate, &hw); - mate->drives[0].select.all ^= 0x20; - mate->drives[1].select.all ^= 0x20; idx[1] = mate->index; - - if (hwif) { - hwif->mate = mate; - mate->mate = hwif; - hwif->serialized = mate->serialized = 1; - } } - ide_device_add(idx, NULL); + ide_device_add(idx, &ide_4drives_port_info); return 0; } -- cgit v0.10.2 From dccdf527379dc2fe8a4efc5c75601d1d4035a750 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 16 Jul 2008 20:33:40 +0200 Subject: ide-cs: use struct ide_port_info Convert the driver to use struct ide_port_info. There should be no functional changes caused by this patch. Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/legacy/ide-cs.c b/drivers/ide/legacy/ide-cs.c index 7c69608..fc53dcf 100644 --- a/drivers/ide/legacy/ide-cs.c +++ b/drivers/ide/legacy/ide-cs.c @@ -154,6 +154,11 @@ static const struct ide_port_ops idecs_port_ops = { .quirkproc = ide_undecoded_slave, }; +static const struct ide_port_info idecs_port_info = { + .port_ops = &idecs_port_ops, + .host_flags = IDE_HFLAG_NO_DMA, +}; + static ide_hwif_t *idecs_register(unsigned long io, unsigned long ctl, unsigned long irq, struct pcmcia_device *handle) { @@ -188,11 +193,10 @@ static ide_hwif_t *idecs_register(unsigned long io, unsigned long ctl, i = hwif->index; ide_init_port_hw(hwif, &hw); - hwif->port_ops = &idecs_port_ops; idx[0] = i; - ide_device_add(idx, NULL); + ide_device_add(idx, &idecs_port_info); if (hwif->present) return hwif; -- cgit v0.10.2 From 26839f09ca2d0f4239e546cd912bc9f4694f3c5e Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 16 Jul 2008 20:33:40 +0200 Subject: icside: don't use hwif->hwif_data * Move ecard_set_drvdata() from icside_probe() to icside_register_v{5,6}(), then use state->ioc_base instead of hwif->hwif_data in icside_maskproc() and icside_dma_test_irq(). While at it: * Add sel field to struct icside_state, then use state->{sel,ioc_base} instead of ->{select,config}_data in icside_dma_setup(). There should be no functional changes caused by this patch. Cc: Russell King Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/arm/icside.c b/drivers/ide/arm/icside.c index 0614569..7439580 100644 --- a/drivers/ide/arm/icside.c +++ b/drivers/ide/arm/icside.c @@ -68,6 +68,7 @@ struct icside_state { unsigned int enabled; void __iomem *irq_port; void __iomem *ioc_base; + unsigned int sel; unsigned int type; ide_hwif_t *hwif[2]; }; @@ -165,7 +166,8 @@ static const expansioncard_ops_t icside_ops_arcin_v6 = { static void icside_maskproc(ide_drive_t *drive, int mask) { ide_hwif_t *hwif = HWIF(drive); - struct icside_state *state = hwif->hwif_data; + struct expansion_card *ec = ECARD_DEV(hwif->dev); + struct icside_state *state = ecard_get_drvdata(ec); unsigned long flags; local_irq_save(flags); @@ -308,6 +310,7 @@ static int icside_dma_setup(ide_drive_t *drive) { ide_hwif_t *hwif = HWIF(drive); struct expansion_card *ec = ECARD_DEV(hwif->dev); + struct icside_state *state = ecard_get_drvdata(ec); struct request *rq = hwif->hwgroup->rq; unsigned int dma_mode; @@ -331,7 +334,7 @@ static int icside_dma_setup(ide_drive_t *drive) /* * Route the DMA signals to the correct interface. */ - writeb(hwif->select_data, hwif->config_data); + writeb(state->sel | hwif->channel, state->ioc_base); /* * Select the correct timing for this drive. @@ -359,7 +362,8 @@ static void icside_dma_exec_cmd(ide_drive_t *drive, u8 cmd) static int icside_dma_test_irq(ide_drive_t *drive) { ide_hwif_t *hwif = HWIF(drive); - struct icside_state *state = hwif->hwif_data; + struct expansion_card *ec = ECARD_DEV(hwif->dev); + struct icside_state *state = ecard_get_drvdata(ec); return readb(state->irq_port + (hwif->channel ? @@ -472,6 +476,8 @@ icside_register_v5(struct icside_state *state, struct expansion_card *ec) state->hwif[0] = hwif; + ecard_set_drvdata(ec, state); + idx[0] = hwif->index; ide_device_add(idx, NULL); @@ -525,6 +531,7 @@ icside_register_v6(struct icside_state *state, struct expansion_card *ec) state->irq_port = easi_base; state->ioc_base = ioc_base; + state->sel = sel; /* * Be on the safe side - disable interrupts @@ -545,13 +552,7 @@ icside_register_v6(struct icside_state *state, struct expansion_card *ec) state->hwif[0] = hwif; state->hwif[1] = mate; - hwif->hwif_data = state; - hwif->config_data = (unsigned long)ioc_base; - hwif->select_data = sel; - - mate->hwif_data = state; - mate->config_data = (unsigned long)ioc_base; - mate->select_data = sel | 1; + ecard_set_drvdata(ec, state); if (ec->dma != NO_DMA && !request_dma(ec->dma, hwif->name)) { d.init_dma = icside_dma_init; @@ -627,10 +628,8 @@ icside_probe(struct expansion_card *ec, const struct ecard_id *id) break; } - if (ret == 0) { - ecard_set_drvdata(ec, state); + if (ret == 0) goto out; - } kfree(state); release: -- cgit v0.10.2 From b25afdf1336237fb0e4021eb35744e577e19bd14 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 16 Jul 2008 20:33:41 +0200 Subject: icside: use ide_init_port_hw() * Move ide_find_port() and default_hwif_mmiops() calls from icside_setup() to icside_register_v{5,6}(). * Convert icside_setup() to initialize hw_regs_t instead ide_hwif_t and icside_register_v{5,6}() to use ide_init_port_hw(). * Rename icside_setup() to icside_setup_ports(). There should be no functional changes caused by this patch. Cc: Russell King Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/arm/icside.c b/drivers/ide/arm/icside.c index 7439580..c8d8600 100644 --- a/drivers/ide/arm/icside.c +++ b/drivers/ide/arm/icside.c @@ -415,36 +415,24 @@ static int icside_dma_off_init(ide_hwif_t *hwif, const struct ide_port_info *d) return -EOPNOTSUPP; } -static ide_hwif_t * -icside_setup(void __iomem *base, struct cardinfo *info, struct expansion_card *ec) +static void icside_setup_ports(hw_regs_t *hw, void __iomem *base, + struct cardinfo *info, struct expansion_card *ec) { unsigned long port = (unsigned long)base + info->dataoffset; - ide_hwif_t *hwif; - hwif = ide_find_port(); - if (hwif) { - /* - * Ensure we're using MMIO - */ - default_hwif_mmiops(hwif); - - hwif->io_ports.data_addr = port; - hwif->io_ports.error_addr = port + (1 << info->stepping); - hwif->io_ports.nsect_addr = port + (2 << info->stepping); - hwif->io_ports.lbal_addr = port + (3 << info->stepping); - hwif->io_ports.lbam_addr = port + (4 << info->stepping); - hwif->io_ports.lbah_addr = port + (5 << info->stepping); - hwif->io_ports.device_addr = port + (6 << info->stepping); - hwif->io_ports.status_addr = port + (7 << info->stepping); - hwif->io_ports.ctl_addr = - (unsigned long)base + info->ctrloffset; - hwif->irq = ec->irq; - hwif->chipset = ide_acorn; - hwif->gendev.parent = &ec->dev; - hwif->dev = &ec->dev; - } - - return hwif; + hw->io_ports.data_addr = port; + hw->io_ports.error_addr = port + (1 << info->stepping); + hw->io_ports.nsect_addr = port + (2 << info->stepping); + hw->io_ports.lbal_addr = port + (3 << info->stepping); + hw->io_ports.lbam_addr = port + (4 << info->stepping); + hw->io_ports.lbah_addr = port + (5 << info->stepping); + hw->io_ports.device_addr = port + (6 << info->stepping); + hw->io_ports.status_addr = port + (7 << info->stepping); + hw->io_ports.ctl_addr = (unsigned long)base + info->ctrloffset; + + hw->irq = ec->irq; + hw->dev = &ec->dev; + hw->chipset = ide_acorn; } static int __init @@ -453,6 +441,7 @@ icside_register_v5(struct icside_state *state, struct expansion_card *ec) ide_hwif_t *hwif; void __iomem *base; u8 idx[4] = { 0xff, 0xff, 0xff, 0xff }; + hw_regs_t hw; base = ecardm_iomap(ec, ECARD_RES_MEMC, 0, 0); if (!base) @@ -470,10 +459,15 @@ icside_register_v5(struct icside_state *state, struct expansion_card *ec) */ icside_irqdisable_arcin_v5(ec, 0); - hwif = icside_setup(base, &icside_cardinfo_v5, ec); + icside_setup_ports(&hw, base, &icside_cardinfo_v5, ec); + + hwif = ide_find_port(); if (!hwif) return -ENODEV; + ide_init_port_hw(hwif, &hw); + default_hwif_mmiops(hwif); + state->hwif[0] = hwif; ecard_set_drvdata(ec, state); @@ -503,6 +497,7 @@ icside_register_v6(struct icside_state *state, struct expansion_card *ec) int ret; u8 idx[4] = { 0xff, 0xff, 0xff, 0xff }; struct ide_port_info d = icside_v6_port_info; + hw_regs_t hw[2]; ioc_base = ecardm_iomap(ec, ECARD_RES_IOCFAST, 0, 0); if (!ioc_base) { @@ -538,16 +533,25 @@ icside_register_v6(struct icside_state *state, struct expansion_card *ec) */ icside_irqdisable_arcin_v6(ec, 0); + icside_setup_ports(&hw[0], easi_base, &icside_cardinfo_v6_1, ec); + icside_setup_ports(&hw[1], easi_base, &icside_cardinfo_v6_2, ec); + /* * Find and register the interfaces. */ - hwif = icside_setup(easi_base, &icside_cardinfo_v6_1, ec); - mate = icside_setup(easi_base, &icside_cardinfo_v6_2, ec); + hwif = ide_find_port(); + if (hwif == NULL) + return -ENODEV; - if (!hwif || !mate) { - ret = -ENODEV; - goto out; - } + ide_init_port_hw(hwif, &hw[0]); + default_hwif_mmiops(hwif); + + mate = ide_find_port(); + if (mate == NULL) + return -ENODEV; + + ide_init_port_hw(mate, &hw[1]); + default_hwif_mmiops(mate); state->hwif[0] = hwif; state->hwif[1] = mate; -- cgit v0.10.2 From 01397012b3129147890bb116431d5a794dfc3990 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 16 Jul 2008 20:33:41 +0200 Subject: sgiioc4: use driver name for resource allocation Cc: Jeremy Higdon Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/pci/sgiioc4.c b/drivers/ide/pci/sgiioc4.c index af91cc5..dd0d0f7 100644 --- a/drivers/ide/pci/sgiioc4.c +++ b/drivers/ide/pci/sgiioc4.c @@ -608,11 +608,11 @@ sgiioc4_ide_setup_pci_device(struct pci_dev *dev) cmd_phys_base = bar0 + IOC4_CMD_OFFSET; if (!request_mem_region(cmd_phys_base, IOC4_CMD_CTL_BLK_SIZE, - hwif->name)) { + DRV_NAME)) { printk(KERN_ERR "%s : %s -- ERROR, Addresses " "0x%p to 0x%p ALREADY in use\n", - __func__, hwif->name, (void *) cmd_phys_base, + __func__, DRV_NAME, (void *) cmd_phys_base, (void *) cmd_phys_base + IOC4_CMD_CTL_BLK_SIZE); return -ENOMEM; } -- cgit v0.10.2 From c1da678b5b4d8ce7836ed1ded80109d1db37efe0 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 16 Jul 2008 20:33:41 +0200 Subject: ide: tighten checks on PCI BARs in ide_hwif_configure() Alan has fixed PCI layer handling of PCI IDE in Compatibility mode so PCI BAR 0/1 (and/or 2/3) content reported by kernel should never be zero. Tighten checks on PCI BARs and also fix printk() message while on it. Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/setup-pci.c b/drivers/ide/setup-pci.c index 3dea5a5..166211f 100644 --- a/drivers/ide/setup-pci.c +++ b/drivers/ide/setup-pci.c @@ -319,18 +319,18 @@ static ide_hwif_t *ide_hwif_configure(struct pci_dev *dev, ctl = pci_resource_start(dev, 2*port+1); base = pci_resource_start(dev, 2*port); - if ((ctl && !base) || (base && !ctl)) { - printk(KERN_ERR "%s: inconsistent baseregs (BIOS) " - "for port %d, skipping\n", d->name, port); - return NULL; - } - } - if (!ctl) { + } else { /* Use default values */ ctl = port ? 0x374 : 0x3f4; base = port ? 0x170 : 0x1f0; } + if (!base || !ctl) { + printk(KERN_ERR "%s: bad PCI BARs for port %d, skipping\n", + d->name, port); + return NULL; + } + hwif = ide_find_port_slot(d); if (hwif == NULL) { printk(KERN_ERR "%s: too many IDE interfaces, no room in " -- cgit v0.10.2 From e48905e9cfffd21861c3521d828ae992a53aac67 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 16 Jul 2008 20:33:41 +0200 Subject: swarm: use struct ide_port_info Convert the driver to use struct ide_port_info. There should be no functional changes caused by this patch. Acked-by: Maciej W. Rozycki Tested-by: Maciej W. Rozycki Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/mips/swarm.c b/drivers/ide/mips/swarm.c index 52fee3d..57a0138 100644 --- a/drivers/ide/mips/swarm.c +++ b/drivers/ide/mips/swarm.c @@ -61,6 +61,10 @@ static struct resource swarm_ide_resource = { static struct platform_device *swarm_ide_dev; +static const struct ide_port_info swarm_port_info = { + .host_flags = IDE_HFLAG_MMIO | IDE_HFLAG_NO_DMA, +}; + /* * swarm_ide_probe - if the board header indicates the existence of * Generic Bus IDE, allocate a HWIF for it. @@ -110,7 +114,6 @@ static int __devinit swarm_ide_probe(struct device *dev) base = ioremap(offset, size); /* Setup MMIO ops. */ - hwif->host_flags = IDE_HFLAG_MMIO; default_hwif_mmiops(hwif); for (i = 0; i <= 7; i++) @@ -125,7 +128,7 @@ static int __devinit swarm_ide_probe(struct device *dev) idx[0] = hwif->index; - ide_device_add(idx, NULL); + ide_device_add(idx, &swarm_port_info); dev_set_drvdata(dev, hwif); -- cgit v0.10.2 From ba1d0de70d64e68f0e035f00dbb041c1e05b49c9 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 16 Jul 2008 20:33:41 +0200 Subject: powerpc/ide: remove mpc8xx-ide driver This driver was only used by arch/ppc code and is obsolete now with the move to common arch/powerpc code. [bart: port it over IDE tree, remove leftover 'choice' from Kconfig] Signed-off-by: Arnd Bergmann Acked-by: Kumar Gala Cc: Paul Mackerras Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig index 5f4d6ea..21511d4 100644 --- a/drivers/ide/Kconfig +++ b/drivers/ide/Kconfig @@ -929,46 +929,6 @@ config BLK_DEV_PALMCHIP_BK3710 Say Y here if you want to support the onchip IDE controller on the TI DaVinci SoC - -config BLK_DEV_MPC8xx_IDE - tristate "MPC8xx IDE support" - depends on 8xx && (LWMON || IVMS8 || IVML24 || TQM8xxL) && IDE=y && BLK_DEV_IDE=y && !PPC_MERGE - help - This option provides support for IDE on Motorola MPC8xx Systems. - Please see 'Type of MPC8xx IDE interface' for details. - - If unsure, say N. - -choice - prompt "Type of MPC8xx IDE interface" - depends on BLK_DEV_MPC8xx_IDE - default IDE_8xx_PCCARD - -config IDE_8xx_PCCARD - bool "8xx_PCCARD" - ---help--- - Select how the IDE devices are connected to the MPC8xx system: - - 8xx_PCCARD uses the 8xx internal PCMCIA interface in combination - with a PC Card (e.g. ARGOSY portable Hard Disk Adapter), - ATA PC Card HDDs or ATA PC Flash Cards (example: TQM8xxL - systems) - - 8xx_DIRECT is used for directly connected IDE devices using the 8xx - internal PCMCIA interface (example: IVMS8 systems) - - EXT_DIRECT is used for IDE devices directly connected to the 8xx - bus using some glue logic, but _not_ the 8xx internal - PCMCIA interface (example: IDIF860 systems) - -config IDE_8xx_DIRECT - bool "8xx_DIRECT" - -config IDE_EXT_DIRECT - bool "EXT_DIRECT" - -endchoice - # no isa -> no vlb if ISA && (ALPHA || X86 || MIPS) diff --git a/drivers/ide/ppc/Makefile b/drivers/ide/ppc/Makefile index 65af584..74e52ad 100644 --- a/drivers/ide/ppc/Makefile +++ b/drivers/ide/ppc/Makefile @@ -1,3 +1,2 @@ obj-$(CONFIG_BLK_DEV_IDE_PMAC) += pmac.o -obj-$(CONFIG_BLK_DEV_MPC8xx_IDE) += mpc8xx.o diff --git a/drivers/ide/ppc/mpc8xx.c b/drivers/ide/ppc/mpc8xx.c deleted file mode 100644 index 33bc699..0000000 --- a/drivers/ide/ppc/mpc8xx.c +++ /dev/null @@ -1,844 +0,0 @@ -/* - * Copyright (C) 2000, 2001 Wolfgang Denk, wd@denx.de - * Modified for direct IDE interface - * by Thomas Lange, thomas@corelatus.com - * Modified for direct IDE interface on 8xx without using the PCMCIA - * controller - * by Steven.Scholz@imc-berlin.de - * Moved out of arch/ppc/kernel/m8xx_setup.c, other minor cleanups - * by Mathew Locke - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define DRV_NAME "ide-mpc8xx" - -static int identify (volatile u8 *p); -static void print_fixed (volatile u8 *p); -static void print_funcid (int func); -static int check_ide_device (unsigned long base); - -static void ide_interrupt_ack (void *dev); -static void m8xx_ide_set_pio_mode(ide_drive_t *drive, const u8 pio); - -typedef struct ide_ioport_desc { - unsigned long base_off; /* Offset to PCMCIA memory */ - unsigned long reg_off[IDE_NR_PORTS]; /* controller register offsets */ - int irq; /* IRQ */ -} ide_ioport_desc_t; - -ide_ioport_desc_t ioport_dsc[MAX_HWIFS] = { -#ifdef IDE0_BASE_OFFSET - { IDE0_BASE_OFFSET, - { - IDE0_DATA_REG_OFFSET, - IDE0_ERROR_REG_OFFSET, - IDE0_NSECTOR_REG_OFFSET, - IDE0_SECTOR_REG_OFFSET, - IDE0_LCYL_REG_OFFSET, - IDE0_HCYL_REG_OFFSET, - IDE0_SELECT_REG_OFFSET, - IDE0_STATUS_REG_OFFSET, - IDE0_CONTROL_REG_OFFSET, - IDE0_IRQ_REG_OFFSET, - }, - IDE0_INTERRUPT, - }, -#ifdef IDE1_BASE_OFFSET - { IDE1_BASE_OFFSET, - { - IDE1_DATA_REG_OFFSET, - IDE1_ERROR_REG_OFFSET, - IDE1_NSECTOR_REG_OFFSET, - IDE1_SECTOR_REG_OFFSET, - IDE1_LCYL_REG_OFFSET, - IDE1_HCYL_REG_OFFSET, - IDE1_SELECT_REG_OFFSET, - IDE1_STATUS_REG_OFFSET, - IDE1_CONTROL_REG_OFFSET, - IDE1_IRQ_REG_OFFSET, - }, - IDE1_INTERRUPT, - }, -#endif /* IDE1_BASE_OFFSET */ -#endif /* IDE0_BASE_OFFSET */ -}; - -struct ide_timing ide_pio_clocks[6]; -int hold_time[6] = {30, 20, 15, 10, 10, 10 }; /* PIO Mode 5 with IORDY (nonstandard) */ - -/* - * Warning: only 1 (ONE) PCMCIA slot supported here, - * which must be correctly initialized by the firmware (PPCBoot). - */ -static int _slot_ = -1; /* will be read from PCMCIA registers */ - -/* Make clock cycles and always round up */ -#define PCMCIA_MK_CLKS( t, T ) (( (t) * ((T)/1000000) + 999U ) / 1000U ) - -#define M8XX_PCMCIA_CD2(slot) (0x10000000 >> (slot << 4)) -#define M8XX_PCMCIA_CD1(slot) (0x08000000 >> (slot << 4)) - -/* - * The TQM850L hardware has two pins swapped! Grrrrgh! - */ -#ifdef CONFIG_TQM850L -#define __MY_PCMCIA_GCRX_CXRESET PCMCIA_GCRX_CXOE -#define __MY_PCMCIA_GCRX_CXOE PCMCIA_GCRX_CXRESET -#else -#define __MY_PCMCIA_GCRX_CXRESET PCMCIA_GCRX_CXRESET -#define __MY_PCMCIA_GCRX_CXOE PCMCIA_GCRX_CXOE -#endif - -#if defined(CONFIG_BLK_DEV_MPC8xx_IDE) && defined(CONFIG_IDE_8xx_PCCARD) -#define PCMCIA_SCHLVL IDE0_INTERRUPT /* Status Change Interrupt Level */ -static int pcmcia_schlvl = PCMCIA_SCHLVL; -#endif - -/* - * See include/linux/ide.h for definition of hw_regs_t (p, base) - */ - -/* - * m8xx_ide_init_ports() for a direct IDE interface _using_ - * MPC8xx's internal PCMCIA interface - */ -#if defined(CONFIG_IDE_8xx_PCCARD) || defined(CONFIG_IDE_8xx_DIRECT) -static int __init m8xx_ide_init_ports(hw_regs_t *hw, unsigned long data_port) -{ - unsigned long *p = hw->io_ports_array; - int i; - - typedef struct { - ulong br; - ulong or; - } pcmcia_win_t; - volatile pcmcia_win_t *win; - volatile pcmconf8xx_t *pcmp; - - uint *pgcrx; - u32 pcmcia_phy_base; - u32 pcmcia_phy_end; - static unsigned long pcmcia_base = 0; - unsigned long base; - - *p = 0; - - pcmp = (pcmconf8xx_t *)(&(((immap_t *)IMAP_ADDR)->im_pcmcia)); - - if (!pcmcia_base) { - /* - * Read out PCMCIA registers. Since the reset values - * are undefined, we sure hope that they have been - * set up by firmware - */ - - /* Scan all registers for valid settings */ - pcmcia_phy_base = 0xFFFFFFFF; - pcmcia_phy_end = 0; - /* br0 is start of brX and orX regs */ - win = (pcmcia_win_t *) \ - (&(((immap_t *)IMAP_ADDR)->im_pcmcia.pcmc_pbr0)); - for (i = 0; i < 8; i++) { - if (win->or & 1) { /* This bank is marked as valid */ - if (win->br < pcmcia_phy_base) { - pcmcia_phy_base = win->br; - } - if ((win->br + PCMCIA_MEM_SIZE) > pcmcia_phy_end) { - pcmcia_phy_end = win->br + PCMCIA_MEM_SIZE; - } - /* Check which slot that has been defined */ - _slot_ = (win->or >> 2) & 1; - - } /* Valid bank */ - win++; - } /* for */ - - printk ("PCMCIA slot %c: phys mem %08x...%08x (size %08x)\n", - 'A' + _slot_, - pcmcia_phy_base, pcmcia_phy_end, - pcmcia_phy_end - pcmcia_phy_base); - - if (!request_mem_region(pcmcia_phy_base, - pcmcia_phy_end - pcmcia_phy_base, - DRV_NAME)) { - printk(KERN_ERR "%s: resources busy\n", DRV_NAME); - return -EBUSY; - } - - pcmcia_base=(unsigned long)ioremap(pcmcia_phy_base, - pcmcia_phy_end-pcmcia_phy_base); - -#ifdef DEBUG - printk ("PCMCIA virt base: %08lx\n", pcmcia_base); -#endif - /* Compute clock cycles for PIO timings */ - for (i=0; i<6; ++i) { - bd_t *binfo = (bd_t *)__res; - struct ide_timing *t, *n; - - hold_time[i] = - PCMCIA_MK_CLKS (hold_time[i], - binfo->bi_busfreq); - - t = ide_timing_find_mode(XFER_PIO_0 + i); - n = &ide_pio_clocks[i]; - - n->setup = PCMCIA_MK_CLKS(t->setup, binfo->bi_busfreq); - n->active = PCMCIA_MK_CLKS(t->active, binfo->bi_busfreq); - n->cycle = PCMCIA_MK_CLKS(t->cycle, binfo->bi_busfreq); -#if 0 - printk ("PIO mode %d timings: %d/%d/%d => %d/%d/%d\n", - i, - t->setup, t->active, t->cycle, - n->setup, n->active, n->cycle); -#endif - } - } - - if (_slot_ == -1) { - printk ("PCMCIA slot has not been defined! Using A as default\n"); - _slot_ = 0; - } - -#ifdef CONFIG_IDE_8xx_PCCARD - -#ifdef DEBUG - printk ("PIPR = 0x%08X slot %c ==> mask = 0x%X\n", - pcmp->pcmc_pipr, - 'A' + _slot_, - M8XX_PCMCIA_CD1(_slot_) | M8XX_PCMCIA_CD2(_slot_) ); -#endif /* DEBUG */ - - if (pcmp->pcmc_pipr & (M8XX_PCMCIA_CD1(_slot_)|M8XX_PCMCIA_CD2(_slot_))) { - printk ("No card in slot %c: PIPR=%08x\n", - 'A' + _slot_, (u32) pcmp->pcmc_pipr); - return -ENODEV; /* No card in slot */ - } - - check_ide_device (pcmcia_base); - -#endif /* CONFIG_IDE_8xx_PCCARD */ - - base = pcmcia_base + ioport_dsc[data_port].base_off; -#ifdef DEBUG - printk ("base: %08x + %08x = %08x\n", - pcmcia_base, ioport_dsc[data_port].base_off, base); -#endif - - for (i = 0; i < IDE_NR_PORTS; ++i) { -#ifdef DEBUG - printk ("port[%d]: %08x + %08x = %08x\n", - i, - base, - ioport_dsc[data_port].reg_off[i], - i, base + ioport_dsc[data_port].reg_off[i]); -#endif - *p++ = base + ioport_dsc[data_port].reg_off[i]; - } - - hw->irq = ioport_dsc[data_port].irq; - hw->ack_intr = (ide_ack_intr_t *)ide_interrupt_ack; - -#ifdef CONFIG_IDE_8xx_PCCARD - { - unsigned int reg; - - if (_slot_) - pgcrx = &((immap_t *) IMAP_ADDR)->im_pcmcia.pcmc_pgcrb; - else - pgcrx = &((immap_t *) IMAP_ADDR)->im_pcmcia.pcmc_pgcra; - - reg = *pgcrx; - reg |= mk_int_int_mask (pcmcia_schlvl) << 24; - reg |= mk_int_int_mask (pcmcia_schlvl) << 16; - *pgcrx = reg; - } -#endif /* CONFIG_IDE_8xx_PCCARD */ - - /* Enable Harddisk Interrupt, - * and make it edge sensitive - */ - /* (11-18) Set edge detect for irq, no wakeup from low power mode */ - ((immap_t *)IMAP_ADDR)->im_siu_conf.sc_siel |= - (0x80000000 >> ioport_dsc[data_port].irq); - -#ifdef CONFIG_IDE_8xx_PCCARD - /* Make sure we don't get garbage irq */ - ((immap_t *) IMAP_ADDR)->im_pcmcia.pcmc_pscr = 0xFFFF; - - /* Enable falling edge irq */ - pcmp->pcmc_per = 0x100000 >> (16 * _slot_); -#endif /* CONFIG_IDE_8xx_PCCARD */ - - hw->chipset = ide_generic; - - return 0; -} -#endif /* CONFIG_IDE_8xx_PCCARD || CONFIG_IDE_8xx_DIRECT */ - -/* - * m8xx_ide_init_ports() for a direct IDE interface _not_ using - * MPC8xx's internal PCMCIA interface - */ -#if defined(CONFIG_IDE_EXT_DIRECT) -static int __init m8xx_ide_init_ports(hw_regs_t *hw, unsigned long data_port) -{ - unsigned long *p = hw->io_ports_array; - int i; - - u32 ide_phy_base; - u32 ide_phy_end; - static unsigned long ide_base = 0; - unsigned long base; - - *p = 0; - - if (!ide_base) { - - /* TODO: - * - add code to read ORx, BRx - */ - ide_phy_base = CFG_ATA_BASE_ADDR; - ide_phy_end = CFG_ATA_BASE_ADDR + 0x200; - - printk ("IDE phys mem : %08x...%08x (size %08x)\n", - ide_phy_base, ide_phy_end, - ide_phy_end - ide_phy_base); - - if (!request_mem_region(ide_phy_base, 0x200, DRV_NAME)) { - printk(KERN_ERR "%s: resources busy\n", DRV_NAME); - return -EBUSY; - } - - ide_base=(unsigned long)ioremap(ide_phy_base, - ide_phy_end-ide_phy_base); - -#ifdef DEBUG - printk ("IDE virt base: %08lx\n", ide_base); -#endif - } - - base = ide_base + ioport_dsc[data_port].base_off; -#ifdef DEBUG - printk ("base: %08x + %08x = %08x\n", - ide_base, ioport_dsc[data_port].base_off, base); -#endif - - for (i = 0; i < IDE_NR_PORTS; ++i) { -#ifdef DEBUG - printk ("port[%d]: %08x + %08x = %08x\n", - i, - base, - ioport_dsc[data_port].reg_off[i], - i, base + ioport_dsc[data_port].reg_off[i]); -#endif - *p++ = base + ioport_dsc[data_port].reg_off[i]; - } - - /* direct connected IDE drive, i.e. external IRQ */ - hw->irq = ioport_dsc[data_port].irq; - hw->ack_intr = (ide_ack_intr_t *)ide_interrupt_ack; - - /* Enable Harddisk Interrupt, - * and make it edge sensitive - */ - /* (11-18) Set edge detect for irq, no wakeup from low power mode */ - ((immap_t *) IMAP_ADDR)->im_siu_conf.sc_siel |= - (0x80000000 >> ioport_dsc[data_port].irq); - - hw->chipset = ide_generic; - - return 0; -} -#endif /* CONFIG_IDE_8xx_DIRECT */ - - -/* -------------------------------------------------------------------- */ - - -/* PCMCIA Timing */ -#ifndef PCMCIA_SHT -#define PCMCIA_SHT(t) ((t & 0x0F)<<16) /* Strobe Hold Time */ -#define PCMCIA_SST(t) ((t & 0x0F)<<12) /* Strobe Setup Time */ -#define PCMCIA_SL(t) ((t==32) ? 0 : ((t & 0x1F)<<7)) /* Strobe Length */ -#endif - -/* Calculate PIO timings */ -static void m8xx_ide_set_pio_mode(ide_drive_t *drive, const u8 pio) -{ -#if defined(CONFIG_IDE_8xx_PCCARD) || defined(CONFIG_IDE_8xx_DIRECT) - volatile pcmconf8xx_t *pcmp; - ulong timing, mask, reg; - - pcmp = (pcmconf8xx_t *)(&(((immap_t *)IMAP_ADDR)->im_pcmcia)); - - mask = ~(PCMCIA_SHT(0xFF) | PCMCIA_SST(0xFF) | PCMCIA_SL(0xFF)); - - timing = PCMCIA_SHT(hold_time[pio] ) - | PCMCIA_SST(ide_pio_clocks[pio].setup) - | PCMCIA_SL (ide_pio_clocks[pio].active) - ; - -#if 1 - printk ("Setting timing bits 0x%08lx in PCMCIA controller\n", timing); -#endif - if ((reg = pcmp->pcmc_por0 & mask) != 0) - pcmp->pcmc_por0 = reg | timing; - - if ((reg = pcmp->pcmc_por1 & mask) != 0) - pcmp->pcmc_por1 = reg | timing; - - if ((reg = pcmp->pcmc_por2 & mask) != 0) - pcmp->pcmc_por2 = reg | timing; - - if ((reg = pcmp->pcmc_por3 & mask) != 0) - pcmp->pcmc_por3 = reg | timing; - - if ((reg = pcmp->pcmc_por4 & mask) != 0) - pcmp->pcmc_por4 = reg | timing; - - if ((reg = pcmp->pcmc_por5 & mask) != 0) - pcmp->pcmc_por5 = reg | timing; - - if ((reg = pcmp->pcmc_por6 & mask) != 0) - pcmp->pcmc_por6 = reg | timing; - - if ((reg = pcmp->pcmc_por7 & mask) != 0) - pcmp->pcmc_por7 = reg | timing; - -#elif defined(CONFIG_IDE_EXT_DIRECT) - - printk("%s[%d] %s: not implemented yet!\n", - __FILE__, __LINE__, __func__); -#endif /* defined(CONFIG_IDE_8xx_PCCARD) || defined(CONFIG_IDE_8xx_PCMCIA */ -} - -static const struct ide_port_ops m8xx_port_ops = { - .set_pio_mode = m8xx_ide_set_pio_mode, -}; - -static void -ide_interrupt_ack (void *dev) -{ -#ifdef CONFIG_IDE_8xx_PCCARD - u_int pscr, pipr; - -#if (PCMCIA_SOCKETS_NO == 2) - u_int _slot_; -#endif - - /* get interrupt sources */ - - pscr = ((volatile immap_t *)IMAP_ADDR)->im_pcmcia.pcmc_pscr; - pipr = ((volatile immap_t *)IMAP_ADDR)->im_pcmcia.pcmc_pipr; - - /* - * report only if both card detect signals are the same - * not too nice done, - * we depend on that CD2 is the bit to the left of CD1... - */ - - if(_slot_==-1){ - printk("PCMCIA slot has not been defined! Using A as default\n"); - _slot_=0; - } - - if(((pipr & M8XX_PCMCIA_CD2(_slot_)) >> 1) ^ - (pipr & M8XX_PCMCIA_CD1(_slot_)) ) { - printk ("card detect interrupt\n"); - } - /* clear the interrupt sources */ - ((immap_t *)IMAP_ADDR)->im_pcmcia.pcmc_pscr = pscr; - -#else /* ! CONFIG_IDE_8xx_PCCARD */ - /* - * Only CONFIG_IDE_8xx_PCCARD is using the interrupt of the - * MPC8xx's PCMCIA controller, so there is nothing to be done here - * for CONFIG_IDE_8xx_DIRECT and CONFIG_IDE_EXT_DIRECT. - * The interrupt is handled somewhere else. -- Steven - */ -#endif /* CONFIG_IDE_8xx_PCCARD */ -} - - - -/* - * CIS Tupel codes - */ -#define CISTPL_NULL 0x00 -#define CISTPL_DEVICE 0x01 -#define CISTPL_LONGLINK_CB 0x02 -#define CISTPL_INDIRECT 0x03 -#define CISTPL_CONFIG_CB 0x04 -#define CISTPL_CFTABLE_ENTRY_CB 0x05 -#define CISTPL_LONGLINK_MFC 0x06 -#define CISTPL_BAR 0x07 -#define CISTPL_PWR_MGMNT 0x08 -#define CISTPL_EXTDEVICE 0x09 -#define CISTPL_CHECKSUM 0x10 -#define CISTPL_LONGLINK_A 0x11 -#define CISTPL_LONGLINK_C 0x12 -#define CISTPL_LINKTARGET 0x13 -#define CISTPL_NO_LINK 0x14 -#define CISTPL_VERS_1 0x15 -#define CISTPL_ALTSTR 0x16 -#define CISTPL_DEVICE_A 0x17 -#define CISTPL_JEDEC_C 0x18 -#define CISTPL_JEDEC_A 0x19 -#define CISTPL_CONFIG 0x1a -#define CISTPL_CFTABLE_ENTRY 0x1b -#define CISTPL_DEVICE_OC 0x1c -#define CISTPL_DEVICE_OA 0x1d -#define CISTPL_DEVICE_GEO 0x1e -#define CISTPL_DEVICE_GEO_A 0x1f -#define CISTPL_MANFID 0x20 -#define CISTPL_FUNCID 0x21 -#define CISTPL_FUNCE 0x22 -#define CISTPL_SWIL 0x23 -#define CISTPL_END 0xff - -/* - * CIS Function ID codes - */ -#define CISTPL_FUNCID_MULTI 0x00 -#define CISTPL_FUNCID_MEMORY 0x01 -#define CISTPL_FUNCID_SERIAL 0x02 -#define CISTPL_FUNCID_PARALLEL 0x03 -#define CISTPL_FUNCID_FIXED 0x04 -#define CISTPL_FUNCID_VIDEO 0x05 -#define CISTPL_FUNCID_NETWORK 0x06 -#define CISTPL_FUNCID_AIMS 0x07 -#define CISTPL_FUNCID_SCSI 0x08 - -/* - * Fixed Disk FUNCE codes - */ -#define CISTPL_IDE_INTERFACE 0x01 - -#define CISTPL_FUNCE_IDE_IFACE 0x01 -#define CISTPL_FUNCE_IDE_MASTER 0x02 -#define CISTPL_FUNCE_IDE_SLAVE 0x03 - -/* First feature byte */ -#define CISTPL_IDE_SILICON 0x04 -#define CISTPL_IDE_UNIQUE 0x08 -#define CISTPL_IDE_DUAL 0x10 - -/* Second feature byte */ -#define CISTPL_IDE_HAS_SLEEP 0x01 -#define CISTPL_IDE_HAS_STANDBY 0x02 -#define CISTPL_IDE_HAS_IDLE 0x04 -#define CISTPL_IDE_LOW_POWER 0x08 -#define CISTPL_IDE_REG_INHIBIT 0x10 -#define CISTPL_IDE_HAS_INDEX 0x20 -#define CISTPL_IDE_IOIS16 0x40 - - -/* -------------------------------------------------------------------- */ - - -#define MAX_TUPEL_SZ 512 -#define MAX_FEATURES 4 - -static int check_ide_device (unsigned long base) -{ - volatile u8 *ident = NULL; - volatile u8 *feature_p[MAX_FEATURES]; - volatile u8 *p, *start; - int n_features = 0; - u8 func_id = ~0; - u8 code, len; - unsigned short config_base = 0; - int found = 0; - int i; - -#ifdef DEBUG - printk ("PCMCIA MEM: %08lX\n", base); -#endif - start = p = (volatile u8 *) base; - - while ((p - start) < MAX_TUPEL_SZ) { - - code = *p; p += 2; - - if (code == 0xFF) { /* End of chain */ - break; - } - - len = *p; p += 2; -#ifdef DEBUG_PCMCIA - { volatile u8 *q = p; - printk ("\nTuple code %02x length %d\n\tData:", - code, len); - - for (i = 0; i < len; ++i) { - printk (" %02x", *q); - q+= 2; - } - } -#endif /* DEBUG_PCMCIA */ - switch (code) { - case CISTPL_VERS_1: - ident = p + 4; - break; - case CISTPL_FUNCID: - func_id = *p; - break; - case CISTPL_FUNCE: - if (n_features < MAX_FEATURES) - feature_p[n_features++] = p; - break; - case CISTPL_CONFIG: - config_base = (*(p+6) << 8) + (*(p+4)); - default: - break; - } - p += 2 * len; - } - - found = identify (ident); - - if (func_id != ((u8)~0)) { - print_funcid (func_id); - - if (func_id == CISTPL_FUNCID_FIXED) - found = 1; - else - return (1); /* no disk drive */ - } - - for (i=0; i id_str) { - if (*t == ' ') - *t = '\0'; - else - break; - } - printk ("Card ID: %s\n", id_str); - - for (card=known_cards; *card; ++card) { - if (strcmp(*card, id_str) == 0) { /* found! */ - return (1); - } - } - - return (0); /* don't know */ -} - -static int __init mpc8xx_ide_probe(void) -{ - hw_regs_t hw; - u8 idx[4] = { 0xff, 0xff, 0xff, 0xff }; - -#ifdef IDE0_BASE_OFFSET - memset(&hw, 0, sizeof(hw)); - if (!m8xx_ide_init_ports(&hw, 0)) { - ide_hwif_t *hwif = ide_find_port(); - - if (hwif) { - ide_init_port_hw(hwif, &hw); - hwif->pio_mask = ATA_PIO4; - hwif->port_ops = &m8xx_port_ops; - - idx[0] = hwif->index; - } - } -#ifdef IDE1_BASE_OFFSET - memset(&hw, 0, sizeof(hw)); - if (!m8xx_ide_init_ports(&hw, 1)) { - ide_hwif_t *mate = ide_find_port(); - - if (mate) { - ide_init_port_hw(mate, &hw); - mate->pio_mask = ATA_PIO4; - mate->port_ops = &m8xx_port_ops; - - idx[1] = mate->index; - } - } -#endif -#endif - - ide_device_add(idx, NULL); - - return 0; -} - -module_init(mpc8xx_ide_probe); - -MODULE_LICENSE("GPL"); -- cgit v0.10.2 From a698400a1556cf9f0376d1a41e536973dd5c4747 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 16 Jul 2008 20:33:41 +0200 Subject: cmd640: fix warm-plug support for the secondary interface Register secondary interface also when user requested not to probe devices. While at it: - remove write-only second_port_toggled variable Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/pci/cmd640.c b/drivers/ide/pci/cmd640.c index baa26a2..ab7d727 100644 --- a/drivers/ide/pci/cmd640.c +++ b/drivers/ide/pci/cmd640.c @@ -687,9 +687,6 @@ static int cmd640x_init_one(unsigned long base, unsigned long ctl) */ static int __init cmd640x_init(void) { -#ifdef CONFIG_BLK_DEV_CMD640_ENHANCED - int second_port_toggled = 0; -#endif /* CONFIG_BLK_DEV_CMD640_ENHANCED */ int second_port_cmd640 = 0, rc; const char *bus_type, *port2; unsigned int index; @@ -778,46 +775,40 @@ static int __init cmd640x_init(void) put_cmd640_reg(CMDTIM, 0); put_cmd640_reg(BRST, 0x40); - cmd_hwif1 = ide_find_port(); + b = get_cmd640_reg(CNTRL); /* * Try to enable the secondary interface, if not already enabled */ - if (cmd_hwif1 && - cmd_hwif1->drives[0].noprobe && cmd_hwif1->drives[1].noprobe) { - port2 = "not probed"; + if (secondary_port_responding()) { + if ((b & CNTRL_ENA_2ND)) { + second_port_cmd640 = 1; + port2 = "okay"; + } else if (cmd640_vlb) { + second_port_cmd640 = 1; + port2 = "alive"; + } else + port2 = "not cmd640"; } else { - b = get_cmd640_reg(CNTRL); + put_cmd640_reg(CNTRL, b ^ CNTRL_ENA_2ND); /* toggle the bit */ if (secondary_port_responding()) { - if ((b & CNTRL_ENA_2ND)) { - second_port_cmd640 = 1; - port2 = "okay"; - } else if (cmd640_vlb) { - second_port_cmd640 = 1; - port2 = "alive"; - } else - port2 = "not cmd640"; + second_port_cmd640 = 1; + port2 = "enabled"; } else { - put_cmd640_reg(CNTRL, b ^ CNTRL_ENA_2ND); /* toggle the bit */ - if (secondary_port_responding()) { - second_port_cmd640 = 1; -#ifdef CONFIG_BLK_DEV_CMD640_ENHANCED - second_port_toggled = 1; -#endif /* CONFIG_BLK_DEV_CMD640_ENHANCED */ - port2 = "enabled"; - } else { - put_cmd640_reg(CNTRL, b); /* restore original setting */ - port2 = "not responding"; - } + put_cmd640_reg(CNTRL, b); /* restore original setting */ + port2 = "not responding"; } } /* * Initialize data for secondary cmd640 port, if enabled */ - if (second_port_cmd640 && cmd_hwif1) { - ide_init_port_hw(cmd_hwif1, &hw[1]); - idx[1] = cmd_hwif1->index; + if (second_port_cmd640) { + cmd_hwif1 = ide_find_port(); + if (cmd_hwif1) { + ide_init_port_hw(cmd_hwif1, &hw[1]); + idx[1] = cmd_hwif1->index; + } } printk(KERN_INFO "cmd640: %sserialized, secondary interface %s\n", second_port_cmd640 ? "" : "not ", port2); -- cgit v0.10.2 From e6d95bd14928926d6658b5e4ace905e8b83ed27a Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 16 Jul 2008 20:33:42 +0200 Subject: ide: ->port_init_devs -> ->init_dev Change ->port_init_devs method to take 'ide_drive_t *' as an argument instead of 'ide_hwif_t *' and rename it to ->init_dev. There should be no functional changes caused by this patch. Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index 97dda05..c5dc188 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -1320,10 +1320,10 @@ static void ide_port_init_devices(ide_hwif_t *hwif) drive->unmask = 1; if (hwif->host_flags & IDE_HFLAG_NO_UNMASK_IRQS) drive->no_unmask = 1; - } - if (port_ops && port_ops->port_init_devs) - port_ops->port_init_devs(hwif); + if (port_ops && port_ops->init_dev) + port_ops->init_dev(drive); + } } static void ide_init_port(ide_hwif_t *hwif, unsigned int port, diff --git a/drivers/ide/legacy/ht6560b.c b/drivers/ide/legacy/ht6560b.c index bd2f579..7bc8fd5 100644 --- a/drivers/ide/legacy/ht6560b.c +++ b/drivers/ide/legacy/ht6560b.c @@ -310,16 +310,16 @@ static void ht6560b_set_pio_mode(ide_drive_t *drive, const u8 pio) #endif } -static void __init ht6560b_port_init_devs(ide_hwif_t *hwif) +static void __init ht6560b_init_dev(ide_drive_t *drive) { + ide_hwif_t *hwif = drive->hwif; /* Setting default configurations for drives. */ int t = (HT_CONFIG_DEFAULT << 8) | HT_TIMING_DEFAULT; if (hwif->channel) t |= (HT_SECONDARY_IF << 8); - hwif->drives[0].drive_data = t; - hwif->drives[1].drive_data = t; + drive->drive_data = t; } static int probe_ht6560b; @@ -328,7 +328,7 @@ module_param_named(probe, probe_ht6560b, bool, 0); MODULE_PARM_DESC(probe, "probe for HT6560B chipset"); static const struct ide_port_ops ht6560b_port_ops = { - .port_init_devs = ht6560b_port_init_devs, + .init_dev = ht6560b_init_dev, .set_pio_mode = ht6560b_set_pio_mode, .selectproc = ht6560b_selectproc, }; diff --git a/drivers/ide/legacy/ide-4drives.c b/drivers/ide/legacy/ide-4drives.c index 5cd6ce5..89c8ff0 100644 --- a/drivers/ide/legacy/ide-4drives.c +++ b/drivers/ide/legacy/ide-4drives.c @@ -11,16 +11,14 @@ static int probe_4drives; module_param_named(probe, probe_4drives, bool, 0); MODULE_PARM_DESC(probe, "probe for generic IDE chipset with 4 drives/port"); -static void ide_4drives_port_init_devs(ide_hwif_t *hwif) +static void ide_4drives_init_dev(ide_drive_t *drive) { - if (hwif->channel) { - hwif->drives[0].select.all ^= 0x20; - hwif->drives[1].select.all ^= 0x20; - } + if (drive->hwif->channel) + drive->select.all ^= 0x20; } static const struct ide_port_ops ide_4drives_port_ops = { - .port_init_devs = ide_4drives_port_init_devs, + .init_dev = ide_4drives_init_dev, }; static const struct ide_port_info ide_4drives_port_info = { diff --git a/drivers/ide/legacy/qd65xx.c b/drivers/ide/legacy/qd65xx.c index 63f6c31..2338f34 100644 --- a/drivers/ide/legacy/qd65xx.c +++ b/drivers/ide/legacy/qd65xx.c @@ -282,17 +282,18 @@ static int __init qd_testreg(int port) return (readreg != QD_TESTVAL); } -static void __init qd6500_port_init_devs(ide_hwif_t *hwif) +static void __init qd6500_init_dev(ide_drive_t *drive) { + ide_hwif_t *hwif = drive->hwif; u8 base = (hwif->config_data & 0xff00) >> 8; u8 config = QD_CONFIG(hwif); - hwif->drives[0].drive_data = QD6500_DEF_DATA; - hwif->drives[1].drive_data = QD6500_DEF_DATA; + drive->drive_data = QD6500_DEF_DATA; } -static void __init qd6580_port_init_devs(ide_hwif_t *hwif) +static void __init qd6580_init_dev(ide_drive_t *drive) { + ide_hwif_t *hwif = drive->hwif; u16 t1, t2; u8 base = (hwif->config_data & 0xff00) >> 8; u8 config = QD_CONFIG(hwif); @@ -303,18 +304,17 @@ static void __init qd6580_port_init_devs(ide_hwif_t *hwif) } else t2 = t1 = hwif->channel ? QD6580_DEF_DATA2 : QD6580_DEF_DATA; - hwif->drives[0].drive_data = t1; - hwif->drives[1].drive_data = t2; + drive->drive_data = drive->select.b.unit ? t2 : t1; } static const struct ide_port_ops qd6500_port_ops = { - .port_init_devs = qd6500_port_init_devs, + .init_dev = qd6500_init_dev, .set_pio_mode = qd6500_set_pio_mode, .selectproc = qd65xx_select, }; static const struct ide_port_ops qd6580_port_ops = { - .port_init_devs = qd6580_port_init_devs, + .init_dev = qd6580_init_dev, .set_pio_mode = qd6580_set_pio_mode, .selectproc = qd65xx_select, }; diff --git a/include/linux/ide.h b/include/linux/ide.h index a6a2ecc..f9cbe93 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -405,8 +405,8 @@ typedef struct ide_drive_s { struct ide_port_info; struct ide_port_ops { - /* host specific initialization of devices on a port */ - void (*port_init_devs)(struct hwif_s *); + /* host specific initialization of a device */ + void (*init_dev)(ide_drive_t *); /* routine to program host for PIO mode */ void (*set_pio_mode)(ide_drive_t *, const u8); /* routine to program host for DMA mode */ -- cgit v0.10.2 From b48c89a9699f451e4e236fa7313461281c00e69b Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 16 Jul 2008 20:33:42 +0200 Subject: cmd640: add ->init_dev method Convert the driver to use ->init_dev method instead of open-coding devices init in cmd640x_init(). While at it: - fix printk()-s to use KERN_INFO level instead of the default KERN_ERR - use DRV_NAME define in printk()-s - set proper ->pio_mask also for CONFIG_BLK_DEV_CMD640_ENHANCED=n There should be no functional changes caused by this patch (except fixing printk()-s levels). Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/pci/cmd640.c b/drivers/ide/pci/cmd640.c index ab7d727..1ad1e23 100644 --- a/drivers/ide/pci/cmd640.c +++ b/drivers/ide/pci/cmd640.c @@ -611,11 +611,40 @@ static void cmd640_set_pio_mode(ide_drive_t *drive, const u8 pio) display_clocks(index); } +#endif /* CONFIG_BLK_DEV_CMD640_ENHANCED */ + +static void cmd640_init_dev(ide_drive_t *drive) +{ + unsigned int i = drive->hwif->channel * 2 + drive->select.b.unit; + +#ifdef CONFIG_BLK_DEV_CMD640_ENHANCED + /* + * Reset timing to the slowest speed and turn off prefetch. + * This way, the drive identify code has a better chance. + */ + setup_counts[i] = 4; /* max possible */ + active_counts[i] = 16; /* max possible */ + recovery_counts[i] = 16; /* max possible */ + program_drive_counts(drive, i); + set_prefetch_mode(drive, i, 0); + printk(KERN_INFO DRV_NAME ": drive%d timings/prefetch cleared\n", i); +#else + /* + * Set the drive unmask flags to match the prefetch setting. + */ + check_prefetch(drive, i); + printk(KERN_INFO DRV_NAME ": drive%d timings/prefetch(%s) preserved\n", + i, drive->no_io_32bit ? "off" : "on"); +#endif /* CONFIG_BLK_DEV_CMD640_ENHANCED */ +} + static const struct ide_port_ops cmd640_port_ops = { + .init_dev = cmd640_init_dev, +#ifdef CONFIG_BLK_DEV_CMD640_ENHANCED .set_pio_mode = cmd640_set_pio_mode, +#endif }; -#endif /* CONFIG_BLK_DEV_CMD640_ENHANCED */ static int pci_conf1(void) { @@ -658,10 +687,8 @@ static const struct ide_port_info cmd640_port_info __initdata = { IDE_HFLAG_NO_DMA | IDE_HFLAG_ABUSE_PREFETCH | IDE_HFLAG_ABUSE_FAST_DEVSEL, -#ifdef CONFIG_BLK_DEV_CMD640_ENHANCED .port_ops = &cmd640_port_ops, .pio_mask = ATA_PIO5, -#endif }; static int cmd640x_init_one(unsigned long base, unsigned long ctl) @@ -689,7 +716,6 @@ static int __init cmd640x_init(void) { int second_port_cmd640 = 0, rc; const char *bus_type, *port2; - unsigned int index; u8 b, cfr; u8 idx[4] = { 0xff, 0xff, 0xff, 0xff }; hw_regs_t hw[2]; @@ -813,44 +839,6 @@ static int __init cmd640x_init(void) printk(KERN_INFO "cmd640: %sserialized, secondary interface %s\n", second_port_cmd640 ? "" : "not ", port2); - /* - * Establish initial timings/prefetch for all drives. - * Do not unnecessarily disturb any prior BIOS setup of these. - */ - for (index = 0; index < (2 + (second_port_cmd640 << 1)); index++) { - ide_drive_t *drive; - - if (index > 1) { - if (cmd_hwif1 == NULL) - continue; - drive = &cmd_hwif1->drives[index & 1]; - } else { - if (cmd_hwif0 == NULL) - continue; - drive = &cmd_hwif0->drives[index & 1]; - } - -#ifdef CONFIG_BLK_DEV_CMD640_ENHANCED - /* - * Reset timing to the slowest speed and turn off prefetch. - * This way, the drive identify code has a better chance. - */ - setup_counts [index] = 4; /* max possible */ - active_counts [index] = 16; /* max possible */ - recovery_counts [index] = 16; /* max possible */ - program_drive_counts(drive, index); - set_prefetch_mode(drive, index, 0); - printk("cmd640: drive%d timings/prefetch cleared\n", index); -#else - /* - * Set the drive unmask flags to match the prefetch setting - */ - check_prefetch(drive, index); - printk("cmd640: drive%d timings/prefetch(%s) preserved\n", - index, drive->no_io_32bit ? "off" : "on"); -#endif /* CONFIG_BLK_DEV_CMD640_ENHANCED */ - } - #ifdef CMD640_DUMP_REGS cmd640_dump_regs(); #endif -- cgit v0.10.2 From 9a5ae1faaaf43933dee48c223b193d5e1c4b8b0c Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 16 Jul 2008 20:33:42 +0200 Subject: rapide: use struct ide_port_info Convert the driver to use struct ide_port_info. There should be no functional changes caused by this patch. Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/arm/rapide.c b/drivers/ide/arm/rapide.c index 1747b23..bb081ad 100644 --- a/drivers/ide/arm/rapide.c +++ b/drivers/ide/arm/rapide.c @@ -11,6 +11,10 @@ #include +static struct const ide_port_info rapide_port_info = { + .host_flags = IDE_HFLAG_MMIO | IDE_HFLAG_NO_DMA, +}; + static void rapide_setup_ports(hw_regs_t *hw, void __iomem *base, void __iomem *ctrl, unsigned int sz, int irq) { @@ -53,12 +57,11 @@ rapide_probe(struct expansion_card *ec, const struct ecard_id *id) ide_init_port_hw(hwif, &hw); - hwif->host_flags = IDE_HFLAG_MMIO; default_hwif_mmiops(hwif); idx[0] = hwif->index; - ide_device_add(idx, NULL); + ide_device_add(idx, &rapide_port_info); ecard_set_drvdata(ec, hwif); goto out; -- cgit v0.10.2 From f81eb80bbb949f9498980c785ef7dd4c994a4909 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 16 Jul 2008 20:33:42 +0200 Subject: ide-h8300: use struct ide_port_info Convert the driver to use struct ide_port_info. There should be no functional changes caused by this patch. Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/h8300/ide-h8300.c b/drivers/ide/h8300/ide-h8300.c index dfce105..ff6a98a 100644 --- a/drivers/ide/h8300/ide-h8300.c +++ b/drivers/ide/h8300/ide-h8300.c @@ -176,6 +176,10 @@ static inline void hwif_setup(ide_hwif_t *hwif) hwif->output_data = h8300_output_data; } +static const struct ide_port_info h8300_port_info = { + .host_flags = IDE_HFLAG_NO_IO_32BIT | IDE_HFLAG_NO_DMA, +}; + static int __init h8300_ide_init(void) { hw_regs_t hw; @@ -201,12 +205,11 @@ static int __init h8300_ide_init(void) index = hwif->index; ide_init_port_hw(hwif, &hw); hwif_setup(hwif); - hwif->host_flags = IDE_HFLAG_NO_IO_32BIT; printk(KERN_INFO "ide%d: H8/300 generic IDE interface\n", index); idx[0] = index; - ide_device_add(idx, NULL); + ide_device_add(idx, &h8300_port_info); return 0; -- cgit v0.10.2 From 7b60fa16ca50b0f8cb9d007faee0dff71b397fb8 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 16 Jul 2008 20:33:42 +0200 Subject: ide_platform: use struct ide_port_info Convert the driver to use struct ide_port_info. There should be no functional changes caused by this patch. Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/legacy/ide_platform.c b/drivers/ide/legacy/ide_platform.c index d3bc3f2..a249562 100644 --- a/drivers/ide/legacy/ide_platform.c +++ b/drivers/ide/legacy/ide_platform.c @@ -44,6 +44,10 @@ static void __devinit plat_ide_setup_ports(hw_regs_t *hw, hw->chipset = ide_generic; } +static const struct ide_port_info platform_ide_port_info = { + .host_flags = IDE_HFLAG_NO_DMA, +}; + static int __devinit plat_ide_probe(struct platform_device *pdev) { struct resource *res_base, *res_alt, *res_irq; @@ -54,6 +58,7 @@ static int __devinit plat_ide_probe(struct platform_device *pdev) int ret = 0; int mmio = 0; hw_regs_t hw; + struct ide_port_info d = platform_ide_port_info; pdata = pdev->dev.platform_data; @@ -102,13 +107,13 @@ static int __devinit plat_ide_probe(struct platform_device *pdev) ide_init_port_hw(hwif, &hw); if (mmio) { - hwif->host_flags = IDE_HFLAG_MMIO; + d.host_flags |= IDE_HFLAG_MMIO; default_hwif_mmiops(hwif); } idx[0] = hwif->index; - ide_device_add(idx, NULL); + ide_device_add(idx, &d); platform_set_drvdata(pdev, hwif); -- cgit v0.10.2 From eb3aff5530d22eb4be0a99c9d39c9ffde7b9891a Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 16 Jul 2008 20:33:42 +0200 Subject: ide: print message on error in ide_find_port_slot() * Add DRV_NAME define to ide-h8300.c. * Fix ide-h8300.c, swarm.c and sgiioc4.c to set .name field in struct ide_port_info to DRV_NAME, then convert these host drivers to use ide_find_port_slot() instead of ide_find_port(). * Print message on error in ide_find_port_slot(). Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/h8300/ide-h8300.c b/drivers/ide/h8300/ide-h8300.c index ff6a98a..c445298 100644 --- a/drivers/ide/h8300/ide-h8300.c +++ b/drivers/ide/h8300/ide-h8300.c @@ -8,6 +8,8 @@ #include #include +#define DRV_NAME "ide-h8300" + #define bswap(d) \ ({ \ u16 r; \ @@ -196,11 +198,9 @@ static int __init h8300_ide_init(void) hw_setup(&hw); - hwif = ide_find_port(); - if (hwif == NULL) { - printk(KERN_ERR "ide-h8300: IDE I/F register failed\n"); + hwif = ide_find_port_slot(&h8300_port_info); + if (hwif == NULL) return -ENOENT; - } index = hwif->index; ide_init_port_hw(hwif, &hw); diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index c5dc188..235ebdb 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -1490,6 +1490,9 @@ ide_hwif_t *ide_find_port_slot(const struct ide_port_info *d) } } + printk(KERN_ERR "%s: no free slot for interface\n", + d ? d->name : "ide"); + return NULL; out_found: diff --git a/drivers/ide/mips/swarm.c b/drivers/ide/mips/swarm.c index 57a0138..bae92ac 100644 --- a/drivers/ide/mips/swarm.c +++ b/drivers/ide/mips/swarm.c @@ -62,6 +62,7 @@ static struct resource swarm_ide_resource = { static struct platform_device *swarm_ide_dev; static const struct ide_port_info swarm_port_info = { + .name = DRV_NAME, .host_flags = IDE_HFLAG_MMIO | IDE_HFLAG_NO_DMA, }; @@ -81,11 +82,9 @@ static int __devinit swarm_ide_probe(struct device *dev) if (!SIBYTE_HAVE_IDE) return -ENODEV; - hwif = ide_find_port(); - if (hwif == NULL) { - printk(KERN_ERR DRV_NAME ": no free slot for interface\n"); + hwif = ide_find_port_slot(&swarm_port_info); + if (hwif == NULL) return -ENOMEM; - } base = ioremap(A_IO_EXT_BASE, 0x800); offset = __raw_readq(base + R_IO_EXT_REG(R_IO_EXT_START_ADDR, IDE_CS)); diff --git a/drivers/ide/pci/scc_pata.c b/drivers/ide/pci/scc_pata.c index 71bff31..789c66d 100644 --- a/drivers/ide/pci/scc_pata.c +++ b/drivers/ide/pci/scc_pata.c @@ -558,12 +558,9 @@ static int scc_ide_setup_pci_device(struct pci_dev *dev, u8 idx[4] = { 0xff, 0xff, 0xff, 0xff }; int i; - hwif = ide_find_port(); - if (hwif == NULL) { - printk(KERN_ERR "%s: too many IDE interfaces, " - "no room in table\n", SCC_PATA_NAME); + hwif = ide_find_port_slot(d); + if (hwif == NULL) return -ENOMEM; - } memset(&hw, 0, sizeof(hw)); for (i = 0; i <= 8; i++) diff --git a/drivers/ide/pci/sgiioc4.c b/drivers/ide/pci/sgiioc4.c index dd0d0f7..be73acb 100644 --- a/drivers/ide/pci/sgiioc4.c +++ b/drivers/ide/pci/sgiioc4.c @@ -568,6 +568,7 @@ static const struct ide_dma_ops sgiioc4_dma_ops = { }; static const struct ide_port_info sgiioc4_port_info __devinitdata = { + .name = DRV_NAME, .chipset = ide_pci, .init_dma = ide_dma_sgiioc4, .port_ops = &sgiioc4_port_ops, @@ -587,12 +588,9 @@ sgiioc4_ide_setup_pci_device(struct pci_dev *dev) hw_regs_t hw; struct ide_port_info d = sgiioc4_port_info; - hwif = ide_find_port(); - if (hwif == NULL) { - printk(KERN_ERR "%s: too many IDE interfaces, no room in table\n", - DRV_NAME); + hwif = ide_find_port_slot(&d); + if (hwif == NULL) return -ENOMEM; - } /* Get the CmdBlk and CtrlBlk Base Registers */ bar0 = pci_resource_start(dev, 0); diff --git a/drivers/ide/setup-pci.c b/drivers/ide/setup-pci.c index 166211f..4321d20 100644 --- a/drivers/ide/setup-pci.c +++ b/drivers/ide/setup-pci.c @@ -332,11 +332,8 @@ static ide_hwif_t *ide_hwif_configure(struct pci_dev *dev, } hwif = ide_find_port_slot(d); - if (hwif == NULL) { - printk(KERN_ERR "%s: too many IDE interfaces, no room in " - "table\n", d->name); + if (hwif == NULL) return NULL; - } memset(&hw, 0, sizeof(hw)); hw.irq = irq; -- cgit v0.10.2 From 740c397cc64272917a4c4c283649579d2044a836 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 16 Jul 2008 20:33:43 +0200 Subject: ide-h8300: print driver banner message early Print driver banner message early and without interface number. Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/h8300/ide-h8300.c b/drivers/ide/h8300/ide-h8300.c index c445298..20fad6d 100644 --- a/drivers/ide/h8300/ide-h8300.c +++ b/drivers/ide/h8300/ide-h8300.c @@ -189,6 +189,8 @@ static int __init h8300_ide_init(void) int index; u8 idx[4] = { 0xff, 0xff, 0xff, 0xff }; + printk(KERN_INFO DRV_NAME ": H8/300 generic IDE interface\n"); + if (!request_region(CONFIG_H8300_IDE_BASE, H8300_IDE_GAP*8, "ide-h8300")) goto out_busy; if (!request_region(CONFIG_H8300_IDE_ALT, H8300_IDE_GAP, "ide-h8300")) { @@ -205,7 +207,6 @@ static int __init h8300_ide_init(void) index = hwif->index; ide_init_port_hw(hwif, &hw); hwif_setup(hwif); - printk(KERN_INFO "ide%d: H8/300 generic IDE interface\n", index); idx[0] = index; -- cgit v0.10.2 From e193c3e141df4b536ed077b29c83a96768333607 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 16 Jul 2008 20:33:43 +0200 Subject: ide-pnp: print driver banner message early Print driver banner message early and without interface number. Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/ide-pnp.c b/drivers/ide/ide-pnp.c index fc1e10c..03f2ef5 100644 --- a/drivers/ide/ide-pnp.c +++ b/drivers/ide/ide-pnp.c @@ -33,6 +33,8 @@ static int idepnp_probe(struct pnp_dev *dev, const struct pnp_device_id *dev_id) ide_hwif_t *hwif; unsigned long base, ctl; + printk(KERN_INFO DRV_NAME ": generic PnP IDE interface\n"); + if (!(pnp_port_valid(dev, 0) && pnp_port_valid(dev, 1) && pnp_irq_valid(dev, 0))) return -1; @@ -64,7 +66,6 @@ static int idepnp_probe(struct pnp_dev *dev, const struct pnp_device_id *dev_id) ide_init_port_hw(hwif, &hw); - printk(KERN_INFO "ide%d: generic PnP IDE interface\n", index); pnp_set_drvdata(dev, hwif); ide_device_add(idx, NULL); -- cgit v0.10.2 From 07fe69d5d0b6e476cecaf75e81c0c6093571087b Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 16 Jul 2008 20:33:43 +0200 Subject: ide: allow any command requesting DMA data phase for HDIO_DRIVE_TASKFILE Allow any command requesting DMA data phase for HDIO_DRIVE_TASKFILE ioctl and remove no longer needed task_dma_ok() Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c index cf55a48..994a516 100644 --- a/drivers/ide/ide-taskfile.c +++ b/drivers/ide/ide-taskfile.c @@ -62,25 +62,6 @@ int taskfile_lib_get_identify (ide_drive_t *drive, u8 *buf) return ide_raw_taskfile(drive, &args, buf, 1); } -static int inline task_dma_ok(ide_task_t *task) -{ - if (blk_fs_request(task->rq) || (task->tf_flags & IDE_TFLAG_FLAGGED)) - return 1; - - switch (task->tf.command) { - case WIN_WRITEDMA_ONCE: - case WIN_WRITEDMA: - case WIN_WRITEDMA_EXT: - case WIN_READDMA_ONCE: - case WIN_READDMA: - case WIN_READDMA_EXT: - case WIN_IDENTIFY_DMA: - return 1; - } - - return 0; -} - static ide_startstop_t task_no_data_intr(ide_drive_t *); static ide_startstop_t set_geometry_intr(ide_drive_t *); static ide_startstop_t recal_intr(ide_drive_t *); @@ -139,8 +120,7 @@ ide_startstop_t do_rw_taskfile (ide_drive_t *drive, ide_task_t *task) WAIT_WORSTCASE, NULL); return ide_started; default: - if (task_dma_ok(task) == 0 || drive->using_dma == 0 || - dma_ops->dma_setup(drive)) + if (drive->using_dma == 0 || dma_ops->dma_setup(drive)) return ide_stopped; dma_ops->dma_exec_cmd(drive, tf->command); dma_ops->dma_start(drive); -- cgit v0.10.2 From 605cfe8270cb182f494575c5a608404bb19fdfc5 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 16 Jul 2008 20:33:43 +0200 Subject: ide: remove superfluous BUG_ON() from set_geometry_intr() ide_set_handler() bugs on ->handler == NULL so no need to do it in set_geometry_intr(). There should be no functional changes caused by this patch. Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c index 994a516..0d0bf29 100644 --- a/drivers/ide/ide-taskfile.c +++ b/drivers/ide/ide-taskfile.c @@ -163,7 +163,6 @@ static ide_startstop_t set_geometry_intr(ide_drive_t *drive) if (stat & (ERR_STAT|DRQ_STAT)) return ide_error(drive, "set_geometry_intr", stat); - BUG_ON(HWGROUP(drive)->handler != NULL); ide_set_handler(drive, &set_geometry_intr, WAIT_WORSTCASE, NULL); return ide_started; } -- cgit v0.10.2 From 61729415e64a1149d4eb36c3fac26a28728ad1d7 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 16 Jul 2008 20:33:43 +0200 Subject: ide: remove needless includes from ide.c Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c index 9240888..90ae00d 100644 --- a/drivers/ide/ide.c +++ b/drivers/ide/ide.c @@ -50,29 +50,16 @@ #include #include #include -#include -#include #include #include #include #include -#include #include #include #include -#include #include #include -#include -#include -#include #include -#include - -#include -#include -#include -#include /* default maximum number of failures */ -- cgit v0.10.2 From ad15e9fc8913b704978ffdda7d1f31c79ed6814d Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 16 Jul 2008 20:33:44 +0200 Subject: ide: remove needless includes from ide-taskfile.c (take 2) v2: On Sunday 15 June 2008, Geert Uytterhoeven wrote: > As ide-taskfile.c uses scatterlists, it should include . (v1 broke IDE build on m68k, thanks to Geert for finding the bug) Cc: Geert Uytterhoeven Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c index 0d0bf29..1fbdb74 100644 --- a/drivers/ide/ide-taskfile.c +++ b/drivers/ide/ide-taskfile.c @@ -8,28 +8,18 @@ * The big the bad and the ugly. */ -#include #include #include #include -#include -#include #include #include -#include #include -#include -#include #include -#include #include #include #include -#include #include -#include -#include #include #include -- cgit v0.10.2 From ff23712e791fd80f3b088d5e1c5733c0944cfe64 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 16 Jul 2008 20:33:44 +0200 Subject: ide: remove needless includes from setup-pci.c (take 2) v2: * sparc build fix. (From Stephen Rothwell) Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/setup-pci.c b/drivers/ide/setup-pci.c index 4321d20..65fc08b 100644 --- a/drivers/ide/setup-pci.c +++ b/drivers/ide/setup-pci.c @@ -6,19 +6,15 @@ * May be copied or modified under the terms of the GNU General Public License */ -#include #include #include #include #include -#include -#include #include #include #include #include -#include /** * ide_setup_pci_baseregs - place a PCI IDE controller native -- cgit v0.10.2 From 47bc7e7425d198ad1f8c4597b0bf28619fcce0fd Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 16 Jul 2008 20:33:44 +0200 Subject: rapide: fix rapide_probe() return value Return -ENOENT on ide_find_port() failure. While at it: - Cleanup rapide_probe() a bit. Cc: Russell King Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/arm/rapide.c b/drivers/ide/arm/rapide.c index bb081ad..43057e0 100644 --- a/drivers/ide/arm/rapide.c +++ b/drivers/ide/arm/rapide.c @@ -48,24 +48,26 @@ rapide_probe(struct expansion_card *ec, const struct ecard_id *id) goto release; } - hwif = ide_find_port(); - if (hwif) { - memset(&hw, 0, sizeof(hw)); - rapide_setup_ports(&hw, base, base + 0x818, 1 << 6, ec->irq); - hw.chipset = ide_generic; - hw.dev = &ec->dev; + memset(&hw, 0, sizeof(hw)); + rapide_setup_ports(&hw, base, base + 0x818, 1 << 6, ec->irq); + hw.chipset = ide_generic; + hw.dev = &ec->dev; - ide_init_port_hw(hwif, &hw); + hwif = ide_find_port(); + if (hwif == NULL) { + ret = -ENOENT; + goto release; + } - default_hwif_mmiops(hwif); + ide_init_port_hw(hwif, &hw); + default_hwif_mmiops(hwif); - idx[0] = hwif->index; + idx[0] = hwif->index; - ide_device_add(idx, &rapide_port_info); + ide_device_add(idx, &rapide_port_info); - ecard_set_drvdata(ec, hwif); - goto out; - } + ecard_set_drvdata(ec, hwif); + goto out; release: ecard_release_resources(ec); -- cgit v0.10.2 From 67717e224181527987cce800fa2ddb5c8c1e9315 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 16 Jul 2008 20:33:44 +0200 Subject: icside: always try to probe first interface Try to probe first interface even if ide_hwifs[]'s slot for the second interface cannot be obtained. While at it: - Add DRV_NAME define and use it for request_dma() instead of hwif->name. Cc: Russell King Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/arm/icside.c b/drivers/ide/arm/icside.c index c8d8600..52f58c8 100644 --- a/drivers/ide/arm/icside.c +++ b/drivers/ide/arm/icside.c @@ -21,6 +21,8 @@ #include #include +#define DRV_NAME "icside" + #define ICS_IDENT_OFFSET 0x2280 #define ICS_ARCIN_V5_INTRSTAT 0x0000 @@ -546,27 +548,27 @@ icside_register_v6(struct icside_state *state, struct expansion_card *ec) ide_init_port_hw(hwif, &hw[0]); default_hwif_mmiops(hwif); + idx[0] = hwif->index; + mate = ide_find_port(); - if (mate == NULL) - return -ENODEV; + if (mate) { + ide_init_port_hw(mate, &hw[1]); + default_hwif_mmiops(mate); - ide_init_port_hw(mate, &hw[1]); - default_hwif_mmiops(mate); + idx[1] = mate->index; + } state->hwif[0] = hwif; state->hwif[1] = mate; ecard_set_drvdata(ec, state); - if (ec->dma != NO_DMA && !request_dma(ec->dma, hwif->name)) { + if (ec->dma != NO_DMA && !request_dma(ec->dma, DRV_NAME)) { d.init_dma = icside_dma_init; d.port_ops = &icside_v6_port_ops; d.dma_ops = NULL; } - idx[0] = hwif->index; - idx[1] = mate->index; - ide_device_add(idx, &d); return 0; -- cgit v0.10.2 From cb8ea0929c9cb899d61c4e155aace0b34d8cffe3 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 16 Jul 2008 20:33:44 +0200 Subject: swarm: call ide_find_port_slot() later Move ide_find_port_slot() call closer to ide_device_add(). This is basically a preparation for the future changes. Cc: Maciej W. Rozycki Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/mips/swarm.c b/drivers/ide/mips/swarm.c index bae92ac..9f1212c 100644 --- a/drivers/ide/mips/swarm.c +++ b/drivers/ide/mips/swarm.c @@ -82,10 +82,6 @@ static int __devinit swarm_ide_probe(struct device *dev) if (!SIBYTE_HAVE_IDE) return -ENODEV; - hwif = ide_find_port_slot(&swarm_port_info); - if (hwif == NULL) - return -ENOMEM; - base = ioremap(A_IO_EXT_BASE, 0x800); offset = __raw_readq(base + R_IO_EXT_REG(R_IO_EXT_START_ADDR, IDE_CS)); size = __raw_readq(base + R_IO_EXT_REG(R_IO_EXT_MULT_SIZE, IDE_CS)); @@ -112,9 +108,6 @@ static int __devinit swarm_ide_probe(struct device *dev) base = ioremap(offset, size); - /* Setup MMIO ops. */ - default_hwif_mmiops(hwif); - for (i = 0; i <= 7; i++) hw.io_ports_array[i] = (unsigned long)(base + ((0x1f0 + i) << 5)); @@ -123,8 +116,15 @@ static int __devinit swarm_ide_probe(struct device *dev) hw.irq = K_INT_GB_IDE; hw.chipset = ide_generic; + hwif = ide_find_port_slot(&swarm_port_info); + if (hwif == NULL) + goto err; + ide_init_port_hw(hwif, &hw); + /* Setup MMIO ops. */ + default_hwif_mmiops(hwif); + idx[0] = hwif->index; ide_device_add(idx, &swarm_port_info); @@ -132,6 +132,10 @@ static int __devinit swarm_ide_probe(struct device *dev) dev_set_drvdata(dev, hwif); return 0; +err: + release_resource(&swarm_ide_resource); + iounmap(base); + return -ENOMEM; } static struct device_driver swarm_ide_driver = { -- cgit v0.10.2 From ce30e4015954e281f682aa8d158a47885d8e1262 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 16 Jul 2008 20:33:44 +0200 Subject: sgiioc4: call ide_find_port_slot() later Move ide_find_port_slot() call closer to ide_device_add(). This is basically a preparation for the future changes. Cc: Jeremy Higdon Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/pci/sgiioc4.c b/drivers/ide/pci/sgiioc4.c index be73acb..c79ff5b 100644 --- a/drivers/ide/pci/sgiioc4.c +++ b/drivers/ide/pci/sgiioc4.c @@ -588,10 +588,6 @@ sgiioc4_ide_setup_pci_device(struct pci_dev *dev) hw_regs_t hw; struct ide_port_info d = sgiioc4_port_info; - hwif = ide_find_port_slot(&d); - if (hwif == NULL) - return -ENOMEM; - /* Get the CmdBlk and CtrlBlk Base Registers */ bar0 = pci_resource_start(dev, 0); virt_base = ioremap(bar0, pci_resource_len(dev, 0)); @@ -621,6 +617,11 @@ sgiioc4_ide_setup_pci_device(struct pci_dev *dev) hw.irq = dev->irq; hw.chipset = ide_pci; hw.dev = &dev->dev; + + hwif = ide_find_port_slot(&d); + if (hwif == NULL) + goto err; + ide_init_port_hw(hwif, &hw); /* The IOC4 uses MMIO rather than Port IO. */ @@ -637,6 +638,10 @@ sgiioc4_ide_setup_pci_device(struct pci_dev *dev) return -EIO; return 0; +err: + release_mem_region(cmd_phys_base, IOC4_CMD_CTL_BLK_SIZE); + iounmap(virt_base); + return -ENOMEM; } static unsigned int __devinit -- cgit v0.10.2 From af6765ce9f535f20e6f9975269660fe49594745a Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 16 Jul 2008 20:33:45 +0200 Subject: ide-cd: remove wait-for-idle-controller bit in cdrom_start_packet_command This is done in the request issue path anyway. Signed-off-by: Borislav Petkov Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index e12d602..eb4b275 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -517,14 +517,9 @@ static ide_startstop_t cdrom_start_packet_command(ide_drive_t *drive, int xferlen, ide_handler_t *handler) { - ide_startstop_t startstop; struct cdrom_info *info = drive->driver_data; ide_hwif_t *hwif = drive->hwif; - /* wait for the controller to be idle */ - if (ide_wait_stat(&startstop, drive, 0, BUSY_STAT, WAIT_READY)) - return startstop; - /* FIXME: for Virtual DMA we must check harder */ if (info->dma) info->dma = !hwif->dma_ops->dma_setup(drive); -- cgit v0.10.2 From ae8789f034ffa077105575817ec0cc581fd18d83 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 16 Jul 2008 20:33:45 +0200 Subject: ide-cd: remove ide_cd_drain_data and ide_cd_pad_transfer Use the generic ide_pad_transfer() helper instead. [bart: fixup ide_cd_drain_data() -> ide_pad_transfer() conversion] Signed-off-by: Borislav Petkov Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index eb4b275..789b19c 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -599,28 +599,6 @@ static ide_startstop_t cdrom_transfer_packet_command(ide_drive_t *drive, } /* - * Block read functions. - */ -static void ide_cd_pad_transfer(ide_drive_t *drive, xfer_func_t *xf, int len) -{ - while (len > 0) { - int dum = 0; - xf(drive, NULL, &dum, sizeof(dum)); - len -= sizeof(dum); - } -} - -static void ide_cd_drain_data(ide_drive_t *drive, int nsects) -{ - while (nsects > 0) { - static char dum[SECTOR_SIZE]; - - drive->hwif->input_data(drive, NULL, dum, sizeof(dum)); - nsects--; - } -} - -/* * Check the contents of the interrupt reason register from the cdrom * and attempt to recover if there are problems. Returns 0 if everything's * ok; nonzero if the request has been terminated. @@ -635,15 +613,12 @@ static int ide_cd_check_ireason(ide_drive_t *drive, struct request *rq, if (ireason == (!rw << 1)) return 0; else if (ireason == (rw << 1)) { - ide_hwif_t *hwif = drive->hwif; - xfer_func_t *xf; /* whoops... */ printk(KERN_ERR "%s: %s: wrong transfer direction!\n", drive->name, __func__); - xf = rw ? hwif->output_data : hwif->input_data; - ide_cd_pad_transfer(drive, xf, len); + ide_pad_transfer(drive, rw, len); } else if (rw == 0 && ireason == 1) { /* * Some drives (ASUS) seem to tell us that status info is @@ -1006,7 +981,7 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive) - bio_cur_sectors(rq->bio), thislen >> 9); if (nskip > 0) { - ide_cd_drain_data(drive, nskip); + ide_pad_transfer(drive, write, nskip << 9); rq->current_nr_sectors -= nskip; thislen -= (nskip << 9); } @@ -1043,7 +1018,7 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive) * If the buffers are full, pipe the rest into * oblivion. */ - ide_cd_drain_data(drive, thislen >> 9); + ide_pad_transfer(drive, 0, thislen); else { printk(KERN_ERR "%s: confused, missing data\n", drive->name); @@ -1091,7 +1066,7 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive) /* pad, if necessary */ if (!blk_fs_request(rq) && len > 0) - ide_cd_pad_transfer(drive, xferfunc, len); + ide_pad_transfer(drive, write, len); if (blk_pc_request(rq)) { timeout = rq->timeout; -- cgit v0.10.2 From ab9d6e3374d938cf3d941fbed5ba32a19ad263b8 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 16 Jul 2008 20:33:45 +0200 Subject: ide-cd: ide_do_rw_cdrom: add the catch-all bad request case to the if-else block There should be no functionality change resulting from this patch. Signed-off-by: Borislav Petkov Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 789b19c..6ade498 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -1235,11 +1235,11 @@ static ide_startstop_t ide_do_rw_cdrom(ide_drive_t *drive, struct request *rq, /* right now this can only be a reset... */ cdrom_end_request(drive, 1); return ide_stopped; + } else { + blk_dump_rq_flags(rq, "ide-cd bad flags"); + cdrom_end_request(drive, 0); + return ide_stopped; } - - blk_dump_rq_flags(rq, "ide-cd bad flags"); - cdrom_end_request(drive, 0); - return ide_stopped; } -- cgit v0.10.2 From 8ea1d17b962c35401fe26428e25c4652023e2652 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 16 Jul 2008 20:33:45 +0200 Subject: ide-cd: cdrom_start_seek: remove unused argument block There should be no functionality change resulting from this patch. Signed-off-by: Borislav Petkov Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 6ade498..ff98222 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -753,7 +753,7 @@ static ide_startstop_t cdrom_start_seek_continuation(ide_drive_t *drive) return cdrom_transfer_packet_command(drive, rq, &cdrom_seek_intr); } -static ide_startstop_t cdrom_start_seek(ide_drive_t *drive, unsigned int block) +static ide_startstop_t cdrom_start_seek(ide_drive_t *drive) { struct cdrom_info *info = drive->driver_data; @@ -1223,7 +1223,7 @@ static ide_startstop_t ide_do_rw_cdrom(ide_drive_t *drive, struct request *rq, IDE_LARGE_SEEK(info->last_block, block, IDECD_SEEK_THRESHOLD) && drive->dsc_overlap) - action = cdrom_start_seek(drive, block); + action = cdrom_start_seek(drive); else action = cdrom_start_rw(drive, rq); info->last_block = block; -- cgit v0.10.2 From 99384aeafe3a78d8a2e66b09b67aa6a219cd7897 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 16 Jul 2008 20:33:45 +0200 Subject: ide-cd: mv ide_do_rw_cdrom ide_cd_do_request There should be no functionality change resulting from this patch. Signed-off-by: Borislav Petkov Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index ff98222..a9be4bf 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -1197,7 +1197,7 @@ static ide_startstop_t cdrom_do_block_pc(ide_drive_t *drive, struct request *rq) /* * cdrom driver request routine. */ -static ide_startstop_t ide_do_rw_cdrom(ide_drive_t *drive, struct request *rq, +static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq, sector_t block) { ide_startstop_t action; @@ -1936,7 +1936,7 @@ static ide_driver_t ide_cdrom_driver = { .version = IDECD_VERSION, .media = ide_cdrom, .supports_dsc_overlap = 1, - .do_request = ide_do_rw_cdrom, + .do_request = ide_cd_do_request, .end_request = ide_end_request, .error = __ide_error, .abort = __ide_abort, -- cgit v0.10.2 From b6ca440a8ff15e12478ea6f026a52970e7a0c54c Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 16 Jul 2008 20:33:46 +0200 Subject: ide-cd: simplify request issuing path Call cdrom_start_packet_command() only from the ->do_request() routine. As a nice side effect, this improves code readability a bit. There should be no functionality change resulting from this patch. Signed-off-by: Borislav Petkov Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index a9be4bf..b61ce5e 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -753,14 +753,12 @@ static ide_startstop_t cdrom_start_seek_continuation(ide_drive_t *drive) return cdrom_transfer_packet_command(drive, rq, &cdrom_seek_intr); } -static ide_startstop_t cdrom_start_seek(ide_drive_t *drive) +static void cdrom_start_seek(ide_drive_t *drive) { struct cdrom_info *info = drive->driver_data; info->dma = 0; info->start_seek = jiffies; - return cdrom_start_packet_command(drive, 0, - cdrom_start_seek_continuation); } /* @@ -1135,8 +1133,7 @@ static ide_startstop_t cdrom_start_rw(ide_drive_t *drive, struct request *rq) if (write) cd->devinfo.media_written = 1; - /* start sending the read/write request to the drive */ - return cdrom_start_packet_command(drive, 32768, cdrom_start_rw_cont); + return ide_started; } static ide_startstop_t cdrom_do_newpc_cont(ide_drive_t *drive) @@ -1149,7 +1146,7 @@ static ide_startstop_t cdrom_do_newpc_cont(ide_drive_t *drive) return cdrom_transfer_packet_command(drive, rq, cdrom_newpc_intr); } -static ide_startstop_t cdrom_do_block_pc(ide_drive_t *drive, struct request *rq) +static void cdrom_do_block_pc(ide_drive_t *drive, struct request *rq) { struct cdrom_info *info = drive->driver_data; @@ -1188,10 +1185,6 @@ static ide_startstop_t cdrom_do_block_pc(ide_drive_t *drive, struct request *rq) ((unsigned long)current->stack & stack_mask))) info->dma = 0; } - - /* start sending the command to the drive */ - return cdrom_start_packet_command(drive, rq->data_len, - cdrom_do_newpc_cont); } /* @@ -1200,8 +1193,9 @@ static ide_startstop_t cdrom_do_block_pc(ide_drive_t *drive, struct request *rq) static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq, sector_t block) { - ide_startstop_t action; struct cdrom_info *info = drive->driver_data; + ide_handler_t *fn; + int xferlen; if (blk_fs_request(rq)) { if (info->cd_flags & IDE_CD_FLAG_SEEKING) { @@ -1221,16 +1215,23 @@ static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq, } if (rq_data_dir(rq) == READ && IDE_LARGE_SEEK(info->last_block, block, - IDECD_SEEK_THRESHOLD) && - drive->dsc_overlap) - action = cdrom_start_seek(drive); - else - action = cdrom_start_rw(drive, rq); + IDECD_SEEK_THRESHOLD) && + drive->dsc_overlap) { + xferlen = 0; + fn = cdrom_start_seek_continuation; + cdrom_start_seek(drive); + } else { + xferlen = 32768; + fn = cdrom_start_rw_cont; + if (cdrom_start_rw(drive, rq) == ide_stopped) + return ide_stopped; + } info->last_block = block; - return action; } else if (blk_sense_request(rq) || blk_pc_request(rq) || rq->cmd_type == REQ_TYPE_ATA_PC) { - return cdrom_do_block_pc(drive, rq); + xferlen = rq->data_len; + fn = cdrom_do_newpc_cont; + cdrom_do_block_pc(drive, rq); } else if (blk_special_request(rq)) { /* right now this can only be a reset... */ cdrom_end_request(drive, 1); @@ -1240,6 +1241,8 @@ static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq, cdrom_end_request(drive, 0); return ide_stopped; } + + return cdrom_start_packet_command(drive, xferlen, fn); } -- cgit v0.10.2 From 4b01fcbbe69bba34a8494fca6376ac0804f0f51d Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 16 Jul 2008 20:33:46 +0200 Subject: ide-cd: fold cdrom_start_seek into ide_cd_do_request Do what the compiler does anyway: inline a function that is used only once. This saves us the overhead of a function call and the function is small enough to be embedded in the callsite anyways. Signed-off-by: Borislav Petkov Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index b61ce5e..66d82c1 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -753,14 +753,6 @@ static ide_startstop_t cdrom_start_seek_continuation(ide_drive_t *drive) return cdrom_transfer_packet_command(drive, rq, &cdrom_seek_intr); } -static void cdrom_start_seek(ide_drive_t *drive) -{ - struct cdrom_info *info = drive->driver_data; - - info->dma = 0; - info->start_seek = jiffies; -} - /* * Fix up a possibly partially-processed request so that we can start it over * entirely, or even put it back on the request queue. @@ -1219,7 +1211,8 @@ static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq, drive->dsc_overlap) { xferlen = 0; fn = cdrom_start_seek_continuation; - cdrom_start_seek(drive); + info->dma = 0; + info->start_seek = jiffies; } else { xferlen = 32768; fn = cdrom_start_rw_cont; -- cgit v0.10.2 From e529c6087a845e64a6a36a2d17b8754b20bc7c0d Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 16 Jul 2008 20:33:46 +0200 Subject: ide-cd: move request prep from cdrom_start_seek_continuation to rq issue path ... by factoring out the rq preparation code into a separate function called in the request routine. As a nice side effect, this minimizes the IRQ handler execution time. There should be no functionality change resulting from this patch. [bart: s/HWGROUP()/drive->hwif->hwgroup/ and remove extra newlines] Signed-off-by: Borislav Petkov Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 66d82c1..275d5ba 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -738,9 +738,8 @@ static ide_startstop_t cdrom_seek_intr(ide_drive_t *drive) return ide_stopped; } -static ide_startstop_t cdrom_start_seek_continuation(ide_drive_t *drive) +static void ide_cd_prepare_seek_request(ide_drive_t *drive, struct request *rq) { - struct request *rq = HWGROUP(drive)->rq; sector_t frame = rq->sector; sector_div(frame, queue_hardsect_size(drive->queue) >> SECTOR_BITS); @@ -750,6 +749,12 @@ static ide_startstop_t cdrom_start_seek_continuation(ide_drive_t *drive) put_unaligned(cpu_to_be32(frame), (unsigned int *) &rq->cmd[2]); rq->timeout = ATAPI_WAIT_PC; +} + +static ide_startstop_t cdrom_start_seek_continuation(ide_drive_t *drive) +{ + struct request *rq = drive->hwif->hwgroup->rq; + return cdrom_transfer_packet_command(drive, rq, &cdrom_seek_intr); } @@ -1211,8 +1216,11 @@ static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq, drive->dsc_overlap) { xferlen = 0; fn = cdrom_start_seek_continuation; + info->dma = 0; info->start_seek = jiffies; + + ide_cd_prepare_seek_request(drive, rq); } else { xferlen = 32768; fn = cdrom_start_rw_cont; -- cgit v0.10.2 From 90eb808e0fd3cbda8a8b085238930c533f603642 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 16 Jul 2008 20:33:46 +0200 Subject: ide-cd: move request prep from cdrom_start_rw_cont to rq issue path ... by factoring out the rq preparation code into a separate function called in the request routine. As a nice side effect, this minimizes the IRQ handler execution time. There should be no functionality change resulting from this patch. [bart: s/HWGROUP()/drive->hwif->hwgroup/ and remove extra newline] Signed-off-by: Borislav Petkov Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 275d5ba..9e593ce 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -666,16 +666,9 @@ static int ide_cd_check_transfer_size(ide_drive_t *drive, int len) static ide_startstop_t cdrom_newpc_intr(ide_drive_t *); -/* - * Routine to send a read/write packet command to the drive. This is usually - * called directly from cdrom_start_{read,write}(). However, for drq_interrupt - * devices, it is called from an interrupt when the drive is ready to accept - * the command. - */ -static ide_startstop_t cdrom_start_rw_cont(ide_drive_t *drive) +static ide_startstop_t ide_cd_prepare_rw_request(ide_drive_t *drive, + struct request *rq) { - struct request *rq = HWGROUP(drive)->rq; - if (rq_data_dir(rq) == READ) { unsigned short sectors_per_frame = queue_hardsect_size(drive->queue) >> SECTOR_BITS; @@ -712,6 +705,19 @@ static ide_startstop_t cdrom_start_rw_cont(ide_drive_t *drive) /* set up the command */ rq->timeout = ATAPI_WAIT_PC; + return ide_started; +} + +/* + * Routine to send a read/write packet command to the drive. This is usually + * called directly from cdrom_start_{read,write}(). However, for drq_interrupt + * devices, it is called from an interrupt when the drive is ready to accept + * the command. + */ +static ide_startstop_t cdrom_start_rw_cont(ide_drive_t *drive) +{ + struct request *rq = drive->hwif->hwgroup->rq; + /* send the command to the drive and return */ return cdrom_transfer_packet_command(drive, rq, cdrom_newpc_intr); } @@ -1224,8 +1230,12 @@ static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq, } else { xferlen = 32768; fn = cdrom_start_rw_cont; + if (cdrom_start_rw(drive, rq) == ide_stopped) return ide_stopped; + + if (ide_cd_prepare_rw_request(drive, rq) == ide_stopped) + return ide_stopped; } info->last_block = block; } else if (blk_sense_request(rq) || blk_pc_request(rq) || -- cgit v0.10.2 From 7fcebda501681080a242733b8db3f09f5ccb5d3f Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 16 Jul 2008 20:33:46 +0200 Subject: ide-cd: move request prep chunk from cdrom_do_newpc_cont to rq issue path As a nice side effect, this minimizes the IRQ handler execution time. There should be no functionality change resulting from this patch. [bart: remove extra newlines from ide_cd_do_request()] Signed-off-by: Borislav Petkov Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 9e593ce..2b3c69d 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -1143,9 +1143,6 @@ static ide_startstop_t cdrom_do_newpc_cont(ide_drive_t *drive) { struct request *rq = HWGROUP(drive)->rq; - if (!rq->timeout) - rq->timeout = ATAPI_WAIT_PC; - return cdrom_transfer_packet_command(drive, rq, cdrom_newpc_intr); } @@ -1242,6 +1239,10 @@ static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq, rq->cmd_type == REQ_TYPE_ATA_PC) { xferlen = rq->data_len; fn = cdrom_do_newpc_cont; + + if (!rq->timeout) + rq->timeout = ATAPI_WAIT_PC; + cdrom_do_block_pc(drive, rq); } else if (blk_special_request(rq)) { /* right now this can only be a reset... */ @@ -1256,8 +1257,6 @@ static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq, return cdrom_start_packet_command(drive, xferlen, fn); } - - /* * Ioctl handling. * -- cgit v0.10.2 From c96a7df8dba5800c03b0f1edd87b2f3d0473a119 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 16 Jul 2008 20:33:46 +0200 Subject: ide-floppy: fold idefloppy_create_test_unit_ready_cmd into idefloppy_open There's no need for this function since it is used only once. [bart: ported it over IDE changes] Signed-off-by: Borislav Petkov Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c index b368943..df19ede 100644 --- a/drivers/ide/ide-floppy.c +++ b/drivers/ide/ide-floppy.c @@ -561,12 +561,6 @@ static void idefloppy_create_start_stop_cmd(struct ide_atapi_pc *pc, int start) pc->c[4] = start; } -static void idefloppy_create_test_unit_ready_cmd(struct ide_atapi_pc *pc) -{ - idefloppy_init_pc(pc); - pc->c[0] = GPCMD_TEST_UNIT_READY; -} - static void idefloppy_create_rw_cmd(idefloppy_floppy_t *floppy, struct ide_atapi_pc *pc, struct request *rq, unsigned long sector) @@ -1166,7 +1160,9 @@ static int idefloppy_open(struct inode *inode, struct file *filp) floppy->flags &= ~IDEFLOPPY_FLAG_FORMAT_IN_PROGRESS; /* Just in case */ - idefloppy_create_test_unit_ready_cmd(&pc); + idefloppy_init_pc(&pc); + pc.c[0] = GPCMD_TEST_UNIT_READY; + if (idefloppy_queue_pc_tail(drive, &pc)) { idefloppy_create_start_stop_cmd(&pc, 1); (void) idefloppy_queue_pc_tail(drive, &pc); -- cgit v0.10.2 From 68dc3575e064a5655cbd656fbf32d6ceeb85ac9e Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 16 Jul 2008 20:33:47 +0200 Subject: ide-floppy: zero out the whole struct ide_atapi_pc on init This is a precaution just to make sure a new pc is clean when allocated. There should be no functional change introduced by this patch. [bart: ported it over IDE changes] Signed-off-by: Borislav Petkov Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c index df19ede..00948c3 100644 --- a/drivers/ide/ide-floppy.c +++ b/drivers/ide/ide-floppy.c @@ -351,10 +351,7 @@ static void ide_floppy_callback(ide_drive_t *drive) static void idefloppy_init_pc(struct ide_atapi_pc *pc) { - memset(pc->c, 0, 12); - pc->retries = 0; - pc->flags = 0; - pc->req_xfer = 0; + memset(pc, 0, sizeof(*pc)); pc->buf = pc->pc_buf; pc->buf_size = IDEFLOPPY_PC_BUFFER_SIZE; pc->callback = ide_floppy_callback; -- cgit v0.10.2 From 85ae98a3dff2da860a4f8f9e4a0de69ad82ce633 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Wed, 16 Jul 2008 20:33:47 +0200 Subject: ide: endian annotations in ide-floppy.c Signed-off-by: Harvey Harrison Cc: Al Viro Cc: Borislav Petkov Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c index 00948c3..c7e4433 100644 --- a/drivers/ide/ide-floppy.c +++ b/drivers/ide/ide-floppy.c @@ -702,10 +702,10 @@ static int ide_floppy_get_flexible_disk_page(ide_drive_t *drive) set_disk_ro(floppy->disk, floppy->wp); page = &pc.buf[8]; - transfer_rate = be16_to_cpu(*(u16 *)&pc.buf[8 + 2]); - sector_size = be16_to_cpu(*(u16 *)&pc.buf[8 + 6]); - cyls = be16_to_cpu(*(u16 *)&pc.buf[8 + 8]); - rpm = be16_to_cpu(*(u16 *)&pc.buf[8 + 28]); + transfer_rate = be16_to_cpup((__be16 *)&pc.buf[8 + 2]); + sector_size = be16_to_cpup((__be16 *)&pc.buf[8 + 6]); + cyls = be16_to_cpup((__be16 *)&pc.buf[8 + 8]); + rpm = be16_to_cpup((__be16 *)&pc.buf[8 + 28]); heads = pc.buf[8 + 4]; sectors = pc.buf[8 + 5]; @@ -780,8 +780,8 @@ static int ide_floppy_get_capacity(ide_drive_t *drive) for (i = 0; i < desc_cnt; i++) { unsigned int desc_start = 4 + i*8; - blocks = be32_to_cpu(*(u32 *)&pc.buf[desc_start]); - length = be16_to_cpu(*(u16 *)&pc.buf[desc_start + 6]); + blocks = be32_to_cpup((__be32 *)&pc.buf[desc_start]); + length = be16_to_cpup((__be16 *)&pc.buf[desc_start + 6]); debug_log("Descriptor %d: %dkB, %d blocks, %d sector size\n", i, blocks * length / 1024, blocks, length); @@ -902,8 +902,8 @@ static int ide_floppy_get_format_capacities(ide_drive_t *drive, int __user *arg) if (u_index >= u_array_size) break; /* User-supplied buffer too small */ - blocks = be32_to_cpu(*(u32 *)&pc.buf[desc_start]); - length = be16_to_cpu(*(u16 *)&pc.buf[desc_start + 6]); + blocks = be32_to_cpup((__be32 *)&pc.buf[desc_start]); + length = be16_to_cpup((__be16 *)&pc.buf[desc_start + 6]); if (put_user(blocks, argp)) return(-EFAULT); -- cgit v0.10.2 From ffa793f9bb5b563edb4cacc43a4d6677eec0e36b Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 16 Jul 2008 20:33:47 +0200 Subject: remove BLK_DEV_HD_ONLY After commit 80aa31cb460d12c1e02327b43eceb3eebc6e7090 (ide: remove CONFIG_BLK_DEV_HD_IDE config option (take 2)) the indirection through BLK_DEV_HD_ONLY is no longer required. Signed-off-by: Adrian Bunk Cc: rmk@arm.linux.org.uk Cc: Alan Cox Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig index 21511d4..39b95f9 100644 --- a/drivers/ide/Kconfig +++ b/drivers/ide/Kconfig @@ -1000,7 +1000,7 @@ config BLK_DEV_IDEDMA endif -config BLK_DEV_HD_ONLY +config BLK_DEV_HD bool "Old hard disk (MFM/RLL/IDE) driver" depends on !ARM || ARCH_RPC || ARCH_SHARK || BROKEN help @@ -1023,7 +1023,4 @@ config BLK_DEV_HD_ONLY Disk-HOWTO, available from . -config BLK_DEV_HD - def_bool BLK_DEV_HD_ONLY - endif # IDE -- cgit v0.10.2 From 01c22bfc30a3f40fed08cfd2779348edcb6c5e53 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 16 Jul 2008 20:33:47 +0200 Subject: ide/legacy/hd.c: use late_initcall() Since the later move to drivers/block/ will break the link order, the module_init() has to become a late_initcall(). Signed-off-by: Adrian Bunk Cc: rmk@arm.linux.org.uk Cc: Alan Cox Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/Makefile b/drivers/ide/Makefile index 8605536..39e99ac 100644 --- a/drivers/ide/Makefile +++ b/drivers/ide/Makefile @@ -62,7 +62,6 @@ endif obj-$(CONFIG_BLK_DEV_IDE) += arm/ mips/ -# old hd driver must be last ifeq ($(CONFIG_BLK_DEV_HD), y) hd-core-y += legacy/hd.o obj-y += hd-core.o diff --git a/drivers/ide/legacy/hd.c b/drivers/ide/legacy/hd.c index abdedf5..00b6956 100644 --- a/drivers/ide/legacy/hd.c +++ b/drivers/ide/legacy/hd.c @@ -812,4 +812,4 @@ static int __init parse_hd_setup(char *line) } __setup("hd=", parse_hd_setup); -module_init(hd_init); +late_initcall(hd_init); -- cgit v0.10.2 From 453ea3ed0b3e8ad67d4ee9d2fccf3d95a3e1f709 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 16 Jul 2008 20:33:47 +0200 Subject: move ide/legacy/hd.c to drivers/block/ This patch moves hd.c to drivers/block/ Signed-off-by: Adrian Bunk Cc: rmk@arm.linux.org.uk Cc: Alan Cox Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 0d1d213..c390974 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -433,4 +433,28 @@ config VIRTIO_BLK This is the virtual block driver for virtio. It can be used with lguest or QEMU based VMMs (like KVM or Xen). Say Y or M. +config BLK_DEV_HD + bool "Old hard disk (MFM/RLL/IDE) driver" + depends on HAVE_IDE + depends on !ARM || ARCH_RPC || ARCH_SHARK || BROKEN + help + There are two drivers for MFM/RLL/IDE hard disks. Most people use + the newer enhanced driver, but this old one is still around for two + reasons. Some older systems have strange timing problems and seem to + work only with the old driver (which itself does not work with some + newer systems). The other reason is that the old driver is smaller, + since it lacks the enhanced functionality of the new one. This makes + it a good choice for systems with very tight memory restrictions, or + for systems with only older MFM/RLL/ESDI drives. Choosing the old + driver can save 13 KB or so of kernel memory. + + If you want to use this driver together with the new one you have + to use "hda=noprobe hdb=noprobe" kernel parameters to prevent the new + driver from probing the primary interface. + + If you are unsure, then just choose the Enhanced IDE/MFM/RLL driver + instead of this one. For more detailed information, read the + Disk-HOWTO, available from + . + endif # BLK_DEV diff --git a/drivers/block/Makefile b/drivers/block/Makefile index 5e58430..204332b 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -29,5 +29,6 @@ obj-$(CONFIG_VIRTIO_BLK) += virtio_blk.o obj-$(CONFIG_VIODASD) += viodasd.o obj-$(CONFIG_BLK_DEV_SX8) += sx8.o obj-$(CONFIG_BLK_DEV_UB) += ub.o +obj-$(CONFIG_BLK_DEV_HD) += hd.o obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o diff --git a/drivers/block/hd.c b/drivers/block/hd.c new file mode 100644 index 0000000..00b6956 --- /dev/null +++ b/drivers/block/hd.c @@ -0,0 +1,815 @@ +/* + * Copyright (C) 1991, 1992 Linus Torvalds + * + * This is the low-level hd interrupt support. It traverses the + * request-list, using interrupts to jump between functions. As + * all the functions are called within interrupts, we may not + * sleep. Special care is recommended. + * + * modified by Drew Eckhardt to check nr of hd's from the CMOS. + * + * Thanks to Branko Lankester, lankeste@fwi.uva.nl, who found a bug + * in the early extended-partition checks and added DM partitions + * + * IRQ-unmask, drive-id, multiple-mode, support for ">16 heads", + * and general streamlining by Mark Lord. + * + * Removed 99% of above. Use Mark's ide driver for those options. + * This is now a lightweight ST-506 driver. (Paul Gortmaker) + * + * Modified 1995 Russell King for ARM processor. + * + * Bugfix: max_sectors must be <= 255 or the wheels tend to come + * off in a hurry once you queue things up - Paul G. 02/2001 + */ + +/* Uncomment the following if you want verbose error reports. */ +/* #define VERBOSE_ERRORS */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include /* CMOS defines */ +#include +#include +#include + +#define REALLY_SLOW_IO +#include +#include +#include + +#ifdef __arm__ +#undef HD_IRQ +#endif +#include +#ifdef __arm__ +#define HD_IRQ IRQ_HARDDISK +#endif + +/* Hd controller regster ports */ + +#define HD_DATA 0x1f0 /* _CTL when writing */ +#define HD_ERROR 0x1f1 /* see err-bits */ +#define HD_NSECTOR 0x1f2 /* nr of sectors to read/write */ +#define HD_SECTOR 0x1f3 /* starting sector */ +#define HD_LCYL 0x1f4 /* starting cylinder */ +#define HD_HCYL 0x1f5 /* high byte of starting cyl */ +#define HD_CURRENT 0x1f6 /* 101dhhhh , d=drive, hhhh=head */ +#define HD_STATUS 0x1f7 /* see status-bits */ +#define HD_FEATURE HD_ERROR /* same io address, read=error, write=feature */ +#define HD_PRECOMP HD_FEATURE /* obsolete use of this port - predates IDE */ +#define HD_COMMAND HD_STATUS /* same io address, read=status, write=cmd */ + +#define HD_CMD 0x3f6 /* used for resets */ +#define HD_ALTSTATUS 0x3f6 /* same as HD_STATUS but doesn't clear irq */ + +/* Bits of HD_STATUS */ +#define ERR_STAT 0x01 +#define INDEX_STAT 0x02 +#define ECC_STAT 0x04 /* Corrected error */ +#define DRQ_STAT 0x08 +#define SEEK_STAT 0x10 +#define SERVICE_STAT SEEK_STAT +#define WRERR_STAT 0x20 +#define READY_STAT 0x40 +#define BUSY_STAT 0x80 + +/* Bits for HD_ERROR */ +#define MARK_ERR 0x01 /* Bad address mark */ +#define TRK0_ERR 0x02 /* couldn't find track 0 */ +#define ABRT_ERR 0x04 /* Command aborted */ +#define MCR_ERR 0x08 /* media change request */ +#define ID_ERR 0x10 /* ID field not found */ +#define MC_ERR 0x20 /* media changed */ +#define ECC_ERR 0x40 /* Uncorrectable ECC error */ +#define BBD_ERR 0x80 /* pre-EIDE meaning: block marked bad */ +#define ICRC_ERR 0x80 /* new meaning: CRC error during transfer */ + +static DEFINE_SPINLOCK(hd_lock); +static struct request_queue *hd_queue; + +#define MAJOR_NR HD_MAJOR +#define QUEUE (hd_queue) +#define CURRENT elv_next_request(hd_queue) + +#define TIMEOUT_VALUE (6*HZ) +#define HD_DELAY 0 + +#define MAX_ERRORS 16 /* Max read/write errors/sector */ +#define RESET_FREQ 8 /* Reset controller every 8th retry */ +#define RECAL_FREQ 4 /* Recalibrate every 4th retry */ +#define MAX_HD 2 + +#define STAT_OK (READY_STAT|SEEK_STAT) +#define OK_STATUS(s) (((s)&(STAT_OK|(BUSY_STAT|WRERR_STAT|ERR_STAT)))==STAT_OK) + +static void recal_intr(void); +static void bad_rw_intr(void); + +static int reset; +static int hd_error; + +/* + * This struct defines the HD's and their types. + */ +struct hd_i_struct { + unsigned int head, sect, cyl, wpcom, lzone, ctl; + int unit; + int recalibrate; + int special_op; +}; + +#ifdef HD_TYPE +static struct hd_i_struct hd_info[] = { HD_TYPE }; +static int NR_HD = ARRAY_SIZE(hd_info); +#else +static struct hd_i_struct hd_info[MAX_HD]; +static int NR_HD; +#endif + +static struct gendisk *hd_gendisk[MAX_HD]; + +static struct timer_list device_timer; + +#define TIMEOUT_VALUE (6*HZ) + +#define SET_TIMER \ + do { \ + mod_timer(&device_timer, jiffies + TIMEOUT_VALUE); \ + } while (0) + +static void (*do_hd)(void) = NULL; +#define SET_HANDLER(x) \ +if ((do_hd = (x)) != NULL) \ + SET_TIMER; \ +else \ + del_timer(&device_timer); + + +#if (HD_DELAY > 0) + +#include + +unsigned long last_req; + +unsigned long read_timer(void) +{ + unsigned long t, flags; + int i; + + spin_lock_irqsave(&i8253_lock, flags); + t = jiffies * 11932; + outb_p(0, 0x43); + i = inb_p(0x40); + i |= inb(0x40) << 8; + spin_unlock_irqrestore(&i8253_lock, flags); + return(t - i); +} +#endif + +static void __init hd_setup(char *str, int *ints) +{ + int hdind = 0; + + if (ints[0] != 3) + return; + if (hd_info[0].head != 0) + hdind = 1; + hd_info[hdind].head = ints[2]; + hd_info[hdind].sect = ints[3]; + hd_info[hdind].cyl = ints[1]; + hd_info[hdind].wpcom = 0; + hd_info[hdind].lzone = ints[1]; + hd_info[hdind].ctl = (ints[2] > 8 ? 8 : 0); + NR_HD = hdind+1; +} + +static void dump_status(const char *msg, unsigned int stat) +{ + char *name = "hd?"; + if (CURRENT) + name = CURRENT->rq_disk->disk_name; + +#ifdef VERBOSE_ERRORS + printk("%s: %s: status=0x%02x { ", name, msg, stat & 0xff); + if (stat & BUSY_STAT) printk("Busy "); + if (stat & READY_STAT) printk("DriveReady "); + if (stat & WRERR_STAT) printk("WriteFault "); + if (stat & SEEK_STAT) printk("SeekComplete "); + if (stat & DRQ_STAT) printk("DataRequest "); + if (stat & ECC_STAT) printk("CorrectedError "); + if (stat & INDEX_STAT) printk("Index "); + if (stat & ERR_STAT) printk("Error "); + printk("}\n"); + if ((stat & ERR_STAT) == 0) { + hd_error = 0; + } else { + hd_error = inb(HD_ERROR); + printk("%s: %s: error=0x%02x { ", name, msg, hd_error & 0xff); + if (hd_error & BBD_ERR) printk("BadSector "); + if (hd_error & ECC_ERR) printk("UncorrectableError "); + if (hd_error & ID_ERR) printk("SectorIdNotFound "); + if (hd_error & ABRT_ERR) printk("DriveStatusError "); + if (hd_error & TRK0_ERR) printk("TrackZeroNotFound "); + if (hd_error & MARK_ERR) printk("AddrMarkNotFound "); + printk("}"); + if (hd_error & (BBD_ERR|ECC_ERR|ID_ERR|MARK_ERR)) { + printk(", CHS=%d/%d/%d", (inb(HD_HCYL)<<8) + inb(HD_LCYL), + inb(HD_CURRENT) & 0xf, inb(HD_SECTOR)); + if (CURRENT) + printk(", sector=%ld", CURRENT->sector); + } + printk("\n"); + } +#else + printk("%s: %s: status=0x%02x.\n", name, msg, stat & 0xff); + if ((stat & ERR_STAT) == 0) { + hd_error = 0; + } else { + hd_error = inb(HD_ERROR); + printk("%s: %s: error=0x%02x.\n", name, msg, hd_error & 0xff); + } +#endif +} + +static void check_status(void) +{ + int i = inb_p(HD_STATUS); + + if (!OK_STATUS(i)) { + dump_status("check_status", i); + bad_rw_intr(); + } +} + +static int controller_busy(void) +{ + int retries = 100000; + unsigned char status; + + do { + status = inb_p(HD_STATUS); + } while ((status & BUSY_STAT) && --retries); + return status; +} + +static int status_ok(void) +{ + unsigned char status = inb_p(HD_STATUS); + + if (status & BUSY_STAT) + return 1; /* Ancient, but does it make sense??? */ + if (status & WRERR_STAT) + return 0; + if (!(status & READY_STAT)) + return 0; + if (!(status & SEEK_STAT)) + return 0; + return 1; +} + +static int controller_ready(unsigned int drive, unsigned int head) +{ + int retry = 100; + + do { + if (controller_busy() & BUSY_STAT) + return 0; + outb_p(0xA0 | (drive<<4) | head, HD_CURRENT); + if (status_ok()) + return 1; + } while (--retry); + return 0; +} + +static void hd_out(struct hd_i_struct *disk, + unsigned int nsect, + unsigned int sect, + unsigned int head, + unsigned int cyl, + unsigned int cmd, + void (*intr_addr)(void)) +{ + unsigned short port; + +#if (HD_DELAY > 0) + while (read_timer() - last_req < HD_DELAY) + /* nothing */; +#endif + if (reset) + return; + if (!controller_ready(disk->unit, head)) { + reset = 1; + return; + } + SET_HANDLER(intr_addr); + outb_p(disk->ctl, HD_CMD); + port = HD_DATA; + outb_p(disk->wpcom >> 2, ++port); + outb_p(nsect, ++port); + outb_p(sect, ++port); + outb_p(cyl, ++port); + outb_p(cyl >> 8, ++port); + outb_p(0xA0 | (disk->unit << 4) | head, ++port); + outb_p(cmd, ++port); +} + +static void hd_request (void); + +static int drive_busy(void) +{ + unsigned int i; + unsigned char c; + + for (i = 0; i < 500000 ; i++) { + c = inb_p(HD_STATUS); + if ((c & (BUSY_STAT | READY_STAT | SEEK_STAT)) == STAT_OK) + return 0; + } + dump_status("reset timed out", c); + return 1; +} + +static void reset_controller(void) +{ + int i; + + outb_p(4, HD_CMD); + for (i = 0; i < 1000; i++) barrier(); + outb_p(hd_info[0].ctl & 0x0f, HD_CMD); + for (i = 0; i < 1000; i++) barrier(); + if (drive_busy()) + printk("hd: controller still busy\n"); + else if ((hd_error = inb(HD_ERROR)) != 1) + printk("hd: controller reset failed: %02x\n", hd_error); +} + +static void reset_hd(void) +{ + static int i; + +repeat: + if (reset) { + reset = 0; + i = -1; + reset_controller(); + } else { + check_status(); + if (reset) + goto repeat; + } + if (++i < NR_HD) { + struct hd_i_struct *disk = &hd_info[i]; + disk->special_op = disk->recalibrate = 1; + hd_out(disk, disk->sect, disk->sect, disk->head-1, + disk->cyl, WIN_SPECIFY, &reset_hd); + if (reset) + goto repeat; + } else + hd_request(); +} + +/* + * Ok, don't know what to do with the unexpected interrupts: on some machines + * doing a reset and a retry seems to result in an eternal loop. Right now I + * ignore it, and just set the timeout. + * + * On laptops (and "green" PCs), an unexpected interrupt occurs whenever the + * drive enters "idle", "standby", or "sleep" mode, so if the status looks + * "good", we just ignore the interrupt completely. + */ +static void unexpected_hd_interrupt(void) +{ + unsigned int stat = inb_p(HD_STATUS); + + if (stat & (BUSY_STAT|DRQ_STAT|ECC_STAT|ERR_STAT)) { + dump_status("unexpected interrupt", stat); + SET_TIMER; + } +} + +/* + * bad_rw_intr() now tries to be a bit smarter and does things + * according to the error returned by the controller. + * -Mika Liljeberg (liljeber@cs.Helsinki.FI) + */ +static void bad_rw_intr(void) +{ + struct request *req = CURRENT; + if (req != NULL) { + struct hd_i_struct *disk = req->rq_disk->private_data; + if (++req->errors >= MAX_ERRORS || (hd_error & BBD_ERR)) { + end_request(req, 0); + disk->special_op = disk->recalibrate = 1; + } else if (req->errors % RESET_FREQ == 0) + reset = 1; + else if ((hd_error & TRK0_ERR) || req->errors % RECAL_FREQ == 0) + disk->special_op = disk->recalibrate = 1; + /* Otherwise just retry */ + } +} + +static inline int wait_DRQ(void) +{ + int retries; + int stat; + + for (retries = 0; retries < 100000; retries++) { + stat = inb_p(HD_STATUS); + if (stat & DRQ_STAT) + return 0; + } + dump_status("wait_DRQ", stat); + return -1; +} + +static void read_intr(void) +{ + struct request *req; + int i, retries = 100000; + + do { + i = (unsigned) inb_p(HD_STATUS); + if (i & BUSY_STAT) + continue; + if (!OK_STATUS(i)) + break; + if (i & DRQ_STAT) + goto ok_to_read; + } while (--retries > 0); + dump_status("read_intr", i); + bad_rw_intr(); + hd_request(); + return; +ok_to_read: + req = CURRENT; + insw(HD_DATA, req->buffer, 256); + req->sector++; + req->buffer += 512; + req->errors = 0; + i = --req->nr_sectors; + --req->current_nr_sectors; +#ifdef DEBUG + printk("%s: read: sector %ld, remaining = %ld, buffer=%p\n", + req->rq_disk->disk_name, req->sector, req->nr_sectors, + req->buffer+512); +#endif + if (req->current_nr_sectors <= 0) + end_request(req, 1); + if (i > 0) { + SET_HANDLER(&read_intr); + return; + } + (void) inb_p(HD_STATUS); +#if (HD_DELAY > 0) + last_req = read_timer(); +#endif + if (elv_next_request(QUEUE)) + hd_request(); + return; +} + +static void write_intr(void) +{ + struct request *req = CURRENT; + int i; + int retries = 100000; + + do { + i = (unsigned) inb_p(HD_STATUS); + if (i & BUSY_STAT) + continue; + if (!OK_STATUS(i)) + break; + if ((req->nr_sectors <= 1) || (i & DRQ_STAT)) + goto ok_to_write; + } while (--retries > 0); + dump_status("write_intr", i); + bad_rw_intr(); + hd_request(); + return; +ok_to_write: + req->sector++; + i = --req->nr_sectors; + --req->current_nr_sectors; + req->buffer += 512; + if (!i || (req->bio && req->current_nr_sectors <= 0)) + end_request(req, 1); + if (i > 0) { + SET_HANDLER(&write_intr); + outsw(HD_DATA, req->buffer, 256); + local_irq_enable(); + } else { +#if (HD_DELAY > 0) + last_req = read_timer(); +#endif + hd_request(); + } + return; +} + +static void recal_intr(void) +{ + check_status(); +#if (HD_DELAY > 0) + last_req = read_timer(); +#endif + hd_request(); +} + +/* + * This is another of the error-routines I don't know what to do with. The + * best idea seems to just set reset, and start all over again. + */ +static void hd_times_out(unsigned long dummy) +{ + char *name; + + do_hd = NULL; + + if (!CURRENT) + return; + + disable_irq(HD_IRQ); + local_irq_enable(); + reset = 1; + name = CURRENT->rq_disk->disk_name; + printk("%s: timeout\n", name); + if (++CURRENT->errors >= MAX_ERRORS) { +#ifdef DEBUG + printk("%s: too many errors\n", name); +#endif + end_request(CURRENT, 0); + } + local_irq_disable(); + hd_request(); + enable_irq(HD_IRQ); +} + +static int do_special_op(struct hd_i_struct *disk, struct request *req) +{ + if (disk->recalibrate) { + disk->recalibrate = 0; + hd_out(disk, disk->sect, 0, 0, 0, WIN_RESTORE, &recal_intr); + return reset; + } + if (disk->head > 16) { + printk("%s: cannot handle device with more than 16 heads - giving up\n", req->rq_disk->disk_name); + end_request(req, 0); + } + disk->special_op = 0; + return 1; +} + +/* + * The driver enables interrupts as much as possible. In order to do this, + * (a) the device-interrupt is disabled before entering hd_request(), + * and (b) the timeout-interrupt is disabled before the sti(). + * + * Interrupts are still masked (by default) whenever we are exchanging + * data/cmds with a drive, because some drives seem to have very poor + * tolerance for latency during I/O. The IDE driver has support to unmask + * interrupts for non-broken hardware, so use that driver if required. + */ +static void hd_request(void) +{ + unsigned int block, nsect, sec, track, head, cyl; + struct hd_i_struct *disk; + struct request *req; + + if (do_hd) + return; +repeat: + del_timer(&device_timer); + local_irq_enable(); + + req = CURRENT; + if (!req) { + do_hd = NULL; + return; + } + + if (reset) { + local_irq_disable(); + reset_hd(); + return; + } + disk = req->rq_disk->private_data; + block = req->sector; + nsect = req->nr_sectors; + if (block >= get_capacity(req->rq_disk) || + ((block+nsect) > get_capacity(req->rq_disk))) { + printk("%s: bad access: block=%d, count=%d\n", + req->rq_disk->disk_name, block, nsect); + end_request(req, 0); + goto repeat; + } + + if (disk->special_op) { + if (do_special_op(disk, req)) + goto repeat; + return; + } + sec = block % disk->sect + 1; + track = block / disk->sect; + head = track % disk->head; + cyl = track / disk->head; +#ifdef DEBUG + printk("%s: %sing: CHS=%d/%d/%d, sectors=%d, buffer=%p\n", + req->rq_disk->disk_name, + req_data_dir(req) == READ ? "read" : "writ", + cyl, head, sec, nsect, req->buffer); +#endif + if (blk_fs_request(req)) { + switch (rq_data_dir(req)) { + case READ: + hd_out(disk, nsect, sec, head, cyl, WIN_READ, + &read_intr); + if (reset) + goto repeat; + break; + case WRITE: + hd_out(disk, nsect, sec, head, cyl, WIN_WRITE, + &write_intr); + if (reset) + goto repeat; + if (wait_DRQ()) { + bad_rw_intr(); + goto repeat; + } + outsw(HD_DATA, req->buffer, 256); + break; + default: + printk("unknown hd-command\n"); + end_request(req, 0); + break; + } + } +} + +static void do_hd_request(struct request_queue *q) +{ + disable_irq(HD_IRQ); + hd_request(); + enable_irq(HD_IRQ); +} + +static int hd_getgeo(struct block_device *bdev, struct hd_geometry *geo) +{ + struct hd_i_struct *disk = bdev->bd_disk->private_data; + + geo->heads = disk->head; + geo->sectors = disk->sect; + geo->cylinders = disk->cyl; + return 0; +} + +/* + * Releasing a block device means we sync() it, so that it can safely + * be forgotten about... + */ + +static irqreturn_t hd_interrupt(int irq, void *dev_id) +{ + void (*handler)(void) = do_hd; + + do_hd = NULL; + del_timer(&device_timer); + if (!handler) + handler = unexpected_hd_interrupt; + handler(); + local_irq_enable(); + return IRQ_HANDLED; +} + +static struct block_device_operations hd_fops = { + .getgeo = hd_getgeo, +}; + +/* + * This is the hard disk IRQ description. The IRQF_DISABLED in sa_flags + * means we run the IRQ-handler with interrupts disabled: this is bad for + * interrupt latency, but anything else has led to problems on some + * machines. + * + * We enable interrupts in some of the routines after making sure it's + * safe. + */ + +static int __init hd_init(void) +{ + int drive; + + if (register_blkdev(MAJOR_NR, "hd")) + return -1; + + hd_queue = blk_init_queue(do_hd_request, &hd_lock); + if (!hd_queue) { + unregister_blkdev(MAJOR_NR, "hd"); + return -ENOMEM; + } + + blk_queue_max_sectors(hd_queue, 255); + init_timer(&device_timer); + device_timer.function = hd_times_out; + blk_queue_hardsect_size(hd_queue, 512); + + if (!NR_HD) { + /* + * We don't know anything about the drive. This means + * that you *MUST* specify the drive parameters to the + * kernel yourself. + * + * If we were on an i386, we used to read this info from + * the BIOS or CMOS. This doesn't work all that well, + * since this assumes that this is a primary or secondary + * drive, and if we're using this legacy driver, it's + * probably an auxilliary controller added to recover + * legacy data off an ST-506 drive. Either way, it's + * definitely safest to have the user explicitly specify + * the information. + */ + printk("hd: no drives specified - use hd=cyl,head,sectors" + " on kernel command line\n"); + goto out; + } + + for (drive = 0 ; drive < NR_HD ; drive++) { + struct gendisk *disk = alloc_disk(64); + struct hd_i_struct *p = &hd_info[drive]; + if (!disk) + goto Enomem; + disk->major = MAJOR_NR; + disk->first_minor = drive << 6; + disk->fops = &hd_fops; + sprintf(disk->disk_name, "hd%c", 'a'+drive); + disk->private_data = p; + set_capacity(disk, p->head * p->sect * p->cyl); + disk->queue = hd_queue; + p->unit = drive; + hd_gendisk[drive] = disk; + printk("%s: %luMB, CHS=%d/%d/%d\n", + disk->disk_name, (unsigned long)get_capacity(disk)/2048, + p->cyl, p->head, p->sect); + } + + if (request_irq(HD_IRQ, hd_interrupt, IRQF_DISABLED, "hd", NULL)) { + printk("hd: unable to get IRQ%d for the hard disk driver\n", + HD_IRQ); + goto out1; + } + if (!request_region(HD_DATA, 8, "hd")) { + printk(KERN_WARNING "hd: port 0x%x busy\n", HD_DATA); + goto out2; + } + if (!request_region(HD_CMD, 1, "hd(cmd)")) { + printk(KERN_WARNING "hd: port 0x%x busy\n", HD_CMD); + goto out3; + } + + /* Let them fly */ + for (drive = 0; drive < NR_HD; drive++) + add_disk(hd_gendisk[drive]); + + return 0; + +out3: + release_region(HD_DATA, 8); +out2: + free_irq(HD_IRQ, NULL); +out1: + for (drive = 0; drive < NR_HD; drive++) + put_disk(hd_gendisk[drive]); + NR_HD = 0; +out: + del_timer(&device_timer); + unregister_blkdev(MAJOR_NR, "hd"); + blk_cleanup_queue(hd_queue); + return -1; +Enomem: + while (drive--) + put_disk(hd_gendisk[drive]); + goto out; +} + +static int __init parse_hd_setup(char *line) +{ + int ints[6]; + + (void) get_options(line, ARRAY_SIZE(ints), ints); + hd_setup(NULL, ints); + + return 1; +} +__setup("hd=", parse_hd_setup); + +late_initcall(hd_init); diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig index 39b95f9..15b09b8 100644 --- a/drivers/ide/Kconfig +++ b/drivers/ide/Kconfig @@ -1000,27 +1000,4 @@ config BLK_DEV_IDEDMA endif -config BLK_DEV_HD - bool "Old hard disk (MFM/RLL/IDE) driver" - depends on !ARM || ARCH_RPC || ARCH_SHARK || BROKEN - help - There are two drivers for MFM/RLL/IDE hard disks. Most people use - the newer enhanced driver, but this old one is still around for two - reasons. Some older systems have strange timing problems and seem to - work only with the old driver (which itself does not work with some - newer systems). The other reason is that the old driver is smaller, - since it lacks the enhanced functionality of the new one. This makes - it a good choice for systems with very tight memory restrictions, or - for systems with only older MFM/RLL/ESDI drives. Choosing the old - driver can save 13 KB or so of kernel memory. - - If you want to use this driver together with the new one you have - to use "hda=noprobe hdb=noprobe" kernel parameters to prevent the new - driver from probing the primary interface. - - If you are unsure, then just choose the Enhanced IDE/MFM/RLL driver - instead of this one. For more detailed information, read the - Disk-HOWTO, available from - . - endif # IDE diff --git a/drivers/ide/Makefile b/drivers/ide/Makefile index 39e99ac..5d414e3 100644 --- a/drivers/ide/Makefile +++ b/drivers/ide/Makefile @@ -61,8 +61,3 @@ ifeq ($(CONFIG_BLK_DEV_PLATFORM), y) endif obj-$(CONFIG_BLK_DEV_IDE) += arm/ mips/ - -ifeq ($(CONFIG_BLK_DEV_HD), y) - hd-core-y += legacy/hd.o - obj-y += hd-core.o -endif diff --git a/drivers/ide/legacy/hd.c b/drivers/ide/legacy/hd.c deleted file mode 100644 index 00b6956..0000000 --- a/drivers/ide/legacy/hd.c +++ /dev/null @@ -1,815 +0,0 @@ -/* - * Copyright (C) 1991, 1992 Linus Torvalds - * - * This is the low-level hd interrupt support. It traverses the - * request-list, using interrupts to jump between functions. As - * all the functions are called within interrupts, we may not - * sleep. Special care is recommended. - * - * modified by Drew Eckhardt to check nr of hd's from the CMOS. - * - * Thanks to Branko Lankester, lankeste@fwi.uva.nl, who found a bug - * in the early extended-partition checks and added DM partitions - * - * IRQ-unmask, drive-id, multiple-mode, support for ">16 heads", - * and general streamlining by Mark Lord. - * - * Removed 99% of above. Use Mark's ide driver for those options. - * This is now a lightweight ST-506 driver. (Paul Gortmaker) - * - * Modified 1995 Russell King for ARM processor. - * - * Bugfix: max_sectors must be <= 255 or the wheels tend to come - * off in a hurry once you queue things up - Paul G. 02/2001 - */ - -/* Uncomment the following if you want verbose error reports. */ -/* #define VERBOSE_ERRORS */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include /* CMOS defines */ -#include -#include -#include - -#define REALLY_SLOW_IO -#include -#include -#include - -#ifdef __arm__ -#undef HD_IRQ -#endif -#include -#ifdef __arm__ -#define HD_IRQ IRQ_HARDDISK -#endif - -/* Hd controller regster ports */ - -#define HD_DATA 0x1f0 /* _CTL when writing */ -#define HD_ERROR 0x1f1 /* see err-bits */ -#define HD_NSECTOR 0x1f2 /* nr of sectors to read/write */ -#define HD_SECTOR 0x1f3 /* starting sector */ -#define HD_LCYL 0x1f4 /* starting cylinder */ -#define HD_HCYL 0x1f5 /* high byte of starting cyl */ -#define HD_CURRENT 0x1f6 /* 101dhhhh , d=drive, hhhh=head */ -#define HD_STATUS 0x1f7 /* see status-bits */ -#define HD_FEATURE HD_ERROR /* same io address, read=error, write=feature */ -#define HD_PRECOMP HD_FEATURE /* obsolete use of this port - predates IDE */ -#define HD_COMMAND HD_STATUS /* same io address, read=status, write=cmd */ - -#define HD_CMD 0x3f6 /* used for resets */ -#define HD_ALTSTATUS 0x3f6 /* same as HD_STATUS but doesn't clear irq */ - -/* Bits of HD_STATUS */ -#define ERR_STAT 0x01 -#define INDEX_STAT 0x02 -#define ECC_STAT 0x04 /* Corrected error */ -#define DRQ_STAT 0x08 -#define SEEK_STAT 0x10 -#define SERVICE_STAT SEEK_STAT -#define WRERR_STAT 0x20 -#define READY_STAT 0x40 -#define BUSY_STAT 0x80 - -/* Bits for HD_ERROR */ -#define MARK_ERR 0x01 /* Bad address mark */ -#define TRK0_ERR 0x02 /* couldn't find track 0 */ -#define ABRT_ERR 0x04 /* Command aborted */ -#define MCR_ERR 0x08 /* media change request */ -#define ID_ERR 0x10 /* ID field not found */ -#define MC_ERR 0x20 /* media changed */ -#define ECC_ERR 0x40 /* Uncorrectable ECC error */ -#define BBD_ERR 0x80 /* pre-EIDE meaning: block marked bad */ -#define ICRC_ERR 0x80 /* new meaning: CRC error during transfer */ - -static DEFINE_SPINLOCK(hd_lock); -static struct request_queue *hd_queue; - -#define MAJOR_NR HD_MAJOR -#define QUEUE (hd_queue) -#define CURRENT elv_next_request(hd_queue) - -#define TIMEOUT_VALUE (6*HZ) -#define HD_DELAY 0 - -#define MAX_ERRORS 16 /* Max read/write errors/sector */ -#define RESET_FREQ 8 /* Reset controller every 8th retry */ -#define RECAL_FREQ 4 /* Recalibrate every 4th retry */ -#define MAX_HD 2 - -#define STAT_OK (READY_STAT|SEEK_STAT) -#define OK_STATUS(s) (((s)&(STAT_OK|(BUSY_STAT|WRERR_STAT|ERR_STAT)))==STAT_OK) - -static void recal_intr(void); -static void bad_rw_intr(void); - -static int reset; -static int hd_error; - -/* - * This struct defines the HD's and their types. - */ -struct hd_i_struct { - unsigned int head, sect, cyl, wpcom, lzone, ctl; - int unit; - int recalibrate; - int special_op; -}; - -#ifdef HD_TYPE -static struct hd_i_struct hd_info[] = { HD_TYPE }; -static int NR_HD = ARRAY_SIZE(hd_info); -#else -static struct hd_i_struct hd_info[MAX_HD]; -static int NR_HD; -#endif - -static struct gendisk *hd_gendisk[MAX_HD]; - -static struct timer_list device_timer; - -#define TIMEOUT_VALUE (6*HZ) - -#define SET_TIMER \ - do { \ - mod_timer(&device_timer, jiffies + TIMEOUT_VALUE); \ - } while (0) - -static void (*do_hd)(void) = NULL; -#define SET_HANDLER(x) \ -if ((do_hd = (x)) != NULL) \ - SET_TIMER; \ -else \ - del_timer(&device_timer); - - -#if (HD_DELAY > 0) - -#include - -unsigned long last_req; - -unsigned long read_timer(void) -{ - unsigned long t, flags; - int i; - - spin_lock_irqsave(&i8253_lock, flags); - t = jiffies * 11932; - outb_p(0, 0x43); - i = inb_p(0x40); - i |= inb(0x40) << 8; - spin_unlock_irqrestore(&i8253_lock, flags); - return(t - i); -} -#endif - -static void __init hd_setup(char *str, int *ints) -{ - int hdind = 0; - - if (ints[0] != 3) - return; - if (hd_info[0].head != 0) - hdind = 1; - hd_info[hdind].head = ints[2]; - hd_info[hdind].sect = ints[3]; - hd_info[hdind].cyl = ints[1]; - hd_info[hdind].wpcom = 0; - hd_info[hdind].lzone = ints[1]; - hd_info[hdind].ctl = (ints[2] > 8 ? 8 : 0); - NR_HD = hdind+1; -} - -static void dump_status(const char *msg, unsigned int stat) -{ - char *name = "hd?"; - if (CURRENT) - name = CURRENT->rq_disk->disk_name; - -#ifdef VERBOSE_ERRORS - printk("%s: %s: status=0x%02x { ", name, msg, stat & 0xff); - if (stat & BUSY_STAT) printk("Busy "); - if (stat & READY_STAT) printk("DriveReady "); - if (stat & WRERR_STAT) printk("WriteFault "); - if (stat & SEEK_STAT) printk("SeekComplete "); - if (stat & DRQ_STAT) printk("DataRequest "); - if (stat & ECC_STAT) printk("CorrectedError "); - if (stat & INDEX_STAT) printk("Index "); - if (stat & ERR_STAT) printk("Error "); - printk("}\n"); - if ((stat & ERR_STAT) == 0) { - hd_error = 0; - } else { - hd_error = inb(HD_ERROR); - printk("%s: %s: error=0x%02x { ", name, msg, hd_error & 0xff); - if (hd_error & BBD_ERR) printk("BadSector "); - if (hd_error & ECC_ERR) printk("UncorrectableError "); - if (hd_error & ID_ERR) printk("SectorIdNotFound "); - if (hd_error & ABRT_ERR) printk("DriveStatusError "); - if (hd_error & TRK0_ERR) printk("TrackZeroNotFound "); - if (hd_error & MARK_ERR) printk("AddrMarkNotFound "); - printk("}"); - if (hd_error & (BBD_ERR|ECC_ERR|ID_ERR|MARK_ERR)) { - printk(", CHS=%d/%d/%d", (inb(HD_HCYL)<<8) + inb(HD_LCYL), - inb(HD_CURRENT) & 0xf, inb(HD_SECTOR)); - if (CURRENT) - printk(", sector=%ld", CURRENT->sector); - } - printk("\n"); - } -#else - printk("%s: %s: status=0x%02x.\n", name, msg, stat & 0xff); - if ((stat & ERR_STAT) == 0) { - hd_error = 0; - } else { - hd_error = inb(HD_ERROR); - printk("%s: %s: error=0x%02x.\n", name, msg, hd_error & 0xff); - } -#endif -} - -static void check_status(void) -{ - int i = inb_p(HD_STATUS); - - if (!OK_STATUS(i)) { - dump_status("check_status", i); - bad_rw_intr(); - } -} - -static int controller_busy(void) -{ - int retries = 100000; - unsigned char status; - - do { - status = inb_p(HD_STATUS); - } while ((status & BUSY_STAT) && --retries); - return status; -} - -static int status_ok(void) -{ - unsigned char status = inb_p(HD_STATUS); - - if (status & BUSY_STAT) - return 1; /* Ancient, but does it make sense??? */ - if (status & WRERR_STAT) - return 0; - if (!(status & READY_STAT)) - return 0; - if (!(status & SEEK_STAT)) - return 0; - return 1; -} - -static int controller_ready(unsigned int drive, unsigned int head) -{ - int retry = 100; - - do { - if (controller_busy() & BUSY_STAT) - return 0; - outb_p(0xA0 | (drive<<4) | head, HD_CURRENT); - if (status_ok()) - return 1; - } while (--retry); - return 0; -} - -static void hd_out(struct hd_i_struct *disk, - unsigned int nsect, - unsigned int sect, - unsigned int head, - unsigned int cyl, - unsigned int cmd, - void (*intr_addr)(void)) -{ - unsigned short port; - -#if (HD_DELAY > 0) - while (read_timer() - last_req < HD_DELAY) - /* nothing */; -#endif - if (reset) - return; - if (!controller_ready(disk->unit, head)) { - reset = 1; - return; - } - SET_HANDLER(intr_addr); - outb_p(disk->ctl, HD_CMD); - port = HD_DATA; - outb_p(disk->wpcom >> 2, ++port); - outb_p(nsect, ++port); - outb_p(sect, ++port); - outb_p(cyl, ++port); - outb_p(cyl >> 8, ++port); - outb_p(0xA0 | (disk->unit << 4) | head, ++port); - outb_p(cmd, ++port); -} - -static void hd_request (void); - -static int drive_busy(void) -{ - unsigned int i; - unsigned char c; - - for (i = 0; i < 500000 ; i++) { - c = inb_p(HD_STATUS); - if ((c & (BUSY_STAT | READY_STAT | SEEK_STAT)) == STAT_OK) - return 0; - } - dump_status("reset timed out", c); - return 1; -} - -static void reset_controller(void) -{ - int i; - - outb_p(4, HD_CMD); - for (i = 0; i < 1000; i++) barrier(); - outb_p(hd_info[0].ctl & 0x0f, HD_CMD); - for (i = 0; i < 1000; i++) barrier(); - if (drive_busy()) - printk("hd: controller still busy\n"); - else if ((hd_error = inb(HD_ERROR)) != 1) - printk("hd: controller reset failed: %02x\n", hd_error); -} - -static void reset_hd(void) -{ - static int i; - -repeat: - if (reset) { - reset = 0; - i = -1; - reset_controller(); - } else { - check_status(); - if (reset) - goto repeat; - } - if (++i < NR_HD) { - struct hd_i_struct *disk = &hd_info[i]; - disk->special_op = disk->recalibrate = 1; - hd_out(disk, disk->sect, disk->sect, disk->head-1, - disk->cyl, WIN_SPECIFY, &reset_hd); - if (reset) - goto repeat; - } else - hd_request(); -} - -/* - * Ok, don't know what to do with the unexpected interrupts: on some machines - * doing a reset and a retry seems to result in an eternal loop. Right now I - * ignore it, and just set the timeout. - * - * On laptops (and "green" PCs), an unexpected interrupt occurs whenever the - * drive enters "idle", "standby", or "sleep" mode, so if the status looks - * "good", we just ignore the interrupt completely. - */ -static void unexpected_hd_interrupt(void) -{ - unsigned int stat = inb_p(HD_STATUS); - - if (stat & (BUSY_STAT|DRQ_STAT|ECC_STAT|ERR_STAT)) { - dump_status("unexpected interrupt", stat); - SET_TIMER; - } -} - -/* - * bad_rw_intr() now tries to be a bit smarter and does things - * according to the error returned by the controller. - * -Mika Liljeberg (liljeber@cs.Helsinki.FI) - */ -static void bad_rw_intr(void) -{ - struct request *req = CURRENT; - if (req != NULL) { - struct hd_i_struct *disk = req->rq_disk->private_data; - if (++req->errors >= MAX_ERRORS || (hd_error & BBD_ERR)) { - end_request(req, 0); - disk->special_op = disk->recalibrate = 1; - } else if (req->errors % RESET_FREQ == 0) - reset = 1; - else if ((hd_error & TRK0_ERR) || req->errors % RECAL_FREQ == 0) - disk->special_op = disk->recalibrate = 1; - /* Otherwise just retry */ - } -} - -static inline int wait_DRQ(void) -{ - int retries; - int stat; - - for (retries = 0; retries < 100000; retries++) { - stat = inb_p(HD_STATUS); - if (stat & DRQ_STAT) - return 0; - } - dump_status("wait_DRQ", stat); - return -1; -} - -static void read_intr(void) -{ - struct request *req; - int i, retries = 100000; - - do { - i = (unsigned) inb_p(HD_STATUS); - if (i & BUSY_STAT) - continue; - if (!OK_STATUS(i)) - break; - if (i & DRQ_STAT) - goto ok_to_read; - } while (--retries > 0); - dump_status("read_intr", i); - bad_rw_intr(); - hd_request(); - return; -ok_to_read: - req = CURRENT; - insw(HD_DATA, req->buffer, 256); - req->sector++; - req->buffer += 512; - req->errors = 0; - i = --req->nr_sectors; - --req->current_nr_sectors; -#ifdef DEBUG - printk("%s: read: sector %ld, remaining = %ld, buffer=%p\n", - req->rq_disk->disk_name, req->sector, req->nr_sectors, - req->buffer+512); -#endif - if (req->current_nr_sectors <= 0) - end_request(req, 1); - if (i > 0) { - SET_HANDLER(&read_intr); - return; - } - (void) inb_p(HD_STATUS); -#if (HD_DELAY > 0) - last_req = read_timer(); -#endif - if (elv_next_request(QUEUE)) - hd_request(); - return; -} - -static void write_intr(void) -{ - struct request *req = CURRENT; - int i; - int retries = 100000; - - do { - i = (unsigned) inb_p(HD_STATUS); - if (i & BUSY_STAT) - continue; - if (!OK_STATUS(i)) - break; - if ((req->nr_sectors <= 1) || (i & DRQ_STAT)) - goto ok_to_write; - } while (--retries > 0); - dump_status("write_intr", i); - bad_rw_intr(); - hd_request(); - return; -ok_to_write: - req->sector++; - i = --req->nr_sectors; - --req->current_nr_sectors; - req->buffer += 512; - if (!i || (req->bio && req->current_nr_sectors <= 0)) - end_request(req, 1); - if (i > 0) { - SET_HANDLER(&write_intr); - outsw(HD_DATA, req->buffer, 256); - local_irq_enable(); - } else { -#if (HD_DELAY > 0) - last_req = read_timer(); -#endif - hd_request(); - } - return; -} - -static void recal_intr(void) -{ - check_status(); -#if (HD_DELAY > 0) - last_req = read_timer(); -#endif - hd_request(); -} - -/* - * This is another of the error-routines I don't know what to do with. The - * best idea seems to just set reset, and start all over again. - */ -static void hd_times_out(unsigned long dummy) -{ - char *name; - - do_hd = NULL; - - if (!CURRENT) - return; - - disable_irq(HD_IRQ); - local_irq_enable(); - reset = 1; - name = CURRENT->rq_disk->disk_name; - printk("%s: timeout\n", name); - if (++CURRENT->errors >= MAX_ERRORS) { -#ifdef DEBUG - printk("%s: too many errors\n", name); -#endif - end_request(CURRENT, 0); - } - local_irq_disable(); - hd_request(); - enable_irq(HD_IRQ); -} - -static int do_special_op(struct hd_i_struct *disk, struct request *req) -{ - if (disk->recalibrate) { - disk->recalibrate = 0; - hd_out(disk, disk->sect, 0, 0, 0, WIN_RESTORE, &recal_intr); - return reset; - } - if (disk->head > 16) { - printk("%s: cannot handle device with more than 16 heads - giving up\n", req->rq_disk->disk_name); - end_request(req, 0); - } - disk->special_op = 0; - return 1; -} - -/* - * The driver enables interrupts as much as possible. In order to do this, - * (a) the device-interrupt is disabled before entering hd_request(), - * and (b) the timeout-interrupt is disabled before the sti(). - * - * Interrupts are still masked (by default) whenever we are exchanging - * data/cmds with a drive, because some drives seem to have very poor - * tolerance for latency during I/O. The IDE driver has support to unmask - * interrupts for non-broken hardware, so use that driver if required. - */ -static void hd_request(void) -{ - unsigned int block, nsect, sec, track, head, cyl; - struct hd_i_struct *disk; - struct request *req; - - if (do_hd) - return; -repeat: - del_timer(&device_timer); - local_irq_enable(); - - req = CURRENT; - if (!req) { - do_hd = NULL; - return; - } - - if (reset) { - local_irq_disable(); - reset_hd(); - return; - } - disk = req->rq_disk->private_data; - block = req->sector; - nsect = req->nr_sectors; - if (block >= get_capacity(req->rq_disk) || - ((block+nsect) > get_capacity(req->rq_disk))) { - printk("%s: bad access: block=%d, count=%d\n", - req->rq_disk->disk_name, block, nsect); - end_request(req, 0); - goto repeat; - } - - if (disk->special_op) { - if (do_special_op(disk, req)) - goto repeat; - return; - } - sec = block % disk->sect + 1; - track = block / disk->sect; - head = track % disk->head; - cyl = track / disk->head; -#ifdef DEBUG - printk("%s: %sing: CHS=%d/%d/%d, sectors=%d, buffer=%p\n", - req->rq_disk->disk_name, - req_data_dir(req) == READ ? "read" : "writ", - cyl, head, sec, nsect, req->buffer); -#endif - if (blk_fs_request(req)) { - switch (rq_data_dir(req)) { - case READ: - hd_out(disk, nsect, sec, head, cyl, WIN_READ, - &read_intr); - if (reset) - goto repeat; - break; - case WRITE: - hd_out(disk, nsect, sec, head, cyl, WIN_WRITE, - &write_intr); - if (reset) - goto repeat; - if (wait_DRQ()) { - bad_rw_intr(); - goto repeat; - } - outsw(HD_DATA, req->buffer, 256); - break; - default: - printk("unknown hd-command\n"); - end_request(req, 0); - break; - } - } -} - -static void do_hd_request(struct request_queue *q) -{ - disable_irq(HD_IRQ); - hd_request(); - enable_irq(HD_IRQ); -} - -static int hd_getgeo(struct block_device *bdev, struct hd_geometry *geo) -{ - struct hd_i_struct *disk = bdev->bd_disk->private_data; - - geo->heads = disk->head; - geo->sectors = disk->sect; - geo->cylinders = disk->cyl; - return 0; -} - -/* - * Releasing a block device means we sync() it, so that it can safely - * be forgotten about... - */ - -static irqreturn_t hd_interrupt(int irq, void *dev_id) -{ - void (*handler)(void) = do_hd; - - do_hd = NULL; - del_timer(&device_timer); - if (!handler) - handler = unexpected_hd_interrupt; - handler(); - local_irq_enable(); - return IRQ_HANDLED; -} - -static struct block_device_operations hd_fops = { - .getgeo = hd_getgeo, -}; - -/* - * This is the hard disk IRQ description. The IRQF_DISABLED in sa_flags - * means we run the IRQ-handler with interrupts disabled: this is bad for - * interrupt latency, but anything else has led to problems on some - * machines. - * - * We enable interrupts in some of the routines after making sure it's - * safe. - */ - -static int __init hd_init(void) -{ - int drive; - - if (register_blkdev(MAJOR_NR, "hd")) - return -1; - - hd_queue = blk_init_queue(do_hd_request, &hd_lock); - if (!hd_queue) { - unregister_blkdev(MAJOR_NR, "hd"); - return -ENOMEM; - } - - blk_queue_max_sectors(hd_queue, 255); - init_timer(&device_timer); - device_timer.function = hd_times_out; - blk_queue_hardsect_size(hd_queue, 512); - - if (!NR_HD) { - /* - * We don't know anything about the drive. This means - * that you *MUST* specify the drive parameters to the - * kernel yourself. - * - * If we were on an i386, we used to read this info from - * the BIOS or CMOS. This doesn't work all that well, - * since this assumes that this is a primary or secondary - * drive, and if we're using this legacy driver, it's - * probably an auxilliary controller added to recover - * legacy data off an ST-506 drive. Either way, it's - * definitely safest to have the user explicitly specify - * the information. - */ - printk("hd: no drives specified - use hd=cyl,head,sectors" - " on kernel command line\n"); - goto out; - } - - for (drive = 0 ; drive < NR_HD ; drive++) { - struct gendisk *disk = alloc_disk(64); - struct hd_i_struct *p = &hd_info[drive]; - if (!disk) - goto Enomem; - disk->major = MAJOR_NR; - disk->first_minor = drive << 6; - disk->fops = &hd_fops; - sprintf(disk->disk_name, "hd%c", 'a'+drive); - disk->private_data = p; - set_capacity(disk, p->head * p->sect * p->cyl); - disk->queue = hd_queue; - p->unit = drive; - hd_gendisk[drive] = disk; - printk("%s: %luMB, CHS=%d/%d/%d\n", - disk->disk_name, (unsigned long)get_capacity(disk)/2048, - p->cyl, p->head, p->sect); - } - - if (request_irq(HD_IRQ, hd_interrupt, IRQF_DISABLED, "hd", NULL)) { - printk("hd: unable to get IRQ%d for the hard disk driver\n", - HD_IRQ); - goto out1; - } - if (!request_region(HD_DATA, 8, "hd")) { - printk(KERN_WARNING "hd: port 0x%x busy\n", HD_DATA); - goto out2; - } - if (!request_region(HD_CMD, 1, "hd(cmd)")) { - printk(KERN_WARNING "hd: port 0x%x busy\n", HD_CMD); - goto out3; - } - - /* Let them fly */ - for (drive = 0; drive < NR_HD; drive++) - add_disk(hd_gendisk[drive]); - - return 0; - -out3: - release_region(HD_DATA, 8); -out2: - free_irq(HD_IRQ, NULL); -out1: - for (drive = 0; drive < NR_HD; drive++) - put_disk(hd_gendisk[drive]); - NR_HD = 0; -out: - del_timer(&device_timer); - unregister_blkdev(MAJOR_NR, "hd"); - blk_cleanup_queue(hd_queue); - return -1; -Enomem: - while (drive--) - put_disk(hd_gendisk[drive]); - goto out; -} - -static int __init parse_hd_setup(char *line) -{ - int ints[6]; - - (void) get_options(line, ARRAY_SIZE(ints), ints); - hd_setup(NULL, ints); - - return 1; -} -__setup("hd=", parse_hd_setup); - -late_initcall(hd_init); -- cgit v0.10.2 From f327c1c33f4882c70f29ff3a9ff7c55c1951d1f6 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 16 Jul 2008 20:33:47 +0200 Subject: update the BLK_DEV_HD help text Many people will see this option the first time now that it is in drivers/block/ Make it clear that virtually noone needs it. Signed-off-by: Adrian Bunk Cc: rmk@arm.linux.org.uk Cc: Alan Cox Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index c390974..61ad8d6 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -434,27 +434,15 @@ config VIRTIO_BLK lguest or QEMU based VMMs (like KVM or Xen). Say Y or M. config BLK_DEV_HD - bool "Old hard disk (MFM/RLL/IDE) driver" + bool "Very old hard disk (MFM/RLL/IDE) driver" depends on HAVE_IDE depends on !ARM || ARCH_RPC || ARCH_SHARK || BROKEN help - There are two drivers for MFM/RLL/IDE hard disks. Most people use - the newer enhanced driver, but this old one is still around for two - reasons. Some older systems have strange timing problems and seem to - work only with the old driver (which itself does not work with some - newer systems). The other reason is that the old driver is smaller, - since it lacks the enhanced functionality of the new one. This makes - it a good choice for systems with very tight memory restrictions, or - for systems with only older MFM/RLL/ESDI drives. Choosing the old - driver can save 13 KB or so of kernel memory. - - If you want to use this driver together with the new one you have - to use "hda=noprobe hdb=noprobe" kernel parameters to prevent the new - driver from probing the primary interface. - - If you are unsure, then just choose the Enhanced IDE/MFM/RLL driver - instead of this one. For more detailed information, read the - Disk-HOWTO, available from - . + This is a very old hard disk driver that lacks the enhanced + functionality of the newer ones. + + It is required for systems with ancient MFM/RLL/ESDI drives. + + If unsure, say N. endif # BLK_DEV -- cgit v0.10.2 From 72a3d651b2fe341a8ae2ca164c395aa3007350cd Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 16 Jul 2008 20:33:48 +0200 Subject: hd.c: remove the #include The code that needed this #include was removed one year ago. Signed-off-by: Adrian Bunk Cc: rmk@arm.linux.org.uk Cc: Alan Cox Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/block/hd.c b/drivers/block/hd.c index 00b6956..682243b 100644 --- a/drivers/block/hd.c +++ b/drivers/block/hd.c @@ -37,7 +37,6 @@ #include #include #include -#include /* CMOS defines */ #include #include #include -- cgit v0.10.2 From 79e36a9f54aaf4a52eb2d9520953aa3960e99294 Mon Sep 17 00:00:00 2001 From: Elias Oltmanns Date: Wed, 16 Jul 2008 20:33:48 +0200 Subject: IDE: Fix HDIO_DRIVE_RESET handling Currently, the code path executing an HDIO_DRIVE_RESET ioctl is broken in various ways. Most importantly, it is treated as an out of band request in an illegal way which may very likely lead to system lock ups. Use the drive's request queue to avoid this problem (and fix a locking issue for free along the way). Signed-off-by: Elias Oltmanns Cc: "Alan Cox" Cc: "Randy Dunlap" Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index 2805774..2b33c12 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -766,6 +766,18 @@ static ide_startstop_t execute_drive_cmd (ide_drive_t *drive, return ide_stopped; } +static ide_startstop_t ide_special_rq(ide_drive_t *drive, struct request *rq) +{ + switch (rq->cmd[0]) { + case REQ_DRIVE_RESET: + return ide_do_reset(drive); + default: + blk_dump_rq_flags(rq, "ide_special_rq - bad request"); + ide_end_request(drive, 0, 0); + return ide_stopped; + } +} + static void ide_check_pm_state(ide_drive_t *drive, struct request *rq) { struct request_pm_state *pm = rq->data; @@ -869,7 +881,16 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq) pm->pm_step == ide_pm_state_completed) ide_complete_pm_request(drive, rq); return startstop; - } + } else if (!rq->rq_disk && blk_special_request(rq)) + /* + * TODO: Once all ULDs have been modified to + * check for specific op codes rather than + * blindly accepting any special request, the + * check for ->rq_disk above may be replaced + * by a more suitable mechanism or even + * dropped entirely. + */ + return ide_special_rq(drive, rq); drv = *(ide_driver_t **)rq->rq_disk->private_data; return drv->do_request(drive, rq, block); diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c index 80ad4f2..96f63eb 100644 --- a/drivers/ide/ide-iops.c +++ b/drivers/ide/ide-iops.c @@ -905,6 +905,14 @@ void ide_execute_pkt_cmd(ide_drive_t *drive) } EXPORT_SYMBOL_GPL(ide_execute_pkt_cmd); +static inline void ide_complete_drive_reset(ide_drive_t *drive) +{ + struct request *rq = drive->hwif->hwgroup->rq; + + if (rq && blk_special_request(rq) && rq->cmd[0] == REQ_DRIVE_RESET) + ide_end_request(drive, 1, 0); +} + /* needed below */ static ide_startstop_t do_reset1 (ide_drive_t *, int); @@ -940,7 +948,7 @@ static ide_startstop_t atapi_reset_pollfunc (ide_drive_t *drive) } /* done polling */ hwgroup->polling = 0; - hwgroup->resetting = 0; + ide_complete_drive_reset(drive); return ide_stopped; } @@ -961,7 +969,7 @@ static ide_startstop_t reset_pollfunc (ide_drive_t *drive) if (port_ops->reset_poll(drive)) { printk(KERN_ERR "%s: host reset_poll failure for %s.\n", hwif->name, drive->name); - return ide_stopped; + goto out; } } @@ -1004,7 +1012,8 @@ static ide_startstop_t reset_pollfunc (ide_drive_t *drive) } } hwgroup->polling = 0; /* done polling */ - hwgroup->resetting = 0; /* done reset attempt */ +out: + ide_complete_drive_reset(drive); return ide_stopped; } @@ -1090,7 +1099,6 @@ static ide_startstop_t do_reset1 (ide_drive_t *drive, int do_not_try_atapi) /* For an ATAPI device, first try an ATAPI SRST. */ if (drive->media != ide_disk && !do_not_try_atapi) { - hwgroup->resetting = 1; pre_reset(drive); SELECT_DRIVE(drive); udelay (20); @@ -1112,10 +1120,10 @@ static ide_startstop_t do_reset1 (ide_drive_t *drive, int do_not_try_atapi) if (io_ports->ctl_addr == 0) { spin_unlock_irqrestore(&ide_lock, flags); + ide_complete_drive_reset(drive); return ide_stopped; } - hwgroup->resetting = 1; /* * Note that we also set nIEN while resetting the device, * to mask unwanted interrupts from the interface during the reset. diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c index 90ae00d..1ec983b 100644 --- a/drivers/ide/ide.c +++ b/drivers/ide/ide.c @@ -529,6 +529,19 @@ static int generic_ide_resume(struct device *dev) return err; } +static void generic_drive_reset(ide_drive_t *drive) +{ + struct request *rq; + + rq = blk_get_request(drive->queue, READ, __GFP_WAIT); + rq->cmd_type = REQ_TYPE_SPECIAL; + rq->cmd_len = 1; + rq->cmd[0] = REQ_DRIVE_RESET; + rq->cmd_flags |= REQ_SOFTBARRIER; + blk_execute_rq(drive->queue, NULL, rq, 1); + blk_put_request(rq); +} + int generic_ide_ioctl(ide_drive_t *drive, struct file *file, struct block_device *bdev, unsigned int cmd, unsigned long arg) { @@ -603,33 +616,9 @@ int generic_ide_ioctl(ide_drive_t *drive, struct file *file, struct block_device if (!capable(CAP_SYS_ADMIN)) return -EACCES; - /* - * Abort the current command on the - * group if there is one, taking - * care not to allow anything else - * to be queued and to die on the - * spot if we miss one somehow - */ - - spin_lock_irqsave(&ide_lock, flags); - - if (HWGROUP(drive)->resetting) { - spin_unlock_irqrestore(&ide_lock, flags); - return -EBUSY; - } - - ide_abort(drive, "drive reset"); - - BUG_ON(HWGROUP(drive)->handler); - - /* Ensure nothing gets queued after we - drop the lock. Reset will clear the busy */ - - HWGROUP(drive)->busy = 1; - spin_unlock_irqrestore(&ide_lock, flags); - (void) ide_do_reset(drive); - + generic_drive_reset(drive); return 0; + case HDIO_GET_BUSSTATE: if (!capable(CAP_SYS_ADMIN)) return -EACCES; diff --git a/include/linux/ide.h b/include/linux/ide.h index f9cbe93..021710c 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -139,6 +139,12 @@ struct ide_io_ports { #define WAIT_MIN_SLEEP (2*HZ/100) /* 20msec - minimum sleep time */ /* + * Op codes for special requests to be handled by ide_special_rq(). + * Values should be in the range of 0x20 to 0x3f. + */ +#define REQ_DRIVE_RESET 0x20 + +/* * Check for an interrupt and acknowledge the interrupt status */ struct hwif_s; -- cgit v0.10.2 From 3ef5eb424ebf0cd981192a416358fd707a9f959b Mon Sep 17 00:00:00 2001 From: Elias Oltmanns Date: Wed, 16 Jul 2008 20:33:48 +0200 Subject: IDE: Remove unused code Remove some code which has been made obsolete and hasn't worked properly before anyway. Part of the infrastructure may be reintroduced in a follow up patch to implement a working command aborting facility. Signed-off-by: Elias Oltmanns Cc: "Alan Cox" Cc: "Randy Dunlap" Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 2b3c69d..6e29dd5 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -1952,7 +1952,6 @@ static ide_driver_t ide_cdrom_driver = { .do_request = ide_cd_do_request, .end_request = ide_end_request, .error = __ide_error, - .abort = __ide_abort, #ifdef CONFIG_IDE_PROC_FS .proc = idecd_proc, #endif diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c index 5f49a4a..3a2e802 100644 --- a/drivers/ide/ide-disk.c +++ b/drivers/ide/ide-disk.c @@ -985,7 +985,6 @@ static ide_driver_t idedisk_driver = { .do_request = ide_do_rw_disk, .end_request = ide_end_request, .error = __ide_error, - .abort = __ide_abort, #ifdef CONFIG_IDE_PROC_FS .proc = idedisk_proc, #endif diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c index c7e4433..011d720 100644 --- a/drivers/ide/ide-floppy.c +++ b/drivers/ide/ide-floppy.c @@ -1129,7 +1129,6 @@ static ide_driver_t idefloppy_driver = { .do_request = idefloppy_do_request, .end_request = idefloppy_end_request, .error = __ide_error, - .abort = __ide_abort, #ifdef CONFIG_IDE_PROC_FS .proc = idefloppy_proc, #endif diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index 2b33c12..661b75a 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -504,55 +504,6 @@ ide_startstop_t ide_error (ide_drive_t *drive, const char *msg, u8 stat) EXPORT_SYMBOL_GPL(ide_error); -ide_startstop_t __ide_abort(ide_drive_t *drive, struct request *rq) -{ - if (drive->media != ide_disk) - rq->errors |= ERROR_RESET; - - ide_kill_rq(drive, rq); - - return ide_stopped; -} - -EXPORT_SYMBOL_GPL(__ide_abort); - -/** - * ide_abort - abort pending IDE operations - * @drive: drive the error occurred on - * @msg: message to report - * - * ide_abort kills and cleans up when we are about to do a - * host initiated reset on active commands. Longer term we - * want handlers to have sensible abort handling themselves - * - * This differs fundamentally from ide_error because in - * this case the command is doing just fine when we - * blow it away. - */ - -ide_startstop_t ide_abort(ide_drive_t *drive, const char *msg) -{ - struct request *rq; - - if (drive == NULL || (rq = HWGROUP(drive)->rq) == NULL) - return ide_stopped; - - /* retry only "normal" I/O: */ - if (!blk_fs_request(rq)) { - rq->errors = 1; - ide_end_drive_cmd(drive, BUSY_STAT, 0); - return ide_stopped; - } - - if (rq->rq_disk) { - ide_driver_t *drv; - - drv = *(ide_driver_t **)rq->rq_disk->private_data; - return drv->abort(drive, rq); - } else - return __ide_abort(drive, rq); -} - static void ide_tf_set_specify_cmd(ide_drive_t *drive, struct ide_taskfile *tf) { tf->nsect = drive->sect; diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index f9cf167..b711ab9 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -2591,7 +2591,6 @@ static ide_driver_t idetape_driver = { .do_request = idetape_do_request, .end_request = idetape_end_request, .error = __ide_error, - .abort = __ide_abort, #ifdef CONFIG_IDE_PROC_FS .proc = idetape_proc, #endif diff --git a/drivers/scsi/ide-scsi.c b/drivers/scsi/ide-scsi.c index 683bce3..f843c13 100644 --- a/drivers/scsi/ide-scsi.c +++ b/drivers/scsi/ide-scsi.c @@ -258,19 +258,6 @@ idescsi_atapi_error(ide_drive_t *drive, struct request *rq, u8 stat, u8 err) return ide_stopped; } -static ide_startstop_t -idescsi_atapi_abort(ide_drive_t *drive, struct request *rq) -{ - debug_log("%s called for %lu\n", __func__, - ((struct ide_atapi_pc *) rq->special)->scsi_cmd->serial_number); - - rq->errors |= ERROR_MAX; - - idescsi_end_request(drive, 0, 0); - - return ide_stopped; -} - static int idescsi_end_request (ide_drive_t *drive, int uptodate, int nrsecs) { idescsi_scsi_t *scsi = drive_to_idescsi(drive); @@ -524,7 +511,6 @@ static ide_driver_t idescsi_driver = { .do_request = idescsi_do_request, .end_request = idescsi_end_request, .error = idescsi_atapi_error, - .abort = idescsi_atapi_abort, #ifdef CONFIG_IDE_PROC_FS .proc = idescsi_proc, #endif diff --git a/include/linux/ide.h b/include/linux/ide.h index 021710c..4726126 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -571,8 +571,6 @@ typedef struct hwgroup_s { unsigned int sleeping : 1; /* BOOL: polling active & poll_timeout field valid */ unsigned int polling : 1; - /* BOOL: in a polling reset situation. Must not trigger another reset yet */ - unsigned int resetting : 1; /* current drive */ ide_drive_t *drive; @@ -792,7 +790,6 @@ struct ide_driver_s { ide_startstop_t (*do_request)(ide_drive_t *, struct request *, sector_t); int (*end_request)(ide_drive_t *, int, int); ide_startstop_t (*error)(ide_drive_t *, struct request *rq, u8, u8); - ide_startstop_t (*abort)(ide_drive_t *, struct request *rq); struct device_driver gen_driver; int (*probe)(ide_drive_t *); void (*remove)(ide_drive_t *); @@ -834,10 +831,6 @@ ide_startstop_t __ide_error(ide_drive_t *, struct request *, u8, u8); ide_startstop_t ide_error (ide_drive_t *drive, const char *msg, byte stat); -ide_startstop_t __ide_abort(ide_drive_t *, struct request *); - -extern ide_startstop_t ide_abort(ide_drive_t *, const char *); - extern void ide_fix_driveid(struct hd_driveid *); extern void ide_fixstring(u8 *, const int, const int); -- cgit v0.10.2 From bb7ee9b1ec15358af870a81b0c6a03af29417f99 Mon Sep 17 00:00:00 2001 From: Elias Oltmanns Date: Wed, 16 Jul 2008 20:33:48 +0200 Subject: Update documentation of HDIO_DRIVE_RESET ioctl Alter the entry for HDIO_DRIVE_RESET in Documentation/ioctl/hdio.txt to reflect a functional change in the driver. Besides, the entry has been inaccurate before. Signed-off-by: Elias Oltmanns Cc: "Alan Cox" Cc: "Randy Dunlap" Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/Documentation/ioctl/hdio.txt b/Documentation/ioctl/hdio.txt index c19efde..44d283d 100644 --- a/Documentation/ioctl/hdio.txt +++ b/Documentation/ioctl/hdio.txt @@ -511,9 +511,8 @@ HDIO_DRIVE_RESET execute a device reset notes: - Abort any current command, prevent anything else from being - queued, execute a reset on the device, and issue BLKRRPART - ioctl on the block device. + Execute a reset on the device as soon as the current IO + operation has completed. Executes an ATAPI soft reset if applicable, otherwise executes an ATA soft reset on the controller. -- cgit v0.10.2 From 64a8f00ff19508b3962c8a932375dbae88bee4d6 Mon Sep 17 00:00:00 2001 From: Elias Oltmanns Date: Wed, 16 Jul 2008 20:33:48 +0200 Subject: IDE: Report errors during drive reset back to user space Make sure that each error condition during the execution of an HDIO_DRIVE_RESET ioctl is actually reported to the calling process. Also, unify the exit path of reset_pollfunc() when returning ide_stopped since the need of ->port_ops->reset_poll() to be treated specially has vanished (way back, it seems). Signed-off-by: Elias Oltmanns Cc: "Alan Cox" Cc: "Randy Dunlap" Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/Documentation/ioctl/hdio.txt b/Documentation/ioctl/hdio.txt index 44d283d..91a6ecb 100644 --- a/Documentation/ioctl/hdio.txt +++ b/Documentation/ioctl/hdio.txt @@ -508,6 +508,8 @@ HDIO_DRIVE_RESET execute a device reset error returns: EACCES Access denied: requires CAP_SYS_ADMIN + ENXIO No such device: phy dead or ctl_addr == 0 + EIO I/O error: reset timed out or hardware error notes: diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c index 96f63eb..44aaec2 100644 --- a/drivers/ide/ide-iops.c +++ b/drivers/ide/ide-iops.c @@ -905,12 +905,12 @@ void ide_execute_pkt_cmd(ide_drive_t *drive) } EXPORT_SYMBOL_GPL(ide_execute_pkt_cmd); -static inline void ide_complete_drive_reset(ide_drive_t *drive) +static inline void ide_complete_drive_reset(ide_drive_t *drive, int err) { struct request *rq = drive->hwif->hwgroup->rq; if (rq && blk_special_request(rq) && rq->cmd[0] == REQ_DRIVE_RESET) - ide_end_request(drive, 1, 0); + ide_end_request(drive, err ? err : 1, 0); } /* needed below */ @@ -948,7 +948,7 @@ static ide_startstop_t atapi_reset_pollfunc (ide_drive_t *drive) } /* done polling */ hwgroup->polling = 0; - ide_complete_drive_reset(drive); + ide_complete_drive_reset(drive, 0); return ide_stopped; } @@ -964,9 +964,11 @@ static ide_startstop_t reset_pollfunc (ide_drive_t *drive) ide_hwif_t *hwif = HWIF(drive); const struct ide_port_ops *port_ops = hwif->port_ops; u8 tmp; + int err = 0; if (port_ops && port_ops->reset_poll) { - if (port_ops->reset_poll(drive)) { + err = port_ops->reset_poll(drive); + if (err) { printk(KERN_ERR "%s: host reset_poll failure for %s.\n", hwif->name, drive->name); goto out; @@ -983,6 +985,7 @@ static ide_startstop_t reset_pollfunc (ide_drive_t *drive) } printk("%s: reset timed-out, status=0x%02x\n", hwif->name, tmp); drive->failures++; + err = -EIO; } else { printk("%s: reset: ", hwif->name); tmp = ide_read_error(drive); @@ -1009,11 +1012,12 @@ static ide_startstop_t reset_pollfunc (ide_drive_t *drive) if (tmp & 0x80) printk("; slave: failed"); printk("\n"); + err = -EIO; } } - hwgroup->polling = 0; /* done polling */ out: - ide_complete_drive_reset(drive); + hwgroup->polling = 0; /* done polling */ + ide_complete_drive_reset(drive, err); return ide_stopped; } @@ -1120,7 +1124,7 @@ static ide_startstop_t do_reset1 (ide_drive_t *drive, int do_not_try_atapi) if (io_ports->ctl_addr == 0) { spin_unlock_irqrestore(&ide_lock, flags); - ide_complete_drive_reset(drive); + ide_complete_drive_reset(drive, -ENXIO); return ide_stopped; } diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c index 1ec983b..d4a6b10 100644 --- a/drivers/ide/ide.c +++ b/drivers/ide/ide.c @@ -529,17 +529,20 @@ static int generic_ide_resume(struct device *dev) return err; } -static void generic_drive_reset(ide_drive_t *drive) +static int generic_drive_reset(ide_drive_t *drive) { struct request *rq; + int ret = 0; rq = blk_get_request(drive->queue, READ, __GFP_WAIT); rq->cmd_type = REQ_TYPE_SPECIAL; rq->cmd_len = 1; rq->cmd[0] = REQ_DRIVE_RESET; rq->cmd_flags |= REQ_SOFTBARRIER; - blk_execute_rq(drive->queue, NULL, rq, 1); + if (blk_execute_rq(drive->queue, NULL, rq, 1)) + ret = rq->errors; blk_put_request(rq); + return ret; } int generic_ide_ioctl(ide_drive_t *drive, struct file *file, struct block_device *bdev, @@ -616,8 +619,7 @@ int generic_ide_ioctl(ide_drive_t *drive, struct file *file, struct block_device if (!capable(CAP_SYS_ADMIN)) return -EACCES; - generic_drive_reset(drive); - return 0; + return generic_drive_reset(drive); case HDIO_GET_BUSSTATE: if (!capable(CAP_SYS_ADMIN)) diff --git a/drivers/ide/pci/siimage.c b/drivers/ide/pci/siimage.c index b75e9bb..6e9d765 100644 --- a/drivers/ide/pci/siimage.c +++ b/drivers/ide/pci/siimage.c @@ -421,8 +421,7 @@ static int sil_sata_reset_poll(ide_drive_t *drive) if ((sata_stat & 0x03) != 0x03) { printk(KERN_WARNING "%s: reset phy dead, status=0x%08x\n", hwif->name, sata_stat); - HWGROUP(drive)->polling = 0; - return ide_started; + return -ENXIO; } } -- cgit v0.10.2 From bef5b54bd7bf8117c75cb943d64549134c6d9a1f Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Wed, 16 Jul 2008 13:02:24 +0100 Subject: Fix MIPS cross-compile problem Crosscompiling on a Fedora 9 machine running gcc 4.3.0 as its host compiler and gcc 3.4.6 for the mips-linux target results in the following build error: $ make malta_defconfig $ make cc1: error: unrecognized command line option "-fno-stack-protector" scripts/kconfig/conf -s arch/mips/Kconfig cc1: error: unrecognized command line option "-fno-stack-protector" The arch Makefile is included too late so the host compiler is feature tested, not the crosscompiler as intended and thus the Makefile applies adds -fno-stack-protector to crosscompiler's flags which fails for gcc 3.4.6. The bug was introduced by e06b8b98da071f7dd78fb7822991694288047df0 in 2.6.25; 35bb5b1e0e84cfa1a8906f7e6a77f391ff315791 did add more flags testing before the arch Makefile inclusion. Signed-off-by: Ralf Baechle Signed-off-by: Linus Torvalds diff --git a/Makefile b/Makefile index 8a2a275..1564577 100644 --- a/Makefile +++ b/Makefile @@ -508,6 +508,8 @@ else KBUILD_CFLAGS += -O2 endif +include $(srctree)/arch/$(SRCARCH)/Makefile + ifneq (CONFIG_FRAME_WARN,0) KBUILD_CFLAGS += $(call cc-option,-Wframe-larger-than=${CONFIG_FRAME_WARN}) endif @@ -516,8 +518,6 @@ endif # Arch Makefiles may override this setting KBUILD_CFLAGS += $(call cc-option, -fno-stack-protector) -include $(srctree)/arch/$(SRCARCH)/Makefile - ifdef CONFIG_FRAME_POINTER KBUILD_CFLAGS += -fno-omit-frame-pointer -fno-optimize-sibling-calls else -- cgit v0.10.2 From e3621ee633810be1079d0fa65ac2c904f53b73fa Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Wed, 16 Jul 2008 08:39:12 -0500 Subject: powerpc/ep8248e: Fix compile problem if !CONFIG_FS_ENET If we don't enable FS_ENET we get build issues: arch/powerpc/platforms/built-in.o: In function `ep8248e_mdio_probe': arch/powerpc/platforms/82xx/ep8248e.c:129: undefined reference to `alloc_mdio_bitbang' arch/powerpc/platforms/82xx/ep8248e.c:143: undefined reference to `mdiobus_register' Signed-off-by: Kumar Gala Signed-off-by: Linus Torvalds diff --git a/arch/powerpc/platforms/82xx/ep8248e.c b/arch/powerpc/platforms/82xx/ep8248e.c index d5770fd..373e993 100644 --- a/arch/powerpc/platforms/82xx/ep8248e.c +++ b/arch/powerpc/platforms/82xx/ep8248e.c @@ -59,6 +59,7 @@ static void __init ep8248e_pic_init(void) of_node_put(np); } +#ifdef CONFIG_FS_ENET_MDIO_FCC static void ep8248e_set_mdc(struct mdiobb_ctrl *ctrl, int level) { if (level) @@ -164,6 +165,7 @@ static struct of_platform_driver ep8248e_mdio_driver = { .probe = ep8248e_mdio_probe, .remove = ep8248e_mdio_remove, }; +#endif struct cpm_pin { int port, pin, flags; @@ -296,7 +298,9 @@ static __initdata struct of_device_id of_bus_ids[] = { static int __init declare_of_platform_devices(void) { of_platform_bus_probe(NULL, of_bus_ids, NULL); +#ifdef CONFIG_FS_ENET_MDIO_FCC of_register_platform_driver(&ep8248e_mdio_driver); +#endif return 0; } -- cgit v0.10.2 From 3c3622dcb64c76c9abd2e468f802db9ba523421c Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 16 Jul 2008 08:52:00 -0500 Subject: Fix compile issues in fs/compat_ioctl.c when CONFIG_BLOCK is disabled Fix fs/compat_ioctl.c to handle CONFIG_BLOCK=n, CONFIG_SCSI=n to avoid build errors: In file included from include/scsi/scsi.h:12, from fs/compat_ioctl.c:71: include/scsi/scsi_cmnd.h:27:25: warning: "BLK_MAX_CDB" is not defined include/scsi/scsi_cmnd.h:28:3: error: #error MAX_COMMAND_SIZE can not be bigger than BLK_MAX_CDB In file included from include/scsi/scsi.h:12, from fs/compat_ioctl.c:71: include/scsi/scsi_cmnd.h: In function 'scsi_bidi_cmnd': include/scsi/scsi_cmnd.h:182: error: implicit declaration of function 'blk_bidi_rq' include/scsi/scsi_cmnd.h:183: error: dereferencing pointer to incomplete type include/scsi/scsi_cmnd.h: In function 'scsi_in': include/scsi/scsi_cmnd.h:189: error: dereferencing pointer to incomplete type Signed-off-by: Randy Dunlap Signed-off-by: Linus Torvalds diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 97dba0d..c54eaab 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -69,9 +69,11 @@ #include #include +#ifdef CONFIG_BLOCK #include #include #include +#endif #include #include @@ -2024,6 +2026,7 @@ COMPATIBLE_IOCTL(GIO_UNISCRNMAP) COMPATIBLE_IOCTL(PIO_UNISCRNMAP) COMPATIBLE_IOCTL(PIO_FONTRESET) COMPATIBLE_IOCTL(PIO_UNIMAPCLR) +#ifdef CONFIG_BLOCK /* Big S */ COMPATIBLE_IOCTL(SCSI_IOCTL_GET_IDLUN) COMPATIBLE_IOCTL(SCSI_IOCTL_DOORLOCK) @@ -2033,6 +2036,7 @@ COMPATIBLE_IOCTL(SCSI_IOCTL_GET_BUS_NUMBER) COMPATIBLE_IOCTL(SCSI_IOCTL_SEND_COMMAND) COMPATIBLE_IOCTL(SCSI_IOCTL_PROBE_HOST) COMPATIBLE_IOCTL(SCSI_IOCTL_GET_PCI) +#endif /* Big T */ COMPATIBLE_IOCTL(TUNSETNOCSUM) COMPATIBLE_IOCTL(TUNSETDEBUG) @@ -2103,6 +2107,7 @@ COMPATIBLE_IOCTL(SIOCGIFVLAN) COMPATIBLE_IOCTL(SIOCSIFVLAN) COMPATIBLE_IOCTL(SIOCBRADDBR) COMPATIBLE_IOCTL(SIOCBRDELBR) +#ifdef CONFIG_BLOCK /* SG stuff */ COMPATIBLE_IOCTL(SG_SET_TIMEOUT) COMPATIBLE_IOCTL(SG_GET_TIMEOUT) @@ -2127,6 +2132,7 @@ COMPATIBLE_IOCTL(SG_SCSI_RESET) COMPATIBLE_IOCTL(SG_GET_REQUEST_TABLE) COMPATIBLE_IOCTL(SG_SET_KEEP_ORPHAN) COMPATIBLE_IOCTL(SG_GET_KEEP_ORPHAN) +#endif /* PPP stuff */ COMPATIBLE_IOCTL(PPPIOCGFLAGS) COMPATIBLE_IOCTL(PPPIOCSFLAGS) -- cgit v0.10.2 From 26d46867b7d27f68a446b073dac7817721ae4c8f Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Tue, 29 Apr 2008 02:35:48 -0400 Subject: fix a deadlock issue when poking "eject" file "/sys/devices/LNXSYSTM:00/.../eject" is used to evaluate _EJx method and eject a device in user space. But system hangs when poking the "eject" file because that the device hot-removal code invoke the driver .remove method which will try to remove the "eject" file as a result. Queues the hot-removal function for deferred execution in this patch. http://bugzilla.kernel.org/show_bug.cgi?id=9772 Signed-off-by: Zhang Rui Signed-off-by: Len Brown Signed-off-by: Andi Kleen diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 6d85289..9a84ed2 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include /* for acpi_ex_eisa_id_to_string() */ @@ -92,17 +93,37 @@ acpi_device_modalias_show(struct device *dev, struct device_attribute *attr, cha } static DEVICE_ATTR(modalias, 0444, acpi_device_modalias_show, NULL); -static int acpi_eject_operation(acpi_handle handle, int lockable) +static int acpi_bus_hot_remove_device(void *context) { + struct acpi_device *device; + acpi_handle handle = context; struct acpi_object_list arg_list; union acpi_object arg; acpi_status status = AE_OK; - /* - * TBD: evaluate _PS3? - */ + if (acpi_bus_get_device(handle, &device)) + return 0; - if (lockable) { + if (!device) + return 0; + + ACPI_DEBUG_PRINT((ACPI_DB_INFO, + "Hot-removing device %s...\n", device->dev.bus_id)); + + + if (acpi_bus_trim(device, 1)) { + ACPI_DEBUG_PRINT((ACPI_DB_ERROR, + "Removing device failed\n")); + return -1; + } + + /* power off device */ + status = acpi_evaluate_object(handle, "_PS3", NULL, NULL); + if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) + ACPI_DEBUG_PRINT((ACPI_DB_WARN, + "Power-off device failed\n")); + + if (device->flags.lockable) { arg_list.count = 1; arg_list.pointer = &arg; arg.type = ACPI_TYPE_INTEGER; @@ -118,24 +139,19 @@ static int acpi_eject_operation(acpi_handle handle, int lockable) /* * TBD: _EJD support. */ - status = acpi_evaluate_object(handle, "_EJ0", &arg_list, NULL); - if (ACPI_FAILURE(status)) { - return (-ENODEV); - } + if (ACPI_FAILURE(status)) + return -ENODEV; - return (0); + return 0; } static ssize_t acpi_eject_store(struct device *d, struct device_attribute *attr, const char *buf, size_t count) { - int result; int ret = count; - int islockable; acpi_status status; - acpi_handle handle; acpi_object_type type = 0; struct acpi_device *acpi_device = to_acpi_device(d); @@ -154,17 +170,9 @@ acpi_eject_store(struct device *d, struct device_attribute *attr, goto err; } - islockable = acpi_device->flags.lockable; - handle = acpi_device->handle; - - result = acpi_bus_trim(acpi_device, 1); - - if (!result) - result = acpi_eject_operation(handle, islockable); - - if (result) { - ret = -EBUSY; - } + /* remove the device in another thread to fix the deadlock issue */ + ret = kernel_thread(acpi_bus_hot_remove_device, + acpi_device->handle, SIGCHLD); err: return ret; } -- cgit v0.10.2 From b62b8ef906cdf7115af579ce7378886ce3e0ce00 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Tue, 29 Apr 2008 02:35:56 -0400 Subject: force offline the processor during hot-removal The ACPI device node for the cpu has already been unregistered when acpi_processor_handle_eject is called. Thus we should offline the cpu and continue, rather than a failure here. http://bugzilla.kernel.org/show_bug.cgi?id=9772 Signed-off-by: Zhang Rui Signed-off-by: Len Brown Signed-off-by: Andi Kleen diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c index 9dd0fa9..1c0008e 100644 --- a/drivers/acpi/processor_core.c +++ b/drivers/acpi/processor_core.c @@ -1014,9 +1014,9 @@ static acpi_status acpi_processor_hotadd_init(acpi_handle handle, int *p_cpu) static int acpi_processor_handle_eject(struct acpi_processor *pr) { - if (cpu_online(pr->id)) { - return (-EINVAL); - } + if (cpu_online(pr->id)) + cpu_down(pr->id); + arch_unregister_cpu(pr->id); acpi_unmap_lsapic(pr->id); return (0); diff --git a/kernel/cpu.c b/kernel/cpu.c index b11f06d..cfb1d43 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -299,6 +299,7 @@ int __ref cpu_down(unsigned int cpu) cpu_maps_update_done(); return err; } +EXPORT_SYMBOL(cpu_down); #endif /*CONFIG_HOTPLUG_CPU*/ /* Requires cpu_add_remove_lock to be held */ -- cgit v0.10.2 From 9f1eb99c757939b0b1783f926130993e9c298bae Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Tue, 29 Apr 2008 02:36:07 -0400 Subject: create sysfs link from acpi device to sysdev for cpu Sys I/F under acpi device node and sysdev device node are both needed for cpu hot-removal. User space need this link so that they know they are poking the sys I/F for the same cpu. http://bugzilla.kernel.org/show_bug.cgi?id=9772 Signed-off-by: Zhang Rui Signed-off-by: Len Brown Signed-off-by: Andi Kleen diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c index 1c0008e..feddc8e 100644 --- a/drivers/acpi/processor_core.c +++ b/drivers/acpi/processor_core.c @@ -621,7 +621,7 @@ static int __cpuinit acpi_processor_start(struct acpi_device *device) int result = 0; acpi_status status = AE_OK; struct acpi_processor *pr; - + struct sys_device *sysdev; pr = acpi_driver_data(device); @@ -652,6 +652,10 @@ static int __cpuinit acpi_processor_start(struct acpi_device *device) if (result) goto end; + sysdev = get_cpu_sysdev(pr->id); + if (sysfs_create_link(&device->dev.kobj, &sysdev->kobj, "sysdev")) + return -EFAULT; + status = acpi_install_notify_handler(pr->handle, ACPI_DEVICE_NOTIFY, acpi_processor_notify, pr); @@ -810,6 +814,8 @@ static int acpi_processor_remove(struct acpi_device *device, int type) status = acpi_remove_notify_handler(pr->handle, ACPI_DEVICE_NOTIFY, acpi_processor_notify); + sysfs_remove_link(&device->dev.kobj, "sysdev"); + acpi_processor_remove_fs(device); if (pr->cdev) { -- cgit v0.10.2 From 5411552c707f4b7387ad63141ef3a559e7488091 Mon Sep 17 00:00:00 2001 From: Cezary Jackiewicz Date: Mon, 9 Jun 2008 16:22:22 -0700 Subject: misc,acpi,backlight: compal Laptop Extras This is driver for Compal Laptop: FL90/IFL90, based on MSI driver. This driver exports a few files in /sys/devices/platform/compal-laptop/: lcd_level - screen brightness: contains a single integer in the range 0..7 (rw) wlan - wlan subsystem state: contains 0 or 1 (rw) bluetooth - bluetooth subsystem state: contains 0 or 1 (rw) raw - raw value taken from embedded controller register (ro) In addition to these platform device attributes the driver registers itself in the Linux backlight control subsystem and is available to userspace under /sys/class/backlight/compal-laptop/. Signed-off-by: Cezary Jackiewicz Signed-off-by: Andi Kleen Cc: Richard Purdie Cc: Henrique de Moraes Holschuh Cc: Len Brown Cc: Alexey Starikovskiy Signed-off-by: Andrew Morton Signed-off-by: Len Brown diff --git a/MAINTAINERS b/MAINTAINERS index ee1c56a..2bfde03 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1160,6 +1160,11 @@ M: scott@spiteful.org L: pcihpd-discuss@lists.sourceforge.net S: Supported +COMPAL LAPTOP SUPPORT +P: Cezary Jackiewicz +M: cezary.jackiewicz@gmail.com +S: Maintained + COMPUTONE INTELLIPORT MULTIPORT CARD P: Michael H. Warfield M: mhw@wittsend.com diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index 636af28..7fce0da 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -219,6 +219,23 @@ config MSI_LAPTOP If you have an MSI S270 laptop, say Y or M here. +config COMPAL_LAPTOP + tristate "Compal Laptop Extras" + depends on X86 + depends on ACPI_EC + depends on BACKLIGHT_CLASS_DEVICE + ---help--- + This is a driver for laptops built by Compal: + + Compal FL90/IFL90 + Compal FL91/IFL91 + Compal FL92/JFL92 + Compal FT00/IFT00 + + It adds support for Bluetooth, WLAN and LCD brightness control. + + If you have an Compal FL9x/IFL9x/FT00 laptop, say Y or M here. + config SONY_LAPTOP tristate "Sony Laptop Extras" depends on X86 && ACPI diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile index 1952875..a6dac6a 100644 --- a/drivers/misc/Makefile +++ b/drivers/misc/Makefile @@ -5,10 +5,11 @@ obj- := misc.o # Dummy rule to force built-in.o to be made obj-$(CONFIG_IBM_ASM) += ibmasm/ obj-$(CONFIG_HDPU_FEATURES) += hdpuftrs/ -obj-$(CONFIG_MSI_LAPTOP) += msi-laptop.o -obj-$(CONFIG_ACER_WMI) += acer-wmi.o obj-$(CONFIG_ASUS_LAPTOP) += asus-laptop.o obj-$(CONFIG_EEEPC_LAPTOP) += eeepc-laptop.o +obj-$(CONFIG_MSI_LAPTOP) += msi-laptop.o +obj-$(CONFIG_COMPAL_LAPTOP) += compal-laptop.o +obj-$(CONFIG_ACER_WMI) += acer-wmi.o obj-$(CONFIG_ATMEL_PWM) += atmel_pwm.o obj-$(CONFIG_ATMEL_SSC) += atmel-ssc.o obj-$(CONFIG_ATMEL_TCLIB) += atmel_tclib.o diff --git a/drivers/misc/compal-laptop.c b/drivers/misc/compal-laptop.c new file mode 100644 index 0000000..0c1f587 --- /dev/null +++ b/drivers/misc/compal-laptop.c @@ -0,0 +1,437 @@ +/*-*-linux-c-*-*/ + +/* + Copyright (C) 2008 Cezary Jackiewicz + + based on MSI driver + + Copyright (C) 2006 Lennart Poettering + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + */ + +/* + * comapl-laptop.c - Compal laptop support. + * + * This driver exports a few files in /sys/devices/platform/compal-laptop/: + * + * lcd_level - Screen brightness: contains a single integer in the + * range 0..7. (rw) + * + * wlan - wlan subsystem state: contains 0 or 1 (rw) + * + * bluetooth - Bluetooth subsystem state: contains 0 or 1 (rw) + * + * raw - raw value taken from embedded controller register (ro) + * + * In addition to these platform device attributes the driver + * registers itself in the Linux backlight control subsystem and is + * available to userspace under /sys/class/backlight/compal-laptop/. + * + * This driver might work on other laptops produced by Compal. If you + * want to try it you can pass force=1 as argument to the module which + * will force it to load even when the DMI data doesn't identify the + * laptop as IFL90. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define COMPAL_DRIVER_VERSION "0.2.5" + +#define COMPAL_LCD_LEVEL_MAX 8 + +#define COMPAL_EC_COMMAND_WIRELESS 0xBB +#define COMPAL_EC_COMMAND_LCD_LEVEL 0xB9 + +#define KILLSWITCH_MASK 0x10 +#define WLAN_MASK 0x01 +#define BT_MASK 0x02 + +static int force; +module_param(force, bool, 0); +MODULE_PARM_DESC(force, "Force driver load, ignore DMI data"); + +/* Hardware access */ + +static int set_lcd_level(int level) +{ + if (level < 0 || level >= COMPAL_LCD_LEVEL_MAX) + return -EINVAL; + + ec_write(COMPAL_EC_COMMAND_LCD_LEVEL, level); + + return 0; +} + +static int get_lcd_level(void) +{ + u8 result; + + ec_read(COMPAL_EC_COMMAND_LCD_LEVEL, &result); + + return (int) result; +} + +static int set_wlan_state(int state) +{ + u8 result, value; + + ec_read(COMPAL_EC_COMMAND_WIRELESS, &result); + + if ((result & KILLSWITCH_MASK) == 0) + return -EINVAL; + else { + if (state) + value = (u8) (result | WLAN_MASK); + else + value = (u8) (result & ~WLAN_MASK); + ec_write(COMPAL_EC_COMMAND_WIRELESS, value); + } + + return 0; +} + +static int set_bluetooth_state(int state) +{ + u8 result, value; + + ec_read(COMPAL_EC_COMMAND_WIRELESS, &result); + + if ((result & KILLSWITCH_MASK) == 0) + return -EINVAL; + else { + if (state) + value = (u8) (result | BT_MASK); + else + value = (u8) (result & ~BT_MASK); + ec_write(COMPAL_EC_COMMAND_WIRELESS, value); + } + + return 0; +} + +static int get_wireless_state(int *wlan, int *bluetooth) +{ + u8 result; + + ec_read(COMPAL_EC_COMMAND_WIRELESS, &result); + + if (wlan) { + if ((result & KILLSWITCH_MASK) == 0) + *wlan = 0; + else + *wlan = result & WLAN_MASK; + } + + if (bluetooth) { + if ((result & KILLSWITCH_MASK) == 0) + *bluetooth = 0; + else + *bluetooth = (result & BT_MASK) >> 1; + } + + return 0; +} + +/* Backlight device stuff */ + +static int bl_get_brightness(struct backlight_device *b) +{ + return get_lcd_level(); +} + + +static int bl_update_status(struct backlight_device *b) +{ + return set_lcd_level(b->props.brightness); +} + +static struct backlight_ops compalbl_ops = { + .get_brightness = bl_get_brightness, + .update_status = bl_update_status, +}; + +static struct backlight_device *compalbl_device; + +/* Platform device */ + +static ssize_t show_wlan(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int ret, enabled; + + ret = get_wireless_state(&enabled, NULL); + if (ret < 0) + return ret; + + return sprintf(buf, "%i\n", enabled); +} + +static ssize_t show_raw(struct device *dev, + struct device_attribute *attr, char *buf) +{ + u8 result; + + ec_read(COMPAL_EC_COMMAND_WIRELESS, &result); + + return sprintf(buf, "%i\n", result); +} + +static ssize_t show_bluetooth(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int ret, enabled; + + ret = get_wireless_state(NULL, &enabled); + if (ret < 0) + return ret; + + return sprintf(buf, "%i\n", enabled); +} + +static ssize_t show_lcd_level(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int ret; + + ret = get_lcd_level(); + if (ret < 0) + return ret; + + return sprintf(buf, "%i\n", ret); +} + +static ssize_t store_lcd_level(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + int level, ret; + + if (sscanf(buf, "%i", &level) != 1 || + (level < 0 || level >= COMPAL_LCD_LEVEL_MAX)) + return -EINVAL; + + ret = set_lcd_level(level); + if (ret < 0) + return ret; + + return count; +} + +static ssize_t store_wlan_state(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + int state, ret; + + if (sscanf(buf, "%i", &state) != 1 || (state < 0 || state > 1)) + return -EINVAL; + + ret = set_wlan_state(state); + if (ret < 0) + return ret; + + return count; +} + +static ssize_t store_bluetooth_state(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + int state, ret; + + if (sscanf(buf, "%i", &state) != 1 || (state < 0 || state > 1)) + return -EINVAL; + + ret = set_bluetooth_state(state); + if (ret < 0) + return ret; + + return count; +} + +static DEVICE_ATTR(lcd_level, 0644, show_lcd_level, store_lcd_level); +static DEVICE_ATTR(bluetooth, 0644, show_bluetooth, store_bluetooth_state); +static DEVICE_ATTR(wlan, 0644, show_wlan, store_wlan_state); +static DEVICE_ATTR(raw, 0444, show_raw, NULL); + +static struct attribute *compal_attributes[] = { + &dev_attr_lcd_level.attr, + &dev_attr_bluetooth.attr, + &dev_attr_wlan.attr, + &dev_attr_raw.attr, + NULL +}; + +static struct attribute_group compal_attribute_group = { + .attrs = compal_attributes +}; + +static struct platform_driver compal_driver = { + .driver = { + .name = "compal-laptop", + .owner = THIS_MODULE, + } +}; + +static struct platform_device *compal_device; + +/* Initialization */ + +static int dmi_check_cb(const struct dmi_system_id *id) +{ + printk(KERN_INFO "compal-laptop: Identified laptop model '%s'.\n", + id->ident); + + return 0; +} + +static struct dmi_system_id __initdata compal_dmi_table[] = { + { + .ident = "FL90/IFL90", + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "IFL90"), + DMI_MATCH(DMI_BOARD_VERSION, "IFT00"), + }, + .callback = dmi_check_cb + }, + { + .ident = "FL90/IFL90", + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "IFL90"), + DMI_MATCH(DMI_BOARD_VERSION, "REFERENCE"), + }, + .callback = dmi_check_cb + }, + { + .ident = "FL91/IFL91", + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "IFL91"), + DMI_MATCH(DMI_BOARD_VERSION, "IFT00"), + }, + .callback = dmi_check_cb + }, + { + .ident = "FL92/JFL92", + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "JFL92"), + DMI_MATCH(DMI_BOARD_VERSION, "IFT00"), + }, + .callback = dmi_check_cb + }, + { + .ident = "FT00/IFT00", + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "IFT00"), + DMI_MATCH(DMI_BOARD_VERSION, "IFT00"), + }, + .callback = dmi_check_cb + }, + { } +}; + +static int __init compal_init(void) +{ + int ret; + + if (acpi_disabled) + return -ENODEV; + + if (!force && !dmi_check_system(compal_dmi_table)) + return -ENODEV; + + /* Register backlight stuff */ + + compalbl_device = backlight_device_register("compal-laptop", NULL, NULL, + &compalbl_ops); + if (IS_ERR(compalbl_device)) + return PTR_ERR(compalbl_device); + + compalbl_device->props.max_brightness = COMPAL_LCD_LEVEL_MAX-1; + + ret = platform_driver_register(&compal_driver); + if (ret) + goto fail_backlight; + + /* Register platform stuff */ + + compal_device = platform_device_alloc("compal-laptop", -1); + if (!compal_device) { + ret = -ENOMEM; + goto fail_platform_driver; + } + + ret = platform_device_add(compal_device); + if (ret) + goto fail_platform_device1; + + ret = sysfs_create_group(&compal_device->dev.kobj, + &compal_attribute_group); + if (ret) + goto fail_platform_device2; + + printk(KERN_INFO "compal-laptop: driver "COMPAL_DRIVER_VERSION + " successfully loaded.\n"); + + return 0; + +fail_platform_device2: + + platform_device_del(compal_device); + +fail_platform_device1: + + platform_device_put(compal_device); + +fail_platform_driver: + + platform_driver_unregister(&compal_driver); + +fail_backlight: + + backlight_device_unregister(compalbl_device); + + return ret; +} + +static void __exit compal_cleanup(void) +{ + + sysfs_remove_group(&compal_device->dev.kobj, &compal_attribute_group); + platform_device_unregister(compal_device); + platform_driver_unregister(&compal_driver); + backlight_device_unregister(compalbl_device); + + printk(KERN_INFO "compal-laptop: driver unloaded.\n"); +} + +module_init(compal_init); +module_exit(compal_cleanup); + +MODULE_AUTHOR("Cezary Jackiewicz"); +MODULE_DESCRIPTION("Compal Laptop Support"); +MODULE_VERSION(COMPAL_DRIVER_VERSION); +MODULE_LICENSE("GPL"); + +MODULE_ALIAS("dmi:*:rnIFL90:rvrIFT00:*"); +MODULE_ALIAS("dmi:*:rnIFL90:rvrREFERENCE:*"); +MODULE_ALIAS("dmi:*:rnIFL91:rvrIFT00:*"); +MODULE_ALIAS("dmi:*:rnJFL92:rvrIFT00:*"); +MODULE_ALIAS("dmi:*:rnIFT00:rvrIFT00:*"); -- cgit v0.10.2 From 706546d02384b64e083bd9130c56eaa599c66038 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Mon, 9 Jun 2008 16:22:23 -0700 Subject: ACPI: change processors from array to per_cpu variable Change processors from an array sized by NR_CPUS to a per_cpu variable. Signed-off-by: Mike Travis Signed-off-by: Andrew Morton Signed-off-by: Len Brown Signed-off-by: Andi Kleen diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c index feddc8e..9a803f8 100644 --- a/drivers/acpi/processor_core.c +++ b/drivers/acpi/processor_core.c @@ -118,7 +118,7 @@ static const struct file_operations acpi_processor_info_fops = { .release = single_release, }; -struct acpi_processor *processors[NR_CPUS]; +DEFINE_PER_CPU(struct acpi_processor *, processors); struct acpi_processor_errata errata __read_mostly; /* -------------------------------------------------------------------------- @@ -614,7 +614,7 @@ static int acpi_processor_get_info(struct acpi_processor *pr, unsigned has_uid) return 0; } -static void *processor_device_array[NR_CPUS]; +static DEFINE_PER_CPU(void *, processor_device_array); static int __cpuinit acpi_processor_start(struct acpi_device *device) { @@ -638,15 +638,15 @@ static int __cpuinit acpi_processor_start(struct acpi_device *device) * ACPI id of processors can be reported wrongly by the BIOS. * Don't trust it blindly */ - if (processor_device_array[pr->id] != NULL && - processor_device_array[pr->id] != device) { + if (per_cpu(processor_device_array, pr->id) != NULL && + per_cpu(processor_device_array, pr->id) != device) { printk(KERN_WARNING "BIOS reported wrong ACPI id " "for the processor\n"); return -ENODEV; } - processor_device_array[pr->id] = device; + per_cpu(processor_device_array, pr->id) = device; - processors[pr->id] = pr; + per_cpu(processors, pr->id) = pr; result = acpi_processor_add_fs(device); if (result) @@ -753,7 +753,7 @@ static int acpi_cpu_soft_notify(struct notifier_block *nfb, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned long)hcpu; - struct acpi_processor *pr = processors[cpu]; + struct acpi_processor *pr = per_cpu(processors, cpu); if (action == CPU_ONLINE && pr) { acpi_processor_ppc_has_changed(pr); @@ -825,8 +825,8 @@ static int acpi_processor_remove(struct acpi_device *device, int type) pr->cdev = NULL; } - processors[pr->id] = NULL; - processor_device_array[pr->id] = NULL; + per_cpu(processors, pr->id) = NULL; + per_cpu(processor_device_array, pr->id) = NULL; kfree(pr); return 0; @@ -1074,8 +1074,6 @@ static int __init acpi_processor_init(void) { int result = 0; - - memset(&processors, 0, sizeof(processors)); memset(&errata, 0, sizeof(errata)); #ifdef CONFIG_SMP diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 4976e5d..0fc310e 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -401,7 +401,7 @@ static void acpi_processor_idle(void) */ local_irq_disable(); - pr = processors[smp_processor_id()]; + pr = __get_cpu_var(processors); if (!pr) { local_irq_enable(); return; @@ -1431,7 +1431,7 @@ static int acpi_idle_enter_c1(struct cpuidle_device *dev, struct acpi_processor *pr; struct acpi_processor_cx *cx = cpuidle_get_statedata(state); - pr = processors[smp_processor_id()]; + pr = __get_cpu_var(processors); if (unlikely(!pr)) return 0; @@ -1471,7 +1471,7 @@ static int acpi_idle_enter_simple(struct cpuidle_device *dev, u32 t1, t2; int sleep_ticks = 0; - pr = processors[smp_processor_id()]; + pr = __get_cpu_var(processors); if (unlikely(!pr)) return 0; @@ -1549,7 +1549,7 @@ static int acpi_idle_enter_bm(struct cpuidle_device *dev, u32 t1, t2; int sleep_ticks = 0; - pr = processors[smp_processor_id()]; + pr = __get_cpu_var(processors); if (unlikely(!pr)) return 0; diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c index d80b2d1..b474996 100644 --- a/drivers/acpi/processor_perflib.c +++ b/drivers/acpi/processor_perflib.c @@ -89,7 +89,7 @@ static int acpi_processor_ppc_notifier(struct notifier_block *nb, if (event != CPUFREQ_INCOMPATIBLE) goto out; - pr = processors[policy->cpu]; + pr = per_cpu(processors, policy->cpu); if (!pr || !pr->performance) goto out; @@ -572,7 +572,7 @@ int acpi_processor_preregister_performance( /* Call _PSD for all CPUs */ for_each_possible_cpu(i) { - pr = processors[i]; + pr = per_cpu(processors, i); if (!pr) { /* Look only at processors in ACPI namespace */ continue; @@ -603,7 +603,7 @@ int acpi_processor_preregister_performance( * domain info. */ for_each_possible_cpu(i) { - pr = processors[i]; + pr = per_cpu(processors, i); if (!pr) continue; @@ -624,7 +624,7 @@ int acpi_processor_preregister_performance( cpus_clear(covered_cpus); for_each_possible_cpu(i) { - pr = processors[i]; + pr = per_cpu(processors, i); if (!pr) continue; @@ -651,7 +651,7 @@ int acpi_processor_preregister_performance( if (i == j) continue; - match_pr = processors[j]; + match_pr = per_cpu(processors, j); if (!match_pr) continue; @@ -680,7 +680,7 @@ int acpi_processor_preregister_performance( if (i == j) continue; - match_pr = processors[j]; + match_pr = per_cpu(processors, j); if (!match_pr) continue; @@ -697,7 +697,7 @@ int acpi_processor_preregister_performance( err_ret: for_each_possible_cpu(i) { - pr = processors[i]; + pr = per_cpu(processors, i); if (!pr || !pr->performance) continue; @@ -728,7 +728,7 @@ acpi_processor_register_performance(struct acpi_processor_performance mutex_lock(&performance_mutex); - pr = processors[cpu]; + pr = per_cpu(processors, cpu); if (!pr) { mutex_unlock(&performance_mutex); return -ENODEV; @@ -766,7 +766,7 @@ acpi_processor_unregister_performance(struct acpi_processor_performance mutex_lock(&performance_mutex); - pr = processors[cpu]; + pr = per_cpu(processors, cpu); if (!pr) { mutex_unlock(&performance_mutex); return; diff --git a/drivers/acpi/processor_throttling.c b/drivers/acpi/processor_throttling.c index bb06738..8728782 100644 --- a/drivers/acpi/processor_throttling.c +++ b/drivers/acpi/processor_throttling.c @@ -71,7 +71,7 @@ static int acpi_processor_update_tsd_coord(void) * coordination between all CPUs. */ for_each_possible_cpu(i) { - pr = processors[i]; + pr = per_cpu(processors, i); if (!pr) continue; @@ -93,7 +93,7 @@ static int acpi_processor_update_tsd_coord(void) cpus_clear(covered_cpus); for_each_possible_cpu(i) { - pr = processors[i]; + pr = per_cpu(processors, i); if (!pr) continue; @@ -119,7 +119,7 @@ static int acpi_processor_update_tsd_coord(void) if (i == j) continue; - match_pr = processors[j]; + match_pr = per_cpu(processors, j); if (!match_pr) continue; @@ -152,7 +152,7 @@ static int acpi_processor_update_tsd_coord(void) if (i == j) continue; - match_pr = processors[j]; + match_pr = per_cpu(processors, j); if (!match_pr) continue; @@ -172,7 +172,7 @@ static int acpi_processor_update_tsd_coord(void) err_ret: for_each_possible_cpu(i) { - pr = processors[i]; + pr = per_cpu(processors, i); if (!pr) continue; @@ -214,7 +214,7 @@ static int acpi_processor_throttling_notifier(unsigned long event, void *data) struct acpi_processor_throttling *p_throttling; cpu = p_tstate->cpu; - pr = processors[cpu]; + pr = per_cpu(processors, cpu); if (!pr) { ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Invalid pr pointer\n")); return 0; @@ -1035,7 +1035,7 @@ int acpi_processor_set_throttling(struct acpi_processor *pr, int state) * cpus. */ for_each_cpu_mask(i, online_throttling_cpus) { - match_pr = processors[i]; + match_pr = per_cpu(processors, i); /* * If the pointer is invalid, we will report the * error message and continue. diff --git a/include/acpi/processor.h b/include/acpi/processor.h index 06ebb6e..3795590 100644 --- a/include/acpi/processor.h +++ b/include/acpi/processor.h @@ -255,7 +255,7 @@ extern void acpi_processor_unregister_performance(struct int acpi_processor_notify_smm(struct module *calling_module); /* for communication between multiple parts of the processor kernel module */ -extern struct acpi_processor *processors[NR_CPUS]; +DECLARE_PER_CPU(struct acpi_processor *, processors); extern struct acpi_processor_errata errata; void arch_acpi_processor_init_pdc(struct acpi_processor *pr); -- cgit v0.10.2 From 20b937343e55c16e37b1a4ad2176760b4a11002c Mon Sep 17 00:00:00 2001 From: Jonathan Woithe Date: Wed, 11 Jun 2008 10:14:56 +0930 Subject: Fujitsu-laptop update Add additional capabilities to the Fujitsu-laptop driver. * Brightness hotkey actions are sent to userspace. This can be disabled using a module parameter if it causes issues with models which handle these keys transparently in the BIOS. * Actions of additional hotkeys found on some Fujitsu models (eg: the suspend key and the dedicated "power on passphrase" keys) are broadcast to userspace. * An alternative brightness control method used by some Fujitsu models (for example, the S6410) is now supported, enabling software brightness controls on models using this method. * DMI-based module aliases are configured for the S6410 and S7020. * The current LCD brightness after booting should now be reflected in the standard backlight interface sysfs file (previously it was always set to 0). The platform brightness sysfs interface has always been fine. Thanks go to Peter Gruber who provided a significant portion of this code and tested various iterations of the patch on his S6410. Signed-off-by: Peter Gruber Signed-off-by: Jonathan Woithe Signed-off-by: Len Brown Signed-off-by: Andi Kleen diff --git a/MAINTAINERS b/MAINTAINERS index 2bfde03..ec5c8ee 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1792,6 +1792,12 @@ P: David Howells M: dhowells@redhat.com S: Maintained +FUJITSU LAPTOP EXTRAS +P: Jonathan Woithe +M: jwoithe@physics.adelaide.edu.au +L: linux-acpi@vger.kernel.org +S: Maintained + FUSE: FILESYSTEM IN USERSPACE P: Miklos Szeredi M: miklos@szeredi.hu diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index 7fce0da..0551d95 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -185,11 +185,22 @@ config FUJITSU_LAPTOP * P2xxx/P5xxx/S6xxx/S7xxx series Lifebooks * Possibly other Fujitsu laptop models + * Tested with S6410 and S7020 - It adds support for LCD brightness control. + It adds support for LCD brightness control and some hotkeys. If you have a Fujitsu laptop, say Y or M here. +config FUJITSU_LAPTOP_DEBUG + bool "Verbose debug mode for Fujitsu Laptop Extras" + depends on FUJITSU_LAPTOP + default n + ---help--- + Enables extra debug output from the fujitsu extras driver, at the + expense of a slight increase in driver size. + + If you are not sure, say N here. + config TC1100_WMI tristate "HP Compaq TC1100 Tablet WMI Extras (EXPERIMENTAL)" depends on X86 && !X86_64 diff --git a/drivers/misc/fujitsu-laptop.c b/drivers/misc/fujitsu-laptop.c index 6d14e8f..7a1ef6c 100644 --- a/drivers/misc/fujitsu-laptop.c +++ b/drivers/misc/fujitsu-laptop.c @@ -1,12 +1,14 @@ /*-*-linux-c-*-*/ /* - Copyright (C) 2007 Jonathan Woithe + Copyright (C) 2007,2008 Jonathan Woithe + Copyright (C) 2008 Peter Gruber Based on earlier work: Copyright (C) 2003 Shane Spencer Adrian Yee - Templated from msi-laptop.c which is copyright by its respective authors. + Templated from msi-laptop.c and thinkpad_acpi.c which is copyright + by its respective authors. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -39,8 +41,17 @@ * registers itself in the Linux backlight control subsystem and is * available to userspace under /sys/class/backlight/fujitsu-laptop/. * - * This driver has been tested on a Fujitsu Lifebook S7020. It should - * work on most P-series and S-series Lifebooks, but YMMV. + * Hotkeys present on certain Fujitsu laptops (eg: the S6xxx series) are + * also supported by this driver. + * + * This driver has been tested on a Fujitsu Lifebook S6410 and S7020. It + * should work on most P-series and S-series Lifebooks, but YMMV. + * + * The module parameter use_alt_lcd_levels switches between different ACPI + * brightness controls which are used by different Fujitsu laptops. In most + * cases the correct method is automatically detected. "use_alt_lcd_levels=1" + * is applicable for a Fujitsu Lifebook S6410 if autodetection fails. + * */ #include @@ -49,30 +60,105 @@ #include #include #include +#include +#include +#include #include -#define FUJITSU_DRIVER_VERSION "0.3" +#define FUJITSU_DRIVER_VERSION "0.4.2" #define FUJITSU_LCD_N_LEVELS 8 #define ACPI_FUJITSU_CLASS "fujitsu" #define ACPI_FUJITSU_HID "FUJ02B1" -#define ACPI_FUJITSU_DRIVER_NAME "Fujitsu laptop FUJ02B1 ACPI extras driver" +#define ACPI_FUJITSU_DRIVER_NAME "Fujitsu laptop FUJ02B1 ACPI brightness driver" #define ACPI_FUJITSU_DEVICE_NAME "Fujitsu FUJ02B1" - +#define ACPI_FUJITSU_HOTKEY_HID "FUJ02E3" +#define ACPI_FUJITSU_HOTKEY_DRIVER_NAME "Fujitsu laptop FUJ02E3 ACPI hotkeys driver" +#define ACPI_FUJITSU_HOTKEY_DEVICE_NAME "Fujitsu FUJ02E3" + +#define ACPI_FUJITSU_NOTIFY_CODE1 0x80 + +#define ACPI_VIDEO_NOTIFY_INC_BRIGHTNESS 0x86 +#define ACPI_VIDEO_NOTIFY_DEC_BRIGHTNESS 0x87 + +/* Hotkey details */ +#define LOCK_KEY 0x410 /* codes for the keys in the GIRB register */ +#define DISPLAY_KEY 0x411 /* keys are mapped to KEY_SCREENLOCK (the key with the key symbol) */ +#define ENERGY_KEY 0x412 /* KEY_MEDIA (the key with the laptop symbol, KEY_EMAIL (E key)) */ +#define REST_KEY 0x413 /* KEY_SUSPEND (R key) */ + +#define MAX_HOTKEY_RINGBUFFER_SIZE 100 +#define RINGBUFFERSIZE 40 + +/* Debugging */ +#define FUJLAPTOP_LOG ACPI_FUJITSU_HID ": " +#define FUJLAPTOP_ERR KERN_ERR FUJLAPTOP_LOG +#define FUJLAPTOP_NOTICE KERN_NOTICE FUJLAPTOP_LOG +#define FUJLAPTOP_INFO KERN_INFO FUJLAPTOP_LOG +#define FUJLAPTOP_DEBUG KERN_DEBUG FUJLAPTOP_LOG + +#define FUJLAPTOP_DBG_ALL 0xffff +#define FUJLAPTOP_DBG_ERROR 0x0001 +#define FUJLAPTOP_DBG_WARN 0x0002 +#define FUJLAPTOP_DBG_INFO 0x0004 +#define FUJLAPTOP_DBG_TRACE 0x0008 + +#define dbg_printk(a_dbg_level, format, arg...) \ + do { if (dbg_level & a_dbg_level) \ + printk(FUJLAPTOP_DEBUG "%s: " format, __func__ , ## arg); \ + } while (0) +#ifdef CONFIG_FUJITSU_LAPTOP_DEBUG +#define vdbg_printk(a_dbg_level, format, arg...) \ + dbg_printk(a_dbg_level, format, ## arg) +#else +#define vdbg_printk(a_dbg_level, format, arg...) +#endif + +/* Device controlling the backlight and associated keys */ struct fujitsu_t { acpi_handle acpi_handle; + struct acpi_device *dev; + struct input_dev *input; + char phys[32]; struct backlight_device *bl_device; struct platform_device *pf_device; - unsigned long fuj02b1_state; + unsigned int max_brightness; unsigned int brightness_changed; unsigned int brightness_level; }; static struct fujitsu_t *fujitsu; +static int use_alt_lcd_levels = -1; +static int disable_brightness_keys = -1; +static int disable_brightness_adjust = -1; + +/* Device used to access other hotkeys on the laptop */ +struct fujitsu_hotkey_t { + acpi_handle acpi_handle; + struct acpi_device *dev; + struct input_dev *input; + char phys[32]; + struct platform_device *pf_device; + struct kfifo *fifo; + spinlock_t fifo_lock; + + unsigned int irb; /* info about the pressed buttons */ +}; -/* Hardware access */ +static struct fujitsu_hotkey_t *fujitsu_hotkey; + +static void acpi_fujitsu_hotkey_notify(acpi_handle handle, u32 event, + void *data); + +#ifdef CONFIG_FUJITSU_LAPTOP_DEBUG +static u32 dbg_level = 0x03; +#endif + +static void acpi_fujitsu_notify(acpi_handle handle, u32 event, void *data); + +/* Hardware access for LCD brightness control */ static int set_lcd_level(int level) { @@ -81,7 +167,10 @@ static int set_lcd_level(int level) struct acpi_object_list arg_list = { 1, &arg0 }; acpi_handle handle = NULL; - if (level < 0 || level >= FUJITSU_LCD_N_LEVELS) + vdbg_printk(FUJLAPTOP_DBG_TRACE, "set lcd level via SBLL [%d]\n", + level); + + if (level < 0 || level >= fujitsu->max_brightness) return -EINVAL; if (!fujitsu) @@ -89,7 +178,38 @@ static int set_lcd_level(int level) status = acpi_get_handle(fujitsu->acpi_handle, "SBLL", &handle); if (ACPI_FAILURE(status)) { - ACPI_DEBUG_PRINT((ACPI_DB_INFO, "SBLL not present\n")); + vdbg_printk(FUJLAPTOP_DBG_ERROR, "SBLL not present\n"); + return -ENODEV; + } + + arg0.integer.value = level; + + status = acpi_evaluate_object(handle, NULL, &arg_list, NULL); + if (ACPI_FAILURE(status)) + return -ENODEV; + + return 0; +} + +static int set_lcd_level_alt(int level) +{ + acpi_status status = AE_OK; + union acpi_object arg0 = { ACPI_TYPE_INTEGER }; + struct acpi_object_list arg_list = { 1, &arg0 }; + acpi_handle handle = NULL; + + vdbg_printk(FUJLAPTOP_DBG_TRACE, "set lcd level via SBL2 [%d]\n", + level); + + if (level < 0 || level >= fujitsu->max_brightness) + return -EINVAL; + + if (!fujitsu) + return -EINVAL; + + status = acpi_get_handle(fujitsu->acpi_handle, "SBL2", &handle); + if (ACPI_FAILURE(status)) { + vdbg_printk(FUJLAPTOP_DBG_ERROR, "SBL2 not present\n"); return -ENODEV; } @@ -107,13 +227,52 @@ static int get_lcd_level(void) unsigned long state = 0; acpi_status status = AE_OK; - // Get the Brightness + vdbg_printk(FUJLAPTOP_DBG_TRACE, "get lcd level via GBLL\n"); + status = acpi_evaluate_integer(fujitsu->acpi_handle, "GBLL", NULL, &state); if (status < 0) return status; - fujitsu->fuj02b1_state = state; + fujitsu->brightness_level = state & 0x0fffffff; + + if (state & 0x80000000) + fujitsu->brightness_changed = 1; + else + fujitsu->brightness_changed = 0; + + return fujitsu->brightness_level; +} + +static int get_max_brightness(void) +{ + unsigned long state = 0; + acpi_status status = AE_OK; + + vdbg_printk(FUJLAPTOP_DBG_TRACE, "get max lcd level via RBLL\n"); + + status = + acpi_evaluate_integer(fujitsu->acpi_handle, "RBLL", NULL, &state); + if (status < 0) + return status; + + fujitsu->max_brightness = state; + + return fujitsu->max_brightness; +} + +static int get_lcd_level_alt(void) +{ + unsigned long state = 0; + acpi_status status = AE_OK; + + vdbg_printk(FUJLAPTOP_DBG_TRACE, "get lcd level via GBLS\n"); + + status = + acpi_evaluate_integer(fujitsu->acpi_handle, "GBLS", NULL, &state); + if (status < 0) + return status; + fujitsu->brightness_level = state & 0x0fffffff; if (state & 0x80000000) @@ -128,12 +287,18 @@ static int get_lcd_level(void) static int bl_get_brightness(struct backlight_device *b) { - return get_lcd_level(); + if (use_alt_lcd_levels) + return get_lcd_level_alt(); + else + return get_lcd_level(); } static int bl_update_status(struct backlight_device *b) { - return set_lcd_level(b->props.brightness); + if (use_alt_lcd_levels) + return set_lcd_level_alt(b->props.brightness); + else + return set_lcd_level(b->props.brightness); } static struct backlight_ops fujitsubl_ops = { @@ -141,7 +306,35 @@ static struct backlight_ops fujitsubl_ops = { .update_status = bl_update_status, }; -/* Platform device */ +/* Platform LCD brightness device */ + +static ssize_t +show_max_brightness(struct device *dev, + struct device_attribute *attr, char *buf) +{ + + int ret; + + ret = get_max_brightness(); + if (ret < 0) + return ret; + + return sprintf(buf, "%i\n", ret); +} + +static ssize_t +show_brightness_changed(struct device *dev, + struct device_attribute *attr, char *buf) +{ + + int ret; + + ret = fujitsu->brightness_changed; + if (ret < 0) + return ret; + + return sprintf(buf, "%i\n", ret); +} static ssize_t show_lcd_level(struct device *dev, struct device_attribute *attr, char *buf) @@ -149,7 +342,10 @@ static ssize_t show_lcd_level(struct device *dev, int ret; - ret = get_lcd_level(); + if (use_alt_lcd_levels) + ret = get_lcd_level_alt(); + else + ret = get_lcd_level(); if (ret < 0) return ret; @@ -164,19 +360,61 @@ static ssize_t store_lcd_level(struct device *dev, int level, ret; if (sscanf(buf, "%i", &level) != 1 - || (level < 0 || level >= FUJITSU_LCD_N_LEVELS)) + || (level < 0 || level >= fujitsu->max_brightness)) return -EINVAL; - ret = set_lcd_level(level); + if (use_alt_lcd_levels) + ret = set_lcd_level_alt(level); + else + ret = set_lcd_level(level); + if (ret < 0) + return ret; + + if (use_alt_lcd_levels) + ret = get_lcd_level_alt(); + else + ret = get_lcd_level(); if (ret < 0) return ret; return count; } +/* Hardware access for hotkey device */ + +static int get_irb(void) +{ + unsigned long state = 0; + acpi_status status = AE_OK; + + vdbg_printk(FUJLAPTOP_DBG_TRACE, "Get irb\n"); + + status = + acpi_evaluate_integer(fujitsu_hotkey->acpi_handle, "GIRB", NULL, + &state); + if (status < 0) + return status; + + fujitsu_hotkey->irb = state; + + return fujitsu_hotkey->irb; +} + +static ssize_t +ignore_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + return count; +} + +static DEVICE_ATTR(max_brightness, 0444, show_max_brightness, ignore_store); +static DEVICE_ATTR(brightness_changed, 0444, show_brightness_changed, + ignore_store); static DEVICE_ATTR(lcd_level, 0644, show_lcd_level, store_lcd_level); static struct attribute *fujitsupf_attributes[] = { + &dev_attr_brightness_changed.attr, + &dev_attr_max_brightness.attr, &dev_attr_lcd_level.attr, NULL }; @@ -192,14 +430,52 @@ static struct platform_driver fujitsupf_driver = { } }; -/* ACPI device */ +static int dmi_check_cb_s6410(const struct dmi_system_id *id) +{ + acpi_handle handle; + int have_blnf; + printk(KERN_INFO "fujitsu-laptop: Identified laptop model '%s'.\n", + id->ident); + have_blnf = ACPI_SUCCESS + (acpi_get_handle(NULL, "\\_SB.PCI0.GFX0.LCD.BLNF", &handle)); + if (use_alt_lcd_levels == -1) { + vdbg_printk(FUJLAPTOP_DBG_TRACE, "auto-detecting usealt\n"); + use_alt_lcd_levels = 1; + } + if (disable_brightness_keys == -1) { + vdbg_printk(FUJLAPTOP_DBG_TRACE, + "auto-detecting disable_keys\n"); + disable_brightness_keys = have_blnf ? 1 : 0; + } + if (disable_brightness_adjust == -1) { + vdbg_printk(FUJLAPTOP_DBG_TRACE, + "auto-detecting disable_adjust\n"); + disable_brightness_adjust = have_blnf ? 0 : 1; + } + return 0; +} + +static struct dmi_system_id __initdata fujitsu_dmi_table[] = { + { + .ident = "Fujitsu Siemens", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"), + DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK S6410"), + }, + .callback = dmi_check_cb_s6410}, + {} +}; + +/* ACPI device for LCD brightness control */ static int acpi_fujitsu_add(struct acpi_device *device) { + acpi_status status; + acpi_handle handle; int result = 0; int state = 0; - - ACPI_FUNCTION_TRACE("acpi_fujitsu_add"); + struct input_dev *input; + int error; if (!device) return -EINVAL; @@ -209,10 +485,42 @@ static int acpi_fujitsu_add(struct acpi_device *device) sprintf(acpi_device_class(device), "%s", ACPI_FUJITSU_CLASS); acpi_driver_data(device) = fujitsu; + status = acpi_install_notify_handler(device->handle, + ACPI_DEVICE_NOTIFY, + acpi_fujitsu_notify, fujitsu); + + if (ACPI_FAILURE(status)) { + printk(KERN_ERR "Error installing notify handler\n"); + error = -ENODEV; + goto err_stop; + } + + fujitsu->input = input = input_allocate_device(); + if (!input) { + error = -ENOMEM; + goto err_uninstall_notify; + } + + snprintf(fujitsu->phys, sizeof(fujitsu->phys), + "%s/video/input0", acpi_device_hid(device)); + + input->name = acpi_device_name(device); + input->phys = fujitsu->phys; + input->id.bustype = BUS_HOST; + input->id.product = 0x06; + input->dev.parent = &device->dev; + input->evbit[0] = BIT(EV_KEY); + set_bit(KEY_BRIGHTNESSUP, input->keybit); + set_bit(KEY_BRIGHTNESSDOWN, input->keybit); + set_bit(KEY_UNKNOWN, input->keybit); + + error = input_register_device(input); + if (error) + goto err_free_input_dev; + result = acpi_bus_get_power(fujitsu->acpi_handle, &state); if (result) { - ACPI_DEBUG_PRINT((ACPI_DB_ERROR, - "Error reading power state\n")); + printk(KERN_ERR "Error reading power state\n"); goto end; } @@ -220,22 +528,373 @@ static int acpi_fujitsu_add(struct acpi_device *device) acpi_device_name(device), acpi_device_bid(device), !device->power.state ? "on" : "off"); - end: + fujitsu->dev = device; + + if (ACPI_SUCCESS + (acpi_get_handle(device->handle, METHOD_NAME__INI, &handle))) { + vdbg_printk(FUJLAPTOP_DBG_INFO, "Invoking _INI\n"); + if (ACPI_FAILURE + (acpi_evaluate_object + (device->handle, METHOD_NAME__INI, NULL, NULL))) + printk(KERN_ERR "_INI Method failed\n"); + } + + /* do config (detect defaults) */ + dmi_check_system(fujitsu_dmi_table); + use_alt_lcd_levels = use_alt_lcd_levels == 1 ? 1 : 0; + disable_brightness_keys = disable_brightness_keys == 1 ? 1 : 0; + disable_brightness_adjust = disable_brightness_adjust == 1 ? 1 : 0; + vdbg_printk(FUJLAPTOP_DBG_INFO, + "config: [alt interface: %d], [key disable: %d], [adjust disable: %d]\n", + use_alt_lcd_levels, disable_brightness_keys, + disable_brightness_adjust); + + if (get_max_brightness() <= 0) + fujitsu->max_brightness = FUJITSU_LCD_N_LEVELS; + if (use_alt_lcd_levels) + get_lcd_level_alt(); + else + get_lcd_level(); + + return result; + +end: +err_free_input_dev: + input_free_device(input); +err_uninstall_notify: + acpi_remove_notify_handler(device->handle, ACPI_DEVICE_NOTIFY, + acpi_fujitsu_notify); +err_stop: return result; } static int acpi_fujitsu_remove(struct acpi_device *device, int type) { - ACPI_FUNCTION_TRACE("acpi_fujitsu_remove"); + acpi_status status; + struct fujitsu_t *fujitsu = NULL; if (!device || !acpi_driver_data(device)) return -EINVAL; + + fujitsu = acpi_driver_data(device); + + status = acpi_remove_notify_handler(fujitsu->acpi_handle, + ACPI_DEVICE_NOTIFY, + acpi_fujitsu_notify); + + if (!device || !acpi_driver_data(device)) + return -EINVAL; + fujitsu->acpi_handle = NULL; return 0; } +/* Brightness notify */ + +static void acpi_fujitsu_notify(acpi_handle handle, u32 event, void *data) +{ + struct input_dev *input; + int keycode; + int oldb, newb; + + input = fujitsu->input; + + switch (event) { + case ACPI_FUJITSU_NOTIFY_CODE1: + keycode = 0; + oldb = fujitsu->brightness_level; + get_lcd_level(); /* the alt version always yields changed */ + newb = fujitsu->brightness_level; + + vdbg_printk(FUJLAPTOP_DBG_TRACE, + "brightness button event [%i -> %i (%i)]\n", + oldb, newb, fujitsu->brightness_changed); + + if (oldb == newb && fujitsu->brightness_changed) { + keycode = 0; + if (disable_brightness_keys != 1) { + if (oldb == 0) { + acpi_bus_generate_proc_event(fujitsu-> + dev, + ACPI_VIDEO_NOTIFY_DEC_BRIGHTNESS, + 0); + keycode = KEY_BRIGHTNESSDOWN; + } else if (oldb == + (fujitsu->max_brightness) - 1) { + acpi_bus_generate_proc_event(fujitsu-> + dev, + ACPI_VIDEO_NOTIFY_INC_BRIGHTNESS, + 0); + keycode = KEY_BRIGHTNESSUP; + } + } + } else if (oldb < newb) { + if (disable_brightness_adjust != 1) { + if (use_alt_lcd_levels) + set_lcd_level_alt(newb); + else + set_lcd_level(newb); + } + if (disable_brightness_keys != 1) { + acpi_bus_generate_proc_event(fujitsu->dev, + ACPI_VIDEO_NOTIFY_INC_BRIGHTNESS, + 0); + keycode = KEY_BRIGHTNESSUP; + } + } else if (oldb > newb) { + if (disable_brightness_adjust != 1) { + if (use_alt_lcd_levels) + set_lcd_level_alt(newb); + else + set_lcd_level(newb); + } + if (disable_brightness_keys != 1) { + acpi_bus_generate_proc_event(fujitsu->dev, + ACPI_VIDEO_NOTIFY_DEC_BRIGHTNESS, + 0); + keycode = KEY_BRIGHTNESSDOWN; + } + } else { + keycode = KEY_UNKNOWN; + } + break; + default: + keycode = KEY_UNKNOWN; + vdbg_printk(FUJLAPTOP_DBG_WARN, + "unsupported event [0x%x]\n", event); + break; + } + + if (keycode != 0) { + input_report_key(input, keycode, 1); + input_sync(input); + input_report_key(input, keycode, 0); + input_sync(input); + } + + return; +} + +/* ACPI device for hotkey handling */ + +static int acpi_fujitsu_hotkey_add(struct acpi_device *device) +{ + acpi_status status; + acpi_handle handle; + int result = 0; + int state = 0; + struct input_dev *input; + int error; + int i; + + if (!device) + return -EINVAL; + + fujitsu_hotkey->acpi_handle = device->handle; + sprintf(acpi_device_name(device), "%s", + ACPI_FUJITSU_HOTKEY_DEVICE_NAME); + sprintf(acpi_device_class(device), "%s", ACPI_FUJITSU_CLASS); + acpi_driver_data(device) = fujitsu_hotkey; + + status = acpi_install_notify_handler(device->handle, + ACPI_DEVICE_NOTIFY, + acpi_fujitsu_hotkey_notify, + fujitsu_hotkey); + + if (ACPI_FAILURE(status)) { + printk(KERN_ERR "Error installing notify handler\n"); + error = -ENODEV; + goto err_stop; + } + + /* kfifo */ + spin_lock_init(&fujitsu_hotkey->fifo_lock); + fujitsu_hotkey->fifo = + kfifo_alloc(RINGBUFFERSIZE * sizeof(int), GFP_KERNEL, + &fujitsu_hotkey->fifo_lock); + if (IS_ERR(fujitsu_hotkey->fifo)) { + printk(KERN_ERR "kfifo_alloc failed\n"); + error = PTR_ERR(fujitsu_hotkey->fifo); + goto err_stop; + } + + fujitsu_hotkey->input = input = input_allocate_device(); + if (!input) { + error = -ENOMEM; + goto err_uninstall_notify; + } + + snprintf(fujitsu_hotkey->phys, sizeof(fujitsu_hotkey->phys), + "%s/video/input0", acpi_device_hid(device)); + + input->name = acpi_device_name(device); + input->phys = fujitsu_hotkey->phys; + input->id.bustype = BUS_HOST; + input->id.product = 0x06; + input->dev.parent = &device->dev; + input->evbit[0] = BIT(EV_KEY); + set_bit(KEY_SCREENLOCK, input->keybit); + set_bit(KEY_MEDIA, input->keybit); + set_bit(KEY_EMAIL, input->keybit); + set_bit(KEY_SUSPEND, input->keybit); + set_bit(KEY_UNKNOWN, input->keybit); + + error = input_register_device(input); + if (error) + goto err_free_input_dev; + + result = acpi_bus_get_power(fujitsu_hotkey->acpi_handle, &state); + if (result) { + printk(KERN_ERR "Error reading power state\n"); + goto end; + } + + printk(KERN_INFO PREFIX "%s [%s] (%s)\n", + acpi_device_name(device), acpi_device_bid(device), + !device->power.state ? "on" : "off"); + + fujitsu_hotkey->dev = device; + + if (ACPI_SUCCESS + (acpi_get_handle(device->handle, METHOD_NAME__INI, &handle))) { + vdbg_printk(FUJLAPTOP_DBG_INFO, "Invoking _INI\n"); + if (ACPI_FAILURE + (acpi_evaluate_object + (device->handle, METHOD_NAME__INI, NULL, NULL))) + printk(KERN_ERR "_INI Method failed\n"); + } + + i = 0; /* Discard hotkey ringbuffer */ + while (get_irb() != 0 && (i++) < MAX_HOTKEY_RINGBUFFER_SIZE) ; + vdbg_printk(FUJLAPTOP_DBG_INFO, "Discarded %i ringbuffer entries\n", i); + + return result; + +end: +err_free_input_dev: + input_free_device(input); +err_uninstall_notify: + acpi_remove_notify_handler(device->handle, ACPI_DEVICE_NOTIFY, + acpi_fujitsu_hotkey_notify); + kfifo_free(fujitsu_hotkey->fifo); +err_stop: + + return result; +} + +static int acpi_fujitsu_hotkey_remove(struct acpi_device *device, int type) +{ + acpi_status status; + struct fujitsu_hotkey_t *fujitsu_hotkey = NULL; + + if (!device || !acpi_driver_data(device)) + return -EINVAL; + + fujitsu_hotkey = acpi_driver_data(device); + + status = acpi_remove_notify_handler(fujitsu_hotkey->acpi_handle, + ACPI_DEVICE_NOTIFY, + acpi_fujitsu_hotkey_notify); + + fujitsu_hotkey->acpi_handle = NULL; + + kfifo_free(fujitsu_hotkey->fifo); + + return 0; +} + +static void acpi_fujitsu_hotkey_notify(acpi_handle handle, u32 event, + void *data) +{ + struct input_dev *input; + int keycode, keycode_r; + unsigned int irb = 1; + int i, status; + + input = fujitsu_hotkey->input; + + vdbg_printk(FUJLAPTOP_DBG_TRACE, "Hotkey event\n"); + + switch (event) { + case ACPI_FUJITSU_NOTIFY_CODE1: + i = 0; + while ((irb = get_irb()) != 0 + && (i++) < MAX_HOTKEY_RINGBUFFER_SIZE) { + vdbg_printk(FUJLAPTOP_DBG_TRACE, "GIRB result [%x]\n", + irb); + + switch (irb & 0x4ff) { + case LOCK_KEY: + keycode = KEY_SCREENLOCK; + break; + case DISPLAY_KEY: + keycode = KEY_MEDIA; + break; + case ENERGY_KEY: + keycode = KEY_EMAIL; + break; + case REST_KEY: + keycode = KEY_SUSPEND; + break; + case 0: + keycode = 0; + break; + default: + vdbg_printk(FUJLAPTOP_DBG_WARN, + "Unknown GIRB result [%x]\n", irb); + keycode = -1; + break; + } + if (keycode > 0) { + vdbg_printk(FUJLAPTOP_DBG_TRACE, + "Push keycode into ringbuffer [%d]\n", + keycode); + status = kfifo_put(fujitsu_hotkey->fifo, + (unsigned char *)&keycode, + sizeof(keycode)); + if (status != sizeof(keycode)) { + vdbg_printk(FUJLAPTOP_DBG_WARN, + "Could not push keycode [0x%x]\n", + keycode); + } else { + input_report_key(input, keycode, 1); + input_sync(input); + } + } else if (keycode == 0) { + while ((status = + kfifo_get + (fujitsu_hotkey->fifo, (unsigned char *) + &keycode_r, + sizeof + (keycode_r))) == sizeof(keycode_r)) { + input_report_key(input, keycode_r, 0); + input_sync(input); + vdbg_printk(FUJLAPTOP_DBG_TRACE, + "Pop keycode from ringbuffer [%d]\n", + keycode_r); + } + } + } + + break; + default: + keycode = KEY_UNKNOWN; + vdbg_printk(FUJLAPTOP_DBG_WARN, + "Unsupported event [0x%x]\n", event); + input_report_key(input, keycode, 1); + input_sync(input); + input_report_key(input, keycode, 0); + input_sync(input); + break; + } + + return; +} + +/* Initialization */ + static const struct acpi_device_id fujitsu_device_ids[] = { {ACPI_FUJITSU_HID, 0}, {"", 0}, @@ -251,11 +910,24 @@ static struct acpi_driver acpi_fujitsu_driver = { }, }; -/* Initialization */ +static const struct acpi_device_id fujitsu_hotkey_device_ids[] = { + {ACPI_FUJITSU_HOTKEY_HID, 0}, + {"", 0}, +}; + +static struct acpi_driver acpi_fujitsu_hotkey_driver = { + .name = ACPI_FUJITSU_HOTKEY_DRIVER_NAME, + .class = ACPI_FUJITSU_CLASS, + .ids = fujitsu_hotkey_device_ids, + .ops = { + .add = acpi_fujitsu_hotkey_add, + .remove = acpi_fujitsu_hotkey_remove, + }, +}; static int __init fujitsu_init(void) { - int ret, result; + int ret, result, max_brightness; if (acpi_disabled) return -ENODEV; @@ -271,19 +943,6 @@ static int __init fujitsu_init(void) goto fail_acpi; } - /* Register backlight stuff */ - - fujitsu->bl_device = - backlight_device_register("fujitsu-laptop", NULL, NULL, - &fujitsubl_ops); - if (IS_ERR(fujitsu->bl_device)) - return PTR_ERR(fujitsu->bl_device); - - fujitsu->bl_device->props.max_brightness = FUJITSU_LCD_N_LEVELS - 1; - ret = platform_driver_register(&fujitsupf_driver); - if (ret) - goto fail_backlight; - /* Register platform stuff */ fujitsu->pf_device = platform_device_alloc("fujitsu-laptop", -1); @@ -302,28 +961,68 @@ static int __init fujitsu_init(void) if (ret) goto fail_platform_device2; + /* Register backlight stuff */ + + fujitsu->bl_device = + backlight_device_register("fujitsu-laptop", NULL, NULL, + &fujitsubl_ops); + if (IS_ERR(fujitsu->bl_device)) + return PTR_ERR(fujitsu->bl_device); + + max_brightness = fujitsu->max_brightness; + + fujitsu->bl_device->props.max_brightness = max_brightness - 1; + fujitsu->bl_device->props.brightness = fujitsu->brightness_level; + + ret = platform_driver_register(&fujitsupf_driver); + if (ret) + goto fail_backlight; + + /* Register hotkey driver */ + + fujitsu_hotkey = kmalloc(sizeof(struct fujitsu_hotkey_t), GFP_KERNEL); + if (!fujitsu_hotkey) { + ret = -ENOMEM; + goto fail_hotkey; + } + memset(fujitsu_hotkey, 0, sizeof(struct fujitsu_hotkey_t)); + + result = acpi_bus_register_driver(&acpi_fujitsu_hotkey_driver); + if (result < 0) { + ret = -ENODEV; + goto fail_hotkey1; + } + printk(KERN_INFO "fujitsu-laptop: driver " FUJITSU_DRIVER_VERSION " successfully loaded.\n"); return 0; - fail_platform_device2: +fail_hotkey1: - platform_device_del(fujitsu->pf_device); - - fail_platform_device1: - - platform_device_put(fujitsu->pf_device); + kfree(fujitsu_hotkey); - fail_platform_driver: +fail_hotkey: platform_driver_unregister(&fujitsupf_driver); - fail_backlight: +fail_backlight: backlight_device_unregister(fujitsu->bl_device); - fail_acpi: +fail_platform_device2: + + platform_device_del(fujitsu->pf_device); + +fail_platform_device1: + + platform_device_put(fujitsu->pf_device); + +fail_platform_driver: + + acpi_bus_unregister_driver(&acpi_fujitsu_driver); + +fail_acpi: kfree(fujitsu); @@ -342,19 +1041,43 @@ static void __exit fujitsu_cleanup(void) kfree(fujitsu); + acpi_bus_unregister_driver(&acpi_fujitsu_hotkey_driver); + + kfree(fujitsu_hotkey); + printk(KERN_INFO "fujitsu-laptop: driver unloaded.\n"); } module_init(fujitsu_init); module_exit(fujitsu_cleanup); -MODULE_AUTHOR("Jonathan Woithe"); +module_param(use_alt_lcd_levels, uint, 0644); +MODULE_PARM_DESC(use_alt_lcd_levels, + "Use alternative interface for lcd_levels (needed for Lifebook s6410)."); +module_param(disable_brightness_keys, uint, 0644); +MODULE_PARM_DESC(disable_brightness_keys, + "Disable brightness keys (eg. if they are already handled by the generic ACPI_VIDEO device)."); +module_param(disable_brightness_adjust, uint, 0644); +MODULE_PARM_DESC(disable_brightness_adjust, "Disable brightness adjustment ."); +#ifdef CONFIG_FUJITSU_LAPTOP_DEBUG +module_param_named(debug, dbg_level, uint, 0644); +MODULE_PARM_DESC(debug, "Sets debug level bit-mask"); +#endif + +MODULE_AUTHOR("Jonathan Woithe, Peter Gruber"); MODULE_DESCRIPTION("Fujitsu laptop extras support"); MODULE_VERSION(FUJITSU_DRIVER_VERSION); MODULE_LICENSE("GPL"); +MODULE_ALIAS + ("dmi:*:svnFUJITSUSIEMENS:*:pvr:rvnFUJITSU:rnFJNB1D3:*:cvrS6410:*"); +MODULE_ALIAS + ("dmi:*:svnFUJITSU:*:pvr:rvnFUJITSU:rnFJNB19C:*:cvrS7020:*"); + static struct pnp_device_id pnp_ids[] = { { .id = "FUJ02bf" }, + { .id = "FUJ02B1" }, + { .id = "FUJ02E3" }, { .id = "" } }; MODULE_DEVICE_TABLE(pnp, pnp_ids); -- cgit v0.10.2 From 74523c901342a773ddd9f14c14539ec3d4197ecf Mon Sep 17 00:00:00 2001 From: Alok N Kataria Date: Fri, 13 Jun 2008 12:54:24 -0400 Subject: ACPI: fix checkpatch.pl complaints in scan.c http://bugzilla.kernel.org/show_bug.cgi?id=9772 Signed-off-by: Alok N Kataria Signed-off-by: Len Brown Signed-off-by: Andi Kleen diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 9a84ed2..5b049cd 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -6,7 +6,8 @@ #include #include #include -#include +#include +#include #include #include /* for acpi_ex_eisa_id_to_string() */ @@ -154,6 +155,7 @@ acpi_eject_store(struct device *d, struct device_attribute *attr, acpi_status status; acpi_object_type type = 0; struct acpi_device *acpi_device = to_acpi_device(d); + struct task_struct *task; if ((!count) || (buf[0] != '1')) { return -EINVAL; @@ -171,9 +173,11 @@ acpi_eject_store(struct device *d, struct device_attribute *attr, } /* remove the device in another thread to fix the deadlock issue */ - ret = kernel_thread(acpi_bus_hot_remove_device, - acpi_device->handle, SIGCHLD); - err: + task = kthread_run(acpi_bus_hot_remove_device, + acpi_device->handle, "acpi_hot_remove_device"); + if (IS_ERR(task)) + ret = PTR_ERR(task); +err: return ret; } -- cgit v0.10.2 From 6594d87ebd8371f4b67f7ab4b68f172b139b78d6 Mon Sep 17 00:00:00 2001 From: Yi Yang Date: Sat, 14 Jun 2008 00:52:06 -0400 Subject: ACPI: fix acpi fan state set error Under /proc/acpi, there is a fan control interface, a user can set 0 or 3 to /proc/acpi/fan/*/state, 0 denotes D0 state, 3 denotes D3 state, but in current implementation, a user can set a fan to D1 state by any char excluding '1', '2' and '3'. For example: [root@localhost acpi]# cat /proc/acpi/fan/C31B/state status: off [root@localhost acpi]# echo "" > /proc/acpi/fan/C31B/state [root@localhost acpi]# cat /proc/acpi/fan/C31B/state status: on [root@localhost acpi]# echo "3" > /proc/acpi/fan/C31B/state [root@localhost acpi]# cat /proc/acpi/fan/C31B/state status: off [root@localhost acpi]# echo "xxxxx" > /proc/acpi/fan/C31B/state [root@localhost acpi]# cat /proc/acpi/fan/C31B/state status: on Obviously, such inputs as "" and "xxxxx" are invalid for fan state. This patch fixes this issue, it strictly limits fan state only to accept 0, 1, 2 and 3, any other inputs are invalid. Before applying this patch, the test result is: [root@localhost acpi]# cat /proc/acpi/fan/C31B/state status: off [root@localhost acpi]# echo "" > /proc/acpi/fan/C31B/state [root@localhost acpi]# cat /proc/acpi/fan/C31B/state status: on [root@localhost acpi]# echo "3" > /proc/acpi/fan/C31B/state [root@localhost acpi]# cat /proc/acpi/fan/C31B/state status: off [root@localhost acpi]# echo "xxxxx" > /proc/acpi/fan/C31B/state [root@localhost acpi]# cat /proc/acpi/fan/C31B/state status: on [root@localhost acpi]# echo "3" > /proc/acpi/fan/C31B/state [root@localhost acpi]# cat /proc/acpi/fan/C31B/state status: off [root@localhost acpi]# echo "3x" > /proc/acpi/fan/C31B/state [root@localhost acpi]# cat /proc/acpi/fan/C31B/state status: off [root@localhost acpi]# echo "-1x" > /proc/acpi/fan/C31B/state [root@localhost acpi]# cat /proc/acpi/fan/C31B/state status: on [root@localhost acpi]# After applying this patch, the test result is: [root@localhost ~]# cat /proc/acpi/fan/C31B/state status: off [root@localhost ~]# echo "" > /proc/acpi/fan/C31B/state -bash: echo: write error: Invalid argument [root@localhost ~]# cat /proc/acpi/fan/C31B/state status: off [root@localhost ~]# echo "3" > /proc/acpi/fan/C31B/state [root@localhost ~]# cat /proc/acpi/fan/C31B/state status: off [root@localhost ~]# echo "xxxxx" > /proc/acpi/fan/C31B/state -bash: echo: write error: Invalid argument [root@localhost ~]# cat /proc/acpi/fan/C31B/state status: off [root@localhost ~]# echo "-1x" > /proc/acpi/fan/C31B/state -bash: echo: write error: Invalid argument [root@localhost ~]# cat /proc/acpi/fan/C31B/state status: off [root@localhost ~]# echo "0" > //proc/acpi/fan/C31B/state [root@localhost ~]# cat /proc/acpi/fan/C31B/state status: on [root@localhost ~]# echo "4" > //proc/acpi/fan/C31B/state -bash: echo: write error: Invalid argument [root@localhost ~]# cat /proc/acpi/fan/C31B/state status: on [root@localhost ~]# echo "3" > //proc/acpi/fan/C31B/state [root@localhost ~]# cat /proc/acpi/fan/C31B/state status: off [root@localhost ~]# echo "0" > //proc/acpi/fan/C31B/state [root@localhost ~]# cat /proc/acpi/fan/C31B/state status: on [root@localhost ~]# echo "3x" > //proc/acpi/fan/C31B/state -bash: echo: write error: Invalid argument [root@localhost ~]# Signed-off-by: Yi Yang Signed-off-by: Andi Kleen Acked-by: Zhang Rui Signed-off-by: Len Brown diff --git a/drivers/acpi/fan.c b/drivers/acpi/fan.c index 6cf10cb..55c17af 100644 --- a/drivers/acpi/fan.c +++ b/drivers/acpi/fan.c @@ -148,7 +148,7 @@ acpi_fan_write_state(struct file *file, const char __user * buffer, int result = 0; struct seq_file *m = file->private_data; struct acpi_device *device = m->private; - char state_string[12] = { '\0' }; + char state_string[3] = { '\0' }; if (count > sizeof(state_string) - 1) return -EINVAL; @@ -157,6 +157,12 @@ acpi_fan_write_state(struct file *file, const char __user * buffer, return -EFAULT; state_string[count] = '\0'; + if ((state_string[0] < '0') || (state_string[0] > '3')) + return -EINVAL; + if (state_string[1] == '\n') + state_string[1] = '\0'; + if (state_string[1] != '\0') + return -EINVAL; result = acpi_bus_set_power(device->handle, simple_strtoul(state_string, NULL, 0)); -- cgit v0.10.2 From 3d532d5e3882c1387a2722df2a368c4a9224b12f Mon Sep 17 00:00:00 2001 From: Yi Yang Date: Sat, 14 Jun 2008 00:54:37 -0400 Subject: ACPI: fix processor throttling set error http://bugzilla.kernel.org/show_bug.cgi?id=9704 When echo some invalid values to /proc/acpi/processor/*/throttling, there isn't any error info returned, on the contray, it sets throttling value to some T* successfully, obviously, this is incorrect, a correct way should be to let it fail and return error info. This patch fixed the aforementioned issue, it also enables /proc/acpi/processor/*/throttling to accept such values as 't0' and 'T0', it also strictly limits /proc/acpi/processor/*/throttling only to accept "*", "t*" and "T*", "*" is the throttling state value the processor can support, current, it is 0 - 7. Before applying this patch, the test result is below: [root@localhost acpi]# cat /proc/acpi/processor/CPU0/throttling state count: 8 active state: T1 state available: T0 to T7 states: T0: 100% *T1: 87% T2: 75% T3: 62% T4: 50% T5: 37% T6: 25% T7: 12% [root@localhost acpi]# echo "1xxxxxx" > /proc/acpi/processor/CPU0/throttling [root@localhost acpi]# cat /proc/acpi/processor/CPU0/throttling state count: 8 active state: T1 state available: T0 to T7 states: T0: 100% *T1: 87% T2: 75% T3: 62% T4: 50% T5: 37% T6: 25% T7: 12% [root@localhost acpi]# echo "0" > /proc/acpi/processor/CPU0/throttling [root@localhost acpi]# cat /proc/acpi/processor/CPU0/throttling state count: 8 active state: T0 state available: T0 to T7 states: *T0: 100% T1: 87% T2: 75% T3: 62% T4: 50% T5: 37% T6: 25% T7: 12% [root@localhost acpi]# cd / [root@localhost /]# cat /proc/acpi/processor/CPU0/throttling state count: 8 active state: T0 state available: T0 to T7 states: *T0: 100% T1: 87% T2: 75% T3: 62% T4: 50% T5: 37% T6: 25% T7: 12% [root@localhost /]# echo "T0" > /proc/acpi/processor/CPU0/throttling [root@localhost /]# cat /proc/acpi/processor/CPU0/throttling state count: 8 active state: T0 state available: T0 to T7 states: *T0: 100% T1: 87% T2: 75% T3: 62% T4: 50% T5: 37% T6: 25% T7: 12% [root@localhost /]# echo "T7" > /proc/acpi/processor/CPU0/throttling [root@localhost /]# cat /proc/acpi/processor/CPU0/throttling state count: 8 active state: T0 state available: T0 to T7 states: *T0: 100% T1: 87% T2: 75% T3: 62% T4: 50% T5: 37% T6: 25% T7: 12% [root@localhost /]# echo "T100" > /proc/acpi/processor/CPU0/throttling [root@localhost /]# cat /proc/acpi/processor/CPU0/throttling state count: 8 active state: T0 state available: T0 to T7 states: *T0: 100% T1: 87% T2: 75% T3: 62% T4: 50% T5: 37% T6: 25% T7: 12% [root@localhost /]# echo "xxx" > /proc/acpi/processor/CPU0/throttling [root@localhost /]# cat /proc/acpi/processor/CPU0/throttling state count: 8 active state: T0 state available: T0 to T7 states: *T0: 100% T1: 87% T2: 75% T3: 62% T4: 50% T5: 37% T6: 25% T7: 12% [root@localhost /]# echo "2xxxx" > /proc/acpi/processor/CPU0/throttling [root@localhost /]# cat /proc/acpi/processor/CPU0/throttling state count: 8 active state: T2 state available: T0 to T7 states: T0: 100% T1: 87% *T2: 75% T3: 62% T4: 50% T5: 37% T6: 25% T7: 12% [root@localhost /]# echo "" > /proc/acpi/processor/CPU0/throttling [root@localhost /]# cat /proc/acpi/processor/CPU0/throttling state count: 8 active state: T0 state available: T0 to T7 states: *T0: 100% T1: 87% T2: 75% T3: 62% T4: 50% T5: 37% T6: 25% T7: 12% [root@localhost /]# echo "7777" > /proc/acpi/processor/CPU0/throttling -bash: echo: write error: Invalid argument [root@localhost /]# echo "7xxx" > /proc/acpi/processor/CPU0/throttling [root@localhost /]# cat /proc/acpi/processor/CPU0/throttling state count: 8 active state: T7 state available: T0 to T7 states: T0: 100% T1: 87% T2: 75% T3: 62% T4: 50% T5: 37% T6: 25% *T7: 12% [root@localhost /]# After applying this patch, the test result is below: [root@localhost linux-2.6.24-rc6]# echo > /proc/acpi/processor/CPU0/throttling -bash: echo: write error: Invalid argument [root@localhost linux-2.6.24-rc6]# echo "" > /proc/acpi/processor/CPU0/throttling -bash: echo: write error: Invalid argument [root@localhost linux-2.6.24-rc6]# echo "0" > /proc/acpi/processor/CPU0/throttling [root@localhost linux-2.6.24-rc6]# echo "t0" > /proc/acpi/processor/CPU0/throttling [root@localhost linux-2.6.24-rc6]# echo "T0" > /proc/acpi/processor/CPU0/throttling [root@localhost linux-2.6.24-rc6]# cat /proc/acpi/processor/CPU0/throttling state count: 8 active state: T0 state available: T0 to T7 states: *T0: 100% T1: 87% T2: 75% T3: 62% T4: 50% T5: 37% T6: 25% T7: 12% [root@localhost linux-2.6.24-rc6]# echo "T7" > /proc/acpi/processor/CPU0/throttling [root@localhost linux-2.6.24-rc6]# cat /proc/acpi/processor/CPU0/throttling state count: 8 active state: T7 state available: T0 to T7 states: T0: 100% T1: 87% T2: 75% T3: 62% T4: 50% T5: 37% T6: 25% *T7: 12% [root@localhost linux-2.6.24-rc6]# echo "T8" > /proc/acpi/processor/CPU0/throttling -bash: echo: write error: Invalid argument [root@localhost linux-2.6.24-rc6]# vi drivers/acpi/processor_throttling.c [root@localhost linux-2.6.24-rc6]# echo "T8" > /proc/acpi/processor/CPU0/throttling -bash: echo: write error: Invalid argument [root@localhost linux-2.6.24-rc6]# echo "t7" > /proc/acpi/processor/CPU0/throttling [root@localhost linux-2.6.24-rc6]# echo "t70" > /proc/acpi/processor/CPU0/throttling -bash: echo: write error: Invalid argument [root@localhost linux-2.6.24-rc6]# echo "70" > /proc/acpi/processor/CPU0/throttling -bash: echo: write error: Invalid argument [root@localhost linux-2.6.24-rc6]# echo "7000" > /proc/acpi/processor/CPU0/throttling -bash: echo: write error: Invalid argument [root@localhost linux-2.6.24-rc6]# echo "70" > /proc/acpi/processor/CPU0/throttling -bash: echo: write error: Invalid argument [root@localhost linux-2.6.24-rc6]# echo "xxx" > /proc/acpi/processor/CPU0/throttling -bash: echo: write error: Invalid argument [root@localhost linux-2.6.24-rc6]# echo > /proc/acpi/processor/CPU0/throttling -bash: echo: write error: Invalid argument [root@localhost linux-2.6.24-rc6]# echo -n > /proc/acpi/processor/CPU0/throttling [root@localhost linux-2.6.24-rc6]# echo -n "" > /proc/acpi/processor/CPU0/throttling [root@localhost linux-2.6.24-rc6]# echo $? 0 [root@localhost linux-2.6.24-rc6]# echo -n "" > /proc/acpi/processor/CPU0/throttling [root@localhost linux-2.6.24-rc6]# cat /proc/acpi/processor/CPU0/throttling state count: 8 active state: T7 state available: T0 to T7 states: T0: 100% T1: 87% T2: 75% T3: 62% T4: 50% T5: 37% T6: 25% *T7: 12% [root@localhost linux-2.6.24-rc6]# echo -n "" > /proc/acpi/processor/CPU0/throttling [root@localhost linux-2.6.24-rc6]# cat /proc/acpi/processor/CPU0/throttling state count: 8 active state: T7 state available: T0 to T7 states: T0: 100% T1: 87% T2: 75% T3: 62% T4: 50% T5: 37% T6: 25% *T7: 12% [root@localhost linux-2.6.24-rc6]# echo t0 > /proc/acpi/processor/CPU0/throttling [root@localhost linux-2.6.24-rc6]# echo T0 > /proc/acpi/processor/CPU0/throttling [root@localhost linux-2.6.24-rc6]# echo Tt0 > /proc/acpi/processor/CPU0/throttling -bash: echo: write error: Invalid argument [root@localhost linux-2.6.24-rc6]# echo T > /proc/acpi/processor/CPU0/throttling -bash: echo: write error: Invalid argument [root@localhost linux-2.6.24-rc6]# Signed-off-by: Yi Yang Signed-off-by: Len Brown Signed-off-by: Andi Kleen diff --git a/drivers/acpi/processor_throttling.c b/drivers/acpi/processor_throttling.c index 8728782..53345db 100644 --- a/drivers/acpi/processor_throttling.c +++ b/drivers/acpi/processor_throttling.c @@ -1232,7 +1232,10 @@ static ssize_t acpi_processor_write_throttling(struct file *file, int result = 0; struct seq_file *m = file->private_data; struct acpi_processor *pr = m->private; - char state_string[12] = { '\0' }; + char state_string[5] = ""; + char *charp = NULL; + size_t state_val = 0; + char tmpbuf[5] = ""; if (!pr || (count > sizeof(state_string) - 1)) return -EINVAL; @@ -1241,10 +1244,23 @@ static ssize_t acpi_processor_write_throttling(struct file *file, return -EFAULT; state_string[count] = '\0'; + if ((count > 0) && (state_string[count-1] == '\n')) + state_string[count-1] = '\0'; - result = acpi_processor_set_throttling(pr, - simple_strtoul(state_string, - NULL, 0)); + charp = state_string; + if ((state_string[0] == 't') || (state_string[0] == 'T')) + charp++; + + state_val = simple_strtoul(charp, NULL, 0); + if (state_val >= pr->throttling.state_count) + return -EINVAL; + + snprintf(tmpbuf, 5, "%d", state_val); + + if (strcmp(tmpbuf, charp) != 0) + return -EINVAL; + + result = acpi_processor_set_throttling(pr, state_val); if (result) return result; -- cgit v0.10.2 From 12b2b34e240a24bdbb2fdacf26a54b24ebf1ed81 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 17 Jun 2008 09:43:41 -0700 Subject: acpi: fix printk format warning Fix printk format warning: linux-next-20080617/drivers/acpi/processor_throttling.c:1258: warning: format '%d' expects type 'int', but argument 4 has type 'size_t' Signed-off-by: Randy Dunlap Signed-off-by: Len Brown Signed-off-by: Andi Kleen diff --git a/drivers/acpi/processor_throttling.c b/drivers/acpi/processor_throttling.c index 53345db..0622ace 100644 --- a/drivers/acpi/processor_throttling.c +++ b/drivers/acpi/processor_throttling.c @@ -1255,7 +1255,7 @@ static ssize_t acpi_processor_write_throttling(struct file *file, if (state_val >= pr->throttling.state_count) return -EINVAL; - snprintf(tmpbuf, 5, "%d", state_val); + snprintf(tmpbuf, 5, "%zu", state_val); if (strcmp(tmpbuf, charp) != 0) return -EINVAL; -- cgit v0.10.2 From 87dc5e3218ba3d7a9293f9113f58455747a233ac Mon Sep 17 00:00:00 2001 From: Cezary Jackiewicz Date: Thu, 12 Jun 2008 22:08:59 +0200 Subject: compal-laptop: remove unnecessary lcd_level attribute Signed-off-by: Cezary Jackiewicz Signed-off-by: Len Brown Signed-off-by: Andi Kleen diff --git a/drivers/misc/compal-laptop.c b/drivers/misc/compal-laptop.c index 0c1f587..344b790 100644 --- a/drivers/misc/compal-laptop.c +++ b/drivers/misc/compal-laptop.c @@ -28,9 +28,6 @@ * * This driver exports a few files in /sys/devices/platform/compal-laptop/: * - * lcd_level - Screen brightness: contains a single integer in the - * range 0..7. (rw) - * * wlan - wlan subsystem state: contains 0 or 1 (rw) * * bluetooth - Bluetooth subsystem state: contains 0 or 1 (rw) @@ -44,7 +41,7 @@ * This driver might work on other laptops produced by Compal. If you * want to try it you can pass force=1 as argument to the module which * will force it to load even when the DMI data doesn't identify the - * laptop as IFL90. + * laptop as FL9x. */ #include @@ -56,7 +53,7 @@ #include #include -#define COMPAL_DRIVER_VERSION "0.2.5" +#define COMPAL_DRIVER_VERSION "0.2.6" #define COMPAL_LCD_LEVEL_MAX 8 @@ -209,34 +206,6 @@ static ssize_t show_bluetooth(struct device *dev, return sprintf(buf, "%i\n", enabled); } -static ssize_t show_lcd_level(struct device *dev, - struct device_attribute *attr, char *buf) -{ - int ret; - - ret = get_lcd_level(); - if (ret < 0) - return ret; - - return sprintf(buf, "%i\n", ret); -} - -static ssize_t store_lcd_level(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - int level, ret; - - if (sscanf(buf, "%i", &level) != 1 || - (level < 0 || level >= COMPAL_LCD_LEVEL_MAX)) - return -EINVAL; - - ret = set_lcd_level(level); - if (ret < 0) - return ret; - - return count; -} - static ssize_t store_wlan_state(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { @@ -267,13 +236,11 @@ static ssize_t store_bluetooth_state(struct device *dev, return count; } -static DEVICE_ATTR(lcd_level, 0644, show_lcd_level, store_lcd_level); static DEVICE_ATTR(bluetooth, 0644, show_bluetooth, store_bluetooth_state); static DEVICE_ATTR(wlan, 0644, show_wlan, store_wlan_state); static DEVICE_ATTR(raw, 0444, show_raw, NULL); static struct attribute *compal_attributes[] = { - &dev_attr_lcd_level.attr, &dev_attr_bluetooth.attr, &dev_attr_wlan.attr, &dev_attr_raw.attr, -- cgit v0.10.2 From e38e8a0743b0e996a8a3fbea8908fe75a84f02c7 Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Fri, 13 Jun 2008 08:28:55 +0800 Subject: Make GPE disable more robust Implemented another change for the GPE disable. We now perform a read-change-write of the enable register instead of simply writing out the cached enable mask. This will prevent inadvertent enabling of GPEs if a rogue GPE is received during initialization (before GPE handlers are installed.) http://bugzilla.kernel.org/show_bug.cgi?id=6217 Signed-off-by: Bob Moore Signed-off-by: Lin Ming Signed-off-by: Len Brown Signed-off-by: Andi Kleen diff --git a/drivers/acpi/events/evgpe.c b/drivers/acpi/events/evgpe.c index 5354be4..e0339d4 100644 --- a/drivers/acpi/events/evgpe.c +++ b/drivers/acpi/events/evgpe.c @@ -256,7 +256,7 @@ acpi_status acpi_ev_disable_gpe(struct acpi_gpe_event_info *gpe_event_info) return_ACPI_STATUS(status); } - /* Mark wake-disabled or HW disable, or both */ + /* Clear the appropriate enabled flags for this GPE */ switch (gpe_event_info->flags & ACPI_GPE_TYPE_MASK) { case ACPI_GPE_TYPE_WAKE: @@ -273,13 +273,23 @@ acpi_status acpi_ev_disable_gpe(struct acpi_gpe_event_info *gpe_event_info) /* Disable the requested runtime GPE */ ACPI_CLEAR_BIT(gpe_event_info->flags, ACPI_GPE_RUN_ENABLED); - - /* fallthrough */ + break; default: - acpi_hw_write_gpe_enable_reg(gpe_event_info); + break; } + /* + * Even if we don't know the GPE type, make sure that we always + * disable it. low_disable_gpe will just clear the enable bit for this + * GPE and write it. It will not write out the current GPE enable mask, + * since this may inadvertently enable GPEs too early, if a rogue GPE has + * come in during ACPICA initialization - possibly as a result of AML or + * other code that has enabled the GPE. + */ + status = acpi_hw_low_disable_gpe(gpe_event_info); + return_ACPI_STATUS(status); + return_ACPI_STATUS(AE_OK); } diff --git a/drivers/acpi/hardware/hwgpe.c b/drivers/acpi/hardware/hwgpe.c index 14bc4f4..58347d6 100644 --- a/drivers/acpi/hardware/hwgpe.c +++ b/drivers/acpi/hardware/hwgpe.c @@ -55,6 +55,54 @@ acpi_hw_enable_wakeup_gpe_block(struct acpi_gpe_xrupt_info *gpe_xrupt_info, /****************************************************************************** * + * FUNCTION: acpi_hw_low_disable_gpe + * + * PARAMETERS: gpe_event_info - Info block for the GPE to be disabled + * + * RETURN: Status + * + * DESCRIPTION: Disable a single GPE in the enable register. + * + ******************************************************************************/ + +acpi_status acpi_hw_low_disable_gpe(struct acpi_gpe_event_info *gpe_event_info) +{ + struct acpi_gpe_register_info *gpe_register_info; + acpi_status status; + u32 enable_mask; + + /* Get the info block for the entire GPE register */ + + gpe_register_info = gpe_event_info->register_info; + if (!gpe_register_info) { + return (AE_NOT_EXIST); + } + + /* Get current value of the enable register that contains this GPE */ + + status = acpi_hw_low_level_read(ACPI_GPE_REGISTER_WIDTH, &enable_mask, + &gpe_register_info->enable_address); + if (ACPI_FAILURE(status)) { + return (status); + } + + /* Clear just the bit that corresponds to this GPE */ + + ACPI_CLEAR_BIT(enable_mask, + ((u32) 1 << + (gpe_event_info->gpe_number - + gpe_register_info->base_gpe_number))); + + /* Write the updated enable mask */ + + status = acpi_hw_low_level_write(ACPI_GPE_REGISTER_WIDTH, enable_mask, + &gpe_register_info->enable_address); + + return (status); +} + +/****************************************************************************** + * * FUNCTION: acpi_hw_write_gpe_enable_reg * * PARAMETERS: gpe_event_info - Info block for the GPE to be enabled @@ -68,7 +116,7 @@ acpi_hw_enable_wakeup_gpe_block(struct acpi_gpe_xrupt_info *gpe_xrupt_info, ******************************************************************************/ acpi_status -acpi_hw_write_gpe_enable_reg(struct acpi_gpe_event_info *gpe_event_info) +acpi_hw_write_gpe_enable_reg(struct acpi_gpe_event_info * gpe_event_info) { struct acpi_gpe_register_info *gpe_register_info; acpi_status status; diff --git a/include/acpi/achware.h b/include/acpi/achware.h index d4fb9bb..45e9859 100644 --- a/include/acpi/achware.h +++ b/include/acpi/achware.h @@ -87,6 +87,8 @@ acpi_status acpi_hw_clear_acpi_status(void); /* * hwgpe - GPE support */ +acpi_status acpi_hw_low_disable_gpe(struct acpi_gpe_event_info *gpe_event_info); + acpi_status acpi_hw_write_gpe_enable_reg(struct acpi_gpe_event_info *gpe_event_info); -- cgit v0.10.2 From 47c00d2bc2ef2cf8a608688144fe2093a2aa9507 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Fri, 20 Jun 2008 13:56:56 +0200 Subject: ACPICA: fix mutex names in debug code. Reorder the mutex names to match the preceding #defines Signed-off-by: Vegard Nossum Signed-off-by: Len Brown Signed-off-by: Andi Kleen diff --git a/include/acpi/aclocal.h b/include/acpi/aclocal.h index c5cdc32..62d7468 100644 --- a/include/acpi/aclocal.h +++ b/include/acpi/aclocal.h @@ -98,8 +98,8 @@ union acpi_parse_object; static char *acpi_gbl_mutex_names[ACPI_NUM_MUTEX] = { "ACPI_MTX_Interpreter", - "ACPI_MTX_Tables", "ACPI_MTX_Namespace", + "ACPI_MTX_Tables", "ACPI_MTX_Events", "ACPI_MTX_Caches", "ACPI_MTX_Memory", -- cgit v0.10.2 From 141094612d2606395a7b9e8658d10ef5c487cf97 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 25 Jun 2008 19:25:47 +0300 Subject: eeepc-laptop: static make the needlessly global cm_{g,s}etv[] static. Signed-off-by: Adrian Bunk Signed-off-by: Len Brown Signed-off-by: Andi Kleen diff --git a/drivers/misc/eeepc-laptop.c b/drivers/misc/eeepc-laptop.c index 6d72760..9e8d79e 100644 --- a/drivers/misc/eeepc-laptop.c +++ b/drivers/misc/eeepc-laptop.c @@ -87,7 +87,7 @@ enum { CM_ASL_LID }; -const char *cm_getv[] = { +static const char *cm_getv[] = { "WLDG", NULL, NULL, NULL, "CAMG", NULL, NULL, NULL, NULL, "PBLG", NULL, NULL, @@ -96,7 +96,7 @@ const char *cm_getv[] = { "CRDG", "LIDG" }; -const char *cm_setv[] = { +static const char *cm_setv[] = { "WLDS", NULL, NULL, NULL, "CAMS", NULL, NULL, NULL, "SDSP", "PBLS", "HDPS", NULL, -- cgit v0.10.2 From a0bbaf83311cd995136c9047f174d79c1075ee2d Mon Sep 17 00:00:00 2001 From: Carlos Corbacho Date: Sat, 21 Jun 2008 09:09:08 +0100 Subject: acer-wmi: Remove LED colour comment from documentation This should have been removed when the colour was removed from the LED device name. Signed-off-by: Carlos Corbacho Signed-off-by: Len Brown Signed-off-by: Andi Kleen diff --git a/Documentation/laptops/acer-wmi.txt b/Documentation/laptops/acer-wmi.txt index 79b7dbd..69b5dd4 100644 --- a/Documentation/laptops/acer-wmi.txt +++ b/Documentation/laptops/acer-wmi.txt @@ -174,8 +174,6 @@ The LED is exposed through the LED subsystem, and can be found in: The mail LED is autodetected, so if you don't have one, the LED device won't be registered. -If you have a mail LED that is not green, please report this to me. - Backlight ********* -- cgit v0.10.2 From 9991d9f2bc75dc8735932240b67432d4073b8f60 Mon Sep 17 00:00:00 2001 From: Carlos Corbacho Date: Sat, 21 Jun 2008 09:09:22 +0100 Subject: acer-wmi: Blacklist backlight on Acer Aspire 1520 & 1360 series A newer BIOS for these laptops adds ACPI-WMI support to them. However, it does not add support for the backlight via the EC, and we have no way to detect this on older machines, so blacklist it from them. Signed-off-by: Carlos Corbacho Signed-off-by: Len Brown Signed-off-by: Andi Kleen diff --git a/drivers/misc/acer-wmi.c b/drivers/misc/acer-wmi.c index dd13a37..b82ff25 100644 --- a/drivers/misc/acer-wmi.c +++ b/drivers/misc/acer-wmi.c @@ -174,7 +174,7 @@ static struct wmi_interface *interface; struct quirk_entry { u8 wireless; u8 mailled; - u8 brightness; + s8 brightness; u8 bluetooth; }; @@ -198,6 +198,10 @@ static int dmi_matched(const struct dmi_system_id *dmi) static struct quirk_entry quirk_unknown = { }; +static struct quirk_entry quirk_acer_aspire_1520 = { + .brightness = -1, +}; + static struct quirk_entry quirk_acer_travelmate_2490 = { .mailled = 1, }; @@ -210,6 +214,24 @@ static struct quirk_entry quirk_medion_md_98300 = { static struct dmi_system_id acer_quirks[] = { { .callback = dmi_matched, + .ident = "Acer Aspire 1360", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Acer"), + DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 1360"), + }, + .driver_data = &quirk_acer_aspire_1520, + }, + { + .callback = dmi_matched, + .ident = "Acer Aspire 1520", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Acer"), + DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 1520"), + }, + .driver_data = &quirk_acer_aspire_1520, + }, + { + .callback = dmi_matched, .ident = "Acer Aspire 3100", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Acer"), @@ -552,7 +574,8 @@ static acpi_status AMW0_set_capabilities(void) * appear to use the same EC register for brightness, even if they * differ for wireless, etc */ - interface->capability |= ACER_CAP_BRIGHTNESS; + if (quirks->brightness >= 0) + interface->capability |= ACER_CAP_BRIGHTNESS; return AE_OK; } @@ -1059,6 +1082,8 @@ static int __init acer_wmi_init(void) printk(ACER_INFO "Acer Laptop ACPI-WMI Extras version %s\n", ACER_WMI_VERSION); + find_quirks(); + /* * Detect which ACPI-WMI interface we're using. */ @@ -1092,8 +1117,6 @@ static int __init acer_wmi_init(void) if (wmi_has_guid(AMW0_GUID1)) AMW0_find_mailled(); - find_quirks(); - if (!interface) { printk(ACER_ERR "No or unsupported WMI interface, unable to " "load\n"); -- cgit v0.10.2 From f2b585b4a31731b17b9769eae523986fa7fddcde Mon Sep 17 00:00:00 2001 From: Carlos Corbacho Date: Sat, 21 Jun 2008 09:09:27 +0100 Subject: acer-wmi: Respect framebuffer blanking in backlight If the framebuffer has requested blanking, turn the backlight down. Also offer the user the option to do this. Reported-by: Michal Pecio Signed-off-by: Carlos Corbacho Signed-off-by: Len Brown Signed-off-by: Andi Kleen diff --git a/drivers/misc/acer-wmi.c b/drivers/misc/acer-wmi.c index b82ff25..e35825f 100644 --- a/drivers/misc/acer-wmi.c +++ b/drivers/misc/acer-wmi.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -830,7 +831,15 @@ static int read_brightness(struct backlight_device *bd) static int update_bl_status(struct backlight_device *bd) { - set_u32(bd->props.brightness, ACER_CAP_BRIGHTNESS); + int intensity = bd->props.brightness; + + if (bd->props.power != FB_BLANK_UNBLANK) + intensity = 0; + if (bd->props.fb_blank != FB_BLANK_UNBLANK) + intensity = 0; + + set_u32(intensity, ACER_CAP_BRIGHTNESS); + return 0; } @@ -852,8 +861,9 @@ static int __devinit acer_backlight_init(struct device *dev) acer_backlight_device = bd; + bd->props.power = FB_BLANK_UNBLANK; + bd->props.brightness = max_brightness; bd->props.max_brightness = max_brightness; - bd->props.brightness = read_brightness(NULL); backlight_update_status(bd); return 0; } -- cgit v0.10.2 From 6f061ab5e55d7fe6ce0c36e8954f56f0d95348fb Mon Sep 17 00:00:00 2001 From: Carlos Corbacho Date: Sat, 21 Jun 2008 09:09:38 +0100 Subject: acer-wmi: Add EC quirk for Fujitsu Siemens Amilo Li 1718 This laptop needs a different EC quirk from the standard Acer one to read the wireless status. Signed-off-by: Carlos Corbacho Signed-off-by: Len Brown Signed-off-by: Andi Kleen diff --git a/drivers/misc/acer-wmi.c b/drivers/misc/acer-wmi.c index e35825f..5b7c5fc 100644 --- a/drivers/misc/acer-wmi.c +++ b/drivers/misc/acer-wmi.c @@ -212,6 +212,10 @@ static struct quirk_entry quirk_medion_md_98300 = { .wireless = 1, }; +static struct quirk_entry quirk_fujitsu_amilo_li_1718 = { + .wireless = 2, +}; + static struct dmi_system_id acer_quirks[] = { { .callback = dmi_matched, @@ -323,6 +327,15 @@ static struct dmi_system_id acer_quirks[] = { }, { .callback = dmi_matched, + .ident = "Fujitsu Siemens Amilo Li 1718", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"), + DMI_MATCH(DMI_PRODUCT_NAME, "AMILO Li 1718"), + }, + .driver_data = &quirk_fujitsu_amilo_li_1718, + }, + { + .callback = dmi_matched, .ident = "Medion MD 98300", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "MEDION"), @@ -416,6 +429,12 @@ struct wmi_interface *iface) return AE_ERROR; *value = result & 0x1; return AE_OK; + case 2: + err = ec_read(0x71, &result); + if (err) + return AE_ERROR; + *value = result & 0x1; + return AE_OK; default: err = ec_read(0xA, &result); if (err) -- cgit v0.10.2 From 5753dd539a86d8fc40a25e1a2cd1005a6525f083 Mon Sep 17 00:00:00 2001 From: Carlos Corbacho Date: Sat, 21 Jun 2008 09:09:48 +0100 Subject: acer-wmi: Disable device autodetection on Fujitsu Siemens Amilo Li2732 The AMW0 (V1) device detection method doesn't work properly on this laptop, so disable it, and for other laptops that may have this problem, by switching on a strange GUID. Signed-off-by: Carlos Corbacho Signed-off-by: Len Brown Signed-off-by: Andi Kleen diff --git a/drivers/misc/acer-wmi.c b/drivers/misc/acer-wmi.c index 5b7c5fc..dd0b8d8 100644 --- a/drivers/misc/acer-wmi.c +++ b/drivers/misc/acer-wmi.c @@ -88,6 +88,7 @@ struct acer_quirks { * Acer ACPI method GUIDs */ #define AMW0_GUID1 "67C3371D-95A3-4C37-BB61-DD47B491DAAB" +#define AMW0_GUID2 "431F16ED-0C2B-444C-B267-27DEB140CF9C" #define WMID_GUID1 "6AF4F258-B401-42fd-BE91-3D4AC2D7C0D3" #define WMID_GUID2 "95764E09-FB56-4e83-B31A-37761F60994A" @@ -548,6 +549,15 @@ static acpi_status AMW0_set_capabilities(void) struct acpi_buffer out = { ACPI_ALLOCATE_BUFFER, NULL }; union acpi_object *obj; + /* + * On laptops with this strange GUID (non Acer), normal probing doesn't + * work. + */ + if (wmi_has_guid(AMW0_GUID2)) { + interface->capability |= ACER_CAP_WIRELESS; + return AE_OK; + } + args.eax = ACER_AMW0_WRITE; args.ecx = args.edx = 0; -- cgit v0.10.2 From 81143522aa823036c4aa35bdd3b2e41966cf6e15 Mon Sep 17 00:00:00 2001 From: Carlos Corbacho Date: Sat, 21 Jun 2008 09:09:53 +0100 Subject: acer-wmi: Add debugfs file for device detection Add a debugfs file for showing the full results of the method we use to detect devices on WMID laptops. This should be useful in the case that a Linux user gets an Acer laptop with 3G support (and/ or people who enjoy ripping their wireless cards out) so we can get some feedback on how this value changes in these cases. (At the moment, we always enable the wireless and 3G control. In the case of the former, this is fairly safe. In the case of the latter though, trying to toggle this device if it doesn't exist on a laptop causes ACPI warnings/ errors). To summarise: If you have an Acer laptop with a built in 3G card, please report back the value from this file. Signed-off-by: Carlos Corbacho Signed-off-by: Len Brown Signed-off-by: Andi Kleen diff --git a/drivers/misc/acer-wmi.c b/drivers/misc/acer-wmi.c index dd0b8d8..d8b8574 100644 --- a/drivers/misc/acer-wmi.c +++ b/drivers/misc/acer-wmi.c @@ -35,6 +35,7 @@ #include #include #include +#include #include @@ -152,6 +153,12 @@ struct acer_data { int brightness; }; +struct acer_debug { + struct dentry *root; + struct dentry *devices; + u32 wmid_devices; +}; + /* Each low-level interface must define at least some of the following */ struct wmi_interface { /* The WMI device type */ @@ -162,6 +169,9 @@ struct wmi_interface { /* Private data for the current interface */ struct acer_data data; + + /* debugfs entries associated with this interface */ + struct acer_debug debug; }; /* The static interface pointer, points to the currently detected interface */ @@ -956,6 +966,28 @@ static DEVICE_ATTR(interface, S_IWUGO | S_IRUGO | S_IWUSR, show_interface, NULL); /* + * debugfs functions + */ +static u32 get_wmid_devices(void) +{ + struct acpi_buffer out = {ACPI_ALLOCATE_BUFFER, NULL}; + union acpi_object *obj; + acpi_status status; + + status = wmi_query_block(WMID_GUID2, 1, &out); + if (ACPI_FAILURE(status)) + return 0; + + obj = (union acpi_object *) out.pointer; + if (obj && obj->type == ACPI_TYPE_BUFFER && + obj->buffer.length == sizeof(u32)) { + return *((u32 *) obj->buffer.pointer); + } else { + return 0; + } +} + +/* * Platform device */ static int __devinit acer_platform_probe(struct platform_device *device) @@ -1114,6 +1146,33 @@ error_sysfs: return retval; } +static void remove_debugfs(void) +{ + debugfs_remove(interface->debug.devices); + debugfs_remove(interface->debug.root); +} + +static int create_debugfs(void) +{ + interface->debug.root = debugfs_create_dir("acer-wmi", NULL); + if (!interface->debug.root) { + printk(ACER_ERR "Failed to create debugfs directory"); + return -ENOMEM; + } + + interface->debug.devices = debugfs_create_u32("devices", S_IRUGO, + interface->debug.root, + &interface->debug.wmid_devices); + if (!interface->debug.devices) + goto error_debugfs; + + return 0; + +error_debugfs: + remove_debugfs(); + return -ENOMEM; +} + static int __init acer_wmi_init(void) { int err; @@ -1173,6 +1232,13 @@ static int __init acer_wmi_init(void) if (err) return err; + if (wmi_has_guid(WMID_GUID2)) { + interface->debug.wmid_devices = get_wmid_devices(); + err = create_debugfs(); + if (err) + return err; + } + /* Override any initial settings with values from the commandline */ acer_commandline_init(); -- cgit v0.10.2 From 860f0c6b3dcaa455894f690647442dc97cab19c8 Mon Sep 17 00:00:00 2001 From: Carlos Corbacho Date: Sat, 21 Jun 2008 09:09:58 +0100 Subject: acer-wmi: Remove version number It doesn't make much sense these days. Signed-off-by: Carlos Corbacho Signed-off-by: Len Brown Signed-off-by: Andi Kleen diff --git a/drivers/misc/acer-wmi.c b/drivers/misc/acer-wmi.c index d8b8574..e7a3fe5 100644 --- a/drivers/misc/acer-wmi.c +++ b/drivers/misc/acer-wmi.c @@ -22,8 +22,6 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#define ACER_WMI_VERSION "0.1" - #include #include #include @@ -1177,8 +1175,7 @@ static int __init acer_wmi_init(void) { int err; - printk(ACER_INFO "Acer Laptop ACPI-WMI Extras version %s\n", - ACER_WMI_VERSION); + printk(ACER_INFO "Acer Laptop ACPI-WMI Extras\n"); find_quirks(); -- cgit v0.10.2 From d1857056904d5f313f11184fcfa624652ff9620a Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Fri, 20 Jun 2008 15:39:09 +0200 Subject: ACPI: don't walk tables if ACPI was disabled Ingo Molnar wrote: > -tip auto-testing started triggering this spinlock corruption message > yesterday: > > [ 3.976213] calling acpi_rtc_init+0x0/0xd3 > [ 3.980213] ACPI Exception (utmutex-0263): AE_BAD_PARAMETER, Thread F7C50000 could not acquire Mutex [3] [20080321] > [ 3.992213] BUG: spinlock bad magic on CPU#0, swapper/1 > [ 3.992213] lock: c2508dc4, .magic: 00000000, .owner: swapper/1, .owner_cpu: 0 This is apparently because some parts of ACPI, including mutexes, are not initialized when acpi=off is passed to the kernel. Reported-by: Ingo Molnar Signed-off-by: Vegard Nossum Signed-off-by: Len Brown Signed-off-by: Andi Kleen diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c index 9b227d4..6d18ca3 100644 --- a/drivers/acpi/glue.c +++ b/drivers/acpi/glue.c @@ -336,6 +336,9 @@ static int __init acpi_rtc_init(void) if (acpi_disabled) return 0; + if (acpi_disabled) + return 0; + if (dev) { rtc_wake_setup(); rtc_info.wake_on = rtc_wake_on; -- cgit v0.10.2 From cc7e51666d82aedfd6b9a033ca1a10d71c21f1ca Mon Sep 17 00:00:00 2001 From: Len Brown Date: Tue, 24 Jun 2008 22:57:12 -0400 Subject: dock: bay: Don't call acpi_walk_namespace() when ACPI is disabled. Signed-off-by: Len Brown Signed-off-by: Andi Kleen diff --git a/drivers/acpi/bay.c b/drivers/acpi/bay.c index 61b6c5b..e6caf5d 100644 --- a/drivers/acpi/bay.c +++ b/drivers/acpi/bay.c @@ -380,6 +380,9 @@ static int __init bay_init(void) if (acpi_disabled) return -ENODEV; + if (acpi_disabled) + return -ENODEV; + /* look for dockable drive bays */ acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX, find_bay, &bays, NULL); diff --git a/drivers/acpi/dock.c b/drivers/acpi/dock.c index bb7c51f..1e872e7 100644 --- a/drivers/acpi/dock.c +++ b/drivers/acpi/dock.c @@ -920,6 +920,9 @@ static int __init dock_init(void) if (acpi_disabled) return 0; + if (acpi_disabled) + return 0; + /* look for a dock station */ acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX, find_dock, &num, NULL); -- cgit v0.10.2 From 2fe2de5f6c283a7d2a82c1b99a19012079cee555 Mon Sep 17 00:00:00 2001 From: David Brownell Date: Thu, 5 Jun 2008 01:15:40 +0200 Subject: ACPI PM: acpi_pm_device_sleep_state() cleanup Get rid of a superfluous acpi_pm_device_sleep_state() parameter. The only legitimate value of that parameter must be derived from the first parameter, which is what all the callers already do. (However, this does not address the fact that ACPI still doesn't set up those flags.) Signed-off-by: David Brownell Signed-off-by: Andi Kleen Acked-by: Pavel Machek Signed-off-by: Rafael J. Wysocki Signed-off-by: Len Brown diff --git a/drivers/acpi/sleep/main.c b/drivers/acpi/sleep/main.c index 495c63a..b45806d 100644 --- a/drivers/acpi/sleep/main.c +++ b/drivers/acpi/sleep/main.c @@ -368,8 +368,8 @@ int acpi_suspend(u32 acpi_state) /** * acpi_pm_device_sleep_state - return preferred power state of ACPI device * in the system sleep state given by %acpi_target_sleep_state - * @dev: device to examine - * @wake: if set, the device should be able to wake up the system + * @dev: device to examine; its driver model wakeup flags control + * whether it should be able to wake up the system * @d_min_p: used to store the upper limit of allowed states range * Return value: preferred power state of the device on success, -ENODEV on * failure (ie. if there's no 'struct acpi_device' for @dev) @@ -387,7 +387,7 @@ int acpi_suspend(u32 acpi_state) * via @wake. */ -int acpi_pm_device_sleep_state(struct device *dev, int wake, int *d_min_p) +int acpi_pm_device_sleep_state(struct device *dev, int *d_min_p) { acpi_handle handle = DEVICE_ACPI_HANDLE(dev); struct acpi_device *adev; @@ -426,7 +426,7 @@ int acpi_pm_device_sleep_state(struct device *dev, int wake, int *d_min_p) * can wake the system. _S0W may be valid, too. */ if (acpi_target_sleep_state == ACPI_STATE_S0 || - (wake && adev->wakeup.state.enabled && + (device_may_wakeup(dev) && adev->wakeup.state.enabled && adev->wakeup.sleep_state <= acpi_target_sleep_state)) { acpi_status status; diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index 9d6fc8e..caabf05 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -298,8 +298,7 @@ static pci_power_t acpi_pci_choose_state(struct pci_dev *pdev, { int acpi_state; - acpi_state = acpi_pm_device_sleep_state(&pdev->dev, - device_may_wakeup(&pdev->dev), NULL); + acpi_state = acpi_pm_device_sleep_state(&pdev->dev, NULL); if (acpi_state < 0) return PCI_POWER_ERROR; diff --git a/drivers/pnp/pnpacpi/core.c b/drivers/pnp/pnpacpi/core.c index 5090277..c1b9ea3 100644 --- a/drivers/pnp/pnpacpi/core.c +++ b/drivers/pnp/pnpacpi/core.c @@ -117,9 +117,7 @@ static int pnpacpi_suspend(struct pnp_dev *dev, pm_message_t state) { int power_state; - power_state = acpi_pm_device_sleep_state(&dev->dev, - device_may_wakeup(&dev->dev), - NULL); + power_state = acpi_pm_device_sleep_state(&dev->dev, NULL); if (power_state < 0) power_state = (state.event == PM_EVENT_ON) ? ACPI_STATE_D0 : ACPI_STATE_D3; diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 2f1c68c..db90a74 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -376,9 +376,9 @@ acpi_handle acpi_get_pci_rootbridge_handle(unsigned int, unsigned int); #define DEVICE_ACPI_HANDLE(dev) ((acpi_handle)((dev)->archdata.acpi_handle)) #ifdef CONFIG_PM_SLEEP -int acpi_pm_device_sleep_state(struct device *, int, int *); +int acpi_pm_device_sleep_state(struct device *, int *); #else /* !CONFIG_PM_SLEEP */ -static inline int acpi_pm_device_sleep_state(struct device *d, int w, int *p) +static inline int acpi_pm_device_sleep_state(struct device *d, int *p) { if (p) *p = ACPI_STATE_D0; -- cgit v0.10.2 From a80a6da145bab8ee77af304961fc926de7a8ac84 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 5 Jun 2008 01:16:37 +0200 Subject: PCI ACPI: Drop the second argument of platform_pci_choose_state Since the second argument of acpi_pci_choose_state() and platform_pci_choose_state() is never used, remove it. Signed-off-by: Rafael J. Wysocki Signed-off-by: Andi Kleen Acked-by: Pavel Machek Signed-off-by: Len Brown diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index caabf05..dab9d47 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -293,8 +293,7 @@ EXPORT_SYMBOL(pci_osc_control_set); * choose highest power _SxD or any lower power */ -static pci_power_t acpi_pci_choose_state(struct pci_dev *pdev, - pm_message_t state) +static pci_power_t acpi_pci_choose_state(struct pci_dev *pdev) { int acpi_state; diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index e4548ab..75c6023 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -508,7 +508,7 @@ pci_set_power_state(struct pci_dev *dev, pci_power_t state) return 0; } -pci_power_t (*platform_pci_choose_state)(struct pci_dev *dev, pm_message_t state); +pci_power_t (*platform_pci_choose_state)(struct pci_dev *dev); /** * pci_choose_state - Choose the power state of a PCI device @@ -528,7 +528,7 @@ pci_power_t pci_choose_state(struct pci_dev *dev, pm_message_t state) return PCI_D0; if (platform_pci_choose_state) { - ret = platform_pci_choose_state(dev, state); + ret = platform_pci_choose_state(dev); if (ret != PCI_POWER_ERROR) return ret; } diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 00408c9..312daff 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -6,8 +6,7 @@ extern void pci_remove_sysfs_dev_files(struct pci_dev *pdev); extern void pci_cleanup_rom(struct pci_dev *dev); /* Firmware callbacks */ -extern pci_power_t (*platform_pci_choose_state)(struct pci_dev *dev, - pm_message_t state); +extern pci_power_t (*platform_pci_choose_state)(struct pci_dev *dev); extern int (*platform_pci_set_power_state)(struct pci_dev *dev, pci_power_t state); -- cgit v0.10.2 From 741438b5008791327d2183cebcd7ac9cfad64ec6 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 5 Jun 2008 01:17:28 +0200 Subject: ACPI PM: Remove obsolete Toshiba workaround Remove the obsolete workaround for a Toshiba Satellite 4030cdt S1 problem from drivers/acpi/sleep/main.c . Signed-off-by: Rafael J. Wysocki Signed-off-by: Andi Kleen Acked-by: Pavel Machek Signed-off-by: Len Brown diff --git a/drivers/acpi/sleep/main.c b/drivers/acpi/sleep/main.c index b45806d..244e352 100644 --- a/drivers/acpi/sleep/main.c +++ b/drivers/acpi/sleep/main.c @@ -61,8 +61,6 @@ static u32 acpi_suspend_states[] = { [PM_SUSPEND_MAX] = ACPI_STATE_S5 }; -static int init_8259A_after_S1; - /** * acpi_suspend_begin - Set the target system sleep state to the state * associated with given @pm_state, if supported. @@ -185,13 +183,6 @@ static void acpi_suspend_finish(void) acpi_set_firmware_waking_vector((acpi_physical_address) 0); acpi_target_sleep_state = ACPI_STATE_S0; - -#ifdef CONFIG_X86 - if (init_8259A_after_S1) { - printk("Broken toshiba laptop -> kicking interrupts\n"); - init_8259A(0); - } -#endif } /** @@ -231,26 +222,6 @@ static struct platform_suspend_ops acpi_suspend_ops = { .finish = acpi_suspend_finish, .end = acpi_suspend_end, }; - -/* - * Toshiba fails to preserve interrupts over S1, reinitialization - * of 8259 is needed after S1 resume. - */ -static int __init init_ints_after_s1(const struct dmi_system_id *d) -{ - printk(KERN_WARNING "%s with broken S1 detected.\n", d->ident); - init_8259A_after_S1 = 1; - return 0; -} - -static struct dmi_system_id __initdata acpisleep_dmi_table[] = { - { - .callback = init_ints_after_s1, - .ident = "Toshiba Satellite 4030cdt", - .matches = {DMI_MATCH(DMI_PRODUCT_NAME, "S4030CDT/4.3"),}, - }, - {}, -}; #endif /* CONFIG_SUSPEND */ #ifdef CONFIG_HIBERNATION @@ -472,8 +443,6 @@ int __init acpi_sleep_init(void) u8 type_a, type_b; #ifdef CONFIG_SUSPEND int i = 0; - - dmi_check_system(acpisleep_dmi_table); #endif if (acpi_disabled) -- cgit v0.10.2 From d20a4dca47d2cd027ed58a13f91b424affd1f449 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 11 Jun 2008 22:03:10 +0200 Subject: APM emulation: Notify about all suspend events, not just APM invoked ones (v2) This revamps the apm-emulation code to get suspend notifications regardless of what way pm_suspend() was invoked, whether via the apm ioctl or via /sys/power/state. Also do some code cleanup and add comments while at it. Signed-off-by: Johannes Berg Signed-off-by: Rafael J. Wysocki Signed-off-by: Len Brown Signed-off-by: Andi Kleen diff --git a/drivers/char/apm-emulation.c b/drivers/char/apm-emulation.c index da8a165..aaca402 100644 --- a/drivers/char/apm-emulation.c +++ b/drivers/char/apm-emulation.c @@ -59,6 +59,55 @@ struct apm_queue { }; /* + * thread states (for threads using a writable /dev/apm_bios fd): + * + * SUSPEND_NONE: nothing happening + * SUSPEND_PENDING: suspend event queued for thread and pending to be read + * SUSPEND_READ: suspend event read, pending acknowledgement + * SUSPEND_ACKED: acknowledgement received from thread (via ioctl), + * waiting for resume + * SUSPEND_ACKTO: acknowledgement timeout + * SUSPEND_DONE: thread had acked suspend and is now notified of + * resume + * + * SUSPEND_WAIT: this thread invoked suspend and is waiting for resume + * + * A thread migrates in one of three paths: + * NONE -1-> PENDING -2-> READ -3-> ACKED -4-> DONE -5-> NONE + * -6-> ACKTO -7-> NONE + * NONE -8-> WAIT -9-> NONE + * + * While in PENDING or READ, the thread is accounted for in the + * suspend_acks_pending counter. + * + * The transitions are invoked as follows: + * 1: suspend event is signalled from the core PM code + * 2: the suspend event is read from the fd by the userspace thread + * 3: userspace thread issues the APM_IOC_SUSPEND ioctl (as ack) + * 4: core PM code signals that we have resumed + * 5: APM_IOC_SUSPEND ioctl returns + * + * 6: the notifier invoked from the core PM code timed out waiting + * for all relevant threds to enter ACKED state and puts those + * that haven't into ACKTO + * 7: those threads issue APM_IOC_SUSPEND ioctl too late, + * get an error + * + * 8: userspace thread issues the APM_IOC_SUSPEND ioctl (to suspend), + * ioctl code invokes pm_suspend() + * 9: pm_suspend() returns indicating resume + */ +enum apm_suspend_state { + SUSPEND_NONE, + SUSPEND_PENDING, + SUSPEND_READ, + SUSPEND_ACKED, + SUSPEND_ACKTO, + SUSPEND_WAIT, + SUSPEND_DONE, +}; + +/* * The per-file APM data */ struct apm_user { @@ -69,13 +118,7 @@ struct apm_user { unsigned int reader: 1; int suspend_result; - unsigned int suspend_state; -#define SUSPEND_NONE 0 /* no suspend pending */ -#define SUSPEND_PENDING 1 /* suspend pending read */ -#define SUSPEND_READ 2 /* suspend read, pending ack */ -#define SUSPEND_ACKED 3 /* suspend acked */ -#define SUSPEND_WAIT 4 /* waiting for suspend */ -#define SUSPEND_DONE 5 /* suspend completed */ + enum apm_suspend_state suspend_state; struct apm_queue queue; }; @@ -83,7 +126,8 @@ struct apm_user { /* * Local variables */ -static int suspends_pending; +static atomic_t suspend_acks_pending = ATOMIC_INIT(0); +static atomic_t userspace_notification_inhibit = ATOMIC_INIT(0); static int apm_disabled; static struct task_struct *kapmd_tsk; @@ -166,78 +210,6 @@ static void queue_event(apm_event_t event) wake_up_interruptible(&apm_waitqueue); } -/* - * queue_suspend_event - queue an APM suspend event. - * - * Check that we're in a state where we can suspend. If not, - * return -EBUSY. Otherwise, queue an event to all "writer" - * users. If there are no "writer" users, return '1' to - * indicate that we can immediately suspend. - */ -static int queue_suspend_event(apm_event_t event, struct apm_user *sender) -{ - struct apm_user *as; - int ret = 1; - - mutex_lock(&state_lock); - down_read(&user_list_lock); - - /* - * If a thread is still processing, we can't suspend, so reject - * the request. - */ - list_for_each_entry(as, &apm_user_list, list) { - if (as != sender && as->reader && as->writer && as->suser && - as->suspend_state != SUSPEND_NONE) { - ret = -EBUSY; - goto out; - } - } - - list_for_each_entry(as, &apm_user_list, list) { - if (as != sender && as->reader && as->writer && as->suser) { - as->suspend_state = SUSPEND_PENDING; - suspends_pending++; - queue_add_event(&as->queue, event); - ret = 0; - } - } - out: - up_read(&user_list_lock); - mutex_unlock(&state_lock); - wake_up_interruptible(&apm_waitqueue); - return ret; -} - -static void apm_suspend(void) -{ - struct apm_user *as; - int err = pm_suspend(PM_SUSPEND_MEM); - - /* - * Anyone on the APM queues will think we're still suspended. - * Send a message so everyone knows we're now awake again. - */ - queue_event(APM_NORMAL_RESUME); - - /* - * Finally, wake up anyone who is sleeping on the suspend. - */ - mutex_lock(&state_lock); - down_read(&user_list_lock); - list_for_each_entry(as, &apm_user_list, list) { - if (as->suspend_state == SUSPEND_WAIT || - as->suspend_state == SUSPEND_ACKED) { - as->suspend_result = err; - as->suspend_state = SUSPEND_DONE; - } - } - up_read(&user_list_lock); - mutex_unlock(&state_lock); - - wake_up(&apm_suspend_waitqueue); -} - static ssize_t apm_read(struct file *fp, char __user *buf, size_t count, loff_t *ppos) { struct apm_user *as = fp->private_data; @@ -308,25 +280,22 @@ apm_ioctl(struct inode * inode, struct file *filp, u_int cmd, u_long arg) as->suspend_result = -EINTR; - if (as->suspend_state == SUSPEND_READ) { - int pending; - + switch (as->suspend_state) { + case SUSPEND_READ: /* * If we read a suspend command from /dev/apm_bios, * then the corresponding APM_IOC_SUSPEND ioctl is * interpreted as an acknowledge. */ as->suspend_state = SUSPEND_ACKED; - suspends_pending--; - pending = suspends_pending == 0; + atomic_dec(&suspend_acks_pending); mutex_unlock(&state_lock); /* - * If there are no further acknowledges required, - * suspend the system. + * suspend_acks_pending changed, the notifier needs to + * be woken up for this */ - if (pending) - apm_suspend(); + wake_up(&apm_suspend_waitqueue); /* * Wait for the suspend/resume to complete. If there @@ -342,35 +311,21 @@ apm_ioctl(struct inode * inode, struct file *filp, u_int cmd, u_long arg) * try_to_freeze() in freezer_count() will not trigger */ freezer_count(); - } else { + break; + case SUSPEND_ACKTO: + as->suspend_result = -ETIMEDOUT; + mutex_unlock(&state_lock); + break; + default: as->suspend_state = SUSPEND_WAIT; mutex_unlock(&state_lock); /* * Otherwise it is a request to suspend the system. - * Queue an event for all readers, and expect an - * acknowledge from all writers who haven't already - * acknowledged. - */ - err = queue_suspend_event(APM_USER_SUSPEND, as); - if (err < 0) { - /* - * Avoid taking the lock here - this - * should be fine. - */ - as->suspend_state = SUSPEND_NONE; - break; - } - - if (err > 0) - apm_suspend(); - - /* - * Wait for the suspend/resume to complete. If there - * are pending acknowledges, we wait here for them. + * Just invoke pm_suspend(), we'll handle it from + * there via the notifier. */ - wait_event_freezable(apm_suspend_waitqueue, - as->suspend_state == SUSPEND_DONE); + as->suspend_result = pm_suspend(PM_SUSPEND_MEM); } mutex_lock(&state_lock); @@ -386,7 +341,6 @@ apm_ioctl(struct inode * inode, struct file *filp, u_int cmd, u_long arg) static int apm_release(struct inode * inode, struct file * filp) { struct apm_user *as = filp->private_data; - int pending = 0; filp->private_data = NULL; @@ -396,18 +350,15 @@ static int apm_release(struct inode * inode, struct file * filp) /* * We are now unhooked from the chain. As far as new - * events are concerned, we no longer exist. However, we - * need to balance suspends_pending, which means the - * possibility of sleeping. + * events are concerned, we no longer exist. */ mutex_lock(&state_lock); - if (as->suspend_state != SUSPEND_NONE) { - suspends_pending -= 1; - pending = suspends_pending == 0; - } + if (as->suspend_state == SUSPEND_PENDING || + as->suspend_state == SUSPEND_READ) + atomic_dec(&suspend_acks_pending); mutex_unlock(&state_lock); - if (pending) - apm_suspend(); + + wake_up(&apm_suspend_waitqueue); kfree(as); return 0; @@ -545,7 +496,6 @@ static int kapmd(void *arg) { do { apm_event_t event; - int ret; wait_event_interruptible(kapmd_wait, !queue_empty(&kapmd_queue) || kthread_should_stop()); @@ -570,20 +520,13 @@ static int kapmd(void *arg) case APM_USER_SUSPEND: case APM_SYS_SUSPEND: - ret = queue_suspend_event(event, NULL); - if (ret < 0) { - /* - * We were busy. Try again in 50ms. - */ - queue_add_event(&kapmd_queue, event); - msleep(50); - } - if (ret > 0) - apm_suspend(); + pm_suspend(PM_SUSPEND_MEM); break; case APM_CRITICAL_SUSPEND: - apm_suspend(); + atomic_inc(&userspace_notification_inhibit); + pm_suspend(PM_SUSPEND_MEM); + atomic_dec(&userspace_notification_inhibit); break; } } while (1); @@ -591,6 +534,120 @@ static int kapmd(void *arg) return 0; } +static int apm_suspend_notifier(struct notifier_block *nb, + unsigned long event, + void *dummy) +{ + struct apm_user *as; + int err; + + /* short-cut emergency suspends */ + if (atomic_read(&userspace_notification_inhibit)) + return NOTIFY_DONE; + + switch (event) { + case PM_SUSPEND_PREPARE: + /* + * Queue an event to all "writer" users that we want + * to suspend and need their ack. + */ + mutex_lock(&state_lock); + down_read(&user_list_lock); + + list_for_each_entry(as, &apm_user_list, list) { + if (as->suspend_state != SUSPEND_WAIT && as->reader && + as->writer && as->suser) { + as->suspend_state = SUSPEND_PENDING; + atomic_inc(&suspend_acks_pending); + queue_add_event(&as->queue, APM_USER_SUSPEND); + } + } + + up_read(&user_list_lock); + mutex_unlock(&state_lock); + wake_up_interruptible(&apm_waitqueue); + + /* + * Wait for the the suspend_acks_pending variable to drop to + * zero, meaning everybody acked the suspend event (or the + * process was killed.) + * + * If the app won't answer within a short while we assume it + * locked up and ignore it. + */ + err = wait_event_interruptible_timeout( + apm_suspend_waitqueue, + atomic_read(&suspend_acks_pending) == 0, + 5*HZ); + + /* timed out */ + if (err == 0) { + /* + * Move anybody who timed out to "ack timeout" state. + * + * We could time out and the userspace does the ACK + * right after we time out but before we enter the + * locked section here, but that's fine. + */ + mutex_lock(&state_lock); + down_read(&user_list_lock); + list_for_each_entry(as, &apm_user_list, list) { + if (as->suspend_state == SUSPEND_PENDING || + as->suspend_state == SUSPEND_READ) { + as->suspend_state = SUSPEND_ACKTO; + atomic_dec(&suspend_acks_pending); + } + } + up_read(&user_list_lock); + mutex_unlock(&state_lock); + } + + /* let suspend proceed */ + if (err >= 0) + return NOTIFY_OK; + + /* interrupted by signal */ + return NOTIFY_BAD; + + case PM_POST_SUSPEND: + /* + * Anyone on the APM queues will think we're still suspended. + * Send a message so everyone knows we're now awake again. + */ + queue_event(APM_NORMAL_RESUME); + + /* + * Finally, wake up anyone who is sleeping on the suspend. + */ + mutex_lock(&state_lock); + down_read(&user_list_lock); + list_for_each_entry(as, &apm_user_list, list) { + if (as->suspend_state == SUSPEND_ACKED) { + /* + * TODO: maybe grab error code, needs core + * changes to push the error to the notifier + * chain (could use the second parameter if + * implemented) + */ + as->suspend_result = 0; + as->suspend_state = SUSPEND_DONE; + } + } + up_read(&user_list_lock); + mutex_unlock(&state_lock); + + wake_up(&apm_suspend_waitqueue); + return NOTIFY_OK; + + default: + return NOTIFY_DONE; + } +} + +static struct notifier_block apm_notif_block = { + .notifier_call = apm_suspend_notifier, +}; + static int __init apm_init(void) { int ret; @@ -604,7 +661,7 @@ static int __init apm_init(void) if (IS_ERR(kapmd_tsk)) { ret = PTR_ERR(kapmd_tsk); kapmd_tsk = NULL; - return ret; + goto out; } wake_up_process(kapmd_tsk); @@ -613,16 +670,27 @@ static int __init apm_init(void) #endif ret = misc_register(&apm_device); - if (ret != 0) { - remove_proc_entry("apm", NULL); - kthread_stop(kapmd_tsk); - } + if (ret) + goto out_stop; + ret = register_pm_notifier(&apm_notif_block); + if (ret) + goto out_unregister; + + return 0; + + out_unregister: + misc_deregister(&apm_device); + out_stop: + remove_proc_entry("apm", NULL); + kthread_stop(kapmd_tsk); + out: return ret; } static void __exit apm_exit(void) { + unregister_pm_notifier(&apm_notif_block); misc_deregister(&apm_device); remove_proc_entry("apm", NULL); -- cgit v0.10.2 From ebb12db51f6c13b30752fcf506baad4c617b153c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 11 Jun 2008 22:04:29 +0200 Subject: Freezer: Introduce PF_FREEZER_NOSIG The freezer currently attempts to distinguish kernel threads from user space tasks by checking if their mm pointer is unset and it does not send fake signals to kernel threads. However, there are kernel threads, mostly related to networking, that behave like user space tasks and may want to be sent a fake signal to be frozen. Introduce the new process flag PF_FREEZER_NOSIG that will be set by default for all kernel threads and make the freezer only send fake signals to the tasks having PF_FREEZER_NOSIG unset. Provide the set_freezable_with_signal() function to be called by the kernel threads that want to be sent a fake signal for freezing. This patch should not change the freezer's observable behavior. Signed-off-by: Rafael J. Wysocki Signed-off-by: Andi Kleen Acked-by: Pavel Machek Signed-off-by: Len Brown diff --git a/include/linux/freezer.h b/include/linux/freezer.h index 0893499..deddeed 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -128,6 +128,15 @@ static inline void set_freezable(void) } /* + * Tell the freezer that the current task should be frozen by it and that it + * should send a fake signal to the task to freeze it. + */ +static inline void set_freezable_with_signal(void) +{ + current->flags &= ~(PF_NOFREEZE | PF_FREEZER_NOSIG); +} + +/* * Freezer-friendly wrappers around wait_event_interruptible() and * wait_event_interruptible_timeout(), originally defined in */ @@ -174,6 +183,7 @@ static inline void freezer_do_not_count(void) {} static inline void freezer_count(void) {} static inline int freezer_should_skip(struct task_struct *p) { return 0; } static inline void set_freezable(void) {} +static inline void set_freezable_with_signal(void) {} #define wait_event_freezable(wq, condition) \ wait_event_interruptible(wq, condition) diff --git a/include/linux/sched.h b/include/linux/sched.h index 2134917..ba2f859 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1494,6 +1494,7 @@ static inline void put_task_struct(struct task_struct *t) #define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */ #define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ #define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezeable */ +#define PF_FREEZER_NOSIG 0x80000000 /* Freezer won't send signals to it */ /* * Only the _current_ task can read/write to tsk->flags, but other diff --git a/kernel/kthread.c b/kernel/kthread.c index 97747cd..ac3fb73 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -235,7 +235,7 @@ int kthreadd(void *unused) set_user_nice(tsk, KTHREAD_NICE_LEVEL); set_cpus_allowed(tsk, CPU_MASK_ALL); - current->flags |= PF_NOFREEZE; + current->flags |= PF_NOFREEZE | PF_FREEZER_NOSIG; for (;;) { set_current_state(TASK_INTERRUPTIBLE); diff --git a/kernel/power/process.c b/kernel/power/process.c index f1d0b34..5fb8765 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -19,9 +19,6 @@ */ #define TIMEOUT (20 * HZ) -#define FREEZER_KERNEL_THREADS 0 -#define FREEZER_USER_SPACE 1 - static inline int freezeable(struct task_struct * p) { if ((p == current) || @@ -84,63 +81,53 @@ static void fake_signal_wake_up(struct task_struct *p) spin_unlock_irqrestore(&p->sighand->siglock, flags); } -static int has_mm(struct task_struct *p) +static inline bool should_send_signal(struct task_struct *p) { - return (p->mm && !(p->flags & PF_BORROWED_MM)); + return !(p->flags & PF_FREEZER_NOSIG); } /** * freeze_task - send a freeze request to given task * @p: task to send the request to - * @with_mm_only: if set, the request will only be sent if the task has its - * own mm - * Return value: 0, if @with_mm_only is set and the task has no mm of its - * own or the task is frozen, 1, otherwise + * @sig_only: if set, the request will only be sent if the task has the + * PF_FREEZER_NOSIG flag unset + * Return value: 'false', if @sig_only is set and the task has + * PF_FREEZER_NOSIG set or the task is frozen, 'true', otherwise * - * The freeze request is sent by seting the tasks's TIF_FREEZE flag and + * The freeze request is sent by setting the tasks's TIF_FREEZE flag and * either sending a fake signal to it or waking it up, depending on whether - * or not it has its own mm (ie. it is a user land task). If @with_mm_only - * is set and the task has no mm of its own (ie. it is a kernel thread), - * its TIF_FREEZE flag should not be set. - * - * The task_lock() is necessary to prevent races with exit_mm() or - * use_mm()/unuse_mm() from occuring. + * or not it has PF_FREEZER_NOSIG set. If @sig_only is set and the task + * has PF_FREEZER_NOSIG set (ie. it is a typical kernel thread), its + * TIF_FREEZE flag will not be set. */ -static int freeze_task(struct task_struct *p, int with_mm_only) +static bool freeze_task(struct task_struct *p, bool sig_only) { - int ret = 1; + /* + * We first check if the task is freezing and next if it has already + * been frozen to avoid the race with frozen_process() which first marks + * the task as frozen and next clears its TIF_FREEZE. + */ + if (!freezing(p)) { + rmb(); + if (frozen(p)) + return false; - task_lock(p); - if (freezing(p)) { - if (has_mm(p)) { - if (!signal_pending(p)) - fake_signal_wake_up(p); - } else { - if (with_mm_only) - ret = 0; - else - wake_up_state(p, TASK_INTERRUPTIBLE); - } + if (!sig_only || should_send_signal(p)) + set_freeze_flag(p); + else + return false; + } + + if (should_send_signal(p)) { + if (!signal_pending(p)) + fake_signal_wake_up(p); + } else if (sig_only) { + return false; } else { - rmb(); - if (frozen(p)) { - ret = 0; - } else { - if (has_mm(p)) { - set_freeze_flag(p); - fake_signal_wake_up(p); - } else { - if (with_mm_only) { - ret = 0; - } else { - set_freeze_flag(p); - wake_up_state(p, TASK_INTERRUPTIBLE); - } - } - } + wake_up_state(p, TASK_INTERRUPTIBLE); } - task_unlock(p); - return ret; + + return true; } static void cancel_freezing(struct task_struct *p) @@ -156,7 +143,7 @@ static void cancel_freezing(struct task_struct *p) } } -static int try_to_freeze_tasks(int freeze_user_space) +static int try_to_freeze_tasks(bool sig_only) { struct task_struct *g, *p; unsigned long end_time; @@ -175,7 +162,7 @@ static int try_to_freeze_tasks(int freeze_user_space) if (frozen(p) || !freezeable(p)) continue; - if (!freeze_task(p, freeze_user_space)) + if (!freeze_task(p, sig_only)) continue; /* @@ -235,13 +222,13 @@ int freeze_processes(void) int error; printk("Freezing user space processes ... "); - error = try_to_freeze_tasks(FREEZER_USER_SPACE); + error = try_to_freeze_tasks(true); if (error) goto Exit; printk("done.\n"); printk("Freezing remaining freezable tasks ... "); - error = try_to_freeze_tasks(FREEZER_KERNEL_THREADS); + error = try_to_freeze_tasks(false); if (error) goto Exit; printk("done."); @@ -251,7 +238,7 @@ int freeze_processes(void) return error; } -static void thaw_tasks(int thaw_user_space) +static void thaw_tasks(bool nosig_only) { struct task_struct *g, *p; @@ -260,7 +247,7 @@ static void thaw_tasks(int thaw_user_space) if (!freezeable(p)) continue; - if (!p->mm == thaw_user_space) + if (nosig_only && should_send_signal(p)) continue; thaw_process(p); @@ -271,8 +258,8 @@ static void thaw_tasks(int thaw_user_space) void thaw_processes(void) { printk("Restarting tasks ... "); - thaw_tasks(FREEZER_KERNEL_THREADS); - thaw_tasks(FREEZER_USER_SPACE); + thaw_tasks(true); + thaw_tasks(false); schedule(); printk("done.\n"); } -- cgit v0.10.2 From 52d11025dba32bed696eaee1822b26529e764770 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Wed, 11 Jun 2008 22:07:52 +0200 Subject: snapshot: Push BKL down into ioctl handlers Push BKL down into ioctl handlers - snapshot device. Signed-off-by: Alan Cox Signed-off-by: Rafael J. Wysocki Signed-off-by: Len Brown Signed-off-by: Andi Kleen diff --git a/kernel/power/user.c b/kernel/power/user.c index f5512cb..658262b 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -23,6 +23,7 @@ #include #include #include +#include #include @@ -164,8 +165,8 @@ static ssize_t snapshot_write(struct file *filp, const char __user *buf, return res; } -static int snapshot_ioctl(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg) +static long snapshot_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) { int error = 0; struct snapshot_data *data; @@ -181,6 +182,8 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, data = filp->private_data; + lock_kernel(); + switch (cmd) { case SNAPSHOT_FREEZE: @@ -389,7 +392,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, error = -ENOTTY; } - + unlock_kernel(); return error; } @@ -399,7 +402,7 @@ static const struct file_operations snapshot_fops = { .read = snapshot_read, .write = snapshot_write, .llseek = no_llseek, - .ioctl = snapshot_ioctl, + .unlocked_ioctl = snapshot_ioctl, }; static struct miscdevice snapshot_device = { -- cgit v0.10.2 From 25f2f3daadaf0768a61d02ee3ed3d9a21e9dc46c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 11 Jun 2008 22:09:45 +0200 Subject: snapshot: Use pm_mutex for mutual exclusion We can avoid taking the BKL in snapshot_ioctl() if pm_mutex is used to prevent the ioctls from being executed concurrently. In addition, although it is only possible to open /dev/snapshot once, the task which has done that may spawn a child that will inherit the open descriptor, so in theory they can call snapshot_write(), snapshot_read() and snapshot_release() concurrently. pm_mutex can also be used for mutual exclusion in such cases. Signed-off-by: Rafael J. Wysocki Signed-off-by: Andi Kleen Acked-by: Pavel Machek Signed-off-by: Len Brown diff --git a/kernel/power/user.c b/kernel/power/user.c index 658262b..a6332a3 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -70,16 +70,22 @@ static int snapshot_open(struct inode *inode, struct file *filp) struct snapshot_data *data; int error; - if (!atomic_add_unless(&snapshot_device_available, -1, 0)) - return -EBUSY; + mutex_lock(&pm_mutex); + + if (!atomic_add_unless(&snapshot_device_available, -1, 0)) { + error = -EBUSY; + goto Unlock; + } if ((filp->f_flags & O_ACCMODE) == O_RDWR) { atomic_inc(&snapshot_device_available); - return -ENOSYS; + error = -ENOSYS; + goto Unlock; } if(create_basic_memory_bitmaps()) { atomic_inc(&snapshot_device_available); - return -ENOMEM; + error = -ENOMEM; + goto Unlock; } nonseekable_open(inode, filp); data = &snapshot_state; @@ -99,33 +105,36 @@ static int snapshot_open(struct inode *inode, struct file *filp) if (error) pm_notifier_call_chain(PM_POST_HIBERNATION); } - if (error) { + if (error) atomic_inc(&snapshot_device_available); - return error; - } data->frozen = 0; data->ready = 0; data->platform_support = 0; - return 0; + Unlock: + mutex_unlock(&pm_mutex); + + return error; } static int snapshot_release(struct inode *inode, struct file *filp) { struct snapshot_data *data; + mutex_lock(&pm_mutex); + swsusp_free(); free_basic_memory_bitmaps(); data = filp->private_data; free_all_swap_pages(data->swap); - if (data->frozen) { - mutex_lock(&pm_mutex); + if (data->frozen) thaw_processes(); - mutex_unlock(&pm_mutex); - } pm_notifier_call_chain(data->mode == O_WRONLY ? PM_POST_HIBERNATION : PM_POST_RESTORE); atomic_inc(&snapshot_device_available); + + mutex_unlock(&pm_mutex); + return 0; } @@ -135,9 +144,13 @@ static ssize_t snapshot_read(struct file *filp, char __user *buf, struct snapshot_data *data; ssize_t res; + mutex_lock(&pm_mutex); + data = filp->private_data; - if (!data->ready) - return -ENODATA; + if (!data->ready) { + res = -ENODATA; + goto Unlock; + } res = snapshot_read_next(&data->handle, count); if (res > 0) { if (copy_to_user(buf, data_of(data->handle), res)) @@ -145,6 +158,10 @@ static ssize_t snapshot_read(struct file *filp, char __user *buf, else *offp = data->handle.offset; } + + Unlock: + mutex_unlock(&pm_mutex); + return res; } @@ -154,6 +171,8 @@ static ssize_t snapshot_write(struct file *filp, const char __user *buf, struct snapshot_data *data; ssize_t res; + mutex_lock(&pm_mutex); + data = filp->private_data; res = snapshot_write_next(&data->handle, count); if (res > 0) { @@ -162,6 +181,9 @@ static ssize_t snapshot_write(struct file *filp, const char __user *buf, else *offp = data->handle.offset; } + + mutex_unlock(&pm_mutex); + return res; } @@ -180,16 +202,16 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, if (!capable(CAP_SYS_ADMIN)) return -EPERM; - data = filp->private_data; + if (!mutex_trylock(&pm_mutex)) + return -EBUSY; - lock_kernel(); + data = filp->private_data; switch (cmd) { case SNAPSHOT_FREEZE: if (data->frozen) break; - mutex_lock(&pm_mutex); printk("Syncing filesystems ... "); sys_sync(); printk("done.\n"); @@ -197,7 +219,6 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, error = freeze_processes(); if (error) thaw_processes(); - mutex_unlock(&pm_mutex); if (!error) data->frozen = 1; break; @@ -205,9 +226,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, case SNAPSHOT_UNFREEZE: if (!data->frozen || data->ready) break; - mutex_lock(&pm_mutex); thaw_processes(); - mutex_unlock(&pm_mutex); data->frozen = 0; break; @@ -310,16 +329,11 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, error = -EPERM; break; } - if (!mutex_trylock(&pm_mutex)) { - error = -EBUSY; - break; - } /* * Tasks are frozen and the notifiers have been called with * PM_HIBERNATION_PREPARE */ error = suspend_devices_and_enter(PM_SUSPEND_MEM); - mutex_unlock(&pm_mutex); break; case SNAPSHOT_PLATFORM_SUPPORT: @@ -392,7 +406,9 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, error = -ENOTTY; } - unlock_kernel(); + + mutex_unlock(&pm_mutex); + return error; } -- cgit v0.10.2 From c735ab7da3414c3e639d5c5223092b74689e5d87 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 26 Jun 2008 22:27:21 -0400 Subject: fujitsu-laptop: depends on INPUT fujitsu-laptop uses input_* functions, so it should depend on INPUT. drivers/built-in.o: In function `acpi_fujitsu_add': fujitsu-laptop.c:(.text+0xaaec7): undefined reference to `input_allocate_device' fujitsu-laptop.c:(.text+0xaaf39): undefined reference to `input_register_device' fujitsu-laptop.c:(.text+0xab025): undefined reference to `input_free_device' drivers/built-in.o: In function `acpi_fujitsu_notify': fujitsu-laptop.c:(.text+0xab0d8): undefined reference to `input_event' fujitsu-laptop.c:(.text+0xab0e5): undefined reference to `input_event' fujitsu-laptop.c:(.text+0xab0f5): undefined reference to `input_event' fujitsu-laptop.c:(.text+0xab102): undefined reference to `input_event' drivers/built-in.o: In function `acpi_fujitsu_hotkey_notify': fujitsu-laptop.c:(.text+0xab261): undefined reference to `input_event' drivers/built-in.o:fujitsu-laptop.c:(.text+0xab26e): more undefined references to `input_event' follow drivers/built-in.o: In function `acpi_fujitsu_hotkey_add': fujitsu-laptop.c:(.text+0xab49c): undefined reference to `input_allocate_device' fujitsu-laptop.c:(.text+0xab51a): undefined reference to `input_register_device' fujitsu-laptop.c:(.text+0xab5e4): undefined reference to `input_free_device' make[1]: *** [.tmp_vmlinux1] Error 1 Signed-off-by: Randy Dunlap Signed-off-by: Andi Kleen Acked-by: Jonathan Woithe Signed-off-by: Andrew Morton Signed-off-by: Len Brown diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index 0551d95..1921b8d 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -179,6 +179,7 @@ config FUJITSU_LAPTOP tristate "Fujitsu Laptop Extras" depends on X86 depends on ACPI + depends on INPUT depends on BACKLIGHT_CLASS_DEVICE ---help--- This is a driver for laptops built by Fujitsu: -- cgit v0.10.2 From f3454ae8104efb2dbf0d08ec42c6f5d0fe9225bc Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Tue, 10 Jun 2008 12:25:42 +0800 Subject: ACPICA: Add argument count checking to control method invocation via acpi_evaluate_object Error if too few arguments, warning if too many. This applies only to external programmatic control method execution, not method-to-method calls within the AML. Signed-off-by: Lin Ming Signed-off-by: Bob Moore Signed-off-by: Len Brown Signed-off-by: Andi Kleen diff --git a/drivers/acpi/namespace/nseval.c b/drivers/acpi/namespace/nseval.c index 14bdfa9..d369164 100644 --- a/drivers/acpi/namespace/nseval.c +++ b/drivers/acpi/namespace/nseval.c @@ -138,6 +138,41 @@ acpi_status acpi_ns_evaluate(struct acpi_evaluate_info * info) return_ACPI_STATUS(AE_NULL_OBJECT); } + /* + * Calculate the number of arguments being passed to the method + */ + + info->param_count = 0; + if (info->parameters) { + while (info->parameters[info->param_count]) + info->param_count++; + } + + /* Error if too few arguments were passed in */ + + if (info->param_count < info->obj_desc->method.param_count) { + ACPI_ERROR((AE_INFO, + "Insufficient arguments - " + "method [%4.4s] needs %d, found %d", + acpi_ut_get_node_name(info->resolved_node), + info->obj_desc->method.param_count, + info->param_count)); + return_ACPI_STATUS(AE_MISSING_ARGUMENTS); + } + + /* Just a warning if too many arguments */ + + else if (info->param_count > + info->obj_desc->method.param_count) { + ACPI_WARNING((AE_INFO, + "Excess arguments - " + "method [%4.4s] needs %d, found %d", + acpi_ut_get_node_name(info-> + resolved_node), + info->obj_desc->method.param_count, + info->param_count)); + } + ACPI_DUMP_PATHNAME(info->resolved_node, "Execute Method:", ACPI_LV_INFO, _COMPONENT); diff --git a/include/acpi/acexcep.h b/include/acpi/acexcep.h index 1f59117..ea627ed 100644 --- a/include/acpi/acexcep.h +++ b/include/acpi/acexcep.h @@ -108,8 +108,9 @@ #define AE_BAD_HEX_CONSTANT (acpi_status) (0x0007 | AE_CODE_PROGRAMMER) #define AE_BAD_OCTAL_CONSTANT (acpi_status) (0x0008 | AE_CODE_PROGRAMMER) #define AE_BAD_DECIMAL_CONSTANT (acpi_status) (0x0009 | AE_CODE_PROGRAMMER) +#define AE_MISSING_ARGUMENTS (acpi_status) (0x000A | AE_CODE_PROGRAMMER) -#define AE_CODE_PGM_MAX 0x0009 +#define AE_CODE_PGM_MAX 0x000A /* * Acpi table exceptions @@ -233,7 +234,8 @@ char const *acpi_gbl_exception_names_pgm[] = { "AE_ALIGNMENT", "AE_BAD_HEX_CONSTANT", "AE_BAD_OCTAL_CONSTANT", - "AE_BAD_DECIMAL_CONSTANT" + "AE_BAD_DECIMAL_CONSTANT", + "AE_MISSING_ARGUMENTS" }; char const *acpi_gbl_exception_names_tbl[] = { diff --git a/include/acpi/acstruct.h b/include/acpi/acstruct.h index a907c67..1cc7450 100644 --- a/include/acpi/acstruct.h +++ b/include/acpi/acstruct.h @@ -189,6 +189,7 @@ struct acpi_evaluate_info { union acpi_operand_object **parameters; struct acpi_namespace_node *resolved_node; union acpi_operand_object *return_object; + u8 param_count; u8 pass_number; u8 parameter_type; u8 return_object_type; -- cgit v0.10.2 From c91d924e3af08d4f98eab6ebf81f2b8ce132448f Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Tue, 10 Jun 2008 12:38:10 +0800 Subject: ACPICA: Fix for hang on GPE method invocation Fixes problem where the new method argument count validation mechanism will enter an infinite loop when a GPE method is dispatched. Problem fixed be removing the obsolete code that passes GPE block information to the notify handler via the control method parameter pointer. Signed-off-by: Bob Moore Signed-off-by: Lin Ming Signed-off-by: Len Brown Signed-off-by: Andi Kleen diff --git a/drivers/acpi/dispatcher/dsmethod.c b/drivers/acpi/dispatcher/dsmethod.c index 2509809..4613b9c 100644 --- a/drivers/acpi/dispatcher/dsmethod.c +++ b/drivers/acpi/dispatcher/dsmethod.c @@ -377,7 +377,6 @@ acpi_ds_call_control_method(struct acpi_thread_state *thread, } info->parameters = &this_walk_state->operands[0]; - info->parameter_type = ACPI_PARAM_ARGS; status = acpi_ds_init_aml_walk(next_walk_state, NULL, method_node, obj_desc->method.aml_start, diff --git a/drivers/acpi/dispatcher/dswstate.c b/drivers/acpi/dispatcher/dswstate.c index 1386ced..bda23ed 100644 --- a/drivers/acpi/dispatcher/dswstate.c +++ b/drivers/acpi/dispatcher/dswstate.c @@ -615,14 +615,8 @@ acpi_ds_init_aml_walk(struct acpi_walk_state *walk_state, walk_state->pass_number = pass_number; if (info) { - if (info->parameter_type == ACPI_PARAM_GPE) { - walk_state->gpe_event_info = - ACPI_CAST_PTR(struct acpi_gpe_event_info, - info->parameters); - } else { - walk_state->params = info->parameters; - walk_state->caller_return_desc = &info->return_object; - } + walk_state->params = info->parameters; + walk_state->caller_return_desc = &info->return_object; } status = acpi_ps_init_scope(&walk_state->parser_state, op); diff --git a/drivers/acpi/events/evgpe.c b/drivers/acpi/events/evgpe.c index e0339d4..ca356e5 100644 --- a/drivers/acpi/events/evgpe.c +++ b/drivers/acpi/events/evgpe.c @@ -565,10 +565,6 @@ static void ACPI_SYSTEM_XFACE acpi_ev_asynch_execute_gpe_method(void *context) */ info->prefix_node = local_gpe_event_info.dispatch.method_node; - info->parameters = - ACPI_CAST_PTR(union acpi_operand_object *, - gpe_event_info); - info->parameter_type = ACPI_PARAM_GPE; info->flags = ACPI_IGNORE_RETURN_VALUE; status = acpi_ns_evaluate(info); diff --git a/drivers/acpi/events/evregion.c b/drivers/acpi/events/evregion.c index 1628f59..5ab4c01 100644 --- a/drivers/acpi/events/evregion.c +++ b/drivers/acpi/events/evregion.c @@ -219,7 +219,6 @@ acpi_ev_execute_reg_method(union acpi_operand_object *region_obj, u32 function) info->prefix_node = region_obj2->extra.method_REG; info->pathname = NULL; info->parameters = args; - info->parameter_type = ACPI_PARAM_ARGS; info->flags = ACPI_IGNORE_RETURN_VALUE; /* diff --git a/drivers/acpi/namespace/nsinit.c b/drivers/acpi/namespace/nsinit.c index 6d6d930..e4c5751 100644 --- a/drivers/acpi/namespace/nsinit.c +++ b/drivers/acpi/namespace/nsinit.c @@ -542,7 +542,6 @@ acpi_ns_init_one_device(acpi_handle obj_handle, info->prefix_node = device_node; info->pathname = METHOD_NAME__INI; info->parameters = NULL; - info->parameter_type = ACPI_PARAM_ARGS; info->flags = ACPI_IGNORE_RETURN_VALUE; /* diff --git a/drivers/acpi/namespace/nsxfeval.c b/drivers/acpi/namespace/nsxfeval.c index a8d5491..b4e135e 100644 --- a/drivers/acpi/namespace/nsxfeval.c +++ b/drivers/acpi/namespace/nsxfeval.c @@ -182,7 +182,6 @@ acpi_evaluate_object(acpi_handle handle, } info->pathname = pathname; - info->parameter_type = ACPI_PARAM_ARGS; /* Convert and validate the device handle */ diff --git a/drivers/acpi/parser/psxface.c b/drivers/acpi/parser/psxface.c index 5258145..5ee09e1 100644 --- a/drivers/acpi/parser/psxface.c +++ b/drivers/acpi/parser/psxface.c @@ -335,7 +335,7 @@ acpi_ps_update_parameter_list(struct acpi_evaluate_info *info, u16 action) { acpi_native_uint i; - if ((info->parameter_type == ACPI_PARAM_ARGS) && (info->parameters)) { + if (info->parameters) { /* Update reference count for each parameter */ diff --git a/drivers/acpi/resources/rsutils.c b/drivers/acpi/resources/rsutils.c index befe230..90ed83d 100644 --- a/drivers/acpi/resources/rsutils.c +++ b/drivers/acpi/resources/rsutils.c @@ -679,7 +679,6 @@ acpi_rs_set_srs_method_data(struct acpi_namespace_node *node, info->prefix_node = node; info->pathname = METHOD_NAME__SRS; info->parameters = args; - info->parameter_type = ACPI_PARAM_ARGS; info->flags = ACPI_IGNORE_RETURN_VALUE; /* diff --git a/drivers/acpi/utilities/uteval.c b/drivers/acpi/utilities/uteval.c index 05e61be..7f1f634 100644 --- a/drivers/acpi/utilities/uteval.c +++ b/drivers/acpi/utilities/uteval.c @@ -217,7 +217,6 @@ acpi_ut_evaluate_object(struct acpi_namespace_node *prefix_node, info->prefix_node = prefix_node; info->pathname = path; - info->parameter_type = ACPI_PARAM_ARGS; /* Evaluate the object/method */ diff --git a/include/acpi/acstruct.h b/include/acpi/acstruct.h index 1cc7450..818c24d 100644 --- a/include/acpi/acstruct.h +++ b/include/acpi/acstruct.h @@ -108,7 +108,6 @@ struct acpi_walk_state { union acpi_operand_object **caller_return_desc; union acpi_generic_state *control_state; /* List of control states (nested IFs) */ struct acpi_namespace_node *deferred_node; /* Used when executing deferred opcodes */ - struct acpi_gpe_event_info *gpe_event_info; /* Info for GPE (_Lxx/_Exx methods only */ union acpi_operand_object *implicit_return_obj; struct acpi_namespace_node *method_call_node; /* Called method Node */ union acpi_parse_object *method_call_op; /* method_call Op if running a method */ @@ -191,16 +190,10 @@ struct acpi_evaluate_info { union acpi_operand_object *return_object; u8 param_count; u8 pass_number; - u8 parameter_type; u8 return_object_type; u8 flags; }; -/* Types for parameter_type above */ - -#define ACPI_PARAM_ARGS 0 -#define ACPI_PARAM_GPE 1 - /* Values for Flags above */ #define ACPI_IGNORE_RETURN_VALUE 1 -- cgit v0.10.2 From 6719561f9b4281491f58ed9f0bbc179dc7db95b7 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 10 Jun 2008 12:49:56 +0800 Subject: ACPICA: Update tracking macros to reduce code/data size Changed ACPI_MODULE_NAME and ACPI_FUNCTION_NAME to use arrays of strings instead of pointers to static strings. Jan Beulich and Bob Moore. Signed-off-by: Bob Moore Signed-off-by: Lin Ming Signed-off-by: Len Brown Signed-off-by: Andi Kleen diff --git a/include/acpi/acmacros.h b/include/acpi/acmacros.h index fb41a3b..5ad6e39 100644 --- a/include/acpi/acmacros.h +++ b/include/acpi/acmacros.h @@ -414,7 +414,7 @@ struct acpi_integer_overlay { * error messages. The __FILE__ macro is not very useful for this, because it * often includes the entire pathname to the module */ -#define ACPI_MODULE_NAME(name) static char ACPI_UNUSED_VAR *_acpi_module_name = name; +#define ACPI_MODULE_NAME(name) static char ACPI_UNUSED_VAR _acpi_module_name[] = name; #else #define ACPI_MODULE_NAME(name) #endif @@ -467,19 +467,17 @@ struct acpi_integer_overlay { /* * If ACPI_GET_FUNCTION_NAME was not defined in the compiler-dependent header, * define it now. This is the case where there the compiler does not support - * a __FUNCTION__ macro or equivalent. We save the function name on the - * local stack. + * a __FUNCTION__ macro or equivalent. */ #ifndef ACPI_GET_FUNCTION_NAME #define ACPI_GET_FUNCTION_NAME _acpi_function_name /* * The Name parameter should be the procedure name as a quoted string. - * This is declared as a local string ("MyFunctionName") so that it can - * be also used by the function exit macros below. + * The function name is also used by the function exit macros below. * Note: (const char) is used to be compatible with the debug interfaces * and macros such as __FUNCTION__. */ -#define ACPI_FUNCTION_NAME(name) const char *_acpi_function_name = #name; +#define ACPI_FUNCTION_NAME(name) static const char _acpi_function_name[] = #name; #else /* Compiler supports __FUNCTION__ (or equivalent) -- Ignore this macro */ -- cgit v0.10.2 From 11f2a61ab418305167f9a3f3a31a50449222f64b Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Tue, 10 Jun 2008 12:53:01 +0800 Subject: ACPICA: Fix possible negative array index in acpi_ut_validate_exception Added NULL fields to the exception string arrays to eliminate the -1 subtraction on the SubStatus field. Signed-off-by: Bob Moore Signed-off-by: Lin Ming Signed-off-by: Len Brown Signed-off-by: Andi Kleen diff --git a/drivers/acpi/utilities/utmisc.c b/drivers/acpi/utilities/utmisc.c index 1f057b7..47354d9 100644 --- a/drivers/acpi/utilities/utmisc.c +++ b/drivers/acpi/utilities/utmisc.c @@ -64,7 +64,7 @@ ACPI_MODULE_NAME("utmisc") ******************************************************************************/ const char *acpi_ut_validate_exception(acpi_status status) { - acpi_status sub_status; + u32 sub_status; const char *exception = NULL; ACPI_FUNCTION_ENTRY(); @@ -85,32 +85,28 @@ const char *acpi_ut_validate_exception(acpi_status status) case AE_CODE_PROGRAMMER: if (sub_status <= AE_CODE_PGM_MAX) { - exception = - acpi_gbl_exception_names_pgm[sub_status - 1]; + exception = acpi_gbl_exception_names_pgm[sub_status]; } break; case AE_CODE_ACPI_TABLES: if (sub_status <= AE_CODE_TBL_MAX) { - exception = - acpi_gbl_exception_names_tbl[sub_status - 1]; + exception = acpi_gbl_exception_names_tbl[sub_status]; } break; case AE_CODE_AML: if (sub_status <= AE_CODE_AML_MAX) { - exception = - acpi_gbl_exception_names_aml[sub_status - 1]; + exception = acpi_gbl_exception_names_aml[sub_status]; } break; case AE_CODE_CONTROL: if (sub_status <= AE_CODE_CTRL_MAX) { - exception = - acpi_gbl_exception_names_ctrl[sub_status - 1]; + exception = acpi_gbl_exception_names_ctrl[sub_status]; } break; diff --git a/include/acpi/acexcep.h b/include/acpi/acexcep.h index ea627ed..e5a890f 100644 --- a/include/acpi/acexcep.h +++ b/include/acpi/acexcep.h @@ -226,6 +226,7 @@ char const *acpi_gbl_exception_names_env[] = { }; char const *acpi_gbl_exception_names_pgm[] = { + NULL, "AE_BAD_PARAMETER", "AE_BAD_CHARACTER", "AE_BAD_PATHNAME", @@ -239,6 +240,7 @@ char const *acpi_gbl_exception_names_pgm[] = { }; char const *acpi_gbl_exception_names_tbl[] = { + NULL, "AE_BAD_SIGNATURE", "AE_BAD_HEADER", "AE_BAD_CHECKSUM", @@ -248,6 +250,7 @@ char const *acpi_gbl_exception_names_tbl[] = { }; char const *acpi_gbl_exception_names_aml[] = { + NULL, "AE_AML_ERROR", "AE_AML_PARSE", "AE_AML_BAD_OPCODE", @@ -285,6 +288,7 @@ char const *acpi_gbl_exception_names_aml[] = { }; char const *acpi_gbl_exception_names_ctrl[] = { + NULL, "AE_CTRL_RETURN_VALUE", "AE_CTRL_PENDING", "AE_CTRL_TERMINATE", -- cgit v0.10.2 From 67a119f990063f5662574f6d6414fe9bc5ece86a Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Tue, 10 Jun 2008 13:42:13 +0800 Subject: ACPICA: Eliminate acpi_native_uint type v2 No longer needed; replaced mostly with u32, but also acpi_size where a type that changes 32/64 bit on 32/64-bit platforms is required. v2: Fix a cast of a 32-bit int to a pointer in ACPI to avoid a compiler warning. from David Howells Signed-off-by: Bob Moore Signed-off-by: Lin Ming Signed-off-by: Len Brown Signed-off-by: Andi Kleen diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index a6dbcf4..afb3438 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -612,7 +612,7 @@ static int __init acpi_bus_init_irq(void) return 0; } -acpi_native_uint acpi_gbl_permanent_mmap; +u8 acpi_gbl_permanent_mmap; void __init acpi_early_init(void) diff --git a/drivers/acpi/dispatcher/dsinit.c b/drivers/acpi/dispatcher/dsinit.c index 610b1ee..949f7c7 100644 --- a/drivers/acpi/dispatcher/dsinit.c +++ b/drivers/acpi/dispatcher/dsinit.c @@ -151,7 +151,7 @@ acpi_ds_init_one_object(acpi_handle obj_handle, ******************************************************************************/ acpi_status -acpi_ds_initialize_objects(acpi_native_uint table_index, +acpi_ds_initialize_objects(u32 table_index, struct acpi_namespace_node * start_node) { acpi_status status; diff --git a/drivers/acpi/dispatcher/dsopcode.c b/drivers/acpi/dispatcher/dsopcode.c index a818e0d..ac0bfb1 100644 --- a/drivers/acpi/dispatcher/dsopcode.c +++ b/drivers/acpi/dispatcher/dsopcode.c @@ -848,7 +848,7 @@ acpi_ds_eval_table_region_operands(struct acpi_walk_state *walk_state, union acpi_operand_object **operand; struct acpi_namespace_node *node; union acpi_parse_object *next_op; - acpi_native_uint table_index; + u32 table_index; struct acpi_table_header *table; ACPI_FUNCTION_TRACE_PTR(ds_eval_table_region_operands, op); diff --git a/drivers/acpi/dispatcher/dswstate.c b/drivers/acpi/dispatcher/dswstate.c index bda23ed..b00d4af 100644 --- a/drivers/acpi/dispatcher/dswstate.c +++ b/drivers/acpi/dispatcher/dswstate.c @@ -70,7 +70,7 @@ acpi_status acpi_ds_result_pop(union acpi_operand_object **object, struct acpi_walk_state *walk_state) { - acpi_native_uint index; + u32 index; union acpi_generic_state *state; acpi_status status; @@ -122,7 +122,7 @@ acpi_ds_result_pop(union acpi_operand_object **object, ACPI_DEBUG_PRINT((ACPI_DB_EXEC, "Obj=%p [%s] Index=%X State=%p Num=%X\n", *object, acpi_ut_get_object_type_name(*object), - (u32) index, walk_state, walk_state->result_count)); + index, walk_state, walk_state->result_count)); return (AE_OK); } @@ -146,7 +146,7 @@ acpi_ds_result_push(union acpi_operand_object * object, { union acpi_generic_state *state; acpi_status status; - acpi_native_uint index; + u32 index; ACPI_FUNCTION_NAME(ds_result_push); @@ -400,7 +400,7 @@ void acpi_ds_obj_stack_pop_and_delete(u32 pop_count, struct acpi_walk_state *walk_state) { - acpi_native_int i; + s32 i; union acpi_operand_object *obj_desc; ACPI_FUNCTION_NAME(ds_obj_stack_pop_and_delete); @@ -409,7 +409,7 @@ acpi_ds_obj_stack_pop_and_delete(u32 pop_count, return; } - for (i = (acpi_native_int) (pop_count - 1); i >= 0; i--) { + for (i = (s32) pop_count - 1; i >= 0; i--) { if (walk_state->num_operands == 0) { return; } diff --git a/drivers/acpi/events/evevent.c b/drivers/acpi/events/evevent.c index 5d30e5b..c56c5c6 100644 --- a/drivers/acpi/events/evevent.c +++ b/drivers/acpi/events/evevent.c @@ -188,7 +188,7 @@ acpi_status acpi_ev_install_xrupt_handlers(void) static acpi_status acpi_ev_fixed_event_initialize(void) { - acpi_native_uint i; + u32 i; acpi_status status; /* @@ -231,7 +231,7 @@ u32 acpi_ev_fixed_event_detect(void) u32 int_status = ACPI_INTERRUPT_NOT_HANDLED; u32 fixed_status; u32 fixed_enable; - acpi_native_uint i; + u32 i; ACPI_FUNCTION_NAME(ev_fixed_event_detect); @@ -260,7 +260,7 @@ u32 acpi_ev_fixed_event_detect(void) /* Found an active (signalled) event */ acpi_os_fixed_event_count(i); - int_status |= acpi_ev_fixed_event_dispatch((u32) i); + int_status |= acpi_ev_fixed_event_dispatch(i); } } diff --git a/drivers/acpi/events/evgpe.c b/drivers/acpi/events/evgpe.c index ca356e5..c5e53aa 100644 --- a/drivers/acpi/events/evgpe.c +++ b/drivers/acpi/events/evgpe.c @@ -315,7 +315,7 @@ struct acpi_gpe_event_info *acpi_ev_get_gpe_event_info(acpi_handle gpe_device, { union acpi_operand_object *obj_desc; struct acpi_gpe_block_info *gpe_block; - acpi_native_uint i; + u32 i; ACPI_FUNCTION_ENTRY(); @@ -389,8 +389,8 @@ u32 acpi_ev_gpe_detect(struct acpi_gpe_xrupt_info * gpe_xrupt_list) u32 status_reg; u32 enable_reg; acpi_cpu_flags flags; - acpi_native_uint i; - acpi_native_uint j; + u32 i; + u32 j; ACPI_FUNCTION_NAME(ev_gpe_detect); @@ -472,13 +472,7 @@ u32 acpi_ev_gpe_detect(struct acpi_gpe_xrupt_info * gpe_xrupt_list) */ int_status |= acpi_ev_gpe_dispatch(&gpe_block-> - event_info[(i * - ACPI_GPE_REGISTER_WIDTH) - + - j], - (u32) j + - gpe_register_info-> - base_gpe_number); + event_info[((acpi_size) i * ACPI_GPE_REGISTER_WIDTH) + j], j + gpe_register_info->base_gpe_number); } } } diff --git a/drivers/acpi/events/evgpeblk.c b/drivers/acpi/events/evgpeblk.c index e6c4d4c..73c058e 100644 --- a/drivers/acpi/events/evgpeblk.c +++ b/drivers/acpi/events/evgpeblk.c @@ -189,8 +189,8 @@ acpi_ev_delete_gpe_handlers(struct acpi_gpe_xrupt_info *gpe_xrupt_info, struct acpi_gpe_block_info *gpe_block) { struct acpi_gpe_event_info *gpe_event_info; - acpi_native_uint i; - acpi_native_uint j; + u32 i; + u32 j; ACPI_FUNCTION_TRACE(ev_delete_gpe_handlers); @@ -203,7 +203,8 @@ acpi_ev_delete_gpe_handlers(struct acpi_gpe_xrupt_info *gpe_xrupt_info, for (j = 0; j < ACPI_GPE_REGISTER_WIDTH; j++) { gpe_event_info = &gpe_block-> - event_info[(i * ACPI_GPE_REGISTER_WIDTH) + j]; + event_info[((acpi_size) i * + ACPI_GPE_REGISTER_WIDTH) + j]; if ((gpe_event_info->flags & ACPI_GPE_DISPATCH_MASK) == ACPI_GPE_DISPATCH_HANDLER) { @@ -744,8 +745,8 @@ acpi_ev_create_gpe_info_blocks(struct acpi_gpe_block_info *gpe_block) struct acpi_gpe_event_info *gpe_event_info = NULL; struct acpi_gpe_event_info *this_event; struct acpi_gpe_register_info *this_register; - acpi_native_uint i; - acpi_native_uint j; + u32 i; + u32 j; acpi_status status; ACPI_FUNCTION_TRACE(ev_create_gpe_info_blocks); @@ -983,8 +984,8 @@ acpi_ev_initialize_gpe_block(struct acpi_namespace_node *gpe_device, struct acpi_gpe_walk_info gpe_info; u32 wake_gpe_count; u32 gpe_enabled_count; - acpi_native_uint i; - acpi_native_uint j; + u32 i; + u32 j; ACPI_FUNCTION_TRACE(ev_initialize_gpe_block); @@ -1033,7 +1034,8 @@ acpi_ev_initialize_gpe_block(struct acpi_namespace_node *gpe_device, gpe_event_info = &gpe_block-> - event_info[(i * ACPI_GPE_REGISTER_WIDTH) + j]; + event_info[((acpi_size) i * + ACPI_GPE_REGISTER_WIDTH) + j]; if (((gpe_event_info->flags & ACPI_GPE_DISPATCH_MASK) == ACPI_GPE_DISPATCH_METHOD) diff --git a/drivers/acpi/events/evmisc.c b/drivers/acpi/events/evmisc.c index 2113e58..1d5670b 100644 --- a/drivers/acpi/events/evmisc.c +++ b/drivers/acpi/events/evmisc.c @@ -575,7 +575,7 @@ acpi_status acpi_ev_release_global_lock(void) void acpi_ev_terminate(void) { - acpi_native_uint i; + u32 i; acpi_status status; ACPI_FUNCTION_TRACE(ev_terminate); @@ -589,7 +589,7 @@ void acpi_ev_terminate(void) /* Disable all fixed events */ for (i = 0; i < ACPI_NUM_FIXED_EVENTS; i++) { - status = acpi_disable_event((u32) i, 0); + status = acpi_disable_event(i, 0); if (ACPI_FAILURE(status)) { ACPI_ERROR((AE_INFO, "Could not disable fixed event %d", diff --git a/drivers/acpi/events/evregion.c b/drivers/acpi/events/evregion.c index 5ab4c01..236fbd1 100644 --- a/drivers/acpi/events/evregion.c +++ b/drivers/acpi/events/evregion.c @@ -81,7 +81,7 @@ acpi_ev_install_handler(acpi_handle obj_handle, acpi_status acpi_ev_install_region_handlers(void) { acpi_status status; - acpi_native_uint i; + u32 i; ACPI_FUNCTION_TRACE(ev_install_region_handlers); @@ -151,7 +151,7 @@ acpi_status acpi_ev_install_region_handlers(void) acpi_status acpi_ev_initialize_op_regions(void) { acpi_status status; - acpi_native_uint i; + u32 i; ACPI_FUNCTION_TRACE(ev_initialize_op_regions); diff --git a/drivers/acpi/events/evrgnini.c b/drivers/acpi/events/evrgnini.c index 2e3d2c5..6b94b38 100644 --- a/drivers/acpi/events/evrgnini.c +++ b/drivers/acpi/events/evrgnini.c @@ -380,7 +380,7 @@ static u8 acpi_ev_is_pci_root_bridge(struct acpi_namespace_node *node) acpi_status status; struct acpica_device_id hid; struct acpi_compatible_id_list *cid; - acpi_native_uint i; + u32 i; /* * Get the _HID and check for a PCI Root Bridge diff --git a/drivers/acpi/executer/exconfig.c b/drivers/acpi/executer/exconfig.c index 39d7421..2a32c84 100644 --- a/drivers/acpi/executer/exconfig.c +++ b/drivers/acpi/executer/exconfig.c @@ -53,7 +53,7 @@ ACPI_MODULE_NAME("exconfig") /* Local prototypes */ static acpi_status -acpi_ex_add_table(acpi_native_uint table_index, +acpi_ex_add_table(u32 table_index, struct acpi_namespace_node *parent_node, union acpi_operand_object **ddb_handle); @@ -73,7 +73,7 @@ acpi_ex_add_table(acpi_native_uint table_index, ******************************************************************************/ static acpi_status -acpi_ex_add_table(acpi_native_uint table_index, +acpi_ex_add_table(u32 table_index, struct acpi_namespace_node *parent_node, union acpi_operand_object **ddb_handle) { @@ -96,7 +96,8 @@ acpi_ex_add_table(acpi_native_uint table_index, /* Install the new table into the local data structures */ - obj_desc->reference.object = ACPI_CAST_PTR(void, table_index); + obj_desc->reference.object = ACPI_CAST_PTR(void, + (unsigned long)table_index); /* Add the table to the namespace */ @@ -128,12 +129,12 @@ acpi_ex_load_table_op(struct acpi_walk_state *walk_state, { acpi_status status; union acpi_operand_object **operand = &walk_state->operands[0]; - acpi_native_uint table_index; struct acpi_namespace_node *parent_node; struct acpi_namespace_node *start_node; struct acpi_namespace_node *parameter_node = NULL; union acpi_operand_object *ddb_handle; struct acpi_table_header *table; + u32 table_index; ACPI_FUNCTION_TRACE(ex_load_table_op); @@ -280,7 +281,7 @@ acpi_ex_load_op(union acpi_operand_object *obj_desc, { union acpi_operand_object *ddb_handle; struct acpi_table_desc table_desc; - acpi_native_uint table_index; + u32 table_index; acpi_status status; u32 length; @@ -437,7 +438,7 @@ acpi_status acpi_ex_unload_table(union acpi_operand_object *ddb_handle) { acpi_status status = AE_OK; union acpi_operand_object *table_desc = ddb_handle; - acpi_native_uint table_index; + u32 table_index; struct acpi_table_header *table; ACPI_FUNCTION_TRACE(ex_unload_table); @@ -454,9 +455,9 @@ acpi_status acpi_ex_unload_table(union acpi_operand_object *ddb_handle) return_ACPI_STATUS(AE_BAD_PARAMETER); } - /* Get the table index from the ddb_handle */ + /* Get the table index from the ddb_handle (acpi_size for 64-bit case) */ - table_index = (acpi_native_uint) table_desc->reference.object; + table_index = (u32) (acpi_size) table_desc->reference.object; /* Invoke table handler if present */ diff --git a/drivers/acpi/executer/exconvrt.c b/drivers/acpi/executer/exconvrt.c index fd954b4..261d975 100644 --- a/drivers/acpi/executer/exconvrt.c +++ b/drivers/acpi/executer/exconvrt.c @@ -288,11 +288,11 @@ acpi_ex_convert_to_ascii(acpi_integer integer, u16 base, u8 * string, u8 data_width) { acpi_integer digit; - acpi_native_uint i; - acpi_native_uint j; - acpi_native_uint k = 0; - acpi_native_uint hex_length; - acpi_native_uint decimal_length; + u32 i; + u32 j; + u32 k = 0; + u32 hex_length; + u32 decimal_length; u32 remainder; u8 supress_zeros; @@ -348,7 +348,7 @@ acpi_ex_convert_to_ascii(acpi_integer integer, /* hex_length: 2 ascii hex chars per data byte */ - hex_length = (acpi_native_uint) ACPI_MUL_2(data_width); + hex_length = ACPI_MUL_2(data_width); for (i = 0, j = (hex_length - 1); i < hex_length; i++, j--) { /* Get one hex digit, most significant digits first */ diff --git a/drivers/acpi/executer/exdump.c b/drivers/acpi/executer/exdump.c index 74f1b22..976016f 100644 --- a/drivers/acpi/executer/exdump.c +++ b/drivers/acpi/executer/exdump.c @@ -775,7 +775,7 @@ acpi_ex_dump_operands(union acpi_operand_object **operands, u32 num_levels, char *note, char *module_name, u32 line_number) { - acpi_native_uint i; + u32 i; ACPI_FUNCTION_NAME(ex_dump_operands); diff --git a/drivers/acpi/executer/exfldio.c b/drivers/acpi/executer/exfldio.c index e336b5d..9ff9d1f 100644 --- a/drivers/acpi/executer/exfldio.c +++ b/drivers/acpi/executer/exfldio.c @@ -153,14 +153,15 @@ acpi_ex_setup_region(union acpi_operand_object *obj_desc, /* * Slack mode only: We will go ahead and allow access to this * field if it is within the region length rounded up to the next - * access width boundary. + * access width boundary. acpi_size cast for 64-bit compile. */ if (ACPI_ROUND_UP(rgn_desc->region.length, obj_desc->common_field. access_byte_width) >= - (obj_desc->common_field.base_byte_offset + - (acpi_native_uint) obj_desc->common_field. - access_byte_width + field_datum_byte_offset)) { + ((acpi_size) obj_desc->common_field. + base_byte_offset + + obj_desc->common_field.access_byte_width + + field_datum_byte_offset)) { return_ACPI_STATUS(AE_OK); } } diff --git a/drivers/acpi/executer/exmisc.c b/drivers/acpi/executer/exmisc.c index cc956a5..731414a 100644 --- a/drivers/acpi/executer/exmisc.c +++ b/drivers/acpi/executer/exmisc.c @@ -329,8 +329,8 @@ acpi_ex_do_concatenate(union acpi_operand_object *operand0, /* Result of two Strings is a String */ - return_desc = acpi_ut_create_string_object((acpi_size) - (operand0->string. + return_desc = acpi_ut_create_string_object(((acpi_size) + operand0->string. length + local_operand1-> string.length)); @@ -352,8 +352,8 @@ acpi_ex_do_concatenate(union acpi_operand_object *operand0, /* Result of two Buffers is a Buffer */ - return_desc = acpi_ut_create_buffer_object((acpi_size) - (operand0->buffer. + return_desc = acpi_ut_create_buffer_object(((acpi_size) + operand0->buffer. length + local_operand1-> buffer.length)); diff --git a/drivers/acpi/executer/exregion.c b/drivers/acpi/executer/exregion.c index 7cd8bb5..7a41c40 100644 --- a/drivers/acpi/executer/exregion.c +++ b/drivers/acpi/executer/exregion.c @@ -156,7 +156,7 @@ acpi_ex_system_memory_space_handler(u32 function, /* Create a new mapping starting at the address given */ mem_info->mapped_logical_address = - acpi_os_map_memory((acpi_native_uint) address, window_size); + acpi_os_map_memory((acpi_physical_address) address, window_size); if (!mem_info->mapped_logical_address) { ACPI_ERROR((AE_INFO, "Could not map memory at %8.8X%8.8X, size %X", diff --git a/drivers/acpi/namespace/nsdump.c b/drivers/acpi/namespace/nsdump.c index 5445751..904f951 100644 --- a/drivers/acpi/namespace/nsdump.c +++ b/drivers/acpi/namespace/nsdump.c @@ -73,7 +73,7 @@ acpi_ns_dump_one_device(acpi_handle obj_handle, void acpi_ns_print_pathname(u32 num_segments, char *pathname) { - acpi_native_uint i; + u32 i; ACPI_FUNCTION_NAME(ns_print_pathname); diff --git a/drivers/acpi/namespace/nsload.c b/drivers/acpi/namespace/nsload.c index 2c92f6c..a4a412b 100644 --- a/drivers/acpi/namespace/nsload.c +++ b/drivers/acpi/namespace/nsload.c @@ -71,8 +71,7 @@ static acpi_status acpi_ns_delete_subtree(acpi_handle start_handle); ******************************************************************************/ acpi_status -acpi_ns_load_table(acpi_native_uint table_index, - struct acpi_namespace_node *node) +acpi_ns_load_table(u32 table_index, struct acpi_namespace_node *node) { acpi_status status; diff --git a/drivers/acpi/namespace/nsparse.c b/drivers/acpi/namespace/nsparse.c index 46a79b0..a82271a 100644 --- a/drivers/acpi/namespace/nsparse.c +++ b/drivers/acpi/namespace/nsparse.c @@ -63,13 +63,13 @@ ACPI_MODULE_NAME("nsparse") * ******************************************************************************/ acpi_status -acpi_ns_one_complete_parse(acpi_native_uint pass_number, - acpi_native_uint table_index, - struct acpi_namespace_node * start_node) +acpi_ns_one_complete_parse(u32 pass_number, + u32 table_index, + struct acpi_namespace_node *start_node) { union acpi_parse_object *parse_root; acpi_status status; - acpi_native_uint aml_length; + u32 aml_length; u8 *aml_start; struct acpi_walk_state *walk_state; struct acpi_table_header *table; @@ -112,8 +112,8 @@ acpi_ns_one_complete_parse(acpi_native_uint pass_number, aml_start = (u8 *) table + sizeof(struct acpi_table_header); aml_length = table->length - sizeof(struct acpi_table_header); status = acpi_ds_init_aml_walk(walk_state, parse_root, NULL, - aml_start, (u32) aml_length, - NULL, (u8) pass_number); + aml_start, aml_length, NULL, + (u8) pass_number); } if (ACPI_FAILURE(status)) { @@ -158,8 +158,7 @@ acpi_ns_one_complete_parse(acpi_native_uint pass_number, ******************************************************************************/ acpi_status -acpi_ns_parse_table(acpi_native_uint table_index, - struct acpi_namespace_node *start_node) +acpi_ns_parse_table(u32 table_index, struct acpi_namespace_node *start_node) { acpi_status status; diff --git a/drivers/acpi/namespace/nsutils.c b/drivers/acpi/namespace/nsutils.c index 64c0398..5b5619c 100644 --- a/drivers/acpi/namespace/nsutils.c +++ b/drivers/acpi/namespace/nsutils.c @@ -365,7 +365,7 @@ acpi_status acpi_ns_build_internal_name(struct acpi_namestring_info *info) char *internal_name = info->internal_name; char *external_name = info->next_external_char; char *result = NULL; - acpi_native_uint i; + u32 i; ACPI_FUNCTION_TRACE(ns_build_internal_name); @@ -400,12 +400,11 @@ acpi_status acpi_ns_build_internal_name(struct acpi_namestring_info *info) result = &internal_name[i]; } else if (num_segments == 2) { internal_name[i] = AML_DUAL_NAME_PREFIX; - result = &internal_name[(acpi_native_uint) (i + 1)]; + result = &internal_name[(acpi_size) i + 1]; } else { internal_name[i] = AML_MULTI_NAME_PREFIX_OP; - internal_name[(acpi_native_uint) (i + 1)] = - (char)num_segments; - result = &internal_name[(acpi_native_uint) (i + 2)]; + internal_name[(acpi_size) i + 1] = (char)num_segments; + result = &internal_name[(acpi_size) i + 2]; } } @@ -531,12 +530,12 @@ acpi_ns_externalize_name(u32 internal_name_length, char *internal_name, u32 * converted_name_length, char **converted_name) { - acpi_native_uint names_index = 0; - acpi_native_uint num_segments = 0; - acpi_native_uint required_length; - acpi_native_uint prefix_length = 0; - acpi_native_uint i = 0; - acpi_native_uint j = 0; + u32 names_index = 0; + u32 num_segments = 0; + u32 required_length; + u32 prefix_length = 0; + u32 i = 0; + u32 j = 0; ACPI_FUNCTION_TRACE(ns_externalize_name); @@ -582,9 +581,8 @@ acpi_ns_externalize_name(u32 internal_name_length, /* 4-byte names */ names_index = prefix_length + 2; - num_segments = (acpi_native_uint) (u8) - internal_name[(acpi_native_uint) - (prefix_length + 1)]; + num_segments = (u8) + internal_name[(acpi_size) prefix_length + 1]; break; case AML_DUAL_NAME_PREFIX: diff --git a/drivers/acpi/namespace/nsxfeval.c b/drivers/acpi/namespace/nsxfeval.c index b4e135e..38be586 100644 --- a/drivers/acpi/namespace/nsxfeval.c +++ b/drivers/acpi/namespace/nsxfeval.c @@ -441,7 +441,7 @@ acpi_ns_get_device_callback(acpi_handle obj_handle, u32 flags; struct acpica_device_id hid; struct acpi_compatible_id_list *cid; - acpi_native_uint i; + u32 i; int found; status = acpi_ut_acquire_mutex(ACPI_MTX_NAMESPACE); diff --git a/drivers/acpi/parser/psargs.c b/drivers/acpi/parser/psargs.c index e944637..d830b29 100644 --- a/drivers/acpi/parser/psargs.c +++ b/drivers/acpi/parser/psargs.c @@ -76,7 +76,7 @@ acpi_ps_get_next_package_length(struct acpi_parse_state *parser_state) { u8 *aml = parser_state->aml; u32 package_length = 0; - acpi_native_uint byte_count; + u32 byte_count; u8 byte_zero_mask = 0x3F; /* Default [0:5] */ ACPI_FUNCTION_TRACE(ps_get_next_package_length); @@ -86,7 +86,7 @@ acpi_ps_get_next_package_length(struct acpi_parse_state *parser_state) * used to encode the package length, either 0,1,2, or 3 */ byte_count = (aml[0] >> 6); - parser_state->aml += (byte_count + 1); + parser_state->aml += ((acpi_size) byte_count + 1); /* Get bytes 3, 2, 1 as needed */ diff --git a/drivers/acpi/parser/psxface.c b/drivers/acpi/parser/psxface.c index 5ee09e1..270469a 100644 --- a/drivers/acpi/parser/psxface.c +++ b/drivers/acpi/parser/psxface.c @@ -333,7 +333,7 @@ acpi_status acpi_ps_execute_method(struct acpi_evaluate_info *info) static void acpi_ps_update_parameter_list(struct acpi_evaluate_info *info, u16 action) { - acpi_native_uint i; + u32 i; if (info->parameters) { diff --git a/drivers/acpi/resources/rscalc.c b/drivers/acpi/resources/rscalc.c index 8a112d1..f61ebc6 100644 --- a/drivers/acpi/resources/rscalc.c +++ b/drivers/acpi/resources/rscalc.c @@ -73,7 +73,7 @@ acpi_rs_stream_option_length(u32 resource_length, u32 minimum_total_length); static u8 acpi_rs_count_set_bits(u16 bit_field) { - acpi_native_uint bits_set; + u8 bits_set; ACPI_FUNCTION_ENTRY(); @@ -84,7 +84,7 @@ static u8 acpi_rs_count_set_bits(u16 bit_field) bit_field &= (u16) (bit_field - 1); } - return ((u8) bits_set); + return bits_set; } /******************************************************************************* diff --git a/drivers/acpi/resources/rsmisc.c b/drivers/acpi/resources/rsmisc.c index de1ac38..96a6c03 100644 --- a/drivers/acpi/resources/rsmisc.c +++ b/drivers/acpi/resources/rsmisc.c @@ -82,7 +82,7 @@ acpi_rs_convert_aml_to_resource(struct acpi_resource *resource, ACPI_FUNCTION_TRACE(rs_convert_aml_to_resource); - if (((acpi_native_uint) resource) & 0x3) { + if (((acpi_size) resource) & 0x3) { /* Each internal resource struct is expected to be 32-bit aligned */ diff --git a/drivers/acpi/resources/rsutils.c b/drivers/acpi/resources/rsutils.c index 90ed83d..f7b3bcd 100644 --- a/drivers/acpi/resources/rsutils.c +++ b/drivers/acpi/resources/rsutils.c @@ -62,7 +62,7 @@ ACPI_MODULE_NAME("rsutils") ******************************************************************************/ u8 acpi_rs_decode_bitmask(u16 mask, u8 * list) { - acpi_native_uint i; + u8 i; u8 bit_count; ACPI_FUNCTION_ENTRY(); @@ -71,7 +71,7 @@ u8 acpi_rs_decode_bitmask(u16 mask, u8 * list) for (i = 0, bit_count = 0; mask; i++) { if (mask & 0x0001) { - list[bit_count] = (u8) i; + list[bit_count] = i; bit_count++; } @@ -96,8 +96,8 @@ u8 acpi_rs_decode_bitmask(u16 mask, u8 * list) u16 acpi_rs_encode_bitmask(u8 * list, u8 count) { - acpi_native_uint i; - acpi_native_uint mask; + u32 i; + u16 mask; ACPI_FUNCTION_ENTRY(); @@ -107,7 +107,7 @@ u16 acpi_rs_encode_bitmask(u8 * list, u8 count) mask |= (0x1 << list[i]); } - return ((u16) mask); + return mask; } /******************************************************************************* @@ -130,7 +130,7 @@ u16 acpi_rs_encode_bitmask(u8 * list, u8 count) void acpi_rs_move_data(void *destination, void *source, u16 item_count, u8 move_type) { - acpi_native_uint i; + u32 i; ACPI_FUNCTION_ENTRY(); diff --git a/drivers/acpi/tables/tbfadt.c b/drivers/acpi/tables/tbfadt.c index 949d411..a4a41ba 100644 --- a/drivers/acpi/tables/tbfadt.c +++ b/drivers/acpi/tables/tbfadt.c @@ -155,7 +155,7 @@ acpi_tb_init_generic_address(struct acpi_generic_address *generic_address, * ******************************************************************************/ -void acpi_tb_parse_fadt(acpi_native_uint table_index, u8 flags) +void acpi_tb_parse_fadt(u32 table_index, u8 flags) { u32 length; struct acpi_table_header *table; @@ -280,7 +280,7 @@ static void acpi_tb_convert_fadt(void) { u8 pm1_register_length; struct acpi_generic_address *target; - acpi_native_uint i; + u32 i; /* Update the local FADT table header length */ @@ -396,7 +396,7 @@ static void acpi_tb_validate_fadt(void) u32 *address32; struct acpi_generic_address *address64; u8 length; - acpi_native_uint i; + u32 i; /* Examine all of the 64-bit extended address fields (X fields) */ diff --git a/drivers/acpi/tables/tbfind.c b/drivers/acpi/tables/tbfind.c index 9ca3afc..531584d 100644 --- a/drivers/acpi/tables/tbfind.c +++ b/drivers/acpi/tables/tbfind.c @@ -65,10 +65,9 @@ ACPI_MODULE_NAME("tbfind") ******************************************************************************/ acpi_status acpi_tb_find_table(char *signature, - char *oem_id, - char *oem_table_id, acpi_native_uint * table_index) + char *oem_id, char *oem_table_id, u32 *table_index) { - acpi_native_uint i; + u32 i; acpi_status status; struct acpi_table_header header; diff --git a/drivers/acpi/tables/tbinstal.c b/drivers/acpi/tables/tbinstal.c index 5336ce8..b22185f 100644 --- a/drivers/acpi/tables/tbinstal.c +++ b/drivers/acpi/tables/tbinstal.c @@ -107,11 +107,10 @@ acpi_status acpi_tb_verify_table(struct acpi_table_desc *table_desc) ******************************************************************************/ acpi_status -acpi_tb_add_table(struct acpi_table_desc *table_desc, - acpi_native_uint * table_index) +acpi_tb_add_table(struct acpi_table_desc *table_desc, u32 *table_index) { - acpi_native_uint i; - acpi_native_uint length; + u32 i; + u32 length; acpi_status status = AE_OK; ACPI_FUNCTION_TRACE(tb_add_table); @@ -207,8 +206,8 @@ acpi_status acpi_tb_resize_root_table_list(void) /* Increase the Table Array size */ - tables = ACPI_ALLOCATE_ZEROED((acpi_gbl_root_table_list.size + - ACPI_ROOT_TABLE_SIZE_INCREMENT) + tables = ACPI_ALLOCATE_ZEROED(((acpi_size) acpi_gbl_root_table_list. + size + ACPI_ROOT_TABLE_SIZE_INCREMENT) * sizeof(struct acpi_table_desc)); if (!tables) { ACPI_ERROR((AE_INFO, @@ -220,7 +219,7 @@ acpi_status acpi_tb_resize_root_table_list(void) if (acpi_gbl_root_table_list.tables) { ACPI_MEMCPY(tables, acpi_gbl_root_table_list.tables, - acpi_gbl_root_table_list.size * + (acpi_size) acpi_gbl_root_table_list.size * sizeof(struct acpi_table_desc)); if (acpi_gbl_root_table_list.flags & ACPI_ROOT_ORIGIN_ALLOCATED) { @@ -253,7 +252,7 @@ acpi_status acpi_tb_resize_root_table_list(void) acpi_status acpi_tb_store_table(acpi_physical_address address, struct acpi_table_header *table, - u32 length, u8 flags, acpi_native_uint * table_index) + u32 length, u8 flags, u32 *table_index) { acpi_status status = AE_OK; @@ -334,7 +333,7 @@ void acpi_tb_delete_table(struct acpi_table_desc *table_desc) void acpi_tb_terminate(void) { - acpi_native_uint i; + u32 i; ACPI_FUNCTION_TRACE(tb_terminate); @@ -374,7 +373,7 @@ void acpi_tb_terminate(void) * ******************************************************************************/ -void acpi_tb_delete_namespace_by_owner(acpi_native_uint table_index) +void acpi_tb_delete_namespace_by_owner(u32 table_index) { acpi_owner_id owner_id; @@ -403,7 +402,7 @@ void acpi_tb_delete_namespace_by_owner(acpi_native_uint table_index) * ******************************************************************************/ -acpi_status acpi_tb_allocate_owner_id(acpi_native_uint table_index) +acpi_status acpi_tb_allocate_owner_id(u32 table_index) { acpi_status status = AE_BAD_PARAMETER; @@ -431,7 +430,7 @@ acpi_status acpi_tb_allocate_owner_id(acpi_native_uint table_index) * ******************************************************************************/ -acpi_status acpi_tb_release_owner_id(acpi_native_uint table_index) +acpi_status acpi_tb_release_owner_id(u32 table_index) { acpi_status status = AE_BAD_PARAMETER; @@ -462,8 +461,7 @@ acpi_status acpi_tb_release_owner_id(acpi_native_uint table_index) * ******************************************************************************/ -acpi_status -acpi_tb_get_owner_id(acpi_native_uint table_index, acpi_owner_id * owner_id) +acpi_status acpi_tb_get_owner_id(u32 table_index, acpi_owner_id *owner_id) { acpi_status status = AE_BAD_PARAMETER; @@ -490,7 +488,7 @@ acpi_tb_get_owner_id(acpi_native_uint table_index, acpi_owner_id * owner_id) * ******************************************************************************/ -u8 acpi_tb_is_table_loaded(acpi_native_uint table_index) +u8 acpi_tb_is_table_loaded(u32 table_index) { u8 is_loaded = FALSE; @@ -518,7 +516,7 @@ u8 acpi_tb_is_table_loaded(acpi_native_uint table_index) * ******************************************************************************/ -void acpi_tb_set_table_loaded_flag(acpi_native_uint table_index, u8 is_loaded) +void acpi_tb_set_table_loaded_flag(u32 table_index, u8 is_loaded) { (void)acpi_ut_acquire_mutex(ACPI_MTX_TABLES); diff --git a/drivers/acpi/tables/tbutils.c b/drivers/acpi/tables/tbutils.c index bc019b9..0cc92ef 100644 --- a/drivers/acpi/tables/tbutils.c +++ b/drivers/acpi/tables/tbutils.c @@ -49,8 +49,8 @@ ACPI_MODULE_NAME("tbutils") /* Local prototypes */ static acpi_physical_address -acpi_tb_get_root_table_entry(u8 * table_entry, - acpi_native_uint table_entry_size); +acpi_tb_get_root_table_entry(u8 *table_entry, u32 table_entry_size); + /******************************************************************************* * * FUNCTION: acpi_tb_check_xsdt @@ -238,7 +238,7 @@ acpi_status acpi_tb_verify_checksum(struct acpi_table_header *table, u32 length) * ******************************************************************************/ -u8 acpi_tb_checksum(u8 * buffer, acpi_native_uint length) +u8 acpi_tb_checksum(u8 *buffer, u32 length) { u8 sum = 0; u8 *end = buffer + length; @@ -268,7 +268,7 @@ u8 acpi_tb_checksum(u8 * buffer, acpi_native_uint length) void acpi_tb_install_table(acpi_physical_address address, - u8 flags, char *signature, acpi_native_uint table_index) + u8 flags, char *signature, u32 table_index) { struct acpi_table_header *table; @@ -336,8 +336,7 @@ acpi_tb_install_table(acpi_physical_address address, ******************************************************************************/ static acpi_physical_address -acpi_tb_get_root_table_entry(u8 * table_entry, - acpi_native_uint table_entry_size) +acpi_tb_get_root_table_entry(u8 *table_entry, u32 table_entry_size) { u64 address64; @@ -395,8 +394,8 @@ acpi_status __init acpi_tb_parse_root_table(acpi_physical_address rsdp_address, u8 flags) { struct acpi_table_rsdp *rsdp; - acpi_native_uint table_entry_size; - acpi_native_uint i; + u32 table_entry_size; + u32 i; u32 table_count; struct acpi_table_header *table; acpi_physical_address address; diff --git a/drivers/acpi/tables/tbxface.c b/drivers/acpi/tables/tbxface.c index 0e31960..fd7770a 100644 --- a/drivers/acpi/tables/tbxface.c +++ b/drivers/acpi/tables/tbxface.c @@ -125,7 +125,7 @@ acpi_initialize_tables(struct acpi_table_desc * initial_table_array, /* Root Table Array has been statically allocated by the host */ ACPI_MEMSET(initial_table_array, 0, - initial_table_count * + (acpi_size) initial_table_count * sizeof(struct acpi_table_desc)); acpi_gbl_root_table_list.tables = initial_table_array; @@ -183,9 +183,9 @@ acpi_status acpi_reallocate_root_table(void) return_ACPI_STATUS(AE_SUPPORT); } - new_size = - (acpi_gbl_root_table_list.count + - ACPI_ROOT_TABLE_SIZE_INCREMENT) * sizeof(struct acpi_table_desc); + new_size = ((acpi_size) acpi_gbl_root_table_list.count + + ACPI_ROOT_TABLE_SIZE_INCREMENT) * + sizeof(struct acpi_table_desc); /* Create new array and copy the old array */ @@ -222,7 +222,7 @@ acpi_status acpi_reallocate_root_table(void) acpi_status acpi_load_table(struct acpi_table_header *table_ptr) { acpi_status status; - acpi_native_uint table_index; + u32 table_index; struct acpi_table_desc table_desc; if (!table_ptr) @@ -264,11 +264,10 @@ ACPI_EXPORT_SYMBOL(acpi_load_table) *****************************************************************************/ acpi_status acpi_get_table_header(char *signature, - acpi_native_uint instance, - struct acpi_table_header * out_table_header) + u32 instance, struct acpi_table_header *out_table_header) { - acpi_native_uint i; - acpi_native_uint j; + u32 i; + u32 j; struct acpi_table_header *header; /* Parameter validation */ @@ -378,10 +377,10 @@ ACPI_EXPORT_SYMBOL(acpi_unload_table_id) *****************************************************************************/ acpi_status acpi_get_table(char *signature, - acpi_native_uint instance, struct acpi_table_header **out_table) + u32 instance, struct acpi_table_header **out_table) { - acpi_native_uint i; - acpi_native_uint j; + u32 i; + u32 j; acpi_status status; /* Parameter validation */ @@ -435,8 +434,7 @@ ACPI_EXPORT_SYMBOL(acpi_get_table) * ******************************************************************************/ acpi_status -acpi_get_table_by_index(acpi_native_uint table_index, - struct acpi_table_header ** table) +acpi_get_table_by_index(u32 table_index, struct acpi_table_header **table) { acpi_status status; @@ -493,7 +491,7 @@ static acpi_status acpi_tb_load_namespace(void) { acpi_status status; struct acpi_table_header *table; - acpi_native_uint i; + u32 i; ACPI_FUNCTION_TRACE(tb_load_namespace); diff --git a/drivers/acpi/tables/tbxfroot.c b/drivers/acpi/tables/tbxfroot.c index b8c0dfa..2d157e0 100644 --- a/drivers/acpi/tables/tbxfroot.c +++ b/drivers/acpi/tables/tbxfroot.c @@ -118,7 +118,7 @@ static acpi_status acpi_tb_validate_rsdp(struct acpi_table_rsdp *rsdp) * ******************************************************************************/ -acpi_status acpi_find_root_pointer(acpi_native_uint * table_address) +acpi_status acpi_find_root_pointer(acpi_size *table_address) { u8 *table_ptr; u8 *mem_rover; @@ -153,7 +153,7 @@ acpi_status acpi_find_root_pointer(acpi_native_uint * table_address) * 1b) Search EBDA paragraphs (EBDA is required to be a * minimum of 1_k length) */ - table_ptr = acpi_os_map_memory((acpi_native_uint) + table_ptr = acpi_os_map_memory((acpi_physical_address) physical_address, ACPI_EBDA_WINDOW_SIZE); if (!table_ptr) { diff --git a/drivers/acpi/utilities/utcopy.c b/drivers/acpi/utilities/utcopy.c index 655c290..53499ac 100644 --- a/drivers/acpi/utilities/utcopy.c +++ b/drivers/acpi/utilities/utcopy.c @@ -572,7 +572,7 @@ acpi_ut_copy_epackage_to_ipackage(union acpi_object *external_object, acpi_status status = AE_OK; union acpi_operand_object *package_object; union acpi_operand_object **package_elements; - acpi_native_uint i; + u32 i; ACPI_FUNCTION_TRACE(ut_copy_epackage_to_ipackage); @@ -599,7 +599,7 @@ acpi_ut_copy_epackage_to_ipackage(union acpi_object *external_object, /* Truncate package and delete it */ - package_object->package.count = (u32) i; + package_object->package.count = i; package_elements[i] = NULL; acpi_ut_remove_reference(package_object); return_ACPI_STATUS(status); diff --git a/drivers/acpi/utilities/utdebug.c b/drivers/acpi/utilities/utdebug.c index f938f46..3919fe5 100644 --- a/drivers/acpi/utilities/utdebug.c +++ b/drivers/acpi/utilities/utdebug.c @@ -519,8 +519,8 @@ acpi_ut_ptr_exit(u32 line_number, void acpi_ut_dump_buffer2(u8 * buffer, u32 count, u32 display) { - acpi_native_uint i = 0; - acpi_native_uint j; + u32 i = 0; + u32 j; u32 temp32; u8 buf_char; @@ -539,7 +539,7 @@ void acpi_ut_dump_buffer2(u8 * buffer, u32 count, u32 display) /* Print current offset */ - acpi_os_printf("%6.4X: ", (u32) i); + acpi_os_printf("%6.4X: ", i); /* Print 16 hex chars */ @@ -549,7 +549,7 @@ void acpi_ut_dump_buffer2(u8 * buffer, u32 count, u32 display) /* Dump fill spaces */ acpi_os_printf("%*s", ((display * 2) + 1), " "); - j += (acpi_native_uint) display; + j += display; continue; } @@ -557,32 +557,38 @@ void acpi_ut_dump_buffer2(u8 * buffer, u32 count, u32 display) case DB_BYTE_DISPLAY: default: /* Default is BYTE display */ - acpi_os_printf("%02X ", buffer[i + j]); + acpi_os_printf("%02X ", + buffer[(acpi_size) i + j]); break; case DB_WORD_DISPLAY: - ACPI_MOVE_16_TO_32(&temp32, &buffer[i + j]); + ACPI_MOVE_16_TO_32(&temp32, + &buffer[(acpi_size) i + j]); acpi_os_printf("%04X ", temp32); break; case DB_DWORD_DISPLAY: - ACPI_MOVE_32_TO_32(&temp32, &buffer[i + j]); + ACPI_MOVE_32_TO_32(&temp32, + &buffer[(acpi_size) i + j]); acpi_os_printf("%08X ", temp32); break; case DB_QWORD_DISPLAY: - ACPI_MOVE_32_TO_32(&temp32, &buffer[i + j]); + ACPI_MOVE_32_TO_32(&temp32, + &buffer[(acpi_size) i + j]); acpi_os_printf("%08X", temp32); - ACPI_MOVE_32_TO_32(&temp32, &buffer[i + j + 4]); + ACPI_MOVE_32_TO_32(&temp32, + &buffer[(acpi_size) i + j + + 4]); acpi_os_printf("%08X ", temp32); break; } - j += (acpi_native_uint) display; + j += display; } /* @@ -596,7 +602,7 @@ void acpi_ut_dump_buffer2(u8 * buffer, u32 count, u32 display) return; } - buf_char = buffer[i + j]; + buf_char = buffer[(acpi_size) i + j]; if (ACPI_IS_PRINT(buf_char)) { acpi_os_printf("%c", buf_char); } else { diff --git a/drivers/acpi/utilities/utdelete.c b/drivers/acpi/utilities/utdelete.c index 1fbc351..c5c791a 100644 --- a/drivers/acpi/utilities/utdelete.c +++ b/drivers/acpi/utilities/utdelete.c @@ -442,7 +442,7 @@ acpi_ut_update_object_reference(union acpi_operand_object *object, u16 action) union acpi_generic_state *state_list = NULL; union acpi_operand_object *next_object = NULL; union acpi_generic_state *state; - acpi_native_uint i; + u32 i; ACPI_FUNCTION_TRACE_PTR(ut_update_object_reference, object); diff --git a/drivers/acpi/utilities/uteval.c b/drivers/acpi/utilities/uteval.c index 7f1f634..352747e 100644 --- a/drivers/acpi/utilities/uteval.c +++ b/drivers/acpi/utilities/uteval.c @@ -97,7 +97,7 @@ acpi_status acpi_ut_osi_implementation(struct acpi_walk_state *walk_state) acpi_status status; union acpi_operand_object *string_desc; union acpi_operand_object *return_desc; - acpi_native_uint i; + u32 i; ACPI_FUNCTION_TRACE(ut_osi_implementation); @@ -513,7 +513,7 @@ acpi_ut_execute_CID(struct acpi_namespace_node * device_node, u32 count; u32 size; struct acpi_compatible_id_list *cid_list; - acpi_native_uint i; + u32 i; ACPI_FUNCTION_TRACE(ut_execute_CID); diff --git a/drivers/acpi/utilities/utmisc.c b/drivers/acpi/utilities/utmisc.c index 47354d9..6175ca5 100644 --- a/drivers/acpi/utilities/utmisc.c +++ b/drivers/acpi/utilities/utmisc.c @@ -161,9 +161,9 @@ u8 acpi_ut_is_aml_table(struct acpi_table_header *table) acpi_status acpi_ut_allocate_owner_id(acpi_owner_id * owner_id) { - acpi_native_uint i; - acpi_native_uint j; - acpi_native_uint k; + u32 i; + u32 j; + u32 k; acpi_status status; ACPI_FUNCTION_TRACE(ut_allocate_owner_id); @@ -269,7 +269,7 @@ void acpi_ut_release_owner_id(acpi_owner_id * owner_id_ptr) { acpi_owner_id owner_id = *owner_id_ptr; acpi_status status; - acpi_native_uint index; + u32 index; u32 bit; ACPI_FUNCTION_TRACE_U32(ut_release_owner_id, owner_id); @@ -589,7 +589,7 @@ acpi_ut_display_init_pathname(u8 type, * ******************************************************************************/ -u8 acpi_ut_valid_acpi_char(char character, acpi_native_uint position) +u8 acpi_ut_valid_acpi_char(char character, u32 position) { if (!((character >= 'A' && character <= 'Z') || @@ -624,7 +624,7 @@ u8 acpi_ut_valid_acpi_char(char character, acpi_native_uint position) u8 acpi_ut_valid_acpi_name(u32 name) { - acpi_native_uint i; + u32 i; ACPI_FUNCTION_ENTRY(); @@ -653,7 +653,7 @@ u8 acpi_ut_valid_acpi_name(u32 name) acpi_name acpi_ut_repair_name(char *name) { - acpi_native_uint i; + u32 i; char new_name[ACPI_NAME_SIZE]; for (i = 0; i < ACPI_NAME_SIZE; i++) { diff --git a/drivers/acpi/utilities/utobject.c b/drivers/acpi/utilities/utobject.c index e68466d..ede6a80 100644 --- a/drivers/acpi/utilities/utobject.c +++ b/drivers/acpi/utilities/utobject.c @@ -175,8 +175,8 @@ union acpi_operand_object *acpi_ut_create_package_object(u32 count) * Create the element array. Count+1 allows the array to be null * terminated. */ - package_elements = ACPI_ALLOCATE_ZEROED((acpi_size) - (count + 1) * sizeof(void *)); + package_elements = ACPI_ALLOCATE_ZEROED(((acpi_size) count + + 1) * sizeof(void *)); if (!package_elements) { acpi_ut_remove_reference(package_desc); return_PTR(NULL); diff --git a/include/acpi/acdispat.h b/include/acpi/acdispat.h index 910f018..21a73a1 100644 --- a/include/acpi/acdispat.h +++ b/include/acpi/acdispat.h @@ -221,7 +221,7 @@ acpi_ds_method_error(acpi_status status, struct acpi_walk_state *walk_state); * dsinit */ acpi_status -acpi_ds_initialize_objects(acpi_native_uint table_index, +acpi_ds_initialize_objects(u32 table_index, struct acpi_namespace_node *start_node); /* diff --git a/include/acpi/acglobal.h b/include/acpi/acglobal.h index 74ad971..15dda46 100644 --- a/include/acpi/acglobal.h +++ b/include/acpi/acglobal.h @@ -140,7 +140,7 @@ ACPI_EXTERN u32 acpi_gbl_trace_flags; */ ACPI_EXTERN struct acpi_internal_rsdt acpi_gbl_root_table_list; ACPI_EXTERN struct acpi_table_fadt acpi_gbl_FADT; -extern acpi_native_uint acpi_gbl_permanent_mmap; +extern u8 acpi_gbl_permanent_mmap; /* These addresses are calculated from FADT address values */ diff --git a/include/acpi/acmacros.h b/include/acpi/acmacros.h index 5ad6e39..d0494ea 100644 --- a/include/acpi/acmacros.h +++ b/include/acpi/acmacros.h @@ -80,12 +80,12 @@ */ #define ACPI_CAST_PTR(t, p) ((t *) (acpi_uintptr_t) (p)) #define ACPI_CAST_INDIRECT_PTR(t, p) ((t **) (acpi_uintptr_t) (p)) -#define ACPI_ADD_PTR(t,a,b) ACPI_CAST_PTR (t, (ACPI_CAST_PTR (u8,(a)) + (acpi_native_uint)(b))) -#define ACPI_PTR_DIFF(a,b) (acpi_native_uint) (ACPI_CAST_PTR (u8,(a)) - ACPI_CAST_PTR (u8,(b))) +#define ACPI_ADD_PTR(t, a, b) ACPI_CAST_PTR (t, (ACPI_CAST_PTR (u8,(a)) + (acpi_size)(b))) +#define ACPI_PTR_DIFF(a, b) (acpi_size) (ACPI_CAST_PTR (u8,(a)) - ACPI_CAST_PTR (u8,(b))) /* Pointer/Integer type conversions */ -#define ACPI_TO_POINTER(i) ACPI_ADD_PTR (void,(void *) NULL,(acpi_native_uint) i) +#define ACPI_TO_POINTER(i) ACPI_ADD_PTR (void, (void *) NULL, (acpi_size) i) #define ACPI_TO_INTEGER(p) ACPI_PTR_DIFF (p,(void *) NULL) #define ACPI_OFFSET(d,f) (acpi_size) ACPI_PTR_DIFF (&(((d *)0)->f),(void *) NULL) #define ACPI_PHYSADDR_TO_PTR(i) ACPI_TO_POINTER(i) @@ -296,22 +296,22 @@ struct acpi_integer_overlay { /* * Rounding macros (Power of two boundaries only) */ -#define ACPI_ROUND_DOWN(value,boundary) (((acpi_native_uint)(value)) & \ - (~(((acpi_native_uint) boundary)-1))) +#define ACPI_ROUND_DOWN(value, boundary) (((acpi_size)(value)) & \ + (~(((acpi_size) boundary)-1))) -#define ACPI_ROUND_UP(value,boundary) ((((acpi_native_uint)(value)) + \ - (((acpi_native_uint) boundary)-1)) & \ - (~(((acpi_native_uint) boundary)-1))) +#define ACPI_ROUND_UP(value, boundary) ((((acpi_size)(value)) + \ + (((acpi_size) boundary)-1)) & \ + (~(((acpi_size) boundary)-1))) -/* Note: sizeof(acpi_native_uint) evaluates to either 2, 4, or 8 */ +/* Note: sizeof(acpi_size) evaluates to either 4 or 8 (32- vs 64-bit mode) */ #define ACPI_ROUND_DOWN_TO_32BIT(a) ACPI_ROUND_DOWN(a,4) #define ACPI_ROUND_DOWN_TO_64BIT(a) ACPI_ROUND_DOWN(a,8) -#define ACPI_ROUND_DOWN_TO_NATIVE_WORD(a) ACPI_ROUND_DOWN(a,sizeof(acpi_native_uint)) +#define ACPI_ROUND_DOWN_TO_NATIVE_WORD(a) ACPI_ROUND_DOWN(a,sizeof(acpi_size)) #define ACPI_ROUND_UP_TO_32BIT(a) ACPI_ROUND_UP(a,4) #define ACPI_ROUND_UP_TO_64BIT(a) ACPI_ROUND_UP(a,8) -#define ACPI_ROUND_UP_TO_NATIVE_WORD(a) ACPI_ROUND_UP(a,sizeof(acpi_native_uint)) +#define ACPI_ROUND_UP_TO_NATIVE_WORD(a) ACPI_ROUND_UP(a,sizeof(acpi_size)) #define ACPI_ROUND_BITS_UP_TO_BYTES(a) ACPI_DIV_8((a) + 7) #define ACPI_ROUND_BITS_DOWN_TO_BYTES(a) ACPI_DIV_8((a)) @@ -322,7 +322,7 @@ struct acpi_integer_overlay { #define ACPI_ROUND_UP_TO(value,boundary) (((value) + ((boundary)-1)) / (boundary)) -#define ACPI_IS_MISALIGNED(value) (((acpi_native_uint)value) & (sizeof(acpi_native_uint)-1)) +#define ACPI_IS_MISALIGNED(value) (((acpi_size)value) & (sizeof(acpi_size)-1)) /* * Bitmask creation diff --git a/include/acpi/acnamesp.h b/include/acpi/acnamesp.h index 713b309..d622c30 100644 --- a/include/acpi/acnamesp.h +++ b/include/acpi/acnamesp.h @@ -86,8 +86,7 @@ acpi_status acpi_ns_initialize_devices(void); acpi_status acpi_ns_load_namespace(void); acpi_status -acpi_ns_load_table(acpi_native_uint table_index, - struct acpi_namespace_node *node); +acpi_ns_load_table(u32 table_index, struct acpi_namespace_node *node); /* * nswalk - walk the namespace @@ -108,12 +107,11 @@ struct acpi_namespace_node *acpi_ns_get_next_node(acpi_object_type type, struct * nsparse - table parsing */ acpi_status -acpi_ns_parse_table(acpi_native_uint table_index, - struct acpi_namespace_node *start_node); +acpi_ns_parse_table(u32 table_index, struct acpi_namespace_node *start_node); acpi_status -acpi_ns_one_complete_parse(acpi_native_uint pass_number, - acpi_native_uint table_index, +acpi_ns_one_complete_parse(u32 pass_number, + u32 table_index, struct acpi_namespace_node *start_node); /* diff --git a/include/acpi/acpiosxf.h b/include/acpi/acpiosxf.h index d4a560d..3f93a6b 100644 --- a/include/acpi/acpiosxf.h +++ b/include/acpi/acpiosxf.h @@ -144,7 +144,7 @@ void acpi_os_release_mutex(acpi_mutex handle); void *acpi_os_allocate(acpi_size size); void __iomem *acpi_os_map_memory(acpi_physical_address where, - acpi_native_uint length); + acpi_size length); void acpi_os_unmap_memory(void __iomem * logical_address, acpi_size size); diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index 2c3806e..bd95ecc 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -98,7 +98,7 @@ void acpi_free(void *address); */ acpi_status acpi_reallocate_root_table(void); -acpi_status acpi_find_root_pointer(acpi_native_uint * rsdp_address); +acpi_status acpi_find_root_pointer(acpi_size *rsdp_address); acpi_status acpi_load_tables(void); @@ -108,15 +108,15 @@ acpi_status acpi_unload_table_id(acpi_owner_id id); acpi_status acpi_get_table_header(acpi_string signature, - acpi_native_uint instance, + u32 instance, struct acpi_table_header *out_table_header); acpi_status acpi_get_table(acpi_string signature, - acpi_native_uint instance, struct acpi_table_header **out_table); + u32 instance, struct acpi_table_header **out_table); acpi_status -acpi_get_table_by_index(acpi_native_uint table_index, +acpi_get_table_by_index(u32 table_index, struct acpi_table_header **out_table); acpi_status diff --git a/include/acpi/acstruct.h b/include/acpi/acstruct.h index 818c24d..7980a26 100644 --- a/include/acpi/acstruct.h +++ b/include/acpi/acstruct.h @@ -142,7 +142,7 @@ struct acpi_init_walk_info { u16 package_init; u16 object_count; acpi_owner_id owner_id; - acpi_native_uint table_index; + u32 table_index; }; struct acpi_get_devices_info { diff --git a/include/acpi/actables.h b/include/acpi/actables.h index 4b36a55..0cbe1b9 100644 --- a/include/acpi/actables.h +++ b/include/acpi/actables.h @@ -49,7 +49,7 @@ acpi_status acpi_allocate_root_table(u32 initial_table_count); /* * tbfadt - FADT parse/convert/validate */ -void acpi_tb_parse_fadt(acpi_native_uint table_index, u8 flags); +void acpi_tb_parse_fadt(u32 table_index, u8 flags); void acpi_tb_create_local_fadt(struct acpi_table_header *table, u32 length); @@ -58,8 +58,7 @@ void acpi_tb_create_local_fadt(struct acpi_table_header *table, u32 length); */ acpi_status acpi_tb_find_table(char *signature, - char *oem_id, - char *oem_table_id, acpi_native_uint * table_index); + char *oem_id, char *oem_table_id, u32 *table_index); /* * tbinstal - Table removal and deletion @@ -69,30 +68,28 @@ acpi_status acpi_tb_resize_root_table_list(void); acpi_status acpi_tb_verify_table(struct acpi_table_desc *table_desc); acpi_status -acpi_tb_add_table(struct acpi_table_desc *table_desc, - acpi_native_uint * table_index); +acpi_tb_add_table(struct acpi_table_desc *table_desc, u32 *table_index); acpi_status acpi_tb_store_table(acpi_physical_address address, struct acpi_table_header *table, - u32 length, u8 flags, acpi_native_uint * table_index); + u32 length, u8 flags, u32 *table_index); void acpi_tb_delete_table(struct acpi_table_desc *table_desc); void acpi_tb_terminate(void); -void acpi_tb_delete_namespace_by_owner(acpi_native_uint table_index); +void acpi_tb_delete_namespace_by_owner(u32 table_index); -acpi_status acpi_tb_allocate_owner_id(acpi_native_uint table_index); +acpi_status acpi_tb_allocate_owner_id(u32 table_index); -acpi_status acpi_tb_release_owner_id(acpi_native_uint table_index); +acpi_status acpi_tb_release_owner_id(u32 table_index); -acpi_status -acpi_tb_get_owner_id(acpi_native_uint table_index, acpi_owner_id * owner_id); +acpi_status acpi_tb_get_owner_id(u32 table_index, acpi_owner_id *owner_id); -u8 acpi_tb_is_table_loaded(acpi_native_uint table_index); +u8 acpi_tb_is_table_loaded(u32 table_index); -void acpi_tb_set_table_loaded_flag(acpi_native_uint table_index, u8 is_loaded); +void acpi_tb_set_table_loaded_flag(u32 table_index, u8 is_loaded); /* * tbutils - table manager utilities @@ -103,14 +100,14 @@ void acpi_tb_print_table_header(acpi_physical_address address, struct acpi_table_header *header); -u8 acpi_tb_checksum(u8 * buffer, acpi_native_uint length); +u8 acpi_tb_checksum(u8 *buffer, u32 length); acpi_status acpi_tb_verify_checksum(struct acpi_table_header *table, u32 length); void acpi_tb_install_table(acpi_physical_address address, - u8 flags, char *signature, acpi_native_uint table_index); + u8 flags, char *signature, u32 table_index); acpi_status acpi_tb_parse_root_table(acpi_physical_address rsdp_address, u8 flags); diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h index dfea2d4..4ea4f40 100644 --- a/include/acpi/actypes.h +++ b/include/acpi/actypes.h @@ -110,10 +110,10 @@ * usually used for memory allocation, efficient loop counters, and array * indexes. The types are similar to the size_t type in the C library and are * required because there is no C type that consistently represents the native - * data width. + * data width. ACPI_SIZE is needed because there is no guarantee that a + * kernel-level C library is present. * * ACPI_SIZE 16/32/64-bit unsigned value - * ACPI_NATIVE_UINT 16/32/64-bit unsigned value * ACPI_NATIVE_INT 16/32/64-bit signed value * */ @@ -147,9 +147,9 @@ typedef int INT32; /*! [End] no source code translation !*/ -typedef u64 acpi_native_uint; typedef s64 acpi_native_int; +typedef u64 acpi_size; typedef u64 acpi_io_address; typedef u64 acpi_physical_address; @@ -186,9 +186,9 @@ typedef int INT32; /*! [End] no source code translation !*/ -typedef u32 acpi_native_uint; typedef s32 acpi_native_int; +typedef u32 acpi_size; typedef u32 acpi_io_address; typedef u32 acpi_physical_address; @@ -202,10 +202,6 @@ typedef u32 acpi_physical_address; #error unknown ACPI_MACHINE_WIDTH #endif -/* Variable-width type, used instead of clib size_t */ - -typedef acpi_native_uint acpi_size; - /******************************************************************************* * * OS-dependent and compiler-dependent types @@ -219,7 +215,7 @@ typedef acpi_native_uint acpi_size; /* Value returned by acpi_os_get_thread_id */ #ifndef acpi_thread_id -#define acpi_thread_id acpi_native_uint +#define acpi_thread_id acpi_size #endif /* Object returned from acpi_os_create_lock */ @@ -231,7 +227,7 @@ typedef acpi_native_uint acpi_size; /* Flags for acpi_os_acquire_lock/acpi_os_release_lock */ #ifndef acpi_cpu_flags -#define acpi_cpu_flags acpi_native_uint +#define acpi_cpu_flags acpi_size #endif /* Object returned from acpi_os_create_cache */ diff --git a/include/acpi/acutils.h b/include/acpi/acutils.h index b42cadf..3f663a0 100644 --- a/include/acpi/acutils.h +++ b/include/acpi/acutils.h @@ -172,7 +172,7 @@ char *acpi_ut_strstr(char *string1, char *string2); void *acpi_ut_memcpy(void *dest, const void *src, acpi_size count); -void *acpi_ut_memset(void *dest, acpi_native_uint value, acpi_size count); +void *acpi_ut_memset(void *dest, u8 value, acpi_size count); int acpi_ut_to_upper(int c); @@ -476,7 +476,7 @@ u8 acpi_ut_valid_acpi_name(u32 name); acpi_name acpi_ut_repair_name(char *name); -u8 acpi_ut_valid_acpi_char(char character, acpi_native_uint position); +u8 acpi_ut_valid_acpi_char(char character, u32 position); acpi_status acpi_ut_strtoul64(char *string, u32 base, acpi_integer * ret_integer); -- cgit v0.10.2 From d97b4358da9bdaee5789c85415d770e36aebca52 Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Tue, 10 Jun 2008 13:43:34 +0800 Subject: ACPICA: Removed unused include files from source files From lint. Signed-off-by: Bob Moore Signed-off-by: Lin Ming Signed-off-by: Len Brown Signed-off-by: Andi Kleen diff --git a/drivers/acpi/executer/excreate.c b/drivers/acpi/executer/excreate.c index 60e62c4..ad09696 100644 --- a/drivers/acpi/executer/excreate.c +++ b/drivers/acpi/executer/excreate.c @@ -45,8 +45,6 @@ #include #include #include -#include -#include #define _COMPONENT ACPI_EXECUTER ACPI_MODULE_NAME("excreate") -- cgit v0.10.2 From b52437641edf63cee2f2f73a189154989b4a7ff4 Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Tue, 10 Jun 2008 13:44:48 +0800 Subject: ACPICA: Several lint changes, no functional changes Remove pointer cast warnings and fix for a debug printf. Signed-off-by: Bob Moore Signed-off-by: Lin Ming Signed-off-by: Len Brown Signed-off-by: Andi Kleen diff --git a/drivers/acpi/executer/exprep.c b/drivers/acpi/executer/exprep.c index 3a2f8cd..5d438c3 100644 --- a/drivers/acpi/executer/exprep.c +++ b/drivers/acpi/executer/exprep.c @@ -503,11 +503,11 @@ acpi_status acpi_ex_prep_field_value(struct acpi_create_field_info *info) */ second_desc = obj_desc->common.next_object; second_desc->extra.aml_start = - ((union acpi_parse_object *)(info->data_register_node))-> - named.data; + ACPI_CAST_PTR(union acpi_parse_object, + info->data_register_node)->named.data; second_desc->extra.aml_length = - ((union acpi_parse_object *)(info->data_register_node))-> - named.length; + ACPI_CAST_PTR(union acpi_parse_object, + info->data_register_node)->named.length; break; -- cgit v0.10.2 From 4b8ed631679070c183c8ae7519d2bdb9df124ae4 Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Tue, 10 Jun 2008 13:55:53 +0800 Subject: ACPICA: Add const qualifier for appropriate string constants Mostly MODULE_NAME and printf format strings. Signed-off-by: Bob Moore Signed-off-by: Lin Ming Signed-off-by: Len Brown Signed-off-by: Andi Kleen diff --git a/drivers/acpi/executer/exdump.c b/drivers/acpi/executer/exdump.c index 976016f..c94638d 100644 --- a/drivers/acpi/executer/exdump.c +++ b/drivers/acpi/executer/exdump.c @@ -771,9 +771,10 @@ void acpi_ex_dump_operand(union acpi_operand_object *obj_desc, u32 depth) void acpi_ex_dump_operands(union acpi_operand_object **operands, acpi_interpreter_mode interpreter_mode, - char *ident, + const char *ident, u32 num_levels, - char *note, char *module_name, u32 line_number) + const char *note, + const char *module_name, u32 line_number) { u32 i; diff --git a/drivers/acpi/namespace/nsutils.c b/drivers/acpi/namespace/nsutils.c index 5b5619c..b0817e1 100644 --- a/drivers/acpi/namespace/nsutils.c +++ b/drivers/acpi/namespace/nsutils.c @@ -73,9 +73,9 @@ acpi_name acpi_ns_find_parent_name(struct acpi_namespace_node *node_to_search); ******************************************************************************/ void -acpi_ns_report_error(char *module_name, +acpi_ns_report_error(const char *module_name, u32 line_number, - char *internal_name, acpi_status lookup_status) + const char *internal_name, acpi_status lookup_status) { acpi_status status; u32 bad_name; @@ -130,11 +130,11 @@ acpi_ns_report_error(char *module_name, ******************************************************************************/ void -acpi_ns_report_method_error(char *module_name, +acpi_ns_report_method_error(const char *module_name, u32 line_number, - char *message, + const char *message, struct acpi_namespace_node *prefix_node, - char *path, acpi_status method_status) + const char *path, acpi_status method_status) { acpi_status status; struct acpi_namespace_node *node = prefix_node; @@ -167,7 +167,8 @@ acpi_ns_report_method_error(char *module_name, ******************************************************************************/ void -acpi_ns_print_node_pathname(struct acpi_namespace_node *node, char *message) +acpi_ns_print_node_pathname(struct acpi_namespace_node *node, + const char *message) { struct acpi_buffer buffer; acpi_status status; @@ -296,7 +297,7 @@ u32 acpi_ns_local(acpi_object_type type) void acpi_ns_get_internal_name_length(struct acpi_namestring_info *info) { - char *next_external_char; + const char *next_external_char; u32 i; ACPI_FUNCTION_ENTRY(); @@ -363,7 +364,7 @@ acpi_status acpi_ns_build_internal_name(struct acpi_namestring_info *info) { u32 num_segments = info->num_segments; char *internal_name = info->internal_name; - char *external_name = info->next_external_char; + const char *external_name = info->next_external_char; char *result = NULL; u32 i; @@ -471,7 +472,8 @@ acpi_status acpi_ns_build_internal_name(struct acpi_namestring_info *info) * *******************************************************************************/ -acpi_status acpi_ns_internalize_name(char *external_name, char **converted_name) +acpi_status +acpi_ns_internalize_name(const char *external_name, char **converted_name) { char *internal_name; struct acpi_namestring_info info; @@ -527,7 +529,7 @@ acpi_status acpi_ns_internalize_name(char *external_name, char **converted_name) acpi_status acpi_ns_externalize_name(u32 internal_name_length, - char *internal_name, + const char *internal_name, u32 * converted_name_length, char **converted_name) { u32 names_index = 0; @@ -821,7 +823,7 @@ u32 acpi_ns_opens_scope(acpi_object_type type) acpi_status acpi_ns_get_node(struct acpi_namespace_node *prefix_node, - char *pathname, + const char *pathname, u32 flags, struct acpi_namespace_node **return_node) { union acpi_generic_state scope_info; diff --git a/drivers/acpi/utilities/utalloc.c b/drivers/acpi/utilities/utalloc.c index ede0848..3dfb8a4 100644 --- a/drivers/acpi/utilities/utalloc.c +++ b/drivers/acpi/utilities/utalloc.c @@ -309,7 +309,8 @@ acpi_ut_initialize_buffer(struct acpi_buffer * buffer, * ******************************************************************************/ -void *acpi_ut_allocate(acpi_size size, u32 component, char *module, u32 line) +void *acpi_ut_allocate(acpi_size size, + u32 component, const char *module, u32 line) { void *allocation; @@ -353,7 +354,7 @@ void *acpi_ut_allocate(acpi_size size, u32 component, char *module, u32 line) ******************************************************************************/ void *acpi_ut_allocate_zeroed(acpi_size size, - u32 component, char *module, u32 line) + u32 component, const char *module, u32 line) { void *allocation; diff --git a/drivers/acpi/utilities/utdebug.c b/drivers/acpi/utilities/utdebug.c index 3919fe5..fd66ecb 100644 --- a/drivers/acpi/utilities/utdebug.c +++ b/drivers/acpi/utilities/utdebug.c @@ -157,7 +157,8 @@ void ACPI_INTERNAL_VAR_XFACE acpi_ut_debug_print(u32 requested_debug_level, u32 line_number, const char *function_name, - char *module_name, u32 component_id, char *format, ...) + const char *module_name, + u32 component_id, const char *format, ...) { acpi_thread_id thread_id; va_list args; @@ -228,7 +229,8 @@ void ACPI_INTERNAL_VAR_XFACE acpi_ut_debug_print_raw(u32 requested_debug_level, u32 line_number, const char *function_name, - char *module_name, u32 component_id, char *format, ...) + const char *module_name, + u32 component_id, const char *format, ...) { va_list args; @@ -261,7 +263,8 @@ ACPI_EXPORT_SYMBOL(acpi_ut_debug_print_raw) ******************************************************************************/ void acpi_ut_trace(u32 line_number, - const char *function_name, char *module_name, u32 component_id) + const char *function_name, + const char *module_name, u32 component_id) { acpi_gbl_nesting_level++; @@ -293,7 +296,7 @@ ACPI_EXPORT_SYMBOL(acpi_ut_trace) void acpi_ut_trace_ptr(u32 line_number, const char *function_name, - char *module_name, u32 component_id, void *pointer) + const char *module_name, u32 component_id, void *pointer) { acpi_gbl_nesting_level++; acpi_ut_track_stack_ptr(); @@ -324,7 +327,7 @@ acpi_ut_trace_ptr(u32 line_number, void acpi_ut_trace_str(u32 line_number, const char *function_name, - char *module_name, u32 component_id, char *string) + const char *module_name, u32 component_id, char *string) { acpi_gbl_nesting_level++; @@ -356,7 +359,7 @@ acpi_ut_trace_str(u32 line_number, void acpi_ut_trace_u32(u32 line_number, const char *function_name, - char *module_name, u32 component_id, u32 integer) + const char *module_name, u32 component_id, u32 integer) { acpi_gbl_nesting_level++; @@ -386,7 +389,8 @@ acpi_ut_trace_u32(u32 line_number, void acpi_ut_exit(u32 line_number, - const char *function_name, char *module_name, u32 component_id) + const char *function_name, + const char *module_name, u32 component_id) { acpi_ut_debug_print(ACPI_LV_FUNCTIONS, @@ -417,7 +421,8 @@ ACPI_EXPORT_SYMBOL(acpi_ut_exit) void acpi_ut_status_exit(u32 line_number, const char *function_name, - char *module_name, u32 component_id, acpi_status status) + const char *module_name, + u32 component_id, acpi_status status) { if (ACPI_SUCCESS(status)) { @@ -458,7 +463,8 @@ ACPI_EXPORT_SYMBOL(acpi_ut_status_exit) void acpi_ut_value_exit(u32 line_number, const char *function_name, - char *module_name, u32 component_id, acpi_integer value) + const char *module_name, + u32 component_id, acpi_integer value) { acpi_ut_debug_print(ACPI_LV_FUNCTIONS, @@ -490,7 +496,7 @@ ACPI_EXPORT_SYMBOL(acpi_ut_value_exit) void acpi_ut_ptr_exit(u32 line_number, const char *function_name, - char *module_name, u32 component_id, u8 * ptr) + const char *module_name, u32 component_id, u8 *ptr) { acpi_ut_debug_print(ACPI_LV_FUNCTIONS, diff --git a/drivers/acpi/utilities/utmisc.c b/drivers/acpi/utilities/utmisc.c index 6175ca5..f34be67 100644 --- a/drivers/acpi/utilities/utmisc.c +++ b/drivers/acpi/utilities/utmisc.c @@ -1020,7 +1020,7 @@ acpi_ut_walk_package_tree(union acpi_operand_object * source_object, ******************************************************************************/ void ACPI_INTERNAL_VAR_XFACE -acpi_ut_error(char *module_name, u32 line_number, char *format, ...) +acpi_ut_error(const char *module_name, u32 line_number, const char *format, ...) { va_list args; @@ -1033,8 +1033,8 @@ acpi_ut_error(char *module_name, u32 line_number, char *format, ...) } void ACPI_INTERNAL_VAR_XFACE -acpi_ut_exception(char *module_name, - u32 line_number, acpi_status status, char *format, ...) +acpi_ut_exception(const char *module_name, + u32 line_number, acpi_status status, const char *format, ...) { va_list args; @@ -1050,7 +1050,8 @@ acpi_ut_exception(char *module_name, EXPORT_SYMBOL(acpi_ut_exception); void ACPI_INTERNAL_VAR_XFACE -acpi_ut_warning(char *module_name, u32 line_number, char *format, ...) +acpi_ut_warning(const char *module_name, + u32 line_number, const char *format, ...) { va_list args; @@ -1063,7 +1064,7 @@ acpi_ut_warning(char *module_name, u32 line_number, char *format, ...) } void ACPI_INTERNAL_VAR_XFACE -acpi_ut_info(char *module_name, u32 line_number, char *format, ...) +acpi_ut_info(const char *module_name, u32 line_number, const char *format, ...) { va_list args; diff --git a/drivers/acpi/utilities/utobject.c b/drivers/acpi/utilities/utobject.c index ede6a80..e254844 100644 --- a/drivers/acpi/utilities/utobject.c +++ b/drivers/acpi/utilities/utobject.c @@ -83,7 +83,8 @@ acpi_ut_get_element_length(u8 object_type, * ******************************************************************************/ -union acpi_operand_object *acpi_ut_create_internal_object_dbg(char *module_name, +union acpi_operand_object *acpi_ut_create_internal_object_dbg(const char + *module_name, u32 line_number, u32 component_id, acpi_object_type @@ -347,7 +348,7 @@ u8 acpi_ut_valid_internal_object(void *object) * ******************************************************************************/ -void *acpi_ut_allocate_object_desc_dbg(char *module_name, +void *acpi_ut_allocate_object_desc_dbg(const char *module_name, u32 line_number, u32 component_id) { union acpi_operand_object *object; diff --git a/include/acpi/acinterp.h b/include/acpi/acinterp.h index e249ce5..accd120 100644 --- a/include/acpi/acinterp.h +++ b/include/acpi/acinterp.h @@ -367,9 +367,10 @@ void acpi_ex_dump_operand(union acpi_operand_object *obj_desc, u32 depth); void acpi_ex_dump_operands(union acpi_operand_object **operands, acpi_interpreter_mode interpreter_mode, - char *ident, + const char *ident, u32 num_levels, - char *note, char *module_name, u32 line_number); + const char *note, + const char *module_name, u32 line_number); #ifdef ACPI_FUTURE_USAGE void diff --git a/include/acpi/aclocal.h b/include/acpi/aclocal.h index 62d7468..b221c85 100644 --- a/include/acpi/aclocal.h +++ b/include/acpi/aclocal.h @@ -282,8 +282,8 @@ struct acpi_predefined_names { /* Info structure used to convert external<->internal namestrings */ struct acpi_namestring_info { - char *external_name; - char *next_external_char; + const char *external_name; + const char *next_external_char; char *internal_name; u32 length; u32 num_segments; diff --git a/include/acpi/acmacros.h b/include/acpi/acmacros.h index d0494ea..77439df 100644 --- a/include/acpi/acmacros.h +++ b/include/acpi/acmacros.h @@ -414,7 +414,7 @@ struct acpi_integer_overlay { * error messages. The __FILE__ macro is not very useful for this, because it * often includes the entire pathname to the module */ -#define ACPI_MODULE_NAME(name) static char ACPI_UNUSED_VAR _acpi_module_name[] = name; +#define ACPI_MODULE_NAME(name) static const char ACPI_UNUSED_VAR _acpi_module_name[] = name; #else #define ACPI_MODULE_NAME(name) #endif diff --git a/include/acpi/acnamesp.h b/include/acpi/acnamesp.h index d622c30..9ed70a0 100644 --- a/include/acpi/acnamesp.h +++ b/include/acpi/acnamesp.h @@ -199,7 +199,7 @@ acpi_ns_pattern_match(struct acpi_namespace_node *obj_node, char *search_for); acpi_status acpi_ns_get_node(struct acpi_namespace_node *prefix_node, - char *external_pathname, + const char *external_pathname, u32 flags, struct acpi_namespace_node **out_node); acpi_size acpi_ns_get_pathname_length(struct acpi_namespace_node *node); @@ -263,28 +263,30 @@ acpi_object_type acpi_ns_get_type(struct acpi_namespace_node *node); u32 acpi_ns_local(acpi_object_type type); void -acpi_ns_report_error(char *module_name, +acpi_ns_report_error(const char *module_name, u32 line_number, - char *internal_name, acpi_status lookup_status); + const char *internal_name, acpi_status lookup_status); void -acpi_ns_report_method_error(char *module_name, +acpi_ns_report_method_error(const char *module_name, u32 line_number, - char *message, + const char *message, struct acpi_namespace_node *node, - char *path, acpi_status lookup_status); + const char *path, acpi_status lookup_status); -void acpi_ns_print_node_pathname(struct acpi_namespace_node *node, char *msg); +void +acpi_ns_print_node_pathname(struct acpi_namespace_node *node, const char *msg); acpi_status acpi_ns_build_internal_name(struct acpi_namestring_info *info); void acpi_ns_get_internal_name_length(struct acpi_namestring_info *info); -acpi_status acpi_ns_internalize_name(char *dotted_name, char **converted_name); +acpi_status +acpi_ns_internalize_name(const char *dotted_name, char **converted_name); acpi_status acpi_ns_externalize_name(u32 internal_name_length, - char *internal_name, + const char *internal_name, u32 * converted_name_length, char **converted_name); struct acpi_namespace_node *acpi_ns_map_handle_to_node(acpi_handle handle); diff --git a/include/acpi/acutils.h b/include/acpi/acutils.h index 3f663a0..69f8888 100644 --- a/include/acpi/acutils.h +++ b/include/acpi/acutils.h @@ -245,41 +245,45 @@ void acpi_ut_track_stack_ptr(void); void acpi_ut_trace(u32 line_number, - const char *function_name, char *module_name, u32 component_id); + const char *function_name, + const char *module_name, u32 component_id); void acpi_ut_trace_ptr(u32 line_number, const char *function_name, - char *module_name, u32 component_id, void *pointer); + const char *module_name, u32 component_id, void *pointer); void acpi_ut_trace_u32(u32 line_number, const char *function_name, - char *module_name, u32 component_id, u32 integer); + const char *module_name, u32 component_id, u32 integer); void acpi_ut_trace_str(u32 line_number, const char *function_name, - char *module_name, u32 component_id, char *string); + const char *module_name, u32 component_id, char *string); void acpi_ut_exit(u32 line_number, - const char *function_name, char *module_name, u32 component_id); + const char *function_name, + const char *module_name, u32 component_id); void acpi_ut_status_exit(u32 line_number, const char *function_name, - char *module_name, u32 component_id, acpi_status status); + const char *module_name, + u32 component_id, acpi_status status); void acpi_ut_value_exit(u32 line_number, const char *function_name, - char *module_name, u32 component_id, acpi_integer value); + const char *module_name, + u32 component_id, acpi_integer value); void acpi_ut_ptr_exit(u32 line_number, const char *function_name, - char *module_name, u32 component_id, u8 * ptr); + const char *module_name, u32 component_id, u8 *ptr); void acpi_ut_dump_buffer(u8 * buffer, u32 count, u32 display, u32 component_id); @@ -297,33 +301,35 @@ void ACPI_INTERNAL_VAR_XFACE acpi_ut_debug_print(u32 requested_debug_level, u32 line_number, const char *function_name, - char *module_name, - u32 component_id, char *format, ...) ACPI_PRINTF_LIKE(6); + const char *module_name, + u32 component_id, + const char *format, ...) ACPI_PRINTF_LIKE(6); void ACPI_INTERNAL_VAR_XFACE acpi_ut_debug_print_raw(u32 requested_debug_level, u32 line_number, const char *function_name, - char *module_name, + const char *module_name, u32 component_id, - char *format, ...) ACPI_PRINTF_LIKE(6); + const char *format, ...) ACPI_PRINTF_LIKE(6); void ACPI_INTERNAL_VAR_XFACE -acpi_ut_error(char *module_name, - u32 line_number, char *format, ...) ACPI_PRINTF_LIKE(3); +acpi_ut_error(const char *module_name, + u32 line_number, const char *format, ...) ACPI_PRINTF_LIKE(3); void ACPI_INTERNAL_VAR_XFACE -acpi_ut_exception(char *module_name, +acpi_ut_exception(const char *module_name, u32 line_number, - acpi_status status, char *format, ...) ACPI_PRINTF_LIKE(4); + acpi_status status, + const char *format, ...) ACPI_PRINTF_LIKE(4); void ACPI_INTERNAL_VAR_XFACE -acpi_ut_warning(char *module_name, - u32 line_number, char *format, ...) ACPI_PRINTF_LIKE(3); +acpi_ut_warning(const char *module_name, + u32 line_number, const char *format, ...) ACPI_PRINTF_LIKE(3); void ACPI_INTERNAL_VAR_XFACE -acpi_ut_info(char *module_name, - u32 line_number, char *format, ...) ACPI_PRINTF_LIKE(3); +acpi_ut_info(const char *module_name, + u32 line_number, const char *format, ...) ACPI_PRINTF_LIKE(3); /* * utdelete - Object deletion and reference counts @@ -376,13 +382,14 @@ acpi_ut_execute_sxds(struct acpi_namespace_node *device_node, u8 * highest); /* * utobject - internal object create/delete/cache routines */ -union acpi_operand_object *acpi_ut_create_internal_object_dbg(char *module_name, +union acpi_operand_object *acpi_ut_create_internal_object_dbg(const char + *module_name, u32 line_number, u32 component_id, acpi_object_type type); -void *acpi_ut_allocate_object_desc_dbg(char *module_name, +void *acpi_ut_allocate_object_desc_dbg(const char *module_name, u32 line_number, u32 component_id); #define acpi_ut_create_internal_object(t) acpi_ut_create_internal_object_dbg (_acpi_module_name,__LINE__,_COMPONENT,t) @@ -543,26 +550,29 @@ acpi_status acpi_ut_initialize_buffer(struct acpi_buffer *buffer, acpi_size required_length); -void *acpi_ut_allocate(acpi_size size, u32 component, char *module, u32 line); +void *acpi_ut_allocate(acpi_size size, + u32 component, const char *module, u32 line); void *acpi_ut_allocate_zeroed(acpi_size size, - u32 component, char *module, u32 line); + u32 component, const char *module, u32 line); #ifdef ACPI_DBG_TRACK_ALLOCATIONS void *acpi_ut_allocate_and_track(acpi_size size, - u32 component, char *module, u32 line); + u32 component, const char *module, u32 line); void *acpi_ut_allocate_zeroed_and_track(acpi_size size, - u32 component, char *module, u32 line); + u32 component, + const char *module, u32 line); void -acpi_ut_free_and_track(void *address, u32 component, char *module, u32 line); +acpi_ut_free_and_track(void *address, + u32 component, const char *module, u32 line); #ifdef ACPI_FUTURE_USAGE void acpi_ut_dump_allocation_info(void); #endif /* ACPI_FUTURE_USAGE */ -void acpi_ut_dump_allocations(u32 component, char *module); +void acpi_ut_dump_allocations(u32 component, const char *module); acpi_status acpi_ut_create_list(char *list_name, -- cgit v0.10.2 From b25d2a470bc9ffef4c34248952d914bd6fc0fcf6 Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Tue, 10 Jun 2008 14:15:05 +0800 Subject: ACPICA: Update version to 20080514 Update version to 20080514 Signed-off-by: Bob Moore Signed-off-by: Lin Ming Signed-off-by: Len Brown Signed-off-by: Andi Kleen diff --git a/include/acpi/acconfig.h b/include/acpi/acconfig.h index 28fe8ba..f81ba60 100644 --- a/include/acpi/acconfig.h +++ b/include/acpi/acconfig.h @@ -63,7 +63,7 @@ /* Current ACPICA subsystem version in YYYYMMDD format */ -#define ACPI_CA_VERSION 0x20080321 +#define ACPI_CA_VERSION 0x20080514 /* * OS name, used for the _OS object. The _OS object is essentially obsolete, -- cgit v0.10.2 From d0e184abc5983281ef189db2c759d65d56eb1b80 Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Tue, 10 Jun 2008 14:16:47 +0800 Subject: ACPICA: Workaround for reversed _PRT entries from BIOS Some BIOSs erroneously reverse the _PRT SourceName and the SourceIndex. Detect and repair this problem. MS ACPI also allows and repairs this problem, thus ACPICA must also. Signed-off-by: Bob Moore Signed-off-by: Lin Ming Signed-off-by: Len Brown Signed-off-by: Andi Kleen diff --git a/drivers/acpi/resources/rscreate.c b/drivers/acpi/resources/rscreate.c index faddaee..70c84ec 100644 --- a/drivers/acpi/resources/rscreate.c +++ b/drivers/acpi/resources/rscreate.c @@ -285,6 +285,23 @@ acpi_rs_create_pci_routing_table(union acpi_operand_object *package_object, } /* + * If the BIOS has erroneously reversed the _PRT source_name (index 2) + * and the source_index (index 3), fix it. _PRT is important enough to + * workaround this BIOS error. This also provides compatibility with + * other ACPI implementations. + */ + obj_desc = sub_object_list[3]; + if (!obj_desc + || (ACPI_GET_OBJECT_TYPE(obj_desc) != ACPI_TYPE_INTEGER)) { + sub_object_list[3] = sub_object_list[2]; + sub_object_list[2] = obj_desc; + + ACPI_WARNING((AE_INFO, + "(PRT[%X].Source) SourceName and SourceIndex are reversed, fixed", + index)); + } + + /* * 3) Third subobject: Dereference the PRT.source_name * The name may be unresolved (slack mode), so allow a null object */ -- cgit v0.10.2 From 19d0cfe9ddfdf7afa8d1765ab0bd2a7dd30e47c9 Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Tue, 10 Jun 2008 15:54:40 +0800 Subject: ACPICA: Update DMAR and SRAT table definitions Synchronized tables with current specifications. Signed-off-by: Bob Moore Signed-off-by: Lin Ming Signed-off-by: Len Brown Signed-off-by: Andi Kleen diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c index f41d67f..1eb2973 100644 --- a/arch/x86/mm/srat_32.c +++ b/arch/x86/mm/srat_32.c @@ -156,10 +156,9 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *memory_affinity) num_memory_chunks++; - printk(KERN_DEBUG "Memory range %08lx to %08lx (type %x)" + printk(KERN_DEBUG "Memory range %08lx to %08lx" " in proximity domain %02x %s\n", start_pfn, end_pfn, - memory_affinity->memory_type, pxm, ((memory_affinity->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) ? "enabled and removable" : "enabled" ) ); diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c index 658e5f3..cb9864e 100644 --- a/drivers/acpi/numa.c +++ b/drivers/acpi/numa.c @@ -120,10 +120,10 @@ acpi_table_print_srat_entry(struct acpi_subtable_header *header) struct acpi_srat_mem_affinity *p = (struct acpi_srat_mem_affinity *)header; ACPI_DEBUG_PRINT((ACPI_DB_INFO, - "SRAT Memory (0x%lx length 0x%lx type 0x%x) in proximity domain %d %s%s\n", + "SRAT Memory (0x%lx length 0x%lx) in proximity domain %d %s%s\n", (unsigned long)p->base_address, (unsigned long)p->length, - p->memory_type, p->proximity_domain, + p->proximity_domain, (p->flags & ACPI_SRAT_MEM_ENABLED)? "enabled" : "disabled", (p->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE)? diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h index 9af239b..dfb0fb5 100644 --- a/include/acpi/actbl1.h +++ b/include/acpi/actbl1.h @@ -300,6 +300,7 @@ struct acpi_table_dbgp { /******************************************************************************* * * DMAR - DMA Remapping table + * From "Intel Virtualization Technology for Directed I/O", Sept. 2007 * ******************************************************************************/ @@ -382,6 +383,20 @@ struct acpi_dmar_reserved_memory { #define ACPI_DMAR_ALLOW_ALL (1) + +/* 2: Root Port ATS Capability Reporting Structure */ + +struct acpi_dmar_atsr { + struct acpi_dmar_header header; + u8 flags; + u8 reserved; + u16 segment; +}; + +/* Flags */ + +#define ACPI_DMAR_ALL_PORTS (1) + /******************************************************************************* * * ECDT - Embedded Controller Boot Resources Table @@ -1156,9 +1171,9 @@ struct acpi_srat_mem_affinity { u16 reserved; /* Reserved, must be zero */ u64 base_address; u64 length; - u32 memory_type; /* See acpi_address_range_id */ + u32 reserved1; u32 flags; - u64 reserved1; /* Reserved, must be zero */ + u64 reserved2; /* Reserved, must be zero */ }; /* Flags */ -- cgit v0.10.2 From 75e5b5fb778646a93d98adb1ca697435362d2856 Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Tue, 10 Jun 2008 14:19:32 +0800 Subject: ACPICA: Update disassembler for DMAR table changes Now supports the 2007 intel Virtualization Technology for Directed I/O specification. Signed-off-by: Bob Moore Signed-off-by: Lin Ming Signed-off-by: Len Brown Signed-off-by: Andi Kleen diff --git a/include/acpi/acdisasm.h b/include/acpi/acdisasm.h index 788f887..f53faca 100644 --- a/include/acpi/acdisasm.h +++ b/include/acpi/acdisasm.h @@ -162,6 +162,7 @@ extern struct acpi_dmtable_info acpi_dm_table_info_dmar_hdr[]; extern struct acpi_dmtable_info acpi_dm_table_info_dmar_scope[]; extern struct acpi_dmtable_info acpi_dm_table_info_dmar0[]; extern struct acpi_dmtable_info acpi_dm_table_info_dmar1[]; +extern struct acpi_dmtable_info acpi_dm_table_info_dmar2[]; extern struct acpi_dmtable_info acpi_dm_table_info_ecdt[]; extern struct acpi_dmtable_info acpi_dm_table_info_einj[]; extern struct acpi_dmtable_info acpi_dm_table_info_einj0[]; diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h index dfb0fb5..d38f9be2 100644 --- a/include/acpi/actbl1.h +++ b/include/acpi/actbl1.h @@ -311,6 +311,10 @@ struct acpi_table_dmar { u8 reserved[10]; }; +/* Flags */ + +#define ACPI_DMAR_INTR_REMAP (1) + /* DMAR subtable header */ struct acpi_dmar_header { -- cgit v0.10.2 From 5a1a57efeb152d6b8a3b2a20f6b192d074e919ec Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Tue, 10 Jun 2008 14:21:05 +0800 Subject: ACPICA: Fix for invalid large array index on 64-bit systems This problem was introduced in 20080514 as a result of the elimination of the acpi_native_uint type. Code uses a negative array index, which should be eliminated. Signed-off-by: Bob Moore Signed-off-by: Lin Ming Signed-off-by: Len Brown Signed-off-by: Andi Kleen diff --git a/drivers/acpi/executer/exdump.c b/drivers/acpi/executer/exdump.c index c94638d..f337c3f 100644 --- a/drivers/acpi/executer/exdump.c +++ b/drivers/acpi/executer/exdump.c @@ -776,7 +776,7 @@ acpi_ex_dump_operands(union acpi_operand_object **operands, const char *note, const char *module_name, u32 line_number) { - u32 i; + s32 i; ACPI_FUNCTION_NAME(ex_dump_operands); -- cgit v0.10.2 From 71d993e115706a4108bdc7e3cb3cf25309f17aa6 Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Tue, 10 Jun 2008 14:25:05 +0800 Subject: ACPICA: Cleanup debug operand dump mechanism Eliminated unnecessary operands; eliminated use of negative index in loop. Operands now displayed in correct order, not backwards. Signed-off-by: Bob Moore Signed-off-by: Lin Ming Signed-off-by: Len Brown Signed-off-by: Andi Kleen diff --git a/drivers/acpi/dispatcher/dsopcode.c b/drivers/acpi/dispatcher/dsopcode.c index ac0bfb1..6a81c44 100644 --- a/drivers/acpi/dispatcher/dsopcode.c +++ b/drivers/acpi/dispatcher/dsopcode.c @@ -691,12 +691,6 @@ acpi_ds_eval_buffer_field_operands(struct acpi_walk_state *walk_state, status = acpi_ex_resolve_operands(op->common.aml_opcode, ACPI_WALK_OPERANDS, walk_state); - - ACPI_DUMP_OPERANDS(ACPI_WALK_OPERANDS, ACPI_IMODE_EXECUTE, - acpi_ps_get_opcode_name(op->common.aml_opcode), - walk_state->num_operands, - "after AcpiExResolveOperands"); - if (ACPI_FAILURE(status)) { ACPI_ERROR((AE_INFO, "(%s) bad operand(s) (%X)", acpi_ps_get_opcode_name(op->common.aml_opcode), @@ -785,10 +779,6 @@ acpi_ds_eval_region_operands(struct acpi_walk_state *walk_state, return_ACPI_STATUS(status); } - ACPI_DUMP_OPERANDS(ACPI_WALK_OPERANDS, ACPI_IMODE_EXECUTE, - acpi_ps_get_opcode_name(op->common.aml_opcode), - 1, "after AcpiExResolveOperands"); - obj_desc = acpi_ns_get_attached_object(node); if (!obj_desc) { return_ACPI_STATUS(AE_NOT_EXIST); @@ -882,10 +872,6 @@ acpi_ds_eval_table_region_operands(struct acpi_walk_state *walk_state, return_ACPI_STATUS(status); } - ACPI_DUMP_OPERANDS(ACPI_WALK_OPERANDS, ACPI_IMODE_EXECUTE, - acpi_ps_get_opcode_name(op->common.aml_opcode), - 1, "after AcpiExResolveOperands"); - operand = &walk_state->operands[0]; /* Find the ACPI table */ @@ -1091,10 +1077,8 @@ acpi_ds_eval_bank_field_operands(struct acpi_walk_state *walk_state, return_ACPI_STATUS(status); } - ACPI_DUMP_OPERANDS(ACPI_WALK_OPERANDS, ACPI_IMODE_EXECUTE, - acpi_ps_get_opcode_name(op->common.aml_opcode), - 1, "after AcpiExResolveOperands"); - + ACPI_DUMP_OPERANDS(ACPI_WALK_OPERANDS, + acpi_ps_get_opcode_name(op->common.aml_opcode), 1); /* * Get the bank_value operand and save it * (at Top of stack) diff --git a/drivers/acpi/dispatcher/dswexec.c b/drivers/acpi/dispatcher/dswexec.c index b246b96..b5072fa 100644 --- a/drivers/acpi/dispatcher/dswexec.c +++ b/drivers/acpi/dispatcher/dswexec.c @@ -408,14 +408,6 @@ acpi_status acpi_ds_exec_end_op(struct acpi_walk_state *walk_state) [walk_state-> num_operands - 1]), walk_state); - if (ACPI_SUCCESS(status)) { - ACPI_DUMP_OPERANDS(ACPI_WALK_OPERANDS, - ACPI_IMODE_EXECUTE, - acpi_ps_get_opcode_name - (walk_state->opcode), - walk_state->num_operands, - "after ExResolveOperands"); - } } if (ACPI_SUCCESS(status)) { diff --git a/drivers/acpi/executer/exdump.c b/drivers/acpi/executer/exdump.c index f337c3f..2be2e2b 100644 --- a/drivers/acpi/executer/exdump.c +++ b/drivers/acpi/executer/exdump.c @@ -580,25 +580,22 @@ void acpi_ex_dump_operand(union acpi_operand_object *obj_desc, u32 depth) case ACPI_TYPE_BUFFER: - acpi_os_printf("Buffer len %X @ %p\n", + acpi_os_printf("Buffer length %.2X @ %p\n", obj_desc->buffer.length, obj_desc->buffer.pointer); - length = obj_desc->buffer.length; - if (length > 64) { - length = 64; - } - /* Debug only -- dump the buffer contents */ if (obj_desc->buffer.pointer) { - acpi_os_printf("Buffer Contents: "); - - for (index = 0; index < length; index++) { - acpi_os_printf(" %02x", - obj_desc->buffer.pointer[index]); + length = obj_desc->buffer.length; + if (length > 128) { + length = 128; } - acpi_os_printf("\n"); + + acpi_os_printf + ("Buffer Contents: (displaying length 0x%.2X)\n", + length); + ACPI_DUMP_BUFFER(obj_desc->buffer.pointer, length); } break; @@ -756,55 +753,42 @@ void acpi_ex_dump_operand(union acpi_operand_object *obj_desc, u32 depth) * * FUNCTION: acpi_ex_dump_operands * - * PARAMETERS: Operands - Operand list - * interpreter_mode - Load or Exec - * Ident - Identification - * num_levels - # of stack entries to dump above line - * Note - Output notation - * module_name - Caller's module name - * line_number - Caller's invocation line number + * PARAMETERS: Operands - A list of Operand objects + * opcode_name - AML opcode name + * num_operands - Operand count for this opcode * - * DESCRIPTION: Dump the object stack + * DESCRIPTION: Dump the operands associated with the opcode * ******************************************************************************/ void acpi_ex_dump_operands(union acpi_operand_object **operands, - acpi_interpreter_mode interpreter_mode, - const char *ident, - u32 num_levels, - const char *note, - const char *module_name, u32 line_number) + const char *opcode_name, u32 num_operands) { - s32 i; - ACPI_FUNCTION_NAME(ex_dump_operands); - if (!ident) { - ident = "?"; - } - - if (!note) { - note = "?"; + if (!opcode_name) { + opcode_name = "UNKNOWN"; } ACPI_DEBUG_PRINT((ACPI_DB_EXEC, - "************* Operand Stack Contents (Opcode [%s], %d Operands)\n", - ident, num_levels)); + "**** Start operand dump for opcode [%s], %d operands\n", + opcode_name, num_operands)); - if (num_levels == 0) { - num_levels = 1; + if (num_operands == 0) { + num_operands = 1; } - /* Dump the operand stack starting at the top */ + /* Dump the individual operands */ - for (i = 0; num_levels > 0; i--, num_levels--) { - acpi_ex_dump_operand(operands[i], 0); + while (num_operands) { + acpi_ex_dump_operand(*operands, 0); + operands++; + num_operands--; } ACPI_DEBUG_PRINT((ACPI_DB_EXEC, - "************* Operand Stack dump from %s(%d), %s\n", - module_name, line_number, note)); + "**** End operand dump for [%s]\n", opcode_name)); return; } diff --git a/drivers/acpi/executer/exresop.c b/drivers/acpi/executer/exresop.c index 73e29e5..54085f1 100644 --- a/drivers/acpi/executer/exresop.c +++ b/drivers/acpi/executer/exresop.c @@ -698,5 +698,9 @@ acpi_ex_resolve_operands(u16 opcode, } } + ACPI_DUMP_OPERANDS(walk_state->operands, + acpi_ps_get_opcode_name(opcode), + walk_state->num_operands); + return_ACPI_STATUS(status); } diff --git a/drivers/acpi/executer/exstore.c b/drivers/acpi/executer/exstore.c index 76c875b..38b55e3 100644 --- a/drivers/acpi/executer/exstore.c +++ b/drivers/acpi/executer/exstore.c @@ -343,12 +343,6 @@ acpi_ex_store(union acpi_operand_object *source_desc, acpi_ut_get_object_type_name(dest_desc), dest_desc)); - ACPI_DUMP_STACK_ENTRY(source_desc); - ACPI_DUMP_STACK_ENTRY(dest_desc); - ACPI_DUMP_OPERANDS(&dest_desc, ACPI_IMODE_EXECUTE, "ExStore", - 2, - "Target is not a Reference or Constant object"); - return_ACPI_STATUS(AE_AML_OPERAND_TYPE); } diff --git a/drivers/acpi/namespace/nsdump.c b/drivers/acpi/namespace/nsdump.c index 904f951..0ab2200 100644 --- a/drivers/acpi/namespace/nsdump.c +++ b/drivers/acpi/namespace/nsdump.c @@ -515,12 +515,12 @@ acpi_ns_dump_one_object(acpi_handle obj_handle, if (obj_type > ACPI_TYPE_LOCAL_MAX) { acpi_os_printf - ("(Ptr to ACPI Object type %X [UNKNOWN])\n", + ("(Pointer to ACPI Object type %.2X [UNKNOWN])\n", obj_type); bytes_to_dump = 32; } else { acpi_os_printf - ("(Ptr to ACPI Object type %X [%s])\n", + ("(Pointer to ACPI Object type %.2X [%s])\n", obj_type, acpi_ut_get_type_name(obj_type)); bytes_to_dump = sizeof(union acpi_operand_object); diff --git a/include/acpi/acinterp.h b/include/acpi/acinterp.h index accd120..e8db7a3 100644 --- a/include/acpi/acinterp.h +++ b/include/acpi/acinterp.h @@ -366,11 +366,7 @@ void acpi_ex_dump_operand(union acpi_operand_object *obj_desc, u32 depth); void acpi_ex_dump_operands(union acpi_operand_object **operands, - acpi_interpreter_mode interpreter_mode, - const char *ident, - u32 num_levels, - const char *note, - const char *module_name, u32 line_number); + const char *opcode_name, u32 num_opcodes); #ifdef ACPI_FUTURE_USAGE void diff --git a/include/acpi/acmacros.h b/include/acpi/acmacros.h index 77439df..57ab9e9 100644 --- a/include/acpi/acmacros.h +++ b/include/acpi/acmacros.h @@ -597,7 +597,7 @@ struct acpi_integer_overlay { /* Stack and buffer dumping */ #define ACPI_DUMP_STACK_ENTRY(a) acpi_ex_dump_operand((a),0) -#define ACPI_DUMP_OPERANDS(a,b,c,d,e) acpi_ex_dump_operands(a,b,c,d,e,_acpi_module_name,__LINE__) +#define ACPI_DUMP_OPERANDS(a,b,c) acpi_ex_dump_operands(a,b,c) #define ACPI_DUMP_ENTRY(a,b) acpi_ns_dump_entry (a,b) #define ACPI_DUMP_PATHNAME(a,b,c,d) acpi_ns_dump_pathname(a,b,c,d) @@ -633,7 +633,7 @@ struct acpi_integer_overlay { #define ACPI_FUNCTION_VALUE_EXIT(s) do { } while(0) #define ACPI_FUNCTION_ENTRY() do { } while(0) #define ACPI_DUMP_STACK_ENTRY(a) do { } while(0) -#define ACPI_DUMP_OPERANDS(a,b,c,d,e) do { } while(0) +#define ACPI_DUMP_OPERANDS(a,b,c) do { } while(0) #define ACPI_DUMP_ENTRY(a,b) do { } while(0) #define ACPI_DUMP_TABLES(a,b) do { } while(0) #define ACPI_DUMP_PATHNAME(a,b,c,d) do { } while(0) -- cgit v0.10.2 From fd0a43276dc986e186eb27e5755a18e97e07a7eb Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Tue, 10 Jun 2008 14:27:55 +0800 Subject: ACPICA: Cleanup of _PRT parsing code Removed extraneous else clauses, other general cleanup. Signed-off-by: Bob Moore Signed-off-by: Lin Ming Signed-off-by: Len Brown Signed-off-by: Andi Kleen diff --git a/drivers/acpi/resources/rscreate.c b/drivers/acpi/resources/rscreate.c index 70c84ec..7804a8c 100644 --- a/drivers/acpi/resources/rscreate.c +++ b/drivers/acpi/resources/rscreate.c @@ -181,9 +181,9 @@ acpi_rs_create_pci_routing_table(union acpi_operand_object *package_object, } /* - * Loop through the ACPI_INTERNAL_OBJECTS - Each object - * should be a package that in turn contains an - * acpi_integer Address, a u8 Pin, a Name and a u8 source_index. + * Loop through the ACPI_INTERNAL_OBJECTS - Each object should be a + * package that in turn contains an acpi_integer Address, a u8 Pin, + * a Name, and a u8 source_index. */ top_object_list = package_object->package.elements; number_of_elements = package_object->package.count; @@ -240,9 +240,7 @@ acpi_rs_create_pci_routing_table(union acpi_operand_object *package_object, /* 1) First subobject: Dereference the PRT.Address */ obj_desc = sub_object_list[0]; - if (ACPI_GET_OBJECT_TYPE(obj_desc) == ACPI_TYPE_INTEGER) { - user_prt->address = obj_desc->integer.value; - } else { + if (ACPI_GET_OBJECT_TYPE(obj_desc) != ACPI_TYPE_INTEGER) { ACPI_ERROR((AE_INFO, "(PRT[%X].Address) Need Integer, found %s", index, @@ -250,12 +248,12 @@ acpi_rs_create_pci_routing_table(union acpi_operand_object *package_object, return_ACPI_STATUS(AE_BAD_DATA); } + user_prt->address = obj_desc->integer.value; + /* 2) Second subobject: Dereference the PRT.Pin */ obj_desc = sub_object_list[1]; - if (ACPI_GET_OBJECT_TYPE(obj_desc) == ACPI_TYPE_INTEGER) { - user_prt->pin = (u32) obj_desc->integer.value; - } else { + if (ACPI_GET_OBJECT_TYPE(obj_desc) != ACPI_TYPE_INTEGER) { ACPI_ERROR((AE_INFO, "(PRT[%X].Pin) Need Integer, found %s", index, @@ -284,6 +282,8 @@ acpi_rs_create_pci_routing_table(union acpi_operand_object *package_object, } } + user_prt->pin = (u32) obj_desc->integer.value; + /* * If the BIOS has erroneously reversed the _PRT source_name (index 2) * and the source_index (index 3), fix it. _PRT is important enough to @@ -381,9 +381,7 @@ acpi_rs_create_pci_routing_table(union acpi_operand_object *package_object, /* 4) Fourth subobject: Dereference the PRT.source_index */ obj_desc = sub_object_list[source_index_index]; - if (ACPI_GET_OBJECT_TYPE(obj_desc) == ACPI_TYPE_INTEGER) { - user_prt->source_index = (u32) obj_desc->integer.value; - } else { + if (ACPI_GET_OBJECT_TYPE(obj_desc) != ACPI_TYPE_INTEGER) { ACPI_ERROR((AE_INFO, "(PRT[%X].SourceIndex) Need Integer, found %s", index, @@ -391,6 +389,8 @@ acpi_rs_create_pci_routing_table(union acpi_operand_object *package_object, return_ACPI_STATUS(AE_BAD_DATA); } + user_prt->source_index = (u32) obj_desc->integer.value; + /* Point to the next union acpi_operand_object in the top level package */ top_object_list++; -- cgit v0.10.2 From b53ce3f7186e2fc561f02085b5021df10d715ce2 Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Tue, 10 Jun 2008 14:30:04 +0800 Subject: ACPICA: Fix mutex debug code for wrong loop termination value Loop was terminating one iteration early, missing one of the debugger handshake mutexes. Linn Crosetto. Signed-off-by: Bob Moore Signed-off-by: Lin Ming Signed-off-by: Len Brown Signed-off-by: Andi Kleen diff --git a/drivers/acpi/utilities/utmutex.c b/drivers/acpi/utilities/utmutex.c index f7d602b..7331dde 100644 --- a/drivers/acpi/utilities/utmutex.c +++ b/drivers/acpi/utilities/utmutex.c @@ -218,7 +218,7 @@ acpi_status acpi_ut_acquire_mutex(acpi_mutex_handle mutex_id) * the mutex ordering rule. This indicates a coding error somewhere in * the ACPI subsystem code. */ - for (i = mutex_id; i < ACPI_MAX_MUTEX; i++) { + for (i = mutex_id; i < ACPI_NUM_MUTEX; i++) { if (acpi_gbl_mutex_info[i].thread_id == this_thread_id) { if (i == mutex_id) { ACPI_ERROR((AE_INFO, @@ -315,7 +315,7 @@ acpi_status acpi_ut_release_mutex(acpi_mutex_handle mutex_id) * ordering rule. This indicates a coding error somewhere in * the ACPI subsystem code. */ - for (i = mutex_id; i < ACPI_MAX_MUTEX; i++) { + for (i = mutex_id; i < ACPI_NUM_MUTEX; i++) { if (acpi_gbl_mutex_info[i].thread_id == this_thread_id) { if (i == mutex_id) { continue; -- cgit v0.10.2 From 9c9f6d052dc6f469431461a97d49cf9c5558b8ad Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Tue, 10 Jun 2008 14:37:53 +0800 Subject: ACPICA: Update version to 20080609 Update version to 20080609. Signed-off-by: Len Brown Signed-off-by: Andi Kleen diff --git a/include/acpi/acconfig.h b/include/acpi/acconfig.h index f81ba60..4eb75a8 100644 --- a/include/acpi/acconfig.h +++ b/include/acpi/acconfig.h @@ -63,7 +63,7 @@ /* Current ACPICA subsystem version in YYYYMMDD format */ -#define ACPI_CA_VERSION 0x20080514 +#define ACPI_CA_VERSION 0x20080609 /* * OS name, used for the _OS object. The _OS object is essentially obsolete, -- cgit v0.10.2 From 71b58cbb0c30d1f78636a48c4721529449d6ea37 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Fri, 20 Jun 2008 09:42:47 +0800 Subject: ACPI: Enhance /sys/firmware/interrupts to allow enable/disable/clear from user-space Allow users to enable/disable/clear a specific & valid GPE/Fixed Event in user space. This is useful for debugging, especially for some interrupt storm issues. All wakeup GPEs are disabled and they can not be enabled at runtime, and we mark them as invalid. All GPEs that don't have a _Lxx/_Exx method are marked as invalid. All Fixed Events that don't have an event handler are marked as invalid and they can't be enabled until an event handler is registered. Signed-off-by: Zhang Rui Signed-off-by: Ling Ming Signed-off-by: Len Brown Signed-off-by: Andi Kleen diff --git a/Documentation/ABI/testing/sysfs-firmware-acpi b/Documentation/ABI/testing/sysfs-firmware-acpi index 9470ed9..f27be7d 100644 --- a/Documentation/ABI/testing/sysfs-firmware-acpi +++ b/Documentation/ABI/testing/sysfs-firmware-acpi @@ -29,46 +29,46 @@ Description: $ cd /sys/firmware/acpi/interrupts $ grep . * - error:0 - ff_gbl_lock:0 - ff_pmtimer:0 - ff_pwr_btn:0 - ff_rt_clk:0 - ff_slp_btn:0 - gpe00:0 - gpe01:0 - gpe02:0 - gpe03:0 - gpe04:0 - gpe05:0 - gpe06:0 - gpe07:0 - gpe08:0 - gpe09:174 - gpe0A:0 - gpe0B:0 - gpe0C:0 - gpe0D:0 - gpe0E:0 - gpe0F:0 - gpe10:0 - gpe11:60 - gpe12:0 - gpe13:0 - gpe14:0 - gpe15:0 - gpe16:0 - gpe17:0 - gpe18:0 - gpe19:7 - gpe1A:0 - gpe1B:0 - gpe1C:0 - gpe1D:0 - gpe1E:0 - gpe1F:0 - gpe_all:241 - sci:241 + error: 0 + ff_gbl_lock: 0 enable + ff_pmtimer: 0 invalid + ff_pwr_btn: 0 enable + ff_rt_clk: 2 disable + ff_slp_btn: 0 invalid + gpe00: 0 invalid + gpe01: 0 enable + gpe02: 108 enable + gpe03: 0 invalid + gpe04: 0 invalid + gpe05: 0 invalid + gpe06: 0 enable + gpe07: 0 enable + gpe08: 0 invalid + gpe09: 0 invalid + gpe0A: 0 invalid + gpe0B: 0 invalid + gpe0C: 0 invalid + gpe0D: 0 invalid + gpe0E: 0 invalid + gpe0F: 0 invalid + gpe10: 0 invalid + gpe11: 0 invalid + gpe12: 0 invalid + gpe13: 0 invalid + gpe14: 0 invalid + gpe15: 0 invalid + gpe16: 0 invalid + gpe17: 1084 enable + gpe18: 0 enable + gpe19: 0 invalid + gpe1A: 0 invalid + gpe1B: 0 invalid + gpe1C: 0 invalid + gpe1D: 0 invalid + gpe1E: 0 invalid + gpe1F: 0 invalid + gpe_all: 1192 + sci: 1194 sci - The total number of times the ACPI SCI has claimed an interrupt. @@ -89,6 +89,13 @@ Description: error - an interrupt that can't be accounted for above. + invalid: it's either a wakeup GPE or a GPE/Fixed Event that + doesn't have an event handler. + + disable: the GPE/Fixed Event is valid but disabled. + + enable: the GPE/Fixed Event is valid and enabled. + Root has permission to clear any of these counters. Eg. # echo 0 > gpe11 @@ -97,3 +104,43 @@ Description: None of these counters has an effect on the function of the system, they are simply statistics. + + Besides this, user can also write specific strings to these files + to enable/disable/clear ACPI interrupts in user space, which can be + used to debug some ACPI interrupt storm issues. + + Note that only writting to VALID GPE/Fixed Event is allowed, + i.e. user can only change the status of runtime GPE and + Fixed Event with event handler installed. + + Let's take power button fixed event for example, please kill acpid + and other user space applications so that the machine won't shutdown + when pressing the power button. + # cat ff_pwr_btn + 0 + # press the power button for 3 times; + # cat ff_pwr_btn + 3 + # echo disable > ff_pwr_btn + # cat ff_pwr_btn + disable + # press the power button for 3 times; + # cat ff_pwr_btn + disable + # echo enable > ff_pwr_btn + # cat ff_pwr_btn + 4 + /* + * this is because the status bit is set even if the enable bit is cleared, + * and it triggers an ACPI fixed event when the enable bit is set again + */ + # press the power button for 3 times; + # cat ff_pwr_btn + 7 + # echo disable > ff_pwr_btn + # press the power button for 3 times; + # echo clear > ff_pwr_btn /* clear the status bit */ + # echo disable > ff_pwr_btn + # cat ff_pwr_btn + 7 + diff --git a/drivers/acpi/events/evxfevnt.c b/drivers/acpi/events/evxfevnt.c index 99a7502..73bfd6b 100644 --- a/drivers/acpi/events/evxfevnt.c +++ b/drivers/acpi/events/evxfevnt.c @@ -472,7 +472,6 @@ acpi_status acpi_clear_gpe(acpi_handle gpe_device, u32 gpe_number, u32 flags) } ACPI_EXPORT_SYMBOL(acpi_clear_gpe) -#ifdef ACPI_FUTURE_USAGE /******************************************************************************* * * FUNCTION: acpi_get_event_status @@ -489,6 +488,7 @@ ACPI_EXPORT_SYMBOL(acpi_clear_gpe) acpi_status acpi_get_event_status(u32 event, acpi_event_status * event_status) { acpi_status status = AE_OK; + u32 value; ACPI_FUNCTION_TRACE(acpi_get_event_status); @@ -506,7 +506,20 @@ acpi_status acpi_get_event_status(u32 event, acpi_event_status * event_status) status = acpi_get_register(acpi_gbl_fixed_event_info[event]. - status_register_id, event_status); + enable_register_id, &value); + if (ACPI_FAILURE(status)) + return_ACPI_STATUS(status); + + *event_status = value; + + status = + acpi_get_register(acpi_gbl_fixed_event_info[event]. + status_register_id, &value); + if (ACPI_FAILURE(status)) + return_ACPI_STATUS(status); + + if (value) + *event_status |= ACPI_EVENT_FLAG_SET; return_ACPI_STATUS(status); } @@ -566,7 +579,6 @@ acpi_get_gpe_status(acpi_handle gpe_device, } ACPI_EXPORT_SYMBOL(acpi_get_gpe_status) -#endif /* ACPI_FUTURE_USAGE */ /******************************************************************************* * * FUNCTION: acpi_install_gpe_block diff --git a/drivers/acpi/hardware/hwgpe.c b/drivers/acpi/hardware/hwgpe.c index 58347d6..0b80db9 100644 --- a/drivers/acpi/hardware/hwgpe.c +++ b/drivers/acpi/hardware/hwgpe.c @@ -186,7 +186,6 @@ acpi_status acpi_hw_clear_gpe(struct acpi_gpe_event_info * gpe_event_info) * ******************************************************************************/ -#ifdef ACPI_FUTURE_USAGE acpi_status acpi_hw_get_gpe_status(struct acpi_gpe_event_info * gpe_event_info, acpi_event_status * event_status) @@ -246,7 +245,6 @@ acpi_hw_get_gpe_status(struct acpi_gpe_event_info * gpe_event_info, unlock_and_exit: return (status); } -#endif /* ACPI_FUTURE_USAGE */ /****************************************************************************** * diff --git a/drivers/acpi/system.c b/drivers/acpi/system.c index 5bd2dec..d8e3f15 100644 --- a/drivers/acpi/system.c +++ b/drivers/acpi/system.c @@ -167,7 +167,13 @@ static int acpi_system_sysfs_init(void) #define COUNT_ERROR 2 /* other */ #define NUM_COUNTERS_EXTRA 3 -static u32 *all_counters; +#define ACPI_EVENT_VALID 0x01 +struct event_counter { + u32 count; + u32 flags; +}; + +static struct event_counter *all_counters; static u32 num_gpes; static u32 num_counters; static struct attribute **all_attrs; @@ -202,9 +208,44 @@ static int count_num_gpes(void) return count; } +static int get_gpe_device(int index, acpi_handle *handle) +{ + struct acpi_gpe_xrupt_info *gpe_xrupt_info; + struct acpi_gpe_block_info *gpe_block; + acpi_cpu_flags flags; + struct acpi_namespace_node *node; + + flags = acpi_os_acquire_lock(acpi_gbl_gpe_lock); + + gpe_xrupt_info = acpi_gbl_gpe_xrupt_list_head; + while (gpe_xrupt_info) { + gpe_block = gpe_xrupt_info->gpe_block_list_head; + node = gpe_block->node; + while (gpe_block) { + index -= gpe_block->register_count * + ACPI_GPE_REGISTER_WIDTH; + if (index < 0) { + acpi_os_release_lock(acpi_gbl_gpe_lock, flags); + /* return NULL if it's FADT GPE */ + if (node->type != ACPI_TYPE_DEVICE) + *handle = NULL; + else + *handle = node; + return 0; + } + node = gpe_block->node; + gpe_block = gpe_block->next; + } + gpe_xrupt_info = gpe_xrupt_info->next; + } + acpi_os_release_lock(acpi_gbl_gpe_lock, flags); + + return -ENODEV; +} + static void delete_gpe_attr_array(void) { - u32 *tmp = all_counters; + struct event_counter *tmp = all_counters; all_counters = NULL; kfree(tmp); @@ -230,9 +271,10 @@ void acpi_os_gpe_count(u32 gpe_number) return; if (gpe_number < num_gpes) - all_counters[gpe_number]++; + all_counters[gpe_number].count++; else - all_counters[num_gpes + ACPI_NUM_FIXED_EVENTS + COUNT_ERROR]++; + all_counters[num_gpes + ACPI_NUM_FIXED_EVENTS + COUNT_ERROR]. + count++; return; } @@ -243,44 +285,144 @@ void acpi_os_fixed_event_count(u32 event_number) return; if (event_number < ACPI_NUM_FIXED_EVENTS) - all_counters[num_gpes + event_number]++; + all_counters[num_gpes + event_number].count++; else - all_counters[num_gpes + ACPI_NUM_FIXED_EVENTS + COUNT_ERROR]++; + all_counters[num_gpes + ACPI_NUM_FIXED_EVENTS + COUNT_ERROR]. + count++; return; } +static int get_status(u32 index, acpi_event_status *status, acpi_handle *handle) +{ + int result = 0; + + if (index >= num_gpes + ACPI_NUM_FIXED_EVENTS) + goto end; + + if (index < num_gpes) { + result = get_gpe_device(index, handle); + if (result) { + ACPI_EXCEPTION((AE_INFO, AE_NOT_FOUND, + "Invalid GPE 0x%x\n", index)); + goto end; + } + result = acpi_get_gpe_status(*handle, index, + ACPI_NOT_ISR, status); + } else if (index < (num_gpes + ACPI_NUM_FIXED_EVENTS)) + result = acpi_get_event_status(index - num_gpes, status); + + /* + * sleep/power button GPE/Fixed Event is enabled after acpi_system_init, + * check the status at runtime and mark it as valid once it's enabled + */ + if (!result && (*status & ACPI_EVENT_FLAG_ENABLED)) + all_counters[index].flags |= ACPI_EVENT_VALID; +end: + return result; +} + static ssize_t counter_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - all_counters[num_gpes + ACPI_NUM_FIXED_EVENTS + COUNT_SCI] = + int index = attr - counter_attrs; + int size; + acpi_handle handle; + acpi_event_status status; + int result = 0; + + all_counters[num_gpes + ACPI_NUM_FIXED_EVENTS + COUNT_SCI].count = acpi_irq_handled; - all_counters[num_gpes + ACPI_NUM_FIXED_EVENTS + COUNT_GPE] = + all_counters[num_gpes + ACPI_NUM_FIXED_EVENTS + COUNT_GPE].count = acpi_gpe_count; - return sprintf(buf, "%d\n", all_counters[attr - counter_attrs]); + size = sprintf(buf, "%8d", all_counters[index].count); + + /* "gpe_all" or "sci" */ + if (index >= num_gpes + ACPI_NUM_FIXED_EVENTS) + goto end; + + result = get_status(index, &status, &handle); + if (result) + goto end; + + if (!(all_counters[index].flags & ACPI_EVENT_VALID)) + size += sprintf(buf + size, " invalid"); + else if (status & ACPI_EVENT_FLAG_ENABLED) + size += sprintf(buf + size, " enable"); + else + size += sprintf(buf + size, " disable"); + +end: + size += sprintf(buf + size, "\n"); + return result ? result : size; } /* * counter_set() sets the specified counter. * setting the total "sci" file to any value clears all counters. + * enable/disable/clear a gpe/fixed event in user space. */ static ssize_t counter_set(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t size) { int index = attr - counter_attrs; + acpi_event_status status; + acpi_handle handle; + int result = 0; if (index == num_gpes + ACPI_NUM_FIXED_EVENTS + COUNT_SCI) { int i; for (i = 0; i < num_counters; ++i) - all_counters[i] = 0; + all_counters[i].count = 0; acpi_gpe_count = 0; acpi_irq_handled = 0; + goto end; + } + /* show the event status for both GPEs and Fixed Events */ + result = get_status(index, &status, &handle); + if (result) + goto end; + + if (!(all_counters[index].flags & ACPI_EVENT_VALID)) { + ACPI_DEBUG_PRINT((ACPI_DB_WARN, + "Can not change Invalid GPE/Fixed Event status\n")); + return -EINVAL; + } + + if (index < num_gpes) { + if (!strcmp(buf, "disable\n") && + (status & ACPI_EVENT_FLAG_ENABLED)) + result = acpi_disable_gpe(handle, index, ACPI_NOT_ISR); + else if (!strcmp(buf, "enable\n") && + !(status & ACPI_EVENT_FLAG_ENABLED)) + result = acpi_enable_gpe(handle, index, ACPI_NOT_ISR); + else if (!strcmp(buf, "clear\n") && + (status & ACPI_EVENT_FLAG_SET)) + result = acpi_clear_gpe(handle, index, ACPI_NOT_ISR); + else + all_counters[index].count = strtoul(buf, NULL, 0); + } else if (index < num_gpes + ACPI_NUM_FIXED_EVENTS) { + int event = index - num_gpes; + if (!strcmp(buf, "disable\n") && + (status & ACPI_EVENT_FLAG_ENABLED)) + result = acpi_disable_event(event, ACPI_NOT_ISR); + else if (!strcmp(buf, "enable\n") && + !(status & ACPI_EVENT_FLAG_ENABLED)) + result = acpi_enable_event(event, ACPI_NOT_ISR); + else if (!strcmp(buf, "clear\n") && + (status & ACPI_EVENT_FLAG_SET)) + result = acpi_clear_event(event); + else + all_counters[index].count = strtoul(buf, NULL, 0); } else - all_counters[index] = strtoul(buf, NULL, 0); + all_counters[index].count = strtoul(buf, NULL, 0); - return size; + if (ACPI_FAILURE(result)) + result = -EINVAL; +end: + return result ? result : size; } void acpi_irq_stats_init(void) @@ -298,7 +440,8 @@ void acpi_irq_stats_init(void) if (all_attrs == NULL) return; - all_counters = kzalloc(sizeof(u32) * (num_counters), GFP_KERNEL); + all_counters = kzalloc(sizeof(struct event_counter) * (num_counters), + GFP_KERNEL); if (all_counters == NULL) goto fail; diff --git a/include/acpi/achware.h b/include/acpi/achware.h index 45e9859..97a72b1 100644 --- a/include/acpi/achware.h +++ b/include/acpi/achware.h @@ -102,11 +102,9 @@ acpi_status acpi_hw_clear_gpe_block(struct acpi_gpe_xrupt_info *gpe_xrupt_info, struct acpi_gpe_block_info *gpe_block); -#ifdef ACPI_FUTURE_USAGE acpi_status acpi_hw_get_gpe_status(struct acpi_gpe_event_info *gpe_event_info, acpi_event_status * event_status); -#endif /* ACPI_FUTURE_USAGE */ acpi_status acpi_hw_disable_all_gpes(void); diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index bd95ecc..94d94e1 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -248,9 +248,7 @@ acpi_status acpi_disable_event(u32 event, u32 flags); acpi_status acpi_clear_event(u32 event); -#ifdef ACPI_FUTURE_USAGE acpi_status acpi_get_event_status(u32 event, acpi_event_status * event_status); -#endif /* ACPI_FUTURE_USAGE */ acpi_status acpi_set_gpe_type(acpi_handle gpe_device, u32 gpe_number, u8 type); @@ -260,12 +258,10 @@ acpi_status acpi_disable_gpe(acpi_handle gpe_device, u32 gpe_number, u32 flags); acpi_status acpi_clear_gpe(acpi_handle gpe_device, u32 gpe_number, u32 flags); -#ifdef ACPI_FUTURE_USAGE acpi_status acpi_get_gpe_status(acpi_handle gpe_device, u32 gpe_number, u32 flags, acpi_event_status * event_status); -#endif /* ACPI_FUTURE_USAGE */ acpi_status acpi_install_gpe_block(acpi_handle gpe_device, -- cgit v0.10.2 From 5b53496a5ad79e91052f72761a7c5516b069bc99 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Tue, 17 Jun 2008 14:39:59 +0800 Subject: ACPI: Disable the C2C3_FFH access mode HW has no MWAIT support 991528d7348667924176f3e29addea0675298944 (ACPI: Processor native C-states using MWAIT) started passing C2C3_FFH to _PDC to tell the BIOS that Linux supports MWAIT for deep C-states. However, we should first double check with the hardware that it actually supports MWAIT before potentially exposing a BIOS bug of an MWAIT _CST on HW that doesn't support MWAIT. Signed-off-by: Zhao Yakui Signed-off-by: Li Shaohua Signed-off-by: Len Brown Signed-off-by: Andi Kleen diff --git a/arch/x86/kernel/acpi/processor.c b/arch/x86/kernel/acpi/processor.c index de2d2e4..7c074ee 100644 --- a/arch/x86/kernel/acpi/processor.c +++ b/arch/x86/kernel/acpi/processor.c @@ -56,6 +56,12 @@ static void init_intel_pdc(struct acpi_processor *pr, struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_ACPI)) buf[2] |= ACPI_PDC_T_FFH; + /* + * If mwait/monitor is unsupported, C2/C3_FFH will be disabled + */ + if (!cpu_has(c, X86_FEATURE_MWAIT)) + buf[2] &= ~(ACPI_PDC_C_C2C3_FFH); + obj->type = ACPI_TYPE_BUFFER; obj->buffer.length = 12; obj->buffer.pointer = (u8 *) buf; -- cgit v0.10.2 From c1e3b377ad48febba6f91b8ae42c44ee4d4ab45e Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Tue, 24 Jun 2008 17:58:53 +0800 Subject: ACPI: Create "idle=halt" bootparam "idle=halt" limits the idle loop to using the halt instruction. No MWAIT, no IO accesses, no C-states deeper than C1. If something is broken in the idle code, "idle=halt" is a less severe workaround than "idle=poll" which disables all power savings. Signed-off-by: Zhao Yakui Signed-off-by: Len Brown Signed-off-by: Andi Kleen diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 312fe77..65db7f4 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -818,7 +818,7 @@ and is between 256 and 4096 characters. It is defined in the file See Documentation/ide/ide.txt. idle= [X86] - Format: idle=poll or idle=mwait + Format: idle=poll or idle=mwait, idle=halt Poll forces a polling idle loop that can slightly improves the performance of waking up a idle CPU, but will use a lot of power and make the system run hot. Not recommended. @@ -826,6 +826,8 @@ and is between 256 and 4096 characters. It is defined in the file to not use it because it doesn't save as much power as a normal idle loop use the MONITOR/MWAIT idle loop anyways. Performance should be the same as idle=poll. + idle=halt. Halt is forced to be used for CPU idle. + In such case C2/C3 won't be used again. ide-pci-generic.all-generic-ide [HW] (E)IDE subsystem Claim all unknown PCI IDE storage controllers. diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index fabaf08..612b3c4 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -55,6 +55,8 @@ void (*ia64_mark_idle)(int); unsigned long boot_option_idle_override = 0; EXPORT_SYMBOL(boot_option_idle_override); +unsigned long idle_halt; +EXPORT_SYMBOL(idle_halt); void ia64_do_show_stack (struct unw_frame_info *info, void *arg) diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 7dceea9..7fc7294 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -7,6 +7,10 @@ #include #include #include +#include + +unsigned long idle_halt; +EXPORT_SYMBOL(idle_halt); struct kmem_cache *task_xstate_cachep; @@ -325,7 +329,18 @@ static int __init idle_setup(char *str) pm_idle = poll_idle; } else if (!strcmp(str, "mwait")) force_mwait = 1; - else + else if (!strcmp(str, "halt")) { + /* + * When the boot option of idle=halt is added, halt is + * forced to be used for CPU idle. In such case CPU C2/C3 + * won't be used again. + * To continue to load the CPU idle driver, don't touch + * the boot_option_idle_override. + */ + pm_idle = default_idle; + idle_halt = 1; + return 0; + } else return -1; boot_option_idle_override = 1; diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 0fc310e..c75c7ac 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -41,6 +41,7 @@ #include #include #include +#include /* * Include the apic definitions for x86 to have the APIC timer related defines @@ -57,6 +58,7 @@ #include #include +#include #define ACPI_PROCESSOR_COMPONENT 0x01000000 #define ACPI_PROCESSOR_CLASS "processor" @@ -955,6 +957,17 @@ static int acpi_processor_get_power_info_cst(struct acpi_processor *pr) } else { continue; } + if (cx.type == ACPI_STATE_C1 && idle_halt) { + /* + * In most cases the C1 space_id obtained from + * _CST object is FIXED_HARDWARE access mode. + * But when the option of idle=halt is added, + * the entry_method type should be changed from + * CSTATE_FFH to CSTATE_HALT. + */ + cx.entry_method = ACPI_CSTATE_HALT; + snprintf(cx.desc, ACPI_CX_DESC_LEN, "ACPI HLT"); + } } else { snprintf(cx.desc, ACPI_CX_DESC_LEN, "ACPI IOPORT 0x%x", cx.address); @@ -1780,6 +1793,15 @@ int __cpuinit acpi_processor_power_init(struct acpi_processor *pr, return 0; if (!first_run) { + if (idle_halt) { + /* + * When the boot option of "idle=halt" is added, halt + * is used for CPU IDLE. + * In such case C2/C3 is meaningless. So the max_cstate + * is set to one. + */ + max_cstate = 1; + } dmi_check_system(processor_power_dmi_table); max_cstate = acpi_processor_cstate_check(max_cstate); if (max_cstate < ACPI_C_STATES_MAX) diff --git a/include/asm-ia64/processor.h b/include/asm-ia64/processor.h index 6aff126..f36e28a 100644 --- a/include/asm-ia64/processor.h +++ b/include/asm-ia64/processor.h @@ -763,6 +763,7 @@ prefetchw (const void *x) #define spin_lock_prefetch(x) prefetchw(x) extern unsigned long boot_option_idle_override; +extern unsigned long idle_halt; #endif /* !__ASSEMBLY__ */ diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h index 7f73827..bc22162 100644 --- a/include/asm-x86/processor.h +++ b/include/asm-x86/processor.h @@ -727,6 +727,7 @@ extern int force_mwait; extern void select_idle_routine(const struct cpuinfo_x86 *c); extern unsigned long boot_option_idle_override; +extern unsigned long idle_halt; extern void enable_sep_cpu(void); extern int sysenter_setup(void); -- cgit v0.10.2 From da5e09a1b3e5a9fc0b15a3feb64e921ccc55ba74 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Tue, 24 Jun 2008 18:01:09 +0800 Subject: ACPI : Create "idle=nomwait" bootparam "idle=nomwait" disables the use of the MWAIT instruction from both C1 (C1_FFH) and deeper (C2C3_FFH) C-states. When MWAIT is unavailable, the BIOS and OS generally negotiate to use the HALT instruction for C1, and use IO accesses for deeper C-states. This option is useful for power and performance comparisons, and also to work around BIOS bugs where broken MWAIT support is advertised. http://bugzilla.kernel.org/show_bug.cgi?id=10807 http://bugzilla.kernel.org/show_bug.cgi?id=10914 Signed-off-by: Zhao Yakui Signed-off-by: Li Shaohua Signed-off-by: Len Brown Signed-off-by: Andi Kleen diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 65db7f4..5e497d1 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -818,7 +818,7 @@ and is between 256 and 4096 characters. It is defined in the file See Documentation/ide/ide.txt. idle= [X86] - Format: idle=poll or idle=mwait, idle=halt + Format: idle=poll or idle=mwait, idle=halt, idle=nomwait Poll forces a polling idle loop that can slightly improves the performance of waking up a idle CPU, but will use a lot of power and make the system run hot. Not recommended. @@ -828,6 +828,7 @@ and is between 256 and 4096 characters. It is defined in the file as idle=poll. idle=halt. Halt is forced to be used for CPU idle. In such case C2/C3 won't be used again. + idle=nomwait. Disable mwait for CPU C-states ide-pci-generic.all-generic-ide [HW] (E)IDE subsystem Claim all unknown PCI IDE storage controllers. diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index 612b3c4..3ab8373 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -57,6 +57,8 @@ unsigned long boot_option_idle_override = 0; EXPORT_SYMBOL(boot_option_idle_override); unsigned long idle_halt; EXPORT_SYMBOL(idle_halt); +unsigned long idle_nomwait; +EXPORT_SYMBOL(idle_nomwait); void ia64_do_show_stack (struct unw_frame_info *info, void *arg) diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 7fc7294..4d629c6 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -11,6 +11,8 @@ unsigned long idle_halt; EXPORT_SYMBOL(idle_halt); +unsigned long idle_nomwait; +EXPORT_SYMBOL(idle_nomwait); struct kmem_cache *task_xstate_cachep; @@ -340,6 +342,15 @@ static int __init idle_setup(char *str) pm_idle = default_idle; idle_halt = 1; return 0; + } else if (!strcmp(str, "nomwait")) { + /* + * If the boot option of "idle=nomwait" is added, + * it means that mwait will be disabled for CPU C2/C3 + * states. In such case it won't touch the variable + * of boot_option_idle_override. + */ + idle_nomwait = 1; + return 0; } else return -1; diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c index 9a803f8..4e1bb89 100644 --- a/drivers/acpi/processor_core.c +++ b/drivers/acpi/processor_core.c @@ -265,7 +265,20 @@ static int acpi_processor_set_pdc(struct acpi_processor *pr) if (!pdc_in) return status; + if (idle_nomwait) { + /* + * If mwait is disabled for CPU C-states, the C2C3_FFH access + * mode will be disabled in the parameter of _PDC object. + * Of course C1_FFH access mode will also be disabled. + */ + union acpi_object *obj; + u32 *buffer = NULL; + obj = pdc_in->pointer; + buffer = (u32 *)(obj->buffer.pointer); + buffer[2] &= ~(ACPI_PDC_C_C2C3_FFH | ACPI_PDC_C_C1_FFH); + + } status = acpi_evaluate_object(pr->handle, "_PDC", pdc_in, NULL); if (ACPI_FAILURE(status)) diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index c75c7ac..d592dbb 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -957,13 +957,17 @@ static int acpi_processor_get_power_info_cst(struct acpi_processor *pr) } else { continue; } - if (cx.type == ACPI_STATE_C1 && idle_halt) { + if (cx.type == ACPI_STATE_C1 && + (idle_halt || idle_nomwait)) { /* * In most cases the C1 space_id obtained from * _CST object is FIXED_HARDWARE access mode. * But when the option of idle=halt is added, * the entry_method type should be changed from * CSTATE_FFH to CSTATE_HALT. + * When the option of idle=nomwait is added, + * the C1 entry_method type should be + * CSTATE_HALT. */ cx.entry_method = ACPI_CSTATE_HALT; snprintf(cx.desc, ACPI_CX_DESC_LEN, "ACPI HLT"); diff --git a/include/asm-ia64/processor.h b/include/asm-ia64/processor.h index f36e28a..f88fa05 100644 --- a/include/asm-ia64/processor.h +++ b/include/asm-ia64/processor.h @@ -764,6 +764,7 @@ prefetchw (const void *x) extern unsigned long boot_option_idle_override; extern unsigned long idle_halt; +extern unsigned long idle_nomwait; #endif /* !__ASSEMBLY__ */ diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h index bc22162..55402d2 100644 --- a/include/asm-x86/processor.h +++ b/include/asm-x86/processor.h @@ -728,6 +728,7 @@ extern void select_idle_routine(const struct cpuinfo_x86 *c); extern unsigned long boot_option_idle_override; extern unsigned long idle_halt; +extern unsigned long idle_nomwait; extern void enable_sep_cpu(void); extern int sysenter_setup(void); -- cgit v0.10.2 From 2a2a64714d9c40f7705c4de1e79a5b855c7211a9 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Tue, 24 Jun 2008 18:02:57 +0800 Subject: ACPI: Disable MWAIT via DMI on broken Compal board If a system matches in this DMI table, Linux will disable MWAIT support for idle. ie. "idle=nomwait" is automatically invoked and C1_FFH and C2C3_FFH access mode are disabled. http://bugzilla.kernel.org/show_bug.cgi?id=10807 http://bugzilla.kernel.org/show_bug.cgi?id=10914 Signed-off-by: Zhao Yakui Signed-off-by: Li Shaohua Signed-off-by: Len Brown Signed-off-by: Andi Kleen diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c index 4e1bb89..ec0f2d5 100644 --- a/drivers/acpi/processor_core.c +++ b/drivers/acpi/processor_core.c @@ -120,6 +120,29 @@ static const struct file_operations acpi_processor_info_fops = { DEFINE_PER_CPU(struct acpi_processor *, processors); struct acpi_processor_errata errata __read_mostly; +static int set_no_mwait(const struct dmi_system_id *id) +{ + printk(KERN_NOTICE PREFIX "%s detected - " + "disable mwait for CPU C-stetes\n", id->ident); + idle_nomwait = 1; + return 0; +} + +static struct dmi_system_id __cpuinitdata processor_idle_dmi_table[] = { + { + set_no_mwait, "IFL91 board", { + DMI_MATCH(DMI_BIOS_VENDOR, "COMPAL"), + DMI_MATCH(DMI_SYS_VENDOR, "ZEPTO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "3215W"), + DMI_MATCH(DMI_BOARD_NAME, "IFL91") }, NULL}, + { + set_no_mwait, "Extensa 5220", { + DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), + DMI_MATCH(DMI_SYS_VENDOR, "ACER"), + DMI_MATCH(DMI_PRODUCT_VERSION, "0100"), + DMI_MATCH(DMI_BOARD_NAME, "Columbia") }, NULL}, + {}, +}; /* -------------------------------------------------------------------------- Errata Handling @@ -1100,6 +1123,11 @@ static int __init acpi_processor_init(void) return -ENOMEM; acpi_processor_dir->owner = THIS_MODULE; + /* + * Check whether the system is DMI table. If yes, OSPM + * should not use mwait for CPU-states. + */ + dmi_check_system(processor_idle_dmi_table); result = cpuidle_register_driver(&acpi_idle_driver); if (result < 0) goto out_proc; -- cgit v0.10.2 From 65573ee72e34e767a135a0841ac5380a2be1a390 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 27 Jun 2008 11:22:46 -0400 Subject: ACPI: Zhang Rui maintains ACPI THERMAL and FAN Reflect that Zhang-Rui has been the sub-maintainer for ACPI THERMAL and FAN for some time now. Also, the Chinese custom is to speak family name first, so rather than "Rui Zhang", write "Zhang Rui", as he does on e-mail. Signed-off-by: Len Brown Signed-off-by: Andi Kleen diff --git a/MAINTAINERS b/MAINTAINERS index ec5c8ee..4e49e9c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -239,8 +239,8 @@ W: http://www.lesswatts.org/projects/acpi/ S: Supported ACPI FAN DRIVER -P: Len Brown -M: len.brown@intel.com +P: Zhang Rui +M: rui.zhang@intel.com L: linux-acpi@vger.kernel.org W: http://www.lesswatts.org/projects/acpi/ S: Supported @@ -252,14 +252,14 @@ L: pcihpd-discuss@lists.sourceforge.net S: Supported ACPI THERMAL DRIVER -P: Len Brown -M: len.brown@intel.com +P: Zhang Rui +M: rui.zhang@intel.com L: linux-acpi@vger.kernel.org W: http://www.lesswatts.org/projects/acpi/ S: Supported ACPI VIDEO DRIVER -P: Rui Zhang +P: Zhang Rui M: rui.zhang@intel.com L: linux-acpi@vger.kernel.org W: http://www.lesswatts.org/projects/acpi/ -- cgit v0.10.2 From 8aa863b8ca0855378e3409b5e55ad57a856f6412 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 27 Jun 2008 11:31:50 -0400 Subject: ACPI: Andi Kleen maintains the ACPI sub-system ...while Len is on sabbatical from Intel Signed-off-by: Len Brown Signed-off-by: Andi Kleen diff --git a/MAINTAINERS b/MAINTAINERS index 4e49e9c..633bda6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -216,8 +216,8 @@ W: http://code.google.com/p/aceracpi S: Maintained ACPI -P: Len Brown -M: len.brown@intel.com +P: Andi Kleen +M: ak@linux.intel.com M: lenb@kernel.org L: linux-acpi@vger.kernel.org W: http://www.lesswatts.org/projects/acpi/ -- cgit v0.10.2 From 469778c1740fcf3113498b6fdf4559bdec25c58f Mon Sep 17 00:00:00 2001 From: Julia Jomantaite Date: Mon, 23 Jun 2008 22:50:42 +0100 Subject: ACPI: video: fix brightness allocation Fix use of uninitialized device->brightness. Signed-off-by: Julia Jomantaite Signed-off-by: Andi Kleen Acked-by: Zhang Rui Signed-off-by: Len Brown diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c index d089c45..64c8893 100644 --- a/drivers/acpi/video.c +++ b/drivers/acpi/video.c @@ -631,6 +631,76 @@ acpi_video_bus_DOS(struct acpi_video_bus *video, int bios_flag, int lcd_flag) * device : video output device (LCD, CRT, ..) * * Return Value: + * Maximum brightness level + * + * Allocate and initialize device->brightness. + */ + +static int +acpi_video_init_brightness(struct acpi_video_device *device) +{ + union acpi_object *obj = NULL; + int i, max_level = 0, count = 0; + union acpi_object *o; + struct acpi_video_device_brightness *br = NULL; + + if (!ACPI_SUCCESS(acpi_video_device_lcd_query_levels(device, &obj))) { + ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Could not query available " + "LCD brightness level\n")); + goto out; + } + + if (obj->package.count < 2) + goto out; + + br = kzalloc(sizeof(*br), GFP_KERNEL); + if (!br) { + printk(KERN_ERR "can't allocate memory\n"); + goto out; + } + + br->levels = kmalloc(obj->package.count * sizeof *(br->levels), + GFP_KERNEL); + if (!br->levels) + goto out_free; + + for (i = 0; i < obj->package.count; i++) { + o = (union acpi_object *)&obj->package.elements[i]; + if (o->type != ACPI_TYPE_INTEGER) { + printk(KERN_ERR PREFIX "Invalid data\n"); + continue; + } + br->levels[count] = (u32) o->integer.value; + + if (br->levels[count] > max_level) + max_level = br->levels[count]; + count++; + } + + if (count < 2) + goto out_free_levels; + + br->count = count; + device->brightness = br; + ACPI_DEBUG_PRINT((ACPI_DB_INFO, "found %d brightness levels\n", count)); + kfree(obj); + return max_level; + +out_free_levels: + kfree(br->levels); +out_free: + kfree(br); +out: + device->brightness = NULL; + kfree(obj); + return 0; +} + +/* + * Arg: + * device : video output device (LCD, CRT, ..) + * + * Return Value: * None * * Find out all required AML methods defined under the output @@ -640,10 +710,7 @@ acpi_video_bus_DOS(struct acpi_video_bus *video, int bios_flag, int lcd_flag) static void acpi_video_device_find_cap(struct acpi_video_device *device) { acpi_handle h_dummy1; - int i; u32 max_level = 0; - union acpi_object *obj = NULL; - struct acpi_video_device_brightness *br = NULL; memset(&device->cap, 0, sizeof(device->cap)); @@ -672,53 +739,7 @@ static void acpi_video_device_find_cap(struct acpi_video_device *device) device->cap._DSS = 1; } - if (ACPI_SUCCESS(acpi_video_device_lcd_query_levels(device, &obj))) { - - if (obj->package.count >= 2) { - int count = 0; - union acpi_object *o; - - br = kzalloc(sizeof(*br), GFP_KERNEL); - if (!br) { - printk(KERN_ERR "can't allocate memory\n"); - } else { - br->levels = kmalloc(obj->package.count * - sizeof *(br->levels), GFP_KERNEL); - if (!br->levels) - goto out; - - for (i = 0; i < obj->package.count; i++) { - o = (union acpi_object *)&obj->package. - elements[i]; - if (o->type != ACPI_TYPE_INTEGER) { - printk(KERN_ERR PREFIX "Invalid data\n"); - continue; - } - br->levels[count] = (u32) o->integer.value; - - if (br->levels[count] > max_level) - max_level = br->levels[count]; - count++; - } - out: - if (count < 2) { - kfree(br->levels); - kfree(br); - } else { - br->count = count; - device->brightness = br; - ACPI_DEBUG_PRINT((ACPI_DB_INFO, - "found %d brightness levels\n", - count)); - } - } - } - - } else { - ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Could not query available LCD brightness level\n")); - } - - kfree(obj); + max_level = acpi_video_init_brightness(device); if (device->cap._BCL && device->cap._BCM && device->cap._BQC && max_level > 0){ int result; @@ -1695,6 +1716,8 @@ static void acpi_video_switch_brightness(struct acpi_video_device *device, int event) { unsigned long level_current, level_next; + if (!device->brightness) + return; acpi_video_device_lcd_get_level_current(device, &level_current); level_next = acpi_video_get_next_level(device, level_current, event); acpi_video_device_lcd_set_level(device, level_next); -- cgit v0.10.2 From b897f46cf7941fff8130ccdaf85f39528bff6a51 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 27 Jun 2008 16:56:52 -0600 Subject: PNP: add detail to debug resource dump In the debug resource dump, decode the flags and indicate when a resource is disabled or has been automatically assigned. Signed-off-by: Bjorn Helgaas Signed-off-by: Len Brown Signed-off-by: Andi Kleen diff --git a/drivers/pnp/support.c b/drivers/pnp/support.c index 95b076c..7f594cc 100644 --- a/drivers/pnp/support.c +++ b/drivers/pnp/support.c @@ -63,28 +63,46 @@ void dbg_pnp_show_resources(struct pnp_dev *dev, char *desc) for (i = 0; i < PNP_MAX_IRQ; i++) { res = pnp_get_resource(dev, IORESOURCE_IRQ, i); if (res && !(res->flags & IORESOURCE_UNSET)) - dev_dbg(&dev->dev, " irq %lld flags %#lx\n", - (unsigned long long) res->start, res->flags); + dev_dbg(&dev->dev, " irq %lld flags %#lx%s%s\n", + (unsigned long long) res->start, res->flags, + res->flags & IORESOURCE_DISABLED ? + " DISABLED" : "", + res->flags & IORESOURCE_AUTO ? + " AUTO" : ""); } for (i = 0; i < PNP_MAX_DMA; i++) { res = pnp_get_resource(dev, IORESOURCE_DMA, i); if (res && !(res->flags & IORESOURCE_UNSET)) - dev_dbg(&dev->dev, " dma %lld flags %#lx\n", - (unsigned long long) res->start, res->flags); + dev_dbg(&dev->dev, " dma %lld flags %#lx%s%s\n", + (unsigned long long) res->start, res->flags, + res->flags & IORESOURCE_DISABLED ? + " DISABLED" : "", + res->flags & IORESOURCE_AUTO ? + " AUTO" : ""); } for (i = 0; i < PNP_MAX_PORT; i++) { res = pnp_get_resource(dev, IORESOURCE_IO, i); if (res && !(res->flags & IORESOURCE_UNSET)) - dev_dbg(&dev->dev, " io %#llx-%#llx flags %#lx\n", + dev_dbg(&dev->dev, " io %#llx-%#llx flags %#lx" + "%s%s\n", (unsigned long long) res->start, - (unsigned long long) res->end, res->flags); + (unsigned long long) res->end, res->flags, + res->flags & IORESOURCE_DISABLED ? + " DISABLED" : "", + res->flags & IORESOURCE_AUTO ? + " AUTO" : ""); } for (i = 0; i < PNP_MAX_MEM; i++) { res = pnp_get_resource(dev, IORESOURCE_MEM, i); if (res && !(res->flags & IORESOURCE_UNSET)) - dev_dbg(&dev->dev, " mem %#llx-%#llx flags %#lx\n", + dev_dbg(&dev->dev, " mem %#llx-%#llx flags %#lx" + "%s%s\n", (unsigned long long) res->start, - (unsigned long long) res->end, res->flags); + (unsigned long long) res->end, res->flags, + res->flags & IORESOURCE_DISABLED ? + " DISABLED" : "", + res->flags & IORESOURCE_AUTO ? + " AUTO" : ""); } #endif } -- cgit v0.10.2 From 87e4acf3ebc02c9d0a2f7a37b655c49176c4d765 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 27 Jun 2008 16:56:53 -0600 Subject: PNP: remove pnp_resource.index We used pnp_resource.index to keep track of which ISAPNP configuration register a resource should be written to. We needed this only to handle the case where a register is disabled but a subsequent register in the same set is enabled. Rather than explicitly maintaining the pnp_resource.index, this patch adds a resource every time we read an ISAPNP configuration register and marks the resource as IORESOURCE_DISABLED when appropriate. This makes the position in the pnp_resource_table always correspond to the config register index. Signed-off-by: Bjorn Helgaas Signed-off-by: Len Brown Signed-off-by: Andi Kleen diff --git a/drivers/pnp/base.h b/drivers/pnp/base.h index 886dac8..6f60097 100644 --- a/drivers/pnp/base.h +++ b/drivers/pnp/base.h @@ -57,7 +57,6 @@ struct pnp_resource *pnp_get_pnp_resource(struct pnp_dev *dev, struct pnp_resource { struct resource res; - unsigned int index; /* ISAPNP config register index */ }; struct pnp_resource_table { diff --git a/drivers/pnp/interface.c b/drivers/pnp/interface.c index 5695a79..3f8007a 100644 --- a/drivers/pnp/interface.c +++ b/drivers/pnp/interface.c @@ -320,7 +320,6 @@ pnp_set_current_resources(struct device *dmdev, struct device_attribute *attr, const char *ubuf, size_t count) { struct pnp_dev *dev = to_pnp_dev(dmdev); - struct pnp_resource *pnp_res; char *buf = (void *)ubuf; int retval = 0; resource_size_t start, end; @@ -368,7 +367,6 @@ pnp_set_current_resources(struct device *dmdev, struct device_attribute *attr, goto done; } if (!strnicmp(buf, "set", 3)) { - int nport = 0, nmem = 0, nirq = 0, ndma = 0; if (dev->active) goto done; buf += 3; @@ -391,10 +389,7 @@ pnp_set_current_resources(struct device *dmdev, struct device_attribute *attr, end = simple_strtoul(buf, &buf, 0); } else end = start; - pnp_res = pnp_add_io_resource(dev, start, end, - 0); - if (pnp_res) - pnp_res->index = nport++; + pnp_add_io_resource(dev, start, end, 0); continue; } if (!strnicmp(buf, "mem", 3)) { @@ -411,10 +406,7 @@ pnp_set_current_resources(struct device *dmdev, struct device_attribute *attr, end = simple_strtoul(buf, &buf, 0); } else end = start; - pnp_res = pnp_add_mem_resource(dev, start, end, - 0); - if (pnp_res) - pnp_res->index = nmem++; + pnp_add_mem_resource(dev, start, end, 0); continue; } if (!strnicmp(buf, "irq", 3)) { @@ -422,9 +414,7 @@ pnp_set_current_resources(struct device *dmdev, struct device_attribute *attr, while (isspace(*buf)) ++buf; start = simple_strtoul(buf, &buf, 0); - pnp_res = pnp_add_irq_resource(dev, start, 0); - if (pnp_res) - pnp_res->index = nirq++; + pnp_add_irq_resource(dev, start, 0); continue; } if (!strnicmp(buf, "dma", 3)) { @@ -432,9 +422,7 @@ pnp_set_current_resources(struct device *dmdev, struct device_attribute *attr, while (isspace(*buf)) ++buf; start = simple_strtoul(buf, &buf, 0); - pnp_res = pnp_add_dma_resource(dev, start, 0); - if (pnp_res) - pnp_res->index = ndma++; + pnp_add_dma_resource(dev, start, 0); continue; } break; diff --git a/drivers/pnp/isapnp/core.c b/drivers/pnp/isapnp/core.c index f1bccdb..752b51f 100644 --- a/drivers/pnp/isapnp/core.c +++ b/drivers/pnp/isapnp/core.c @@ -928,7 +928,6 @@ EXPORT_SYMBOL(isapnp_write_byte); static int isapnp_get_resources(struct pnp_dev *dev) { - struct pnp_resource *pnp_res; int i, ret; dev_dbg(&dev->dev, "get resources\n"); @@ -940,35 +939,23 @@ static int isapnp_get_resources(struct pnp_dev *dev) for (i = 0; i < ISAPNP_MAX_PORT; i++) { ret = isapnp_read_word(ISAPNP_CFG_PORT + (i << 1)); - if (ret) { - pnp_res = pnp_add_io_resource(dev, ret, ret, 0); - if (pnp_res) - pnp_res->index = i; - } + pnp_add_io_resource(dev, ret, ret, + ret == 0 ? IORESOURCE_DISABLED : 0); } for (i = 0; i < ISAPNP_MAX_MEM; i++) { ret = isapnp_read_word(ISAPNP_CFG_MEM + (i << 3)) << 8; - if (ret) { - pnp_res = pnp_add_mem_resource(dev, ret, ret, 0); - if (pnp_res) - pnp_res->index = i; - } + pnp_add_mem_resource(dev, ret, ret, + ret == 0 ? IORESOURCE_DISABLED : 0); } for (i = 0; i < ISAPNP_MAX_IRQ; i++) { ret = isapnp_read_word(ISAPNP_CFG_IRQ + (i << 1)) >> 8; - if (ret) { - pnp_res = pnp_add_irq_resource(dev, ret, 0); - if (pnp_res) - pnp_res->index = i; - } + pnp_add_irq_resource(dev, ret, + ret == 0 ? IORESOURCE_DISABLED : 0); } for (i = 0; i < ISAPNP_MAX_DMA; i++) { ret = isapnp_read_byte(ISAPNP_CFG_DMA + i); - if (ret != 4) { - pnp_res = pnp_add_dma_resource(dev, ret, 0); - if (pnp_res) - pnp_res->index = i; - } + pnp_add_dma_resource(dev, ret, + ret == 4 ? IORESOURCE_DISABLED : 0); } __end: @@ -978,62 +965,49 @@ __end: static int isapnp_set_resources(struct pnp_dev *dev) { - struct pnp_resource *pnp_res; struct resource *res; - int tmp, index; + int tmp; dev_dbg(&dev->dev, "set resources\n"); isapnp_cfg_begin(dev->card->number, dev->number); dev->active = 1; for (tmp = 0; tmp < ISAPNP_MAX_PORT; tmp++) { - pnp_res = pnp_get_pnp_resource(dev, IORESOURCE_IO, tmp); - if (!pnp_res) - continue; - res = &pnp_res->res; - if (pnp_resource_valid(res)) { - index = pnp_res->index; + res = pnp_get_resource(dev, IORESOURCE_IO, tmp); + if (res && pnp_resource_valid(res) && + !(res->flags & IORESOURCE_DISABLED)) { dev_dbg(&dev->dev, " set io %d to %#llx\n", - index, (unsigned long long) res->start); - isapnp_write_word(ISAPNP_CFG_PORT + (index << 1), + tmp, (unsigned long long) res->start); + isapnp_write_word(ISAPNP_CFG_PORT + (tmp << 1), res->start); } } for (tmp = 0; tmp < ISAPNP_MAX_IRQ; tmp++) { - pnp_res = pnp_get_pnp_resource(dev, IORESOURCE_IRQ, tmp); - if (!pnp_res) - continue; - res = &pnp_res->res; - if (pnp_resource_valid(res)) { + res = pnp_get_resource(dev, IORESOURCE_IRQ, tmp); + if (res && pnp_resource_valid(res) && + !(res->flags & IORESOURCE_DISABLED)) { int irq = res->start; if (irq == 2) irq = 9; - index = pnp_res->index; - dev_dbg(&dev->dev, " set irq %d to %d\n", index, irq); - isapnp_write_byte(ISAPNP_CFG_IRQ + (index << 1), irq); + dev_dbg(&dev->dev, " set irq %d to %d\n", tmp, irq); + isapnp_write_byte(ISAPNP_CFG_IRQ + (tmp << 1), irq); } } for (tmp = 0; tmp < ISAPNP_MAX_DMA; tmp++) { - pnp_res = pnp_get_pnp_resource(dev, IORESOURCE_DMA, tmp); - if (!pnp_res) - continue; - res = &pnp_res->res; - if (pnp_resource_valid(res)) { - index = pnp_res->index; + res = pnp_get_resource(dev, IORESOURCE_DMA, tmp); + if (res && pnp_resource_valid(res) && + !(res->flags & IORESOURCE_DISABLED)) { dev_dbg(&dev->dev, " set dma %d to %lld\n", - index, (unsigned long long) res->start); - isapnp_write_byte(ISAPNP_CFG_DMA + index, res->start); + tmp, (unsigned long long) res->start); + isapnp_write_byte(ISAPNP_CFG_DMA + tmp, res->start); } } for (tmp = 0; tmp < ISAPNP_MAX_MEM; tmp++) { - pnp_res = pnp_get_pnp_resource(dev, IORESOURCE_MEM, tmp); - if (!pnp_res) - continue; - res = &pnp_res->res; - if (pnp_resource_valid(res)) { - index = pnp_res->index; + res = pnp_get_resource(dev, IORESOURCE_MEM, tmp); + if (res && pnp_resource_valid(res) && + !(res->flags & IORESOURCE_DISABLED)) { dev_dbg(&dev->dev, " set mem %d to %#llx\n", - index, (unsigned long long) res->start); - isapnp_write_word(ISAPNP_CFG_MEM + (index << 3), + tmp, (unsigned long long) res->start); + isapnp_write_word(ISAPNP_CFG_MEM + (tmp << 3), (res->start >> 8) & 0xffff); } } diff --git a/drivers/pnp/manager.c b/drivers/pnp/manager.c index bea0914..90bd9cb 100644 --- a/drivers/pnp/manager.c +++ b/drivers/pnp/manager.c @@ -40,7 +40,6 @@ static int pnp_assign_port(struct pnp_dev *dev, struct pnp_port *rule, int idx) } /* set the initial values */ - pnp_res->index = idx; res->flags |= rule->flags | IORESOURCE_IO; res->flags &= ~IORESOURCE_UNSET; @@ -90,7 +89,6 @@ static int pnp_assign_mem(struct pnp_dev *dev, struct pnp_mem *rule, int idx) } /* set the initial values */ - pnp_res->index = idx; res->flags |= rule->flags | IORESOURCE_MEM; res->flags &= ~IORESOURCE_UNSET; @@ -155,7 +153,6 @@ static int pnp_assign_irq(struct pnp_dev *dev, struct pnp_irq *rule, int idx) } /* set the initial values */ - pnp_res->index = idx; res->flags |= rule->flags | IORESOURCE_IRQ; res->flags &= ~IORESOURCE_UNSET; @@ -214,7 +211,6 @@ static void pnp_assign_dma(struct pnp_dev *dev, struct pnp_dma *rule, int idx) } /* set the initial values */ - pnp_res->index = idx; res->flags |= rule->flags | IORESOURCE_DMA; res->flags &= ~IORESOURCE_UNSET; -- cgit v0.10.2 From 940e98dbc616f1df7b63b73858a966969baf261d Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 27 Jun 2008 16:56:54 -0600 Subject: PNP: add pnp_resource_type() internal interface Given a struct resource, this returns the type (IO, MEM, IRQ, DMA). Signed-off-by: Bjorn Helgaas Signed-off-by: Len Brown Signed-off-by: Andi Kleen diff --git a/drivers/pnp/base.h b/drivers/pnp/base.h index 6f60097..4c2a625 100644 --- a/drivers/pnp/base.h +++ b/drivers/pnp/base.h @@ -46,6 +46,7 @@ int pnp_check_dma(struct pnp_dev *dev, struct resource *res); void dbg_pnp_show_resources(struct pnp_dev *dev, char *desc); void pnp_init_resource(struct resource *res); +int pnp_resource_type(struct resource *res); struct pnp_resource *pnp_get_pnp_resource(struct pnp_dev *dev, unsigned int type, unsigned int num); diff --git a/drivers/pnp/resource.c b/drivers/pnp/resource.c index 390b500..cce341f 100644 --- a/drivers/pnp/resource.c +++ b/drivers/pnp/resource.c @@ -499,6 +499,12 @@ int pnp_check_dma(struct pnp_dev *dev, struct resource *res) #endif } +int pnp_resource_type(struct resource *res) +{ + return res->flags & (IORESOURCE_IO | IORESOURCE_MEM | + IORESOURCE_IRQ | IORESOURCE_DMA); +} + struct pnp_resource *pnp_get_pnp_resource(struct pnp_dev *dev, unsigned int type, unsigned int num) { -- cgit v0.10.2 From 9fdee4e02e3b214cde8e4f3beb6776075a3d08a7 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 27 Jun 2008 16:56:55 -0600 Subject: PNP: add pnp_resource_type_name() helper function This patch adds a "pnp_resource_type_name(struct resource *)" that returns the string resource type. This will be used by the sysfs "show resources" function and the debug resource dump function. Signed-off-by: Bjorn Helgaas Signed-off-by: Len Brown Signed-off-by: Andi Kleen diff --git a/drivers/pnp/base.h b/drivers/pnp/base.h index 4c2a625..c913158 100644 --- a/drivers/pnp/base.h +++ b/drivers/pnp/base.h @@ -43,6 +43,7 @@ int pnp_check_mem(struct pnp_dev *dev, struct resource *res); int pnp_check_irq(struct pnp_dev *dev, struct resource *res); int pnp_check_dma(struct pnp_dev *dev, struct resource *res); +char *pnp_resource_type_name(struct resource *res); void dbg_pnp_show_resources(struct pnp_dev *dev, char *desc); void pnp_init_resource(struct resource *res); diff --git a/drivers/pnp/support.c b/drivers/pnp/support.c index 7f594cc..eb07345 100644 --- a/drivers/pnp/support.c +++ b/drivers/pnp/support.c @@ -52,6 +52,21 @@ void pnp_eisa_id_to_string(u32 id, char *str) str[7] = '\0'; } +char *pnp_resource_type_name(struct resource *res) +{ + switch (pnp_resource_type(res)) { + case IORESOURCE_IO: + return "io"; + case IORESOURCE_MEM: + return "mem"; + case IORESOURCE_IRQ: + return "irq"; + case IORESOURCE_DMA: + return "dma"; + } + return NULL; +} + void dbg_pnp_show_resources(struct pnp_dev *dev, char *desc) { #ifdef DEBUG -- cgit v0.10.2 From 20bfdbba7212d19613b93dcea93f26cb65af91fe Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 27 Jun 2008 16:56:56 -0600 Subject: PNP: make pnp_{port,mem,etc}_start(), et al work for invalid resources Some callers use pnp_port_start() and similar functions without making sure the resource is valid. This patch makes us fall back to returning the initial values if the resource is not valid or not even present. This mostly preserves the previous behavior, where we would just return the initial values set by pnp_init_resource_table(). The original 2.6.25 code didn't range-check the "bar", so it would return garbage if the bar exceeded the table size. This code returns sensible values instead. Signed-off-by: Bjorn Helgaas Signed-off-by: Len Brown Signed-off-by: Andi Kleen diff --git a/include/linux/pnp.h b/include/linux/pnp.h index 63b128d..8b607ae 100644 --- a/include/linux/pnp.h +++ b/include/linux/pnp.h @@ -40,19 +40,31 @@ static inline resource_size_t pnp_resource_len(struct resource *res) static inline resource_size_t pnp_port_start(struct pnp_dev *dev, unsigned int bar) { - return pnp_get_resource(dev, IORESOURCE_IO, bar)->start; + struct resource *res = pnp_get_resource(dev, IORESOURCE_IO, bar); + + if (pnp_resource_valid(res)) + return res->start; + return 0; } static inline resource_size_t pnp_port_end(struct pnp_dev *dev, unsigned int bar) { - return pnp_get_resource(dev, IORESOURCE_IO, bar)->end; + struct resource *res = pnp_get_resource(dev, IORESOURCE_IO, bar); + + if (pnp_resource_valid(res)) + return res->end; + return 0; } static inline unsigned long pnp_port_flags(struct pnp_dev *dev, unsigned int bar) { - return pnp_get_resource(dev, IORESOURCE_IO, bar)->flags; + struct resource *res = pnp_get_resource(dev, IORESOURCE_IO, bar); + + if (pnp_resource_valid(res)) + return res->flags; + return IORESOURCE_IO | IORESOURCE_AUTO | IORESOURCE_UNSET; } static inline int pnp_port_valid(struct pnp_dev *dev, unsigned int bar) @@ -63,25 +75,41 @@ static inline int pnp_port_valid(struct pnp_dev *dev, unsigned int bar) static inline resource_size_t pnp_port_len(struct pnp_dev *dev, unsigned int bar) { - return pnp_resource_len(pnp_get_resource(dev, IORESOURCE_IO, bar)); + struct resource *res = pnp_get_resource(dev, IORESOURCE_IO, bar); + + if (pnp_resource_valid(res)) + return pnp_resource_len(res); + return 0; } static inline resource_size_t pnp_mem_start(struct pnp_dev *dev, unsigned int bar) { - return pnp_get_resource(dev, IORESOURCE_MEM, bar)->start; + struct resource *res = pnp_get_resource(dev, IORESOURCE_MEM, bar); + + if (pnp_resource_valid(res)) + return res->start; + return 0; } static inline resource_size_t pnp_mem_end(struct pnp_dev *dev, unsigned int bar) { - return pnp_get_resource(dev, IORESOURCE_MEM, bar)->end; + struct resource *res = pnp_get_resource(dev, IORESOURCE_MEM, bar); + + if (pnp_resource_valid(res)) + return res->end; + return 0; } static inline unsigned long pnp_mem_flags(struct pnp_dev *dev, unsigned int bar) { - return pnp_get_resource(dev, IORESOURCE_MEM, bar)->flags; + struct resource *res = pnp_get_resource(dev, IORESOURCE_MEM, bar); + + if (pnp_resource_valid(res)) + return res->flags; + return IORESOURCE_MEM | IORESOURCE_AUTO | IORESOURCE_UNSET; } static inline int pnp_mem_valid(struct pnp_dev *dev, unsigned int bar) @@ -92,18 +120,30 @@ static inline int pnp_mem_valid(struct pnp_dev *dev, unsigned int bar) static inline resource_size_t pnp_mem_len(struct pnp_dev *dev, unsigned int bar) { - return pnp_resource_len(pnp_get_resource(dev, IORESOURCE_MEM, bar)); + struct resource *res = pnp_get_resource(dev, IORESOURCE_MEM, bar); + + if (pnp_resource_valid(res)) + return pnp_resource_len(res); + return 0; } static inline resource_size_t pnp_irq(struct pnp_dev *dev, unsigned int bar) { - return pnp_get_resource(dev, IORESOURCE_IRQ, bar)->start; + struct resource *res = pnp_get_resource(dev, IORESOURCE_IRQ, bar); + + if (pnp_resource_valid(res)) + return res->start; + return -1; } static inline unsigned long pnp_irq_flags(struct pnp_dev *dev, unsigned int bar) { - return pnp_get_resource(dev, IORESOURCE_IRQ, bar)->flags; + struct resource *res = pnp_get_resource(dev, IORESOURCE_IRQ, bar); + + if (pnp_resource_valid(res)) + return res->flags; + return IORESOURCE_IRQ | IORESOURCE_AUTO | IORESOURCE_UNSET; } static inline int pnp_irq_valid(struct pnp_dev *dev, unsigned int bar) @@ -114,12 +154,20 @@ static inline int pnp_irq_valid(struct pnp_dev *dev, unsigned int bar) static inline resource_size_t pnp_dma(struct pnp_dev *dev, unsigned int bar) { - return pnp_get_resource(dev, IORESOURCE_DMA, bar)->start; + struct resource *res = pnp_get_resource(dev, IORESOURCE_DMA, bar); + + if (pnp_resource_valid(res)) + return res->start; + return -1; } static inline unsigned long pnp_dma_flags(struct pnp_dev *dev, unsigned int bar) { - return pnp_get_resource(dev, IORESOURCE_DMA, bar)->flags; + struct resource *res = pnp_get_resource(dev, IORESOURCE_DMA, bar); + + if (pnp_resource_valid(res)) + return res->flags; + return IORESOURCE_DMA | IORESOURCE_AUTO | IORESOURCE_UNSET; } static inline int pnp_dma_valid(struct pnp_dev *dev, unsigned int bar) -- cgit v0.10.2 From aee3ad815dd291a7193ab01da0f1a30c84d00061 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 27 Jun 2008 16:56:57 -0600 Subject: PNP: replace pnp_resource_table with dynamically allocated resources PNP used to have a fixed-size pnp_resource_table for tracking the resources used by a device. This table often overflowed, so we've had to increase the table size, which wastes memory because most devices have very few resources. This patch replaces the table with a linked list of resources where the entries are allocated on demand. This removes messages like these: pnpacpi: exceeded the max number of IO resources 00:01: too many I/O port resources References: http://bugzilla.kernel.org/show_bug.cgi?id=9535 http://bugzilla.kernel.org/show_bug.cgi?id=9740 http://lkml.org/lkml/2007/11/30/110 This patch also changes the way PNP uses the IORESOURCE_UNSET, IORESOURCE_AUTO, and IORESOURCE_DISABLED flags. Prior to this patch, the pnp_resource_table entries used the flags like this: IORESOURCE_UNSET This table entry is unused and available for use. When this flag is set, we shouldn't look at anything else in the resource structure. This flag is set when a resource table entry is initialized. IORESOURCE_AUTO This resource was assigned automatically by pnp_assign_{io,mem,etc}(). This flag is set when a resource table entry is initialized and cleared whenever we discover a resource setting by reading an ISAPNP config register, parsing a PNPBIOS resource data stream, parsing an ACPI _CRS list, or interpreting a sysfs "set" command. Resources marked IORESOURCE_AUTO are reinitialized and marked as IORESOURCE_UNSET by pnp_clean_resource_table() in these cases: - before we attempt to assign resources automatically, - if we fail to assign resources automatically, - after disabling a device IORESOURCE_DISABLED Set by pnp_assign_{io,mem,etc}() when automatic assignment fails. Also set by PNPBIOS and PNPACPI for: - invalid IRQs or GSI registration failures - invalid DMA channels - I/O ports above 0x10000 - mem ranges with negative length After this patch, there is no pnp_resource_table, and the resource list entries use the flags like this: IORESOURCE_UNSET This flag is no longer used in PNP. Instead of keeping IORESOURCE_UNSET entries in the resource list, we remove entries from the list and free them. IORESOURCE_AUTO No change in meaning: it still means the resource was assigned automatically by pnp_assign_{port,mem,etc}(), but these functions now set the bit explicitly. We still "clean" a device's resource list in the same places, but rather than reinitializing IORESOURCE_AUTO entries, we just remove them from the list. Note that IORESOURCE_AUTO entries are always at the end of the list, so removing them doesn't reorder other list entries. This is because non-IORESOURCE_AUTO entries are added by the ISAPNP, PNPBIOS, or PNPACPI "get resources" methods and by the sysfs "set" command. In each of these cases, we completely free the resource list first. IORESOURCE_DISABLED In addition to the cases where we used to set this flag, ISAPNP now adds an IORESOURCE_DISABLED resource when it reads a configuration register with a "disabled" value. Signed-off-by: Bjorn Helgaas Signed-off-by: Len Brown Signed-off-by: Andi Kleen diff --git a/drivers/pnp/base.h b/drivers/pnp/base.h index c913158..1667ac3 100644 --- a/drivers/pnp/base.h +++ b/drivers/pnp/base.h @@ -46,27 +46,15 @@ int pnp_check_dma(struct pnp_dev *dev, struct resource *res); char *pnp_resource_type_name(struct resource *res); void dbg_pnp_show_resources(struct pnp_dev *dev, char *desc); -void pnp_init_resource(struct resource *res); +void pnp_free_resources(struct pnp_dev *dev); int pnp_resource_type(struct resource *res); -struct pnp_resource *pnp_get_pnp_resource(struct pnp_dev *dev, - unsigned int type, unsigned int num); - -#define PNP_MAX_PORT 40 -#define PNP_MAX_MEM 24 -#define PNP_MAX_IRQ 2 -#define PNP_MAX_DMA 2 - struct pnp_resource { + struct list_head list; struct resource res; }; -struct pnp_resource_table { - struct pnp_resource port[PNP_MAX_PORT]; - struct pnp_resource mem[PNP_MAX_MEM]; - struct pnp_resource dma[PNP_MAX_DMA]; - struct pnp_resource irq[PNP_MAX_IRQ]; -}; +void pnp_free_resource(struct pnp_resource *pnp_res); struct pnp_resource *pnp_add_irq_resource(struct pnp_dev *dev, int irq, int flags); diff --git a/drivers/pnp/core.c b/drivers/pnp/core.c index 20771b7..7182da9 100644 --- a/drivers/pnp/core.c +++ b/drivers/pnp/core.c @@ -99,6 +99,21 @@ static void pnp_free_ids(struct pnp_dev *dev) } } +void pnp_free_resource(struct pnp_resource *pnp_res) +{ + list_del(&pnp_res->list); + kfree(pnp_res); +} + +void pnp_free_resources(struct pnp_dev *dev) +{ + struct pnp_resource *pnp_res, *tmp; + + list_for_each_entry_safe(pnp_res, tmp, &dev->resources, list) { + pnp_free_resource(pnp_res); + } +} + static void pnp_release_device(struct device *dmdev) { struct pnp_dev *dev = to_pnp_dev(dmdev); @@ -106,7 +121,7 @@ static void pnp_release_device(struct device *dmdev) pnp_free_option(dev->independent); pnp_free_option(dev->dependent); pnp_free_ids(dev); - kfree(dev->res); + pnp_free_resources(dev); kfree(dev); } @@ -119,12 +134,7 @@ struct pnp_dev *pnp_alloc_dev(struct pnp_protocol *protocol, int id, char *pnpid if (!dev) return NULL; - dev->res = kzalloc(sizeof(struct pnp_resource_table), GFP_KERNEL); - if (!dev->res) { - kfree(dev); - return NULL; - } - + INIT_LIST_HEAD(&dev->resources); dev->protocol = protocol; dev->number = id; dev->dma_mask = DMA_24BIT_MASK; @@ -140,7 +150,6 @@ struct pnp_dev *pnp_alloc_dev(struct pnp_protocol *protocol, int id, char *pnpid dev_id = pnp_add_id(dev, pnpid); if (!dev_id) { - kfree(dev->res); kfree(dev); return NULL; } diff --git a/drivers/pnp/interface.c b/drivers/pnp/interface.c index 3f8007a..7fc86bb 100644 --- a/drivers/pnp/interface.c +++ b/drivers/pnp/interface.c @@ -269,46 +269,38 @@ static ssize_t pnp_show_current_resources(struct device *dmdev, pnp_printf(buffer, "disabled\n"); for (i = 0; (res = pnp_get_resource(dev, IORESOURCE_IO, i)); i++) { - if (pnp_resource_valid(res)) { - pnp_printf(buffer, "io"); - if (res->flags & IORESOURCE_DISABLED) - pnp_printf(buffer, " disabled\n"); - else - pnp_printf(buffer, " 0x%llx-0x%llx\n", - (unsigned long long) res->start, - (unsigned long long) res->end); - } + pnp_printf(buffer, "io"); + if (res->flags & IORESOURCE_DISABLED) + pnp_printf(buffer, " disabled\n"); + else + pnp_printf(buffer, " 0x%llx-0x%llx\n", + (unsigned long long) res->start, + (unsigned long long) res->end); } for (i = 0; (res = pnp_get_resource(dev, IORESOURCE_MEM, i)); i++) { - if (pnp_resource_valid(res)) { - pnp_printf(buffer, "mem"); - if (res->flags & IORESOURCE_DISABLED) - pnp_printf(buffer, " disabled\n"); - else - pnp_printf(buffer, " 0x%llx-0x%llx\n", - (unsigned long long) res->start, - (unsigned long long) res->end); - } + pnp_printf(buffer, "mem"); + if (res->flags & IORESOURCE_DISABLED) + pnp_printf(buffer, " disabled\n"); + else + pnp_printf(buffer, " 0x%llx-0x%llx\n", + (unsigned long long) res->start, + (unsigned long long) res->end); } for (i = 0; (res = pnp_get_resource(dev, IORESOURCE_IRQ, i)); i++) { - if (pnp_resource_valid(res)) { - pnp_printf(buffer, "irq"); - if (res->flags & IORESOURCE_DISABLED) - pnp_printf(buffer, " disabled\n"); - else - pnp_printf(buffer, " %lld\n", - (unsigned long long) res->start); - } + pnp_printf(buffer, "irq"); + if (res->flags & IORESOURCE_DISABLED) + pnp_printf(buffer, " disabled\n"); + else + pnp_printf(buffer, " %lld\n", + (unsigned long long) res->start); } for (i = 0; (res = pnp_get_resource(dev, IORESOURCE_DMA, i)); i++) { - if (pnp_resource_valid(res)) { - pnp_printf(buffer, "dma"); - if (res->flags & IORESOURCE_DISABLED) - pnp_printf(buffer, " disabled\n"); - else - pnp_printf(buffer, " %lld\n", - (unsigned long long) res->start); - } + pnp_printf(buffer, "dma"); + if (res->flags & IORESOURCE_DISABLED) + pnp_printf(buffer, " disabled\n"); + else + pnp_printf(buffer, " %lld\n", + (unsigned long long) res->start); } ret = (buffer->curr - buf); kfree(buffer); diff --git a/drivers/pnp/isapnp/core.c b/drivers/pnp/isapnp/core.c index 752b51f..ca4457e 100644 --- a/drivers/pnp/isapnp/core.c +++ b/drivers/pnp/isapnp/core.c @@ -973,8 +973,7 @@ static int isapnp_set_resources(struct pnp_dev *dev) dev->active = 1; for (tmp = 0; tmp < ISAPNP_MAX_PORT; tmp++) { res = pnp_get_resource(dev, IORESOURCE_IO, tmp); - if (res && pnp_resource_valid(res) && - !(res->flags & IORESOURCE_DISABLED)) { + if (pnp_resource_enabled(res)) { dev_dbg(&dev->dev, " set io %d to %#llx\n", tmp, (unsigned long long) res->start); isapnp_write_word(ISAPNP_CFG_PORT + (tmp << 1), @@ -983,8 +982,7 @@ static int isapnp_set_resources(struct pnp_dev *dev) } for (tmp = 0; tmp < ISAPNP_MAX_IRQ; tmp++) { res = pnp_get_resource(dev, IORESOURCE_IRQ, tmp); - if (res && pnp_resource_valid(res) && - !(res->flags & IORESOURCE_DISABLED)) { + if (pnp_resource_enabled(res)) { int irq = res->start; if (irq == 2) irq = 9; @@ -994,8 +992,7 @@ static int isapnp_set_resources(struct pnp_dev *dev) } for (tmp = 0; tmp < ISAPNP_MAX_DMA; tmp++) { res = pnp_get_resource(dev, IORESOURCE_DMA, tmp); - if (res && pnp_resource_valid(res) && - !(res->flags & IORESOURCE_DISABLED)) { + if (pnp_resource_enabled(res)) { dev_dbg(&dev->dev, " set dma %d to %lld\n", tmp, (unsigned long long) res->start); isapnp_write_byte(ISAPNP_CFG_DMA + tmp, res->start); @@ -1003,8 +1000,7 @@ static int isapnp_set_resources(struct pnp_dev *dev) } for (tmp = 0; tmp < ISAPNP_MAX_MEM; tmp++) { res = pnp_get_resource(dev, IORESOURCE_MEM, tmp); - if (res && pnp_resource_valid(res) && - !(res->flags & IORESOURCE_DISABLED)) { + if (pnp_resource_enabled(res)) { dev_dbg(&dev->dev, " set mem %d to %#llx\n", tmp, (unsigned long long) res->start); isapnp_write_word(ISAPNP_CFG_MEM + (tmp << 3), diff --git a/drivers/pnp/manager.c b/drivers/pnp/manager.c index 90bd9cb..165b624 100644 --- a/drivers/pnp/manager.c +++ b/drivers/pnp/manager.c @@ -19,40 +19,30 @@ DEFINE_MUTEX(pnp_res_mutex); static int pnp_assign_port(struct pnp_dev *dev, struct pnp_port *rule, int idx) { - struct pnp_resource *pnp_res; - struct resource *res; + struct resource *res, local_res; - pnp_res = pnp_get_pnp_resource(dev, IORESOURCE_IO, idx); - if (!pnp_res) { - dev_err(&dev->dev, "too many I/O port resources\n"); - /* pretend we were successful so at least the manager won't try again */ - return 1; - } - - res = &pnp_res->res; - - /* check if this resource has been manually set, if so skip */ - if (!(res->flags & IORESOURCE_AUTO)) { + res = pnp_get_resource(dev, IORESOURCE_IO, idx); + if (res) { dev_dbg(&dev->dev, " io %d already set to %#llx-%#llx " "flags %#lx\n", idx, (unsigned long long) res->start, (unsigned long long) res->end, res->flags); return 1; } - /* set the initial values */ - res->flags |= rule->flags | IORESOURCE_IO; - res->flags &= ~IORESOURCE_UNSET; + res = &local_res; + res->flags = rule->flags | IORESOURCE_AUTO; + res->start = 0; + res->end = 0; if (!rule->size) { res->flags |= IORESOURCE_DISABLED; dev_dbg(&dev->dev, " io %d disabled\n", idx); - return 1; /* skip disabled resource requests */ + goto __add; } res->start = rule->min; res->end = res->start + rule->size - 1; - /* run through until pnp_check_port is happy */ while (!pnp_check_port(dev, res)) { res->start += rule->align; res->end = res->start + rule->size - 1; @@ -61,38 +51,29 @@ static int pnp_assign_port(struct pnp_dev *dev, struct pnp_port *rule, int idx) return 0; } } - dev_dbg(&dev->dev, " assign io %d %#llx-%#llx\n", idx, - (unsigned long long) res->start, (unsigned long long) res->end); + +__add: + pnp_add_io_resource(dev, res->start, res->end, res->flags); return 1; } static int pnp_assign_mem(struct pnp_dev *dev, struct pnp_mem *rule, int idx) { - struct pnp_resource *pnp_res; - struct resource *res; + struct resource *res, local_res; - pnp_res = pnp_get_pnp_resource(dev, IORESOURCE_MEM, idx); - if (!pnp_res) { - dev_err(&dev->dev, "too many memory resources\n"); - /* pretend we were successful so at least the manager won't try again */ - return 1; - } - - res = &pnp_res->res; - - /* check if this resource has been manually set, if so skip */ - if (!(res->flags & IORESOURCE_AUTO)) { + res = pnp_get_resource(dev, IORESOURCE_MEM, idx); + if (res) { dev_dbg(&dev->dev, " mem %d already set to %#llx-%#llx " "flags %#lx\n", idx, (unsigned long long) res->start, (unsigned long long) res->end, res->flags); return 1; } - /* set the initial values */ - res->flags |= rule->flags | IORESOURCE_MEM; - res->flags &= ~IORESOURCE_UNSET; + res = &local_res; + res->flags = rule->flags | IORESOURCE_AUTO; + res->start = 0; + res->end = 0; - /* convert pnp flags to standard Linux flags */ if (!(rule->flags & IORESOURCE_MEM_WRITEABLE)) res->flags |= IORESOURCE_READONLY; if (rule->flags & IORESOURCE_MEM_CACHEABLE) @@ -105,13 +86,12 @@ static int pnp_assign_mem(struct pnp_dev *dev, struct pnp_mem *rule, int idx) if (!rule->size) { res->flags |= IORESOURCE_DISABLED; dev_dbg(&dev->dev, " mem %d disabled\n", idx); - return 1; /* skip disabled resource requests */ + goto __add; } res->start = rule->min; res->end = res->start + rule->size - 1; - /* run through until pnp_check_mem is happy */ while (!pnp_check_mem(dev, res)) { res->start += rule->align; res->end = res->start + rule->size - 1; @@ -120,15 +100,15 @@ static int pnp_assign_mem(struct pnp_dev *dev, struct pnp_mem *rule, int idx) return 0; } } - dev_dbg(&dev->dev, " assign mem %d %#llx-%#llx\n", idx, - (unsigned long long) res->start, (unsigned long long) res->end); + +__add: + pnp_add_mem_resource(dev, res->start, res->end, res->flags); return 1; } static int pnp_assign_irq(struct pnp_dev *dev, struct pnp_irq *rule, int idx) { - struct pnp_resource *pnp_res; - struct resource *res; + struct resource *res, local_res; int i; /* IRQ priority: this table is good for i386 */ @@ -136,58 +116,48 @@ static int pnp_assign_irq(struct pnp_dev *dev, struct pnp_irq *rule, int idx) 5, 10, 11, 12, 9, 14, 15, 7, 3, 4, 13, 0, 1, 6, 8, 2 }; - pnp_res = pnp_get_pnp_resource(dev, IORESOURCE_IRQ, idx); - if (!pnp_res) { - dev_err(&dev->dev, "too many IRQ resources\n"); - /* pretend we were successful so at least the manager won't try again */ - return 1; - } - - res = &pnp_res->res; - - /* check if this resource has been manually set, if so skip */ - if (!(res->flags & IORESOURCE_AUTO)) { + res = pnp_get_resource(dev, IORESOURCE_IRQ, idx); + if (res) { dev_dbg(&dev->dev, " irq %d already set to %d flags %#lx\n", idx, (int) res->start, res->flags); return 1; } - /* set the initial values */ - res->flags |= rule->flags | IORESOURCE_IRQ; - res->flags &= ~IORESOURCE_UNSET; + res = &local_res; + res->flags = rule->flags | IORESOURCE_AUTO; + res->start = -1; + res->end = -1; if (bitmap_empty(rule->map, PNP_IRQ_NR)) { res->flags |= IORESOURCE_DISABLED; dev_dbg(&dev->dev, " irq %d disabled\n", idx); - return 1; /* skip disabled resource requests */ + goto __add; } /* TBD: need check for >16 IRQ */ res->start = find_next_bit(rule->map, PNP_IRQ_NR, 16); if (res->start < PNP_IRQ_NR) { res->end = res->start; - dev_dbg(&dev->dev, " assign irq %d %d\n", idx, - (int) res->start); - return 1; + goto __add; } for (i = 0; i < 16; i++) { if (test_bit(xtab[i], rule->map)) { res->start = res->end = xtab[i]; - if (pnp_check_irq(dev, res)) { - dev_dbg(&dev->dev, " assign irq %d %d\n", idx, - (int) res->start); - return 1; - } + if (pnp_check_irq(dev, res)) + goto __add; } } dev_dbg(&dev->dev, " couldn't assign irq %d\n", idx); return 0; + +__add: + pnp_add_irq_resource(dev, res->start, res->flags); + return 1; } static void pnp_assign_dma(struct pnp_dev *dev, struct pnp_dma *rule, int idx) { - struct pnp_resource *pnp_res; - struct resource *res; + struct resource *res, local_res; int i; /* DMA priority: this table is good for i386 */ @@ -195,127 +165,47 @@ static void pnp_assign_dma(struct pnp_dev *dev, struct pnp_dma *rule, int idx) 1, 3, 5, 6, 7, 0, 2, 4 }; - pnp_res = pnp_get_pnp_resource(dev, IORESOURCE_DMA, idx); - if (!pnp_res) { - dev_err(&dev->dev, "too many DMA resources\n"); - return; - } - - res = &pnp_res->res; - - /* check if this resource has been manually set, if so skip */ - if (!(res->flags & IORESOURCE_AUTO)) { + res = pnp_get_resource(dev, IORESOURCE_DMA, idx); + if (res) { dev_dbg(&dev->dev, " dma %d already set to %d flags %#lx\n", idx, (int) res->start, res->flags); return; } - /* set the initial values */ - res->flags |= rule->flags | IORESOURCE_DMA; - res->flags &= ~IORESOURCE_UNSET; + res = &local_res; + res->flags = rule->flags | IORESOURCE_AUTO; + res->start = -1; + res->end = -1; for (i = 0; i < 8; i++) { if (rule->map & (1 << xtab[i])) { res->start = res->end = xtab[i]; - if (pnp_check_dma(dev, res)) { - dev_dbg(&dev->dev, " assign dma %d %d\n", idx, - (int) res->start); - return; - } + if (pnp_check_dma(dev, res)) + goto __add; } } #ifdef MAX_DMA_CHANNELS res->start = res->end = MAX_DMA_CHANNELS; #endif - res->flags |= IORESOURCE_UNSET | IORESOURCE_DISABLED; + res->flags |= IORESOURCE_DISABLED; dev_dbg(&dev->dev, " disable dma %d\n", idx); -} -void pnp_init_resource(struct resource *res) -{ - unsigned long type; - - type = res->flags & (IORESOURCE_IO | IORESOURCE_MEM | - IORESOURCE_IRQ | IORESOURCE_DMA); - - res->name = NULL; - res->flags = type | IORESOURCE_AUTO | IORESOURCE_UNSET; - if (type == IORESOURCE_IRQ || type == IORESOURCE_DMA) { - res->start = -1; - res->end = -1; - } else { - res->start = 0; - res->end = 0; - } +__add: + pnp_add_dma_resource(dev, res->start, res->flags); } -/** - * pnp_init_resources - Resets a resource table to default values. - * @table: pointer to the desired resource table - */ void pnp_init_resources(struct pnp_dev *dev) { - struct resource *res; - int idx; - - for (idx = 0; idx < PNP_MAX_IRQ; idx++) { - res = &dev->res->irq[idx].res; - res->flags = IORESOURCE_IRQ; - pnp_init_resource(res); - } - for (idx = 0; idx < PNP_MAX_DMA; idx++) { - res = &dev->res->dma[idx].res; - res->flags = IORESOURCE_DMA; - pnp_init_resource(res); - } - for (idx = 0; idx < PNP_MAX_PORT; idx++) { - res = &dev->res->port[idx].res; - res->flags = IORESOURCE_IO; - pnp_init_resource(res); - } - for (idx = 0; idx < PNP_MAX_MEM; idx++) { - res = &dev->res->mem[idx].res; - res->flags = IORESOURCE_MEM; - pnp_init_resource(res); - } + pnp_free_resources(dev); } -/** - * pnp_clean_resources - clears resources that were not manually set - * @res: the resources to clean - */ static void pnp_clean_resource_table(struct pnp_dev *dev) { - struct resource *res; - int idx; - - for (idx = 0; idx < PNP_MAX_IRQ; idx++) { - res = &dev->res->irq[idx].res; - if (res->flags & IORESOURCE_AUTO) { - res->flags = IORESOURCE_IRQ; - pnp_init_resource(res); - } - } - for (idx = 0; idx < PNP_MAX_DMA; idx++) { - res = &dev->res->dma[idx].res; - if (res->flags & IORESOURCE_AUTO) { - res->flags = IORESOURCE_DMA; - pnp_init_resource(res); - } - } - for (idx = 0; idx < PNP_MAX_PORT; idx++) { - res = &dev->res->port[idx].res; - if (res->flags & IORESOURCE_AUTO) { - res->flags = IORESOURCE_IO; - pnp_init_resource(res); - } - } - for (idx = 0; idx < PNP_MAX_MEM; idx++) { - res = &dev->res->mem[idx].res; - if (res->flags & IORESOURCE_AUTO) { - res->flags = IORESOURCE_MEM; - pnp_init_resource(res); - } + struct pnp_resource *pnp_res, *tmp; + + list_for_each_entry_safe(pnp_res, tmp, &dev->resources, list) { + if (pnp_res->res.flags & IORESOURCE_AUTO) + pnp_free_resource(pnp_res); } } diff --git a/drivers/pnp/pnpacpi/rsparser.c b/drivers/pnp/pnpacpi/rsparser.c index 46c791a..9a45c25 100644 --- a/drivers/pnp/pnpacpi/rsparser.c +++ b/drivers/pnp/pnpacpi/rsparser.c @@ -806,6 +806,13 @@ static void pnpacpi_encode_irq(struct pnp_dev *dev, struct acpi_resource_irq *irq = &resource->data.irq; int triggering, polarity, shareable; + if (!pnp_resource_enabled(p)) { + irq->interrupt_count = 0; + dev_dbg(&dev->dev, " encode irq (%s)\n", + p ? "disabled" : "missing"); + return; + } + decode_irq_flags(dev, p->flags, &triggering, &polarity, &shareable); irq->triggering = triggering; irq->polarity = polarity; @@ -828,6 +835,13 @@ static void pnpacpi_encode_ext_irq(struct pnp_dev *dev, struct acpi_resource_extended_irq *extended_irq = &resource->data.extended_irq; int triggering, polarity, shareable; + if (!pnp_resource_enabled(p)) { + extended_irq->interrupt_count = 0; + dev_dbg(&dev->dev, " encode extended irq (%s)\n", + p ? "disabled" : "missing"); + return; + } + decode_irq_flags(dev, p->flags, &triggering, &polarity, &shareable); extended_irq->producer_consumer = ACPI_CONSUMER; extended_irq->triggering = triggering; @@ -848,6 +862,13 @@ static void pnpacpi_encode_dma(struct pnp_dev *dev, { struct acpi_resource_dma *dma = &resource->data.dma; + if (!pnp_resource_enabled(p)) { + dma->channel_count = 0; + dev_dbg(&dev->dev, " encode dma (%s)\n", + p ? "disabled" : "missing"); + return; + } + /* Note: pnp_assign_dma will copy pnp_dma->flags into p->flags */ switch (p->flags & IORESOURCE_DMA_SPEED_MASK) { case IORESOURCE_DMA_TYPEA: @@ -889,17 +910,21 @@ static void pnpacpi_encode_io(struct pnp_dev *dev, { struct acpi_resource_io *io = &resource->data.io; - /* Note: pnp_assign_port will copy pnp_port->flags into p->flags */ - io->io_decode = (p->flags & PNP_PORT_FLAG_16BITADDR) ? - ACPI_DECODE_16 : ACPI_DECODE_10; - io->minimum = p->start; - io->maximum = p->end; - io->alignment = 0; /* Correct? */ - io->address_length = p->end - p->start + 1; - - dev_dbg(&dev->dev, " encode io %#llx-%#llx decode %#x\n", - (unsigned long long) p->start, (unsigned long long) p->end, - io->io_decode); + if (pnp_resource_enabled(p)) { + /* Note: pnp_assign_port copies pnp_port->flags into p->flags */ + io->io_decode = (p->flags & PNP_PORT_FLAG_16BITADDR) ? + ACPI_DECODE_16 : ACPI_DECODE_10; + io->minimum = p->start; + io->maximum = p->end; + io->alignment = 0; /* Correct? */ + io->address_length = p->end - p->start + 1; + } else { + io->minimum = 0; + io->address_length = 0; + } + + dev_dbg(&dev->dev, " encode io %#x-%#x decode %#x\n", io->minimum, + io->minimum + io->address_length - 1, io->io_decode); } static void pnpacpi_encode_fixed_io(struct pnp_dev *dev, @@ -908,11 +933,16 @@ static void pnpacpi_encode_fixed_io(struct pnp_dev *dev, { struct acpi_resource_fixed_io *fixed_io = &resource->data.fixed_io; - fixed_io->address = p->start; - fixed_io->address_length = p->end - p->start + 1; + if (pnp_resource_enabled(p)) { + fixed_io->address = p->start; + fixed_io->address_length = p->end - p->start + 1; + } else { + fixed_io->address = 0; + fixed_io->address_length = 0; + } - dev_dbg(&dev->dev, " encode fixed_io %#llx-%#llx\n", - (unsigned long long) p->start, (unsigned long long) p->end); + dev_dbg(&dev->dev, " encode fixed_io %#x-%#x\n", fixed_io->address, + fixed_io->address + fixed_io->address_length - 1); } static void pnpacpi_encode_mem24(struct pnp_dev *dev, @@ -921,17 +951,22 @@ static void pnpacpi_encode_mem24(struct pnp_dev *dev, { struct acpi_resource_memory24 *memory24 = &resource->data.memory24; - /* Note: pnp_assign_mem will copy pnp_mem->flags into p->flags */ - memory24->write_protect = - (p->flags & IORESOURCE_MEM_WRITEABLE) ? - ACPI_READ_WRITE_MEMORY : ACPI_READ_ONLY_MEMORY; - memory24->minimum = p->start; - memory24->maximum = p->end; - memory24->alignment = 0; - memory24->address_length = p->end - p->start + 1; - - dev_dbg(&dev->dev, " encode mem24 %#llx-%#llx write_protect %#x\n", - (unsigned long long) p->start, (unsigned long long) p->end, + if (pnp_resource_enabled(p)) { + /* Note: pnp_assign_mem copies pnp_mem->flags into p->flags */ + memory24->write_protect = p->flags & IORESOURCE_MEM_WRITEABLE ? + ACPI_READ_WRITE_MEMORY : ACPI_READ_ONLY_MEMORY; + memory24->minimum = p->start; + memory24->maximum = p->end; + memory24->alignment = 0; + memory24->address_length = p->end - p->start + 1; + } else { + memory24->minimum = 0; + memory24->address_length = 0; + } + + dev_dbg(&dev->dev, " encode mem24 %#x-%#x write_protect %#x\n", + memory24->minimum, + memory24->minimum + memory24->address_length - 1, memory24->write_protect); } @@ -941,16 +976,21 @@ static void pnpacpi_encode_mem32(struct pnp_dev *dev, { struct acpi_resource_memory32 *memory32 = &resource->data.memory32; - memory32->write_protect = - (p->flags & IORESOURCE_MEM_WRITEABLE) ? - ACPI_READ_WRITE_MEMORY : ACPI_READ_ONLY_MEMORY; - memory32->minimum = p->start; - memory32->maximum = p->end; - memory32->alignment = 0; - memory32->address_length = p->end - p->start + 1; + if (pnp_resource_enabled(p)) { + memory32->write_protect = p->flags & IORESOURCE_MEM_WRITEABLE ? + ACPI_READ_WRITE_MEMORY : ACPI_READ_ONLY_MEMORY; + memory32->minimum = p->start; + memory32->maximum = p->end; + memory32->alignment = 0; + memory32->address_length = p->end - p->start + 1; + } else { + memory32->minimum = 0; + memory32->alignment = 0; + } - dev_dbg(&dev->dev, " encode mem32 %#llx-%#llx write_protect %#x\n", - (unsigned long long) p->start, (unsigned long long) p->end, + dev_dbg(&dev->dev, " encode mem32 %#x-%#x write_protect %#x\n", + memory32->minimum, + memory32->minimum + memory32->address_length - 1, memory32->write_protect); } @@ -960,15 +1000,20 @@ static void pnpacpi_encode_fixed_mem32(struct pnp_dev *dev, { struct acpi_resource_fixed_memory32 *fixed_memory32 = &resource->data.fixed_memory32; - fixed_memory32->write_protect = - (p->flags & IORESOURCE_MEM_WRITEABLE) ? - ACPI_READ_WRITE_MEMORY : ACPI_READ_ONLY_MEMORY; - fixed_memory32->address = p->start; - fixed_memory32->address_length = p->end - p->start + 1; + if (pnp_resource_enabled(p)) { + fixed_memory32->write_protect = + p->flags & IORESOURCE_MEM_WRITEABLE ? + ACPI_READ_WRITE_MEMORY : ACPI_READ_ONLY_MEMORY; + fixed_memory32->address = p->start; + fixed_memory32->address_length = p->end - p->start + 1; + } else { + fixed_memory32->address = 0; + fixed_memory32->address_length = 0; + } - dev_dbg(&dev->dev, " encode fixed_mem32 %#llx-%#llx " - "write_protect %#x\n", - (unsigned long long) p->start, (unsigned long long) p->end, + dev_dbg(&dev->dev, " encode fixed_mem32 %#x-%#x write_protect %#x\n", + fixed_memory32->address, + fixed_memory32->address + fixed_memory32->address_length - 1, fixed_memory32->write_protect); } diff --git a/drivers/pnp/pnpbios/rsparser.c b/drivers/pnp/pnpbios/rsparser.c index 5ff9a4c..01f0c3d 100644 --- a/drivers/pnp/pnpbios/rsparser.c +++ b/drivers/pnp/pnpbios/rsparser.c @@ -526,8 +526,16 @@ len_err: static void pnpbios_encode_mem(struct pnp_dev *dev, unsigned char *p, struct resource *res) { - unsigned long base = res->start; - unsigned long len = res->end - res->start + 1; + unsigned long base; + unsigned long len; + + if (pnp_resource_enabled(res)) { + base = res->start; + len = res->end - res->start + 1; + } else { + base = 0; + len = 0; + } p[4] = (base >> 8) & 0xff; p[5] = ((base >> 8) >> 8) & 0xff; @@ -536,15 +544,22 @@ static void pnpbios_encode_mem(struct pnp_dev *dev, unsigned char *p, p[10] = (len >> 8) & 0xff; p[11] = ((len >> 8) >> 8) & 0xff; - dev_dbg(&dev->dev, " encode mem %#llx-%#llx\n", - (unsigned long long) res->start, (unsigned long long) res->end); + dev_dbg(&dev->dev, " encode mem %#lx-%#lx\n", base, base + len - 1); } static void pnpbios_encode_mem32(struct pnp_dev *dev, unsigned char *p, struct resource *res) { - unsigned long base = res->start; - unsigned long len = res->end - res->start + 1; + unsigned long base; + unsigned long len; + + if (pnp_resource_enabled(res)) { + base = res->start; + len = res->end - res->start + 1; + } else { + base = 0; + len = 0; + } p[4] = base & 0xff; p[5] = (base >> 8) & 0xff; @@ -559,15 +574,22 @@ static void pnpbios_encode_mem32(struct pnp_dev *dev, unsigned char *p, p[18] = (len >> 16) & 0xff; p[19] = (len >> 24) & 0xff; - dev_dbg(&dev->dev, " encode mem32 %#llx-%#llx\n", - (unsigned long long) res->start, (unsigned long long) res->end); + dev_dbg(&dev->dev, " encode mem32 %#lx-%#lx\n", base, base + len - 1); } static void pnpbios_encode_fixed_mem32(struct pnp_dev *dev, unsigned char *p, struct resource *res) { - unsigned long base = res->start; - unsigned long len = res->end - res->start + 1; + unsigned long base; + unsigned long len; + + if (pnp_resource_enabled(res)) { + base = res->start; + len = res->end - res->start + 1; + } else { + base = 0; + len = 0; + } p[4] = base & 0xff; p[5] = (base >> 8) & 0xff; @@ -578,40 +600,54 @@ static void pnpbios_encode_fixed_mem32(struct pnp_dev *dev, unsigned char *p, p[10] = (len >> 16) & 0xff; p[11] = (len >> 24) & 0xff; - dev_dbg(&dev->dev, " encode fixed_mem32 %#llx-%#llx\n", - (unsigned long long) res->start, (unsigned long long) res->end); + dev_dbg(&dev->dev, " encode fixed_mem32 %#lx-%#lx\n", base, + base + len - 1); } static void pnpbios_encode_irq(struct pnp_dev *dev, unsigned char *p, struct resource *res) { - unsigned long map = 0; + unsigned long map; + + if (pnp_resource_enabled(res)) + map = 1 << res->start; + else + map = 0; - map = 1 << res->start; p[1] = map & 0xff; p[2] = (map >> 8) & 0xff; - dev_dbg(&dev->dev, " encode irq %llu\n", - (unsigned long long)res->start); + dev_dbg(&dev->dev, " encode irq mask %#lx\n", map); } static void pnpbios_encode_dma(struct pnp_dev *dev, unsigned char *p, struct resource *res) { - unsigned long map = 0; + unsigned long map; + + if (pnp_resource_enabled(res)) + map = 1 << res->start; + else + map = 0; - map = 1 << res->start; p[1] = map & 0xff; - dev_dbg(&dev->dev, " encode dma %llu\n", - (unsigned long long)res->start); + dev_dbg(&dev->dev, " encode dma mask %#lx\n", map); } static void pnpbios_encode_port(struct pnp_dev *dev, unsigned char *p, struct resource *res) { - unsigned long base = res->start; - unsigned long len = res->end - res->start + 1; + unsigned long base; + unsigned long len; + + if (pnp_resource_enabled(res)) { + base = res->start; + len = res->end - res->start + 1; + } else { + base = 0; + len = 0; + } p[2] = base & 0xff; p[3] = (base >> 8) & 0xff; @@ -619,8 +655,7 @@ static void pnpbios_encode_port(struct pnp_dev *dev, unsigned char *p, p[5] = (base >> 8) & 0xff; p[7] = len & 0xff; - dev_dbg(&dev->dev, " encode io %#llx-%#llx\n", - (unsigned long long) res->start, (unsigned long long) res->end); + dev_dbg(&dev->dev, " encode io %#lx-%#lx\n", base, base + len - 1); } static void pnpbios_encode_fixed_port(struct pnp_dev *dev, unsigned char *p, @@ -629,12 +664,20 @@ static void pnpbios_encode_fixed_port(struct pnp_dev *dev, unsigned char *p, unsigned long base = res->start; unsigned long len = res->end - res->start + 1; + if (pnp_resource_enabled(res)) { + base = res->start; + len = res->end - res->start + 1; + } else { + base = 0; + len = 0; + } + p[1] = base & 0xff; p[2] = (base >> 8) & 0xff; p[3] = len & 0xff; - dev_dbg(&dev->dev, " encode fixed_io %#llx-%#llx\n", - (unsigned long long) res->start, (unsigned long long) res->end); + dev_dbg(&dev->dev, " encode fixed_io %#lx-%#lx\n", base, + base + len - 1); } static unsigned char *pnpbios_encode_allocated_resource_data(struct pnp_dev diff --git a/drivers/pnp/quirks.c b/drivers/pnp/quirks.c index 1ff3bb5..21acb54 100644 --- a/drivers/pnp/quirks.c +++ b/drivers/pnp/quirks.c @@ -248,8 +248,7 @@ static void quirk_system_pci_resources(struct pnp_dev *dev) for (j = 0; (res = pnp_get_resource(dev, IORESOURCE_MEM, j)); j++) { - if (res->flags & IORESOURCE_UNSET || - (res->start == 0 && res->end == 0)) + if (res->start == 0 && res->end == 0) continue; pnp_start = res->start; diff --git a/drivers/pnp/resource.c b/drivers/pnp/resource.c index cce341f..0797a77 100644 --- a/drivers/pnp/resource.c +++ b/drivers/pnp/resource.c @@ -237,7 +237,7 @@ void pnp_free_option(struct pnp_option *option) !((*(enda) < *(startb)) || (*(endb) < *(starta))) #define cannot_compare(flags) \ -((flags) & (IORESOURCE_UNSET | IORESOURCE_DISABLED)) +((flags) & IORESOURCE_DISABLED) int pnp_check_port(struct pnp_dev *dev, struct resource *res) { @@ -505,81 +505,31 @@ int pnp_resource_type(struct resource *res) IORESOURCE_IRQ | IORESOURCE_DMA); } -struct pnp_resource *pnp_get_pnp_resource(struct pnp_dev *dev, - unsigned int type, unsigned int num) -{ - struct pnp_resource_table *res = dev->res; - - switch (type) { - case IORESOURCE_IO: - if (num >= PNP_MAX_PORT) - return NULL; - return &res->port[num]; - case IORESOURCE_MEM: - if (num >= PNP_MAX_MEM) - return NULL; - return &res->mem[num]; - case IORESOURCE_IRQ: - if (num >= PNP_MAX_IRQ) - return NULL; - return &res->irq[num]; - case IORESOURCE_DMA: - if (num >= PNP_MAX_DMA) - return NULL; - return &res->dma[num]; - } - return NULL; -} - struct resource *pnp_get_resource(struct pnp_dev *dev, unsigned int type, unsigned int num) { struct pnp_resource *pnp_res; + struct resource *res; - pnp_res = pnp_get_pnp_resource(dev, type, num); - if (pnp_res) - return &pnp_res->res; - + list_for_each_entry(pnp_res, &dev->resources, list) { + res = &pnp_res->res; + if (pnp_resource_type(res) == type && num-- == 0) + return res; + } return NULL; } EXPORT_SYMBOL(pnp_get_resource); -static struct pnp_resource *pnp_new_resource(struct pnp_dev *dev, int type) +static struct pnp_resource *pnp_new_resource(struct pnp_dev *dev) { struct pnp_resource *pnp_res; - int i; - switch (type) { - case IORESOURCE_IO: - for (i = 0; i < PNP_MAX_PORT; i++) { - pnp_res = pnp_get_pnp_resource(dev, IORESOURCE_IO, i); - if (pnp_res && !pnp_resource_valid(&pnp_res->res)) - return pnp_res; - } - break; - case IORESOURCE_MEM: - for (i = 0; i < PNP_MAX_MEM; i++) { - pnp_res = pnp_get_pnp_resource(dev, IORESOURCE_MEM, i); - if (pnp_res && !pnp_resource_valid(&pnp_res->res)) - return pnp_res; - } - break; - case IORESOURCE_IRQ: - for (i = 0; i < PNP_MAX_IRQ; i++) { - pnp_res = pnp_get_pnp_resource(dev, IORESOURCE_IRQ, i); - if (pnp_res && !pnp_resource_valid(&pnp_res->res)) - return pnp_res; - } - break; - case IORESOURCE_DMA: - for (i = 0; i < PNP_MAX_DMA; i++) { - pnp_res = pnp_get_pnp_resource(dev, IORESOURCE_DMA, i); - if (pnp_res && !pnp_resource_valid(&pnp_res->res)) - return pnp_res; - } - break; - } - return NULL; + pnp_res = kzalloc(sizeof(struct pnp_resource), GFP_KERNEL); + if (!pnp_res) + return NULL; + + list_add_tail(&pnp_res->list, &dev->resources); + return pnp_res; } struct pnp_resource *pnp_add_irq_resource(struct pnp_dev *dev, int irq, @@ -589,7 +539,7 @@ struct pnp_resource *pnp_add_irq_resource(struct pnp_dev *dev, int irq, struct resource *res; static unsigned char warned; - pnp_res = pnp_new_resource(dev, IORESOURCE_IRQ); + pnp_res = pnp_new_resource(dev); if (!pnp_res) { if (!warned) { dev_err(&dev->dev, "can't add resource for IRQ %d\n", @@ -615,7 +565,7 @@ struct pnp_resource *pnp_add_dma_resource(struct pnp_dev *dev, int dma, struct resource *res; static unsigned char warned; - pnp_res = pnp_new_resource(dev, IORESOURCE_DMA); + pnp_res = pnp_new_resource(dev); if (!pnp_res) { if (!warned) { dev_err(&dev->dev, "can't add resource for DMA %d\n", @@ -642,7 +592,7 @@ struct pnp_resource *pnp_add_io_resource(struct pnp_dev *dev, struct resource *res; static unsigned char warned; - pnp_res = pnp_new_resource(dev, IORESOURCE_IO); + pnp_res = pnp_new_resource(dev); if (!pnp_res) { if (!warned) { dev_err(&dev->dev, "can't add resource for IO " @@ -671,7 +621,7 @@ struct pnp_resource *pnp_add_mem_resource(struct pnp_dev *dev, struct resource *res; static unsigned char warned; - pnp_res = pnp_new_resource(dev, IORESOURCE_MEM); + pnp_res = pnp_new_resource(dev); if (!pnp_res) { if (!warned) { dev_err(&dev->dev, "can't add resource for MEM " diff --git a/drivers/pnp/support.c b/drivers/pnp/support.c index eb07345..1566e4a 100644 --- a/drivers/pnp/support.c +++ b/drivers/pnp/support.c @@ -16,6 +16,10 @@ */ int pnp_is_active(struct pnp_dev *dev) { + /* + * I don't think this is very reliable because pnp_disable_dev() + * only clears out auto-assigned resources. + */ if (!pnp_port_start(dev, 0) && pnp_port_len(dev, 0) <= 1 && !pnp_mem_start(dev, 0) && pnp_mem_len(dev, 0) <= 1 && pnp_irq(dev, 0) == -1 && pnp_dma(dev, 0) == -1) @@ -70,54 +74,41 @@ char *pnp_resource_type_name(struct resource *res) void dbg_pnp_show_resources(struct pnp_dev *dev, char *desc) { #ifdef DEBUG + char buf[128]; + int len = 0; + struct pnp_resource *pnp_res; struct resource *res; - int i; dev_dbg(&dev->dev, "current resources: %s\n", desc); + list_for_each_entry(pnp_res, &dev->resources, list) { + res = &pnp_res->res; - for (i = 0; i < PNP_MAX_IRQ; i++) { - res = pnp_get_resource(dev, IORESOURCE_IRQ, i); - if (res && !(res->flags & IORESOURCE_UNSET)) - dev_dbg(&dev->dev, " irq %lld flags %#lx%s%s\n", - (unsigned long long) res->start, res->flags, - res->flags & IORESOURCE_DISABLED ? - " DISABLED" : "", - res->flags & IORESOURCE_AUTO ? - " AUTO" : ""); - } - for (i = 0; i < PNP_MAX_DMA; i++) { - res = pnp_get_resource(dev, IORESOURCE_DMA, i); - if (res && !(res->flags & IORESOURCE_UNSET)) - dev_dbg(&dev->dev, " dma %lld flags %#lx%s%s\n", - (unsigned long long) res->start, res->flags, - res->flags & IORESOURCE_DISABLED ? - " DISABLED" : "", - res->flags & IORESOURCE_AUTO ? - " AUTO" : ""); - } - for (i = 0; i < PNP_MAX_PORT; i++) { - res = pnp_get_resource(dev, IORESOURCE_IO, i); - if (res && !(res->flags & IORESOURCE_UNSET)) - dev_dbg(&dev->dev, " io %#llx-%#llx flags %#lx" - "%s%s\n", - (unsigned long long) res->start, - (unsigned long long) res->end, res->flags, - res->flags & IORESOURCE_DISABLED ? - " DISABLED" : "", - res->flags & IORESOURCE_AUTO ? - " AUTO" : ""); - } - for (i = 0; i < PNP_MAX_MEM; i++) { - res = pnp_get_resource(dev, IORESOURCE_MEM, i); - if (res && !(res->flags & IORESOURCE_UNSET)) - dev_dbg(&dev->dev, " mem %#llx-%#llx flags %#lx" - "%s%s\n", - (unsigned long long) res->start, - (unsigned long long) res->end, res->flags, - res->flags & IORESOURCE_DISABLED ? - " DISABLED" : "", - res->flags & IORESOURCE_AUTO ? - " AUTO" : ""); + len += snprintf(buf + len, sizeof(buf) - len, " %-3s ", + pnp_resource_type_name(res)); + + if (res->flags & IORESOURCE_DISABLED) { + dev_dbg(&dev->dev, "%sdisabled\n", buf); + continue; + } + + switch (pnp_resource_type(res)) { + case IORESOURCE_IO: + case IORESOURCE_MEM: + len += snprintf(buf + len, sizeof(buf) - len, + "%#llx-%#llx flags %#lx", + (unsigned long long) res->start, + (unsigned long long) res->end, + res->flags); + break; + case IORESOURCE_IRQ: + case IORESOURCE_DMA: + len += snprintf(buf + len, sizeof(buf) - len, + "%lld flags %#lx", + (unsigned long long) res->start, + res->flags); + break; + } + dev_dbg(&dev->dev, "%s\n", buf); } #endif } diff --git a/drivers/pnp/system.c b/drivers/pnp/system.c index cf4e07b..764f3a3 100644 --- a/drivers/pnp/system.c +++ b/drivers/pnp/system.c @@ -60,7 +60,7 @@ static void reserve_resources_of_dev(struct pnp_dev *dev) int i; for (i = 0; (res = pnp_get_resource(dev, IORESOURCE_IO, i)); i++) { - if (res->flags & IORESOURCE_UNSET) + if (res->flags & IORESOURCE_DISABLED) continue; if (res->start == 0) continue; /* disabled */ @@ -81,7 +81,7 @@ static void reserve_resources_of_dev(struct pnp_dev *dev) } for (i = 0; (res = pnp_get_resource(dev, IORESOURCE_MEM, i)); i++) { - if (res->flags & (IORESOURCE_UNSET | IORESOURCE_DISABLED)) + if (res->flags & IORESOURCE_DISABLED) continue; reserve_range(dev, res->start, res->end, 0); diff --git a/include/linux/pnp.h b/include/linux/pnp.h index 8b607ae..dfaa567 100644 --- a/include/linux/pnp.h +++ b/include/linux/pnp.h @@ -15,7 +15,6 @@ struct pnp_protocol; struct pnp_dev; -struct pnp_resource_table; /* * Resource Management @@ -24,7 +23,14 @@ struct resource *pnp_get_resource(struct pnp_dev *, unsigned int, unsigned int); static inline int pnp_resource_valid(struct resource *res) { - if (res && !(res->flags & IORESOURCE_UNSET)) + if (res) + return 1; + return 0; +} + +static inline int pnp_resource_enabled(struct resource *res) +{ + if (res && !(res->flags & IORESOURCE_DISABLED)) return 1; return 0; } @@ -64,7 +70,7 @@ static inline unsigned long pnp_port_flags(struct pnp_dev *dev, if (pnp_resource_valid(res)) return res->flags; - return IORESOURCE_IO | IORESOURCE_AUTO | IORESOURCE_UNSET; + return IORESOURCE_IO | IORESOURCE_AUTO; } static inline int pnp_port_valid(struct pnp_dev *dev, unsigned int bar) @@ -109,7 +115,7 @@ static inline unsigned long pnp_mem_flags(struct pnp_dev *dev, unsigned int bar) if (pnp_resource_valid(res)) return res->flags; - return IORESOURCE_MEM | IORESOURCE_AUTO | IORESOURCE_UNSET; + return IORESOURCE_MEM | IORESOURCE_AUTO; } static inline int pnp_mem_valid(struct pnp_dev *dev, unsigned int bar) @@ -143,7 +149,7 @@ static inline unsigned long pnp_irq_flags(struct pnp_dev *dev, unsigned int bar) if (pnp_resource_valid(res)) return res->flags; - return IORESOURCE_IRQ | IORESOURCE_AUTO | IORESOURCE_UNSET; + return IORESOURCE_IRQ | IORESOURCE_AUTO; } static inline int pnp_irq_valid(struct pnp_dev *dev, unsigned int bar) @@ -167,7 +173,7 @@ static inline unsigned long pnp_dma_flags(struct pnp_dev *dev, unsigned int bar) if (pnp_resource_valid(res)) return res->flags; - return IORESOURCE_DMA | IORESOURCE_AUTO | IORESOURCE_UNSET; + return IORESOURCE_DMA | IORESOURCE_AUTO; } static inline int pnp_dma_valid(struct pnp_dev *dev, unsigned int bar) @@ -296,7 +302,7 @@ struct pnp_dev { int capabilities; struct pnp_option *independent; struct pnp_option *dependent; - struct pnp_resource_table *res; + struct list_head resources; char name[PNP_NAME_LEN]; /* contains a human-readable name */ int flags; /* used by protocols */ -- cgit v0.10.2 From 5acf91415799025410cc0d13101340d352f34c89 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 27 Jun 2008 16:56:58 -0600 Subject: PNPACPI: keep disabled resources when parsing current config When we parse a device's _CRS data (the current resource settings), we should keep track of everything we find, even if it's currently disabled or invalid. This is what we already do for ISAPNP and PNPBIOS, and it helps keep things matched up when we subsequently re-encode resources. For example, consider a device with (mem, irq0, irq1, io), where irq0 is disabled. If we drop irq0 when parsing the _CRS, we will mistakenly put irq1 in the irq0 slot when we encode resources for an _SRS call. Signed-off-by: Bjorn Helgaas Signed-off-by: Len Brown Signed-off-by: Andi Kleen diff --git a/drivers/pnp/pnpacpi/rsparser.c b/drivers/pnp/pnpacpi/rsparser.c index 9a45c25..595252b 100644 --- a/drivers/pnp/pnpacpi/rsparser.c +++ b/drivers/pnp/pnpacpi/rsparser.c @@ -98,8 +98,10 @@ static void pnpacpi_parse_allocated_irqresource(struct pnp_dev *dev, int irq, flags; int p, t; - if (!valid_IRQ(gsi)) + if (!valid_IRQ(gsi)) { + pnp_add_irq_resource(dev, gsi, IORESOURCE_DISABLED); return; + } /* * in IO-APIC mode, use overrided attribute. Two reasons: @@ -248,24 +250,39 @@ static acpi_status pnpacpi_allocated_resource(struct acpi_resource *res, * _CRS, but some firmware violates this, so parse them all. */ irq = &res->data.irq; - for (i = 0; i < irq->interrupt_count; i++) { - pnpacpi_parse_allocated_irqresource(dev, - irq->interrupts[i], - irq->triggering, - irq->polarity, - irq->sharable); + if (irq->interrupt_count == 0) + pnp_add_irq_resource(dev, 0, IORESOURCE_DISABLED); + else { + for (i = 0; i < irq->interrupt_count; i++) { + pnpacpi_parse_allocated_irqresource(dev, + irq->interrupts[i], + irq->triggering, + irq->polarity, + irq->sharable); + } + + /* + * The IRQ encoder puts a single interrupt in each + * descriptor, so if a _CRS descriptor has more than + * one interrupt, we won't be able to re-encode it. + */ + if (pnp_can_write(dev) && irq->interrupt_count > 1) { + dev_warn(&dev->dev, "multiple interrupts in " + "_CRS descriptor; configuration can't " + "be changed\n"); + dev->capabilities &= ~PNP_WRITE; + } } break; case ACPI_RESOURCE_TYPE_DMA: dma = &res->data.dma; - if (dma->channel_count > 0) { + if (dma->channel_count > 0 && dma->channels[0] != (u8) -1) flags = dma_flags(dma->type, dma->bus_master, dma->transfer); - if (dma->channels[0] == (u8) -1) - flags |= IORESOURCE_DISABLED; - pnp_add_dma_resource(dev, dma->channels[0], flags); - } + else + flags = IORESOURCE_DISABLED; + pnp_add_dma_resource(dev, dma->channels[0], flags); break; case ACPI_RESOURCE_TYPE_IO: @@ -331,12 +348,29 @@ static acpi_status pnpacpi_allocated_resource(struct acpi_resource *res, if (extended_irq->producer_consumer == ACPI_PRODUCER) return AE_OK; - for (i = 0; i < extended_irq->interrupt_count; i++) { - pnpacpi_parse_allocated_irqresource(dev, - extended_irq->interrupts[i], - extended_irq->triggering, - extended_irq->polarity, - extended_irq->sharable); + if (extended_irq->interrupt_count == 0) + pnp_add_irq_resource(dev, 0, IORESOURCE_DISABLED); + else { + for (i = 0; i < extended_irq->interrupt_count; i++) { + pnpacpi_parse_allocated_irqresource(dev, + extended_irq->interrupts[i], + extended_irq->triggering, + extended_irq->polarity, + extended_irq->sharable); + } + + /* + * The IRQ encoder puts a single interrupt in each + * descriptor, so if a _CRS descriptor has more than + * one interrupt, we won't be able to re-encode it. + */ + if (pnp_can_write(dev) && + extended_irq->interrupt_count > 1) { + dev_warn(&dev->dev, "multiple interrupts in " + "_CRS descriptor; configuration can't " + "be changed\n"); + dev->capabilities &= ~PNP_WRITE; + } } break; -- cgit v0.10.2 From 25d39c39d82d062f4be685146abd054a3bafdf12 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 27 Jun 2008 16:56:59 -0600 Subject: PNP: remove ratelimit on add resource failures We used to have a fixed-size resource table. If a device had twenty resources when the table only had space for ten, we didn't need ten warnings, so we added the ratelimit. Now that we can dynamically allocate new resources, we should only get failures if the allocation fails. That should be rare enough that we don't need to ratelimit the messages. Signed-off-by: Bjorn Helgaas Signed-off-by: Len Brown Signed-off-by: Andi Kleen diff --git a/drivers/pnp/resource.c b/drivers/pnp/resource.c index 0797a77..ff79aa6 100644 --- a/drivers/pnp/resource.c +++ b/drivers/pnp/resource.c @@ -537,15 +537,10 @@ struct pnp_resource *pnp_add_irq_resource(struct pnp_dev *dev, int irq, { struct pnp_resource *pnp_res; struct resource *res; - static unsigned char warned; pnp_res = pnp_new_resource(dev); if (!pnp_res) { - if (!warned) { - dev_err(&dev->dev, "can't add resource for IRQ %d\n", - irq); - warned = 1; - } + dev_err(&dev->dev, "can't add resource for IRQ %d\n", irq); return NULL; } @@ -563,15 +558,10 @@ struct pnp_resource *pnp_add_dma_resource(struct pnp_dev *dev, int dma, { struct pnp_resource *pnp_res; struct resource *res; - static unsigned char warned; pnp_res = pnp_new_resource(dev); if (!pnp_res) { - if (!warned) { - dev_err(&dev->dev, "can't add resource for DMA %d\n", - dma); - warned = 1; - } + dev_err(&dev->dev, "can't add resource for DMA %d\n", dma); return NULL; } @@ -590,16 +580,12 @@ struct pnp_resource *pnp_add_io_resource(struct pnp_dev *dev, { struct pnp_resource *pnp_res; struct resource *res; - static unsigned char warned; pnp_res = pnp_new_resource(dev); if (!pnp_res) { - if (!warned) { - dev_err(&dev->dev, "can't add resource for IO " - "%#llx-%#llx\n",(unsigned long long) start, - (unsigned long long) end); - warned = 1; - } + dev_err(&dev->dev, "can't add resource for IO %#llx-%#llx\n", + (unsigned long long) start, + (unsigned long long) end); return NULL; } @@ -619,16 +605,12 @@ struct pnp_resource *pnp_add_mem_resource(struct pnp_dev *dev, { struct pnp_resource *pnp_res; struct resource *res; - static unsigned char warned; pnp_res = pnp_new_resource(dev); if (!pnp_res) { - if (!warned) { - dev_err(&dev->dev, "can't add resource for MEM " - "%#llx-%#llx\n",(unsigned long long) start, - (unsigned long long) end); - warned = 1; - } + dev_err(&dev->dev, "can't add resource for MEM %#llx-%#llx\n", + (unsigned long long) start, + (unsigned long long) end); return NULL; } -- cgit v0.10.2 From f61ed7e32d2d6a0a8c3c101da513ccedd542e14d Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 27 Jun 2008 16:57:00 -0600 Subject: PNP: dont sort by type in /sys/.../resources Rather than stepping through all IO resources, then stepping through all MMIO resources, etc., we can just iterate over the resource list once directly. This can change the order in /sys, e.g., # cat /sys/devices/pnp0/00:07/resources # OLD state = active io 0x3f8-0x3ff irq 4 # cat /sys/devices/pnp0/00:07/resources # NEW state = active irq 4 io 0x3f8-0x3ff The old code artificially sorted resources by type; the new code just lists them in the order we read them from the ISAPNP hardware or the BIOS. Signed-off-by: Bjorn Helgaas Signed-off-by: Len Brown Signed-off-by: Andi Kleen diff --git a/drivers/pnp/interface.c b/drivers/pnp/interface.c index 7fc86bb..674e8ba 100644 --- a/drivers/pnp/interface.c +++ b/drivers/pnp/interface.c @@ -248,8 +248,9 @@ static ssize_t pnp_show_current_resources(struct device *dmdev, char *buf) { struct pnp_dev *dev = to_pnp_dev(dmdev); + struct pnp_resource *pnp_res; struct resource *res; - int i, ret; + int ret; pnp_info_buffer_t *buffer; if (!dev) @@ -262,46 +263,33 @@ static ssize_t pnp_show_current_resources(struct device *dmdev, buffer->buffer = buf; buffer->curr = buffer->buffer; - pnp_printf(buffer, "state = "); - if (dev->active) - pnp_printf(buffer, "active\n"); - else - pnp_printf(buffer, "disabled\n"); + pnp_printf(buffer, "state = %s\n", dev->active ? "active" : "disabled"); - for (i = 0; (res = pnp_get_resource(dev, IORESOURCE_IO, i)); i++) { - pnp_printf(buffer, "io"); - if (res->flags & IORESOURCE_DISABLED) - pnp_printf(buffer, " disabled\n"); - else - pnp_printf(buffer, " 0x%llx-0x%llx\n", - (unsigned long long) res->start, - (unsigned long long) res->end); - } - for (i = 0; (res = pnp_get_resource(dev, IORESOURCE_MEM, i)); i++) { - pnp_printf(buffer, "mem"); - if (res->flags & IORESOURCE_DISABLED) + list_for_each_entry(pnp_res, &dev->resources, list) { + res = &pnp_res->res; + + pnp_printf(buffer, pnp_resource_type_name(res)); + + if (res->flags & IORESOURCE_DISABLED) { pnp_printf(buffer, " disabled\n"); - else - pnp_printf(buffer, " 0x%llx-0x%llx\n", + continue; + } + + switch (pnp_resource_type(res)) { + case IORESOURCE_IO: + case IORESOURCE_MEM: + pnp_printf(buffer, " %#llx-%#llx\n", (unsigned long long) res->start, (unsigned long long) res->end); - } - for (i = 0; (res = pnp_get_resource(dev, IORESOURCE_IRQ, i)); i++) { - pnp_printf(buffer, "irq"); - if (res->flags & IORESOURCE_DISABLED) - pnp_printf(buffer, " disabled\n"); - else - pnp_printf(buffer, " %lld\n", - (unsigned long long) res->start); - } - for (i = 0; (res = pnp_get_resource(dev, IORESOURCE_DMA, i)); i++) { - pnp_printf(buffer, "dma"); - if (res->flags & IORESOURCE_DISABLED) - pnp_printf(buffer, " disabled\n"); - else + break; + case IORESOURCE_IRQ: + case IORESOURCE_DMA: pnp_printf(buffer, " %lld\n", (unsigned long long) res->start); + break; + } } + ret = (buffer->curr - buf); kfree(buffer); return ret; -- cgit v0.10.2 From 57fd51a8be26921b56747ddd09d1d9e01c11c9e0 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 27 Jun 2008 16:57:01 -0600 Subject: PNP: add pnp_possible_config() -- can a device could be configured this way? As part of a heuristic to identify modem devices, 8250_pnp.c checks to see whether a device can be configured at any of the legacy COM port addresses. This patch moves the code that traverses the PNP "possible resource options" from 8250_pnp.c to the PNP subsystem. This encapsulation is important because a future patch will change the implementation of those resource options. Signed-off-by: Bjorn Helgaas Signed-off-by: Andi Kleen Acked-by: Rene Herman Signed-off-by: Len Brown diff --git a/drivers/pnp/resource.c b/drivers/pnp/resource.c index ff79aa6..786fd35 100644 --- a/drivers/pnp/resource.c +++ b/drivers/pnp/resource.c @@ -624,6 +624,68 @@ struct pnp_resource *pnp_add_mem_resource(struct pnp_dev *dev, return pnp_res; } +static int pnp_possible_option(struct pnp_option *option, int type, + resource_size_t start, resource_size_t size) +{ + struct pnp_option *tmp; + struct pnp_port *port; + struct pnp_mem *mem; + struct pnp_irq *irq; + struct pnp_dma *dma; + + if (!option) + return 0; + + for (tmp = option; tmp; tmp = tmp->next) { + switch (type) { + case IORESOURCE_IO: + for (port = tmp->port; port; port = port->next) { + if (port->min == start && port->size == size) + return 1; + } + break; + case IORESOURCE_MEM: + for (mem = tmp->mem; mem; mem = mem->next) { + if (mem->min == start && mem->size == size) + return 1; + } + break; + case IORESOURCE_IRQ: + for (irq = tmp->irq; irq; irq = irq->next) { + if (start < PNP_IRQ_NR && + test_bit(start, irq->map)) + return 1; + } + break; + case IORESOURCE_DMA: + for (dma = tmp->dma; dma; dma = dma->next) { + if (dma->map & (1 << start)) + return 1; + } + break; + } + } + + return 0; +} + +/* + * Determine whether the specified resource is a possible configuration + * for this device. + */ +int pnp_possible_config(struct pnp_dev *dev, int type, resource_size_t start, + resource_size_t size) +{ + if (pnp_possible_option(dev->independent, type, start, size)) + return 1; + + if (pnp_possible_option(dev->dependent, type, start, size)) + return 1; + + return 0; +} +EXPORT_SYMBOL(pnp_possible_config); + /* format is: pnp_reserve_irq=irq1[,irq2] .... */ static int __init pnp_setup_reserve_irq(char *str) { diff --git a/drivers/serial/8250_pnp.c b/drivers/serial/8250_pnp.c index 97c68d02..638b686 100644 --- a/drivers/serial/8250_pnp.c +++ b/drivers/serial/8250_pnp.c @@ -383,21 +383,14 @@ static int __devinit check_name(char *name) return 0; } -static int __devinit check_resources(struct pnp_option *option) +static int __devinit check_resources(struct pnp_dev *dev) { - struct pnp_option *tmp; - if (!option) - return 0; + resource_size_t base[] = {0x2f8, 0x3f8, 0x2e8, 0x3e8}; + int i; - for (tmp = option; tmp; tmp = tmp->next) { - struct pnp_port *port; - for (port = tmp->port; port; port = port->next) - if ((port->size == 8) && - ((port->min == 0x2f8) || - (port->min == 0x3f8) || - (port->min == 0x2e8) || - (port->min == 0x3e8))) - return 1; + for (i = 0; i < ARRAY_SIZE(base); i++) { + if (pnp_possible_config(dev, IORESOURCE_IO, base[i], 8)) + return 1; } return 0; @@ -420,10 +413,7 @@ static int __devinit serial_pnp_guess_board(struct pnp_dev *dev, int *flags) (dev->card && check_name(dev->card->name)))) return -ENODEV; - if (check_resources(dev->independent)) - return 0; - - if (check_resources(dev->dependent)) + if (check_resources(dev)) return 0; return -ENODEV; diff --git a/include/linux/pnp.h b/include/linux/pnp.h index dfaa567..e033e1b 100644 --- a/include/linux/pnp.h +++ b/include/linux/pnp.h @@ -479,6 +479,8 @@ void pnp_unregister_card_driver(struct pnp_card_driver *drv); extern struct list_head pnp_cards; /* resource management */ +int pnp_possible_config(struct pnp_dev *dev, int type, resource_size_t base, + resource_size_t size); int pnp_auto_config_dev(struct pnp_dev *dev); int pnp_start_dev(struct pnp_dev *dev); int pnp_stop_dev(struct pnp_dev *dev); @@ -506,6 +508,9 @@ static inline int pnp_register_card_driver(struct pnp_card_driver *drv) { return static inline void pnp_unregister_card_driver(struct pnp_card_driver *drv) { } /* resource management */ +static inline int pnp_possible_config(struct pnp_dev *dev, int type, + resource_size_t base, + resource_size_t size) { return 0; } static inline int pnp_auto_config_dev(struct pnp_dev *dev) { return -ENODEV; } static inline int pnp_start_dev(struct pnp_dev *dev) { return -ENODEV; } static inline int pnp_stop_dev(struct pnp_dev *dev) { return -ENODEV; } -- cgit v0.10.2 From b72ee1f11e373179ec703e0e5afaf585ed3a950a Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 27 Jun 2008 16:57:02 -0600 Subject: PNP: whitespace/coding style fixes No functional change; just make a couple declarations consistent with the rest of the file. Signed-off-by: Bjorn Helgaas Signed-off-by: Andi Kleen Acked-by: Rene Herman Signed-off-by: Len Brown diff --git a/drivers/pnp/interface.c b/drivers/pnp/interface.c index 674e8ba..239923a 100644 --- a/drivers/pnp/interface.c +++ b/drivers/pnp/interface.c @@ -216,12 +216,12 @@ static ssize_t pnp_show_options(struct device *dmdev, struct device_attribute *attr, char *buf) { struct pnp_dev *dev = to_pnp_dev(dmdev); + pnp_info_buffer_t *buffer; struct pnp_option *independent = dev->independent; struct pnp_option *dependent = dev->dependent; int ret, dep = 1; - pnp_info_buffer_t *buffer = (pnp_info_buffer_t *) - pnp_alloc(sizeof(pnp_info_buffer_t)); + buffer = pnp_alloc(sizeof(pnp_info_buffer_t)); if (!buffer) return -ENOMEM; @@ -248,17 +248,18 @@ static ssize_t pnp_show_current_resources(struct device *dmdev, char *buf) { struct pnp_dev *dev = to_pnp_dev(dmdev); + pnp_info_buffer_t *buffer; struct pnp_resource *pnp_res; struct resource *res; int ret; - pnp_info_buffer_t *buffer; if (!dev) return -EINVAL; - buffer = (pnp_info_buffer_t *) pnp_alloc(sizeof(pnp_info_buffer_t)); + buffer = pnp_alloc(sizeof(pnp_info_buffer_t)); if (!buffer) return -ENOMEM; + buffer->len = PAGE_SIZE; buffer->buffer = buf; buffer->curr = buffer->buffer; @@ -295,9 +296,9 @@ static ssize_t pnp_show_current_resources(struct device *dmdev, return ret; } -static ssize_t -pnp_set_current_resources(struct device *dmdev, struct device_attribute *attr, - const char *ubuf, size_t count) +static ssize_t pnp_set_current_resources(struct device *dmdev, + struct device_attribute *attr, + const char *ubuf, size_t count) { struct pnp_dev *dev = to_pnp_dev(dmdev); char *buf = (void *)ubuf; -- cgit v0.10.2 From 08c9f262f268f7948be13bf3a5bda1d635c649b4 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 27 Jun 2008 16:57:03 -0600 Subject: PNP: define PNP-specific IORESOURCE_IO_* flags alongside IRQ, DMA, MEM PNP previously defined PNP_PORT_FLAG_16BITADDR and PNP_PORT_FLAG_FIXED in a private header file, but put those flags in struct resource.flags fields. Better to make them IORESOURCE_IO_* flags like the existing IRQ, DMA, and MEM flags. Signed-off-by: Bjorn Helgaas Signed-off-by: Andi Kleen Acked-by: Rene Herman Signed-off-by: Len Brown diff --git a/drivers/pnp/interface.c b/drivers/pnp/interface.c index 239923a..c172b6d 100644 --- a/drivers/pnp/interface.c +++ b/drivers/pnp/interface.c @@ -57,7 +57,7 @@ static void pnp_print_port(pnp_info_buffer_t * buffer, char *space, "%sport 0x%x-0x%x, align 0x%x, size 0x%x, %i-bit address decoding\n", space, port->min, port->max, port->align ? (port->align - 1) : 0, port->size, - port->flags & PNP_PORT_FLAG_16BITADDR ? 16 : 10); + port->flags & IORESOURCE_IO_16BIT_ADDR ? 16 : 10); } static void pnp_print_irq(pnp_info_buffer_t * buffer, char *space, diff --git a/drivers/pnp/isapnp/core.c b/drivers/pnp/isapnp/core.c index ca4457e..c5b9252 100644 --- a/drivers/pnp/isapnp/core.c +++ b/drivers/pnp/isapnp/core.c @@ -486,7 +486,7 @@ static void __init isapnp_parse_port_resource(struct pnp_dev *dev, port->max = (tmp[4] << 8) | tmp[3]; port->align = tmp[5]; port->size = tmp[6]; - port->flags = tmp[0] ? PNP_PORT_FLAG_16BITADDR : 0; + port->flags = tmp[0] ? IORESOURCE_IO_16BIT_ADDR : 0; pnp_register_port_resource(dev, option, port); } @@ -507,7 +507,7 @@ static void __init isapnp_parse_fixed_port_resource(struct pnp_dev *dev, port->min = port->max = (tmp[1] << 8) | tmp[0]; port->size = tmp[2]; port->align = 0; - port->flags = PNP_PORT_FLAG_FIXED; + port->flags = IORESOURCE_IO_FIXED; pnp_register_port_resource(dev, option, port); } diff --git a/drivers/pnp/pnpacpi/rsparser.c b/drivers/pnp/pnpacpi/rsparser.c index 595252b..46069e6 100644 --- a/drivers/pnp/pnpacpi/rsparser.c +++ b/drivers/pnp/pnpacpi/rsparser.c @@ -180,7 +180,7 @@ static void pnpacpi_parse_allocated_ioresource(struct pnp_dev *dev, u64 start, u64 end = start + len - 1; if (io_decode == ACPI_DECODE_16) - flags |= PNP_PORT_FLAG_16BITADDR; + flags |= IORESOURCE_IO_16BIT_ADDR; if (len == 0 || end >= 0x10003) flags |= IORESOURCE_DISABLED; @@ -485,7 +485,7 @@ static __init void pnpacpi_parse_port_option(struct pnp_dev *dev, port->align = io->alignment; port->size = io->address_length; port->flags = ACPI_DECODE_16 == io->io_decode ? - PNP_PORT_FLAG_16BITADDR : 0; + IORESOURCE_IO_16BIT_ADDR : 0; pnp_register_port_resource(dev, option, port); } @@ -503,7 +503,7 @@ static __init void pnpacpi_parse_fixed_port_option(struct pnp_dev *dev, port->min = port->max = io->address; port->size = io->address_length; port->align = 0; - port->flags = PNP_PORT_FLAG_FIXED; + port->flags = IORESOURCE_IO_FIXED; pnp_register_port_resource(dev, option, port); } @@ -609,7 +609,7 @@ static __init void pnpacpi_parse_address_option(struct pnp_dev *dev, port->min = port->max = p->minimum; port->size = p->address_length; port->align = 0; - port->flags = PNP_PORT_FLAG_FIXED; + port->flags = IORESOURCE_IO_FIXED; pnp_register_port_resource(dev, option, port); } } @@ -946,7 +946,7 @@ static void pnpacpi_encode_io(struct pnp_dev *dev, if (pnp_resource_enabled(p)) { /* Note: pnp_assign_port copies pnp_port->flags into p->flags */ - io->io_decode = (p->flags & PNP_PORT_FLAG_16BITADDR) ? + io->io_decode = (p->flags & IORESOURCE_IO_16BIT_ADDR) ? ACPI_DECODE_16 : ACPI_DECODE_10; io->minimum = p->start; io->maximum = p->end; diff --git a/drivers/pnp/pnpbios/rsparser.c b/drivers/pnp/pnpbios/rsparser.c index 01f0c3d..489fec3 100644 --- a/drivers/pnp/pnpbios/rsparser.c +++ b/drivers/pnp/pnpbios/rsparser.c @@ -310,7 +310,7 @@ static __init void pnpbios_parse_port_option(struct pnp_dev *dev, port->max = (p[5] << 8) | p[4]; port->align = p[6]; port->size = p[7]; - port->flags = p[1] ? PNP_PORT_FLAG_16BITADDR : 0; + port->flags = p[1] ? IORESOURCE_IO_16BIT_ADDR : 0; pnp_register_port_resource(dev, option, port); } @@ -326,7 +326,7 @@ static __init void pnpbios_parse_fixed_port_option(struct pnp_dev *dev, port->min = port->max = (p[2] << 8) | p[1]; port->size = p[3]; port->align = 0; - port->flags = PNP_PORT_FLAG_FIXED; + port->flags = IORESOURCE_IO_FIXED; pnp_register_port_resource(dev, option, port); } diff --git a/include/linux/ioport.h b/include/linux/ioport.h index c6801bf..39db059 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -88,6 +88,10 @@ struct resource_list { #define IORESOURCE_MEM_SHADOWABLE (1<<5) /* dup: IORESOURCE_SHADOWABLE */ #define IORESOURCE_MEM_EXPANSIONROM (1<<6) +/* PnP I/O specific bits (IORESOURCE_BITS) */ +#define IORESOURCE_IO_16BIT_ADDR (1<<0) +#define IORESOURCE_IO_FIXED (1<<1) + /* PCI ROM control bits (IORESOURCE_BITS) */ #define IORESOURCE_ROM_ENABLE (1<<0) /* ROM is enabled, same as PCI_ROM_ADDRESS_ENABLE */ #define IORESOURCE_ROM_SHADOW (1<<1) /* ROM is copy at C000:0 */ diff --git a/include/linux/pnp.h b/include/linux/pnp.h index e033e1b..e1454da 100644 --- a/include/linux/pnp.h +++ b/include/linux/pnp.h @@ -182,9 +182,6 @@ static inline int pnp_dma_valid(struct pnp_dev *dev, unsigned int bar) } -#define PNP_PORT_FLAG_16BITADDR (1<<0) -#define PNP_PORT_FLAG_FIXED (1<<1) - struct pnp_port { unsigned short min; /* min base number */ unsigned short max; /* max base number */ -- cgit v0.10.2 From a1802c42950403657d07e64558eff612d550ce16 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 27 Jun 2008 16:57:04 -0600 Subject: PNP: make resource option structures private to PNP subsystem Nothing outside the PNP subsystem should need access to a device's resource options, so this patch moves the option structure declarations to a private header file. Signed-off-by: Bjorn Helgaas Signed-off-by: Andi Kleen Acked-by: Rene Herman Signed-off-by: Len Brown diff --git a/drivers/pnp/base.h b/drivers/pnp/base.h index 1667ac3..3126e45 100644 --- a/drivers/pnp/base.h +++ b/drivers/pnp/base.h @@ -19,6 +19,54 @@ void pnp_remove_card(struct pnp_card *card); int pnp_add_card_device(struct pnp_card *card, struct pnp_dev *dev); void pnp_remove_card_device(struct pnp_dev *dev); +struct pnp_port { + unsigned short min; /* min base number */ + unsigned short max; /* max base number */ + unsigned char align; /* align boundary */ + unsigned char size; /* size of range */ + unsigned char flags; /* port flags */ + unsigned char pad; /* pad */ + struct pnp_port *next; /* next port */ +}; + +#define PNP_IRQ_NR 256 +struct pnp_irq { + DECLARE_BITMAP(map, PNP_IRQ_NR); /* bitmask for IRQ lines */ + unsigned char flags; /* IRQ flags */ + unsigned char pad; /* pad */ + struct pnp_irq *next; /* next IRQ */ +}; + +struct pnp_dma { + unsigned char map; /* bitmask for DMA channels */ + unsigned char flags; /* DMA flags */ + struct pnp_dma *next; /* next port */ +}; + +struct pnp_mem { + unsigned int min; /* min base number */ + unsigned int max; /* max base number */ + unsigned int align; /* align boundary */ + unsigned int size; /* size of range */ + unsigned char flags; /* memory flags */ + unsigned char pad; /* pad */ + struct pnp_mem *next; /* next memory resource */ +}; + +#define PNP_RES_PRIORITY_PREFERRED 0 +#define PNP_RES_PRIORITY_ACCEPTABLE 1 +#define PNP_RES_PRIORITY_FUNCTIONAL 2 +#define PNP_RES_PRIORITY_INVALID 65535 + +struct pnp_option { + unsigned short priority; /* priority */ + struct pnp_port *port; /* first port */ + struct pnp_irq *irq; /* first IRQ */ + struct pnp_dma *dma; /* first DMA */ + struct pnp_mem *mem; /* first memory resource */ + struct pnp_option *next; /* used to chain dependent resources */ +}; + struct pnp_option *pnp_build_option(int priority); struct pnp_option *pnp_register_independent_option(struct pnp_dev *dev); struct pnp_option *pnp_register_dependent_option(struct pnp_dev *dev, diff --git a/include/linux/pnp.h b/include/linux/pnp.h index e1454da..785126f 100644 --- a/include/linux/pnp.h +++ b/include/linux/pnp.h @@ -182,54 +182,6 @@ static inline int pnp_dma_valid(struct pnp_dev *dev, unsigned int bar) } -struct pnp_port { - unsigned short min; /* min base number */ - unsigned short max; /* max base number */ - unsigned char align; /* align boundary */ - unsigned char size; /* size of range */ - unsigned char flags; /* port flags */ - unsigned char pad; /* pad */ - struct pnp_port *next; /* next port */ -}; - -#define PNP_IRQ_NR 256 -struct pnp_irq { - DECLARE_BITMAP(map, PNP_IRQ_NR); /* bitmask for IRQ lines */ - unsigned char flags; /* IRQ flags */ - unsigned char pad; /* pad */ - struct pnp_irq *next; /* next IRQ */ -}; - -struct pnp_dma { - unsigned char map; /* bitmask for DMA channels */ - unsigned char flags; /* DMA flags */ - struct pnp_dma *next; /* next port */ -}; - -struct pnp_mem { - unsigned int min; /* min base number */ - unsigned int max; /* max base number */ - unsigned int align; /* align boundary */ - unsigned int size; /* size of range */ - unsigned char flags; /* memory flags */ - unsigned char pad; /* pad */ - struct pnp_mem *next; /* next memory resource */ -}; - -#define PNP_RES_PRIORITY_PREFERRED 0 -#define PNP_RES_PRIORITY_ACCEPTABLE 1 -#define PNP_RES_PRIORITY_FUNCTIONAL 2 -#define PNP_RES_PRIORITY_INVALID 65535 - -struct pnp_option { - unsigned short priority; /* priority */ - struct pnp_port *port; /* first port */ - struct pnp_irq *irq; /* first IRQ */ - struct pnp_dma *dma; /* first DMA */ - struct pnp_mem *mem; /* first memory resource */ - struct pnp_option *next; /* used to chain dependent resources */ -}; - /* * Device Management */ -- cgit v0.10.2 From 7aefff51854ccd33599c40b4e360d94cb2b7622f Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 27 Jun 2008 16:57:05 -0600 Subject: PNP: introduce pnp_irq_mask_t typedef This adds a typedef for the IRQ bitmap, which should cause no functional change, but will make it easier to pass a pointer to a bitmap to pnp_register_irq_resource(). Signed-off-by: Bjorn Helgaas Signed-off-by: Andi Kleen Acked-by: Rene Herman Signed-off-by: Len Brown diff --git a/drivers/pnp/base.h b/drivers/pnp/base.h index 3126e45..a9ee0a9 100644 --- a/drivers/pnp/base.h +++ b/drivers/pnp/base.h @@ -30,8 +30,10 @@ struct pnp_port { }; #define PNP_IRQ_NR 256 +typedef struct { DECLARE_BITMAP(bits, PNP_IRQ_NR); } pnp_irq_mask_t; + struct pnp_irq { - DECLARE_BITMAP(map, PNP_IRQ_NR); /* bitmask for IRQ lines */ + pnp_irq_mask_t map; /* bitmap for IRQ lines */ unsigned char flags; /* IRQ flags */ unsigned char pad; /* pad */ struct pnp_irq *next; /* next IRQ */ diff --git a/drivers/pnp/interface.c b/drivers/pnp/interface.c index c172b6d..249b407 100644 --- a/drivers/pnp/interface.c +++ b/drivers/pnp/interface.c @@ -67,7 +67,7 @@ static void pnp_print_irq(pnp_info_buffer_t * buffer, char *space, pnp_printf(buffer, "%sirq ", space); for (i = 0; i < PNP_IRQ_NR; i++) - if (test_bit(i, irq->map)) { + if (test_bit(i, irq->map.bits)) { if (!first) { pnp_printf(buffer, ","); } else { @@ -78,7 +78,7 @@ static void pnp_print_irq(pnp_info_buffer_t * buffer, char *space, else pnp_printf(buffer, "%i", i); } - if (bitmap_empty(irq->map, PNP_IRQ_NR)) + if (bitmap_empty(irq->map.bits, PNP_IRQ_NR)) pnp_printf(buffer, ""); if (irq->flags & IORESOURCE_IRQ_HIGHEDGE) pnp_printf(buffer, " High-Edge"); diff --git a/drivers/pnp/isapnp/core.c b/drivers/pnp/isapnp/core.c index c5b9252..e0caa71 100644 --- a/drivers/pnp/isapnp/core.c +++ b/drivers/pnp/isapnp/core.c @@ -441,7 +441,7 @@ static void __init isapnp_parse_irq_resource(struct pnp_dev *dev, if (!irq) return; bits = (tmp[1] << 8) | tmp[0]; - bitmap_copy(irq->map, &bits, 16); + bitmap_copy(irq->map.bits, &bits, 16); if (size > 2) irq->flags = tmp[2]; else diff --git a/drivers/pnp/manager.c b/drivers/pnp/manager.c index 165b624..e758dd2 100644 --- a/drivers/pnp/manager.c +++ b/drivers/pnp/manager.c @@ -128,20 +128,20 @@ static int pnp_assign_irq(struct pnp_dev *dev, struct pnp_irq *rule, int idx) res->start = -1; res->end = -1; - if (bitmap_empty(rule->map, PNP_IRQ_NR)) { + if (bitmap_empty(rule->map.bits, PNP_IRQ_NR)) { res->flags |= IORESOURCE_DISABLED; dev_dbg(&dev->dev, " irq %d disabled\n", idx); goto __add; } /* TBD: need check for >16 IRQ */ - res->start = find_next_bit(rule->map, PNP_IRQ_NR, 16); + res->start = find_next_bit(rule->map.bits, PNP_IRQ_NR, 16); if (res->start < PNP_IRQ_NR) { res->end = res->start; goto __add; } for (i = 0; i < 16; i++) { - if (test_bit(xtab[i], rule->map)) { + if (test_bit(xtab[i], rule->map.bits)) { res->start = res->end = xtab[i]; if (pnp_check_irq(dev, res)) goto __add; diff --git a/drivers/pnp/pnpacpi/rsparser.c b/drivers/pnp/pnpacpi/rsparser.c index 46069e6..ae65454 100644 --- a/drivers/pnp/pnpacpi/rsparser.c +++ b/drivers/pnp/pnpacpi/rsparser.c @@ -442,7 +442,7 @@ static __init void pnpacpi_parse_irq_option(struct pnp_dev *dev, for (i = 0; i < p->interrupt_count; i++) if (p->interrupts[i]) - __set_bit(p->interrupts[i], irq->map); + __set_bit(p->interrupts[i], irq->map.bits); irq->flags = irq_flags(p->triggering, p->polarity, p->sharable); pnp_register_irq_resource(dev, option, irq); @@ -463,7 +463,7 @@ static __init void pnpacpi_parse_ext_irq_option(struct pnp_dev *dev, for (i = 0; i < p->interrupt_count; i++) if (p->interrupts[i]) - __set_bit(p->interrupts[i], irq->map); + __set_bit(p->interrupts[i], irq->map.bits); irq->flags = irq_flags(p->triggering, p->polarity, p->sharable); pnp_register_irq_resource(dev, option, irq); diff --git a/drivers/pnp/pnpbios/rsparser.c b/drivers/pnp/pnpbios/rsparser.c index 489fec3..dd2ea7b 100644 --- a/drivers/pnp/pnpbios/rsparser.c +++ b/drivers/pnp/pnpbios/rsparser.c @@ -275,7 +275,7 @@ static __init void pnpbios_parse_irq_option(struct pnp_dev *dev, if (!irq) return; bits = (p[2] << 8) | p[1]; - bitmap_copy(irq->map, &bits, 16); + bitmap_copy(irq->map.bits, &bits, 16); if (size > 2) irq->flags = p[3]; else diff --git a/drivers/pnp/quirks.c b/drivers/pnp/quirks.c index 21acb54..48e6017 100644 --- a/drivers/pnp/quirks.c +++ b/drivers/pnp/quirks.c @@ -66,15 +66,18 @@ static void quirk_cmi8330_resources(struct pnp_dev *dev) struct pnp_irq *irq; struct pnp_dma *dma; - for (irq = res->irq; irq; irq = irq->next) { // Valid irqs are 5, 7, 10 + for (irq = res->irq; irq; irq = irq->next) { + /* Valid irqs are 5, 7, 10 */ tmp = 0x04A0; - bitmap_copy(irq->map, &tmp, 16); // 0000 0100 1010 0000 + bitmap_copy(irq->map.bits, &tmp, 16); } - for (dma = res->dma; dma; dma = dma->next) // Valid 8bit dma channels are 1,3 + for (dma = res->dma; dma; dma = dma->next) { + /* Valid 8bit dma channels are 1,3 */ if ((dma->flags & IORESOURCE_DMA_TYPE_MASK) == IORESOURCE_DMA_8BIT) dma->map = 0x000A; + } } dev_info(&dev->dev, "CMI8330 quirk - forced possible IRQs to 5, 7, 10 " "and DMA channels to 1, 3\n"); @@ -187,7 +190,7 @@ static void quirk_ad1815_mpu_resources(struct pnp_dev *dev) if (!copy) break; - memcpy(copy->map, irq->map, sizeof copy->map); + bitmap_copy(copy->map.bits, irq->map.bits, PNP_IRQ_NR); copy->flags = irq->flags; copy->next = res->irq; /* Yes, this is NULL */ diff --git a/drivers/pnp/resource.c b/drivers/pnp/resource.c index 786fd35..55a57cd 100644 --- a/drivers/pnp/resource.c +++ b/drivers/pnp/resource.c @@ -98,13 +98,13 @@ int pnp_register_irq_resource(struct pnp_dev *dev, struct pnp_option *option, int i; for (i = 0; i < 16; i++) - if (test_bit(i, data->map)) + if (test_bit(i, data->map.bits)) pcibios_penalize_isa_irq(i, 0); } #endif #ifdef DEBUG - bitmap_scnprintf(buf, sizeof(buf), data->map, PNP_IRQ_NR); + bitmap_scnprintf(buf, sizeof(buf), data->map.bits, PNP_IRQ_NR); dev_dbg(&dev->dev, " irq bitmask %s flags %#x\n", buf, data->flags); #endif @@ -653,7 +653,7 @@ static int pnp_possible_option(struct pnp_option *option, int type, case IORESOURCE_IRQ: for (irq = tmp->irq; irq; irq = irq->next) { if (start < PNP_IRQ_NR && - test_bit(start, irq->map)) + test_bit(start, irq->map.bits)) return 1; } break; -- cgit v0.10.2 From 169aaffe885c56745188e7913f212a67beaa3b80 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 27 Jun 2008 16:57:06 -0600 Subject: PNP: increase I/O port & memory option address sizes ACPI Address Space Descriptors can be up to 64 bits wide. We should keep track of the whole thing when parsing resource options, so this patch changes PNP port and mem option fields from "unsigned short" and "unsigned int" to "resource_size_t". Signed-off-by: Bjorn Helgaas Signed-off-by: Andi Kleen Acked-by: Rene Herman Signed-off-by: Len Brown diff --git a/drivers/pnp/base.h b/drivers/pnp/base.h index a9ee0a9..afbeee5 100644 --- a/drivers/pnp/base.h +++ b/drivers/pnp/base.h @@ -20,10 +20,10 @@ int pnp_add_card_device(struct pnp_card *card, struct pnp_dev *dev); void pnp_remove_card_device(struct pnp_dev *dev); struct pnp_port { - unsigned short min; /* min base number */ - unsigned short max; /* max base number */ - unsigned char align; /* align boundary */ - unsigned char size; /* size of range */ + resource_size_t min; /* min base number */ + resource_size_t max; /* max base number */ + resource_size_t align; /* align boundary */ + resource_size_t size; /* size of range */ unsigned char flags; /* port flags */ unsigned char pad; /* pad */ struct pnp_port *next; /* next port */ @@ -46,10 +46,10 @@ struct pnp_dma { }; struct pnp_mem { - unsigned int min; /* min base number */ - unsigned int max; /* max base number */ - unsigned int align; /* align boundary */ - unsigned int size; /* size of range */ + resource_size_t min; /* min base number */ + resource_size_t max; /* max base number */ + resource_size_t align; /* align boundary */ + resource_size_t size; /* size of range */ unsigned char flags; /* memory flags */ unsigned char pad; /* pad */ struct pnp_mem *next; /* next memory resource */ diff --git a/drivers/pnp/interface.c b/drivers/pnp/interface.c index 249b407..b09f67d 100644 --- a/drivers/pnp/interface.c +++ b/drivers/pnp/interface.c @@ -53,10 +53,12 @@ static int pnp_printf(pnp_info_buffer_t * buffer, char *fmt, ...) static void pnp_print_port(pnp_info_buffer_t * buffer, char *space, struct pnp_port *port) { - pnp_printf(buffer, - "%sport 0x%x-0x%x, align 0x%x, size 0x%x, %i-bit address decoding\n", - space, port->min, port->max, - port->align ? (port->align - 1) : 0, port->size, + pnp_printf(buffer, "%sport %#llx-%#llx, align %#llx, size %#llx, " + "%i-bit address decoding\n", space, + (unsigned long long) port->min, + (unsigned long long) port->max, + port->align ? ((unsigned long long) port->align - 1) : 0, + (unsigned long long) port->size, port->flags & IORESOURCE_IO_16BIT_ADDR ? 16 : 10); } @@ -148,8 +150,11 @@ static void pnp_print_mem(pnp_info_buffer_t * buffer, char *space, { char *s; - pnp_printf(buffer, "%sMemory 0x%x-0x%x, align 0x%x, size 0x%x", - space, mem->min, mem->max, mem->align, mem->size); + pnp_printf(buffer, "%sMemory %#llx-%#llx, align %#llx, size %#llx", + space, (unsigned long long) mem->min, + (unsigned long long) mem->max, + (unsigned long long) mem->align, + (unsigned long long) mem->size); if (mem->flags & IORESOURCE_MEM_WRITEABLE) pnp_printf(buffer, ", writeable"); if (mem->flags & IORESOURCE_MEM_CACHEABLE) diff --git a/drivers/pnp/resource.c b/drivers/pnp/resource.c index 55a57cd..391828c 100644 --- a/drivers/pnp/resource.c +++ b/drivers/pnp/resource.c @@ -143,8 +143,11 @@ int pnp_register_port_resource(struct pnp_dev *dev, struct pnp_option *option, option->port = data; dev_dbg(&dev->dev, " io " - "min %#x max %#x align %d size %d flags %#x\n", - data->min, data->max, data->align, data->size, data->flags); + "min %#llx max %#llx align %lld size %lld flags %#x\n", + (unsigned long long) data->min, + (unsigned long long) data->max, + (unsigned long long) data->align, + (unsigned long long) data->size, data->flags); return 0; } @@ -162,8 +165,11 @@ int pnp_register_mem_resource(struct pnp_dev *dev, struct pnp_option *option, option->mem = data; dev_dbg(&dev->dev, " mem " - "min %#x max %#x align %d size %d flags %#x\n", - data->min, data->max, data->align, data->size, data->flags); + "min %#llx max %#llx align %lld size %lld flags %#x\n", + (unsigned long long) data->min, + (unsigned long long) data->max, + (unsigned long long) data->align, + (unsigned long long) data->size, data->flags); return 0; } -- cgit v0.10.2 From fcfb7ce3d688d5c15fc9bc0a2a48e1ededdb046f Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 27 Jun 2008 16:57:07 -0600 Subject: PNP: improve resource assignment debug When we fail to assign an I/O or MEM resource, include the min/max in the debug output to help match it with the options. Signed-off-by: Bjorn Helgaas Signed-off-by: Andi Kleen Acked-by: Rene Herman Signed-off-by: Len Brown diff --git a/drivers/pnp/manager.c b/drivers/pnp/manager.c index e758dd2..c706dd2 100644 --- a/drivers/pnp/manager.c +++ b/drivers/pnp/manager.c @@ -47,7 +47,10 @@ static int pnp_assign_port(struct pnp_dev *dev, struct pnp_port *rule, int idx) res->start += rule->align; res->end = res->start + rule->size - 1; if (res->start > rule->max || !rule->align) { - dev_dbg(&dev->dev, " couldn't assign io %d\n", idx); + dev_dbg(&dev->dev, " couldn't assign io %d " + "(min %#llx max %#llx)\n", idx, + (unsigned long long) rule->min, + (unsigned long long) rule->max); return 0; } } @@ -96,7 +99,10 @@ static int pnp_assign_mem(struct pnp_dev *dev, struct pnp_mem *rule, int idx) res->start += rule->align; res->end = res->start + rule->size - 1; if (res->start > rule->max || !rule->align) { - dev_dbg(&dev->dev, " couldn't assign mem %d\n", idx); + dev_dbg(&dev->dev, " couldn't assign mem %d " + "(min %#llx max %#llx)\n", idx, + (unsigned long long) rule->min, + (unsigned long long) rule->max); return 0; } } -- cgit v0.10.2 From 819beac3806a5e986d81f476b999b7fffce1a233 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 27 Jun 2008 16:57:08 -0600 Subject: PNP: in debug resource dump, make empty list obvious If the resource list is empty, say that explicitly. Previously, it was confusing because often the heading was followed by zero resource lines, then some "add resource" lines from auto-assignment, so the "add" lines looked like current resources. Signed-off-by: Bjorn Helgaas Signed-off-by: Andi Kleen Acked-by: Rene Herman Signed-off-by: Len Brown diff --git a/drivers/pnp/support.c b/drivers/pnp/support.c index 1566e4a..0ad42db 100644 --- a/drivers/pnp/support.c +++ b/drivers/pnp/support.c @@ -79,7 +79,12 @@ void dbg_pnp_show_resources(struct pnp_dev *dev, char *desc) struct pnp_resource *pnp_res; struct resource *res; - dev_dbg(&dev->dev, "current resources: %s\n", desc); + if (list_empty(&dev->resources)) { + dev_dbg(&dev->dev, "%s: no current resources\n", desc); + return; + } + + dev_dbg(&dev->dev, "%s: current resources:\n", desc); list_for_each_entry(pnp_res, &dev->resources, list) { res = &pnp_res->res; -- cgit v0.10.2 From 6e906f0e1c8633ed357a64e9861f1822789bee3d Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 27 Jun 2008 16:57:09 -0600 Subject: PNP: make resource assignment functions return 0 (success) or -EBUSY (failure) This patch doesn't change any behavior; it just makes the return values more conventional. This changes pnp_assign_dma() from a void function to one that returns an int, just like the other assignment functions. For now, at least, pnp_assign_dma() always returns 0 (success), so it appears to never fail, just like before. Signed-off-by: Bjorn Helgaas Signed-off-by: Andi Kleen Acked-by: Rene Herman Signed-off-by: Len Brown diff --git a/drivers/pnp/manager.c b/drivers/pnp/manager.c index c706dd2..1adc83f 100644 --- a/drivers/pnp/manager.c +++ b/drivers/pnp/manager.c @@ -26,7 +26,7 @@ static int pnp_assign_port(struct pnp_dev *dev, struct pnp_port *rule, int idx) dev_dbg(&dev->dev, " io %d already set to %#llx-%#llx " "flags %#lx\n", idx, (unsigned long long) res->start, (unsigned long long) res->end, res->flags); - return 1; + return 0; } res = &local_res; @@ -51,13 +51,13 @@ static int pnp_assign_port(struct pnp_dev *dev, struct pnp_port *rule, int idx) "(min %#llx max %#llx)\n", idx, (unsigned long long) rule->min, (unsigned long long) rule->max); - return 0; + return -EBUSY; } } __add: pnp_add_io_resource(dev, res->start, res->end, res->flags); - return 1; + return 0; } static int pnp_assign_mem(struct pnp_dev *dev, struct pnp_mem *rule, int idx) @@ -69,7 +69,7 @@ static int pnp_assign_mem(struct pnp_dev *dev, struct pnp_mem *rule, int idx) dev_dbg(&dev->dev, " mem %d already set to %#llx-%#llx " "flags %#lx\n", idx, (unsigned long long) res->start, (unsigned long long) res->end, res->flags); - return 1; + return 0; } res = &local_res; @@ -103,13 +103,13 @@ static int pnp_assign_mem(struct pnp_dev *dev, struct pnp_mem *rule, int idx) "(min %#llx max %#llx)\n", idx, (unsigned long long) rule->min, (unsigned long long) rule->max); - return 0; + return -EBUSY; } } __add: pnp_add_mem_resource(dev, res->start, res->end, res->flags); - return 1; + return 0; } static int pnp_assign_irq(struct pnp_dev *dev, struct pnp_irq *rule, int idx) @@ -126,7 +126,7 @@ static int pnp_assign_irq(struct pnp_dev *dev, struct pnp_irq *rule, int idx) if (res) { dev_dbg(&dev->dev, " irq %d already set to %d flags %#lx\n", idx, (int) res->start, res->flags); - return 1; + return 0; } res = &local_res; @@ -154,14 +154,14 @@ static int pnp_assign_irq(struct pnp_dev *dev, struct pnp_irq *rule, int idx) } } dev_dbg(&dev->dev, " couldn't assign irq %d\n", idx); - return 0; + return -EBUSY; __add: pnp_add_irq_resource(dev, res->start, res->flags); - return 1; + return 0; } -static void pnp_assign_dma(struct pnp_dev *dev, struct pnp_dma *rule, int idx) +static int pnp_assign_dma(struct pnp_dev *dev, struct pnp_dma *rule, int idx) { struct resource *res, local_res; int i; @@ -175,7 +175,7 @@ static void pnp_assign_dma(struct pnp_dev *dev, struct pnp_dma *rule, int idx) if (res) { dev_dbg(&dev->dev, " dma %d already set to %d flags %#lx\n", idx, (int) res->start, res->flags); - return; + return 0; } res = &local_res; @@ -198,6 +198,7 @@ static void pnp_assign_dma(struct pnp_dev *dev, struct pnp_dma *rule, int idx) __add: pnp_add_dma_resource(dev, res->start, res->flags); + return 0; } void pnp_init_resources(struct pnp_dev *dev) @@ -243,25 +244,26 @@ static int pnp_assign_resources(struct pnp_dev *dev, int depnum) irq = dev->independent->irq; dma = dev->independent->dma; while (port) { - if (!pnp_assign_port(dev, port, nport)) + if (pnp_assign_port(dev, port, nport) < 0) goto fail; nport++; port = port->next; } while (mem) { - if (!pnp_assign_mem(dev, mem, nmem)) + if (pnp_assign_mem(dev, mem, nmem) < 0) goto fail; nmem++; mem = mem->next; } while (irq) { - if (!pnp_assign_irq(dev, irq, nirq)) + if (pnp_assign_irq(dev, irq, nirq) < 0) goto fail; nirq++; irq = irq->next; } while (dma) { - pnp_assign_dma(dev, dma, ndma); + if (pnp_assign_dma(dev, dma, ndma) < 0) + goto fail; ndma++; dma = dma->next; } @@ -281,25 +283,26 @@ static int pnp_assign_resources(struct pnp_dev *dev, int depnum) irq = dep->irq; dma = dep->dma; while (port) { - if (!pnp_assign_port(dev, port, nport)) + if (pnp_assign_port(dev, port, nport) < 0) goto fail; nport++; port = port->next; } while (mem) { - if (!pnp_assign_mem(dev, mem, nmem)) + if (pnp_assign_mem(dev, mem, nmem) < 0) goto fail; nmem++; mem = mem->next; } while (irq) { - if (!pnp_assign_irq(dev, irq, nirq)) + if (pnp_assign_irq(dev, irq, nirq) < 0) goto fail; nirq++; irq = irq->next; } while (dma) { - pnp_assign_dma(dev, dma, ndma); + if (pnp_assign_dma(dev, dma, ndma) < 0) + goto fail; ndma++; dma = dma->next; } -- cgit v0.10.2 From b08395e5038e3337bb85c7246a635a3be6d5a29c Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 27 Jun 2008 16:57:10 -0600 Subject: PNP: remove redundant pnp_can_configure() check pnp_assign_resources() is static and the only caller checks pnp_can_configure() before calling it, so no need to do it again. Signed-off-by: Bjorn Helgaas Signed-off-by: Andi Kleen Acked-by: Rene Herman Signed-off-by: Len Brown diff --git a/drivers/pnp/manager.c b/drivers/pnp/manager.c index 1adc83f..7ea9e1e 100644 --- a/drivers/pnp/manager.c +++ b/drivers/pnp/manager.c @@ -231,9 +231,6 @@ static int pnp_assign_resources(struct pnp_dev *dev, int depnum) struct pnp_dma *dma; int nport = 0, nmem = 0, nirq = 0, ndma = 0; - if (!pnp_can_configure(dev)) - return -ENODEV; - dbg_pnp_show_resources(dev, "before pnp_assign_resources"); mutex_lock(&pnp_res_mutex); pnp_clean_resource_table(dev); -- cgit v0.10.2 From c227536b4cc2600fc9d22ba0067f699165f6621f Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 27 Jun 2008 16:57:11 -0600 Subject: PNP: centralize resource option allocations This patch moves all the option allocations (pnp_mem, pnp_port, etc) into the pnp_register_{mem,port,irq,dma}_resource() functions. This will make it easier to rework the option data structures. The non-trivial part of this patch is the IRQ handling. The backends have to allocate a local pnp_irq_mask_t bitmap, populate it, and pass a pointer to pnp_register_irq_resource(). Signed-off-by: Bjorn Helgaas Signed-off-by: Andi Kleen Acked-by: Rene Herman Signed-off-by: Len Brown diff --git a/drivers/pnp/base.h b/drivers/pnp/base.h index afbeee5..360c638 100644 --- a/drivers/pnp/base.h +++ b/drivers/pnp/base.h @@ -74,13 +74,17 @@ struct pnp_option *pnp_register_independent_option(struct pnp_dev *dev); struct pnp_option *pnp_register_dependent_option(struct pnp_dev *dev, int priority); int pnp_register_irq_resource(struct pnp_dev *dev, struct pnp_option *option, - struct pnp_irq *data); + pnp_irq_mask_t *map, unsigned char flags); int pnp_register_dma_resource(struct pnp_dev *dev, struct pnp_option *option, - struct pnp_dma *data); + unsigned char map, unsigned char flags); int pnp_register_port_resource(struct pnp_dev *dev, struct pnp_option *option, - struct pnp_port *data); + resource_size_t min, resource_size_t max, + resource_size_t align, resource_size_t size, + unsigned char flags); int pnp_register_mem_resource(struct pnp_dev *dev, struct pnp_option *option, - struct pnp_mem *data); + resource_size_t min, resource_size_t max, + resource_size_t align, resource_size_t size, + unsigned char flags); void pnp_init_resources(struct pnp_dev *dev); void pnp_fixup_device(struct pnp_dev *dev); diff --git a/drivers/pnp/isapnp/core.c b/drivers/pnp/isapnp/core.c index e0caa71..3f75fdb 100644 --- a/drivers/pnp/isapnp/core.c +++ b/drivers/pnp/isapnp/core.c @@ -433,20 +433,20 @@ static void __init isapnp_parse_irq_resource(struct pnp_dev *dev, int size) { unsigned char tmp[3]; - struct pnp_irq *irq; unsigned long bits; + pnp_irq_mask_t map; + unsigned char flags = IORESOURCE_IRQ_HIGHEDGE; isapnp_peek(tmp, size); - irq = kzalloc(sizeof(struct pnp_irq), GFP_KERNEL); - if (!irq) - return; bits = (tmp[1] << 8) | tmp[0]; - bitmap_copy(irq->map.bits, &bits, 16); + + bitmap_zero(map.bits, PNP_IRQ_NR); + bitmap_copy(map.bits, &bits, 16); + if (size > 2) - irq->flags = tmp[2]; - else - irq->flags = IORESOURCE_IRQ_HIGHEDGE; - pnp_register_irq_resource(dev, option, irq); + flags = tmp[2]; + + pnp_register_irq_resource(dev, option, &map, flags); } /* @@ -457,15 +457,9 @@ static void __init isapnp_parse_dma_resource(struct pnp_dev *dev, int size) { unsigned char tmp[2]; - struct pnp_dma *dma; isapnp_peek(tmp, size); - dma = kzalloc(sizeof(struct pnp_dma), GFP_KERNEL); - if (!dma) - return; - dma->map = tmp[0]; - dma->flags = tmp[1]; - pnp_register_dma_resource(dev, option, dma); + pnp_register_dma_resource(dev, option, tmp[0], tmp[1]); } /* @@ -476,18 +470,16 @@ static void __init isapnp_parse_port_resource(struct pnp_dev *dev, int size) { unsigned char tmp[7]; - struct pnp_port *port; + resource_size_t min, max, align, len; + unsigned char flags; isapnp_peek(tmp, size); - port = kzalloc(sizeof(struct pnp_port), GFP_KERNEL); - if (!port) - return; - port->min = (tmp[2] << 8) | tmp[1]; - port->max = (tmp[4] << 8) | tmp[3]; - port->align = tmp[5]; - port->size = tmp[6]; - port->flags = tmp[0] ? IORESOURCE_IO_16BIT_ADDR : 0; - pnp_register_port_resource(dev, option, port); + min = (tmp[2] << 8) | tmp[1]; + max = (tmp[4] << 8) | tmp[3]; + align = tmp[5]; + len = tmp[6]; + flags = tmp[0] ? IORESOURCE_IO_16BIT_ADDR : 0; + pnp_register_port_resource(dev, option, min, max, align, len, flags); } /* @@ -498,17 +490,13 @@ static void __init isapnp_parse_fixed_port_resource(struct pnp_dev *dev, int size) { unsigned char tmp[3]; - struct pnp_port *port; + resource_size_t base, len; isapnp_peek(tmp, size); - port = kzalloc(sizeof(struct pnp_port), GFP_KERNEL); - if (!port) - return; - port->min = port->max = (tmp[1] << 8) | tmp[0]; - port->size = tmp[2]; - port->align = 0; - port->flags = IORESOURCE_IO_FIXED; - pnp_register_port_resource(dev, option, port); + base = (tmp[1] << 8) | tmp[0]; + len = tmp[2]; + pnp_register_port_resource(dev, option, base, base, 0, len, + IORESOURCE_IO_FIXED); } /* @@ -519,18 +507,16 @@ static void __init isapnp_parse_mem_resource(struct pnp_dev *dev, int size) { unsigned char tmp[9]; - struct pnp_mem *mem; + resource_size_t min, max, align, len; + unsigned char flags; isapnp_peek(tmp, size); - mem = kzalloc(sizeof(struct pnp_mem), GFP_KERNEL); - if (!mem) - return; - mem->min = ((tmp[2] << 8) | tmp[1]) << 8; - mem->max = ((tmp[4] << 8) | tmp[3]) << 8; - mem->align = (tmp[6] << 8) | tmp[5]; - mem->size = ((tmp[8] << 8) | tmp[7]) << 8; - mem->flags = tmp[0]; - pnp_register_mem_resource(dev, option, mem); + min = ((tmp[2] << 8) | tmp[1]) << 8; + max = ((tmp[4] << 8) | tmp[3]) << 8; + align = (tmp[6] << 8) | tmp[5]; + len = ((tmp[8] << 8) | tmp[7]) << 8; + flags = tmp[0]; + pnp_register_mem_resource(dev, option, min, max, align, len, flags); } /* @@ -541,20 +527,16 @@ static void __init isapnp_parse_mem32_resource(struct pnp_dev *dev, int size) { unsigned char tmp[17]; - struct pnp_mem *mem; + resource_size_t min, max, align, len; + unsigned char flags; isapnp_peek(tmp, size); - mem = kzalloc(sizeof(struct pnp_mem), GFP_KERNEL); - if (!mem) - return; - mem->min = (tmp[4] << 24) | (tmp[3] << 16) | (tmp[2] << 8) | tmp[1]; - mem->max = (tmp[8] << 24) | (tmp[7] << 16) | (tmp[6] << 8) | tmp[5]; - mem->align = - (tmp[12] << 24) | (tmp[11] << 16) | (tmp[10] << 8) | tmp[9]; - mem->size = - (tmp[16] << 24) | (tmp[15] << 16) | (tmp[14] << 8) | tmp[13]; - mem->flags = tmp[0]; - pnp_register_mem_resource(dev, option, mem); + min = (tmp[4] << 24) | (tmp[3] << 16) | (tmp[2] << 8) | tmp[1]; + max = (tmp[8] << 24) | (tmp[7] << 16) | (tmp[6] << 8) | tmp[5]; + align = (tmp[12] << 24) | (tmp[11] << 16) | (tmp[10] << 8) | tmp[9]; + len = (tmp[16] << 24) | (tmp[15] << 16) | (tmp[14] << 8) | tmp[13]; + flags = tmp[0]; + pnp_register_mem_resource(dev, option, min, max, align, len, flags); } /* @@ -565,18 +547,14 @@ static void __init isapnp_parse_fixed_mem32_resource(struct pnp_dev *dev, int size) { unsigned char tmp[9]; - struct pnp_mem *mem; + resource_size_t base, len; + unsigned char flags; isapnp_peek(tmp, size); - mem = kzalloc(sizeof(struct pnp_mem), GFP_KERNEL); - if (!mem) - return; - mem->min = mem->max = - (tmp[4] << 24) | (tmp[3] << 16) | (tmp[2] << 8) | tmp[1]; - mem->size = (tmp[8] << 24) | (tmp[7] << 16) | (tmp[6] << 8) | tmp[5]; - mem->align = 0; - mem->flags = tmp[0]; - pnp_register_mem_resource(dev, option, mem); + base = (tmp[4] << 24) | (tmp[3] << 16) | (tmp[2] << 8) | tmp[1]; + len = (tmp[8] << 24) | (tmp[7] << 16) | (tmp[6] << 8) | tmp[5]; + flags = tmp[0]; + pnp_register_mem_resource(dev, option, base, base, 0, len, flags); } /* diff --git a/drivers/pnp/pnpacpi/rsparser.c b/drivers/pnp/pnpacpi/rsparser.c index ae65454..3aaf406 100644 --- a/drivers/pnp/pnpacpi/rsparser.c +++ b/drivers/pnp/pnpacpi/rsparser.c @@ -411,20 +411,16 @@ static __init void pnpacpi_parse_dma_option(struct pnp_dev *dev, struct acpi_resource_dma *p) { int i; - struct pnp_dma *dma; + unsigned char map = 0, flags; if (p->channel_count == 0) return; - dma = kzalloc(sizeof(struct pnp_dma), GFP_KERNEL); - if (!dma) - return; for (i = 0; i < p->channel_count; i++) - dma->map |= 1 << p->channels[i]; - - dma->flags = dma_flags(p->type, p->bus_master, p->transfer); + map |= 1 << p->channels[i]; - pnp_register_dma_resource(dev, option, dma); + flags = dma_flags(p->type, p->bus_master, p->transfer); + pnp_register_dma_resource(dev, option, map, flags); } static __init void pnpacpi_parse_irq_option(struct pnp_dev *dev, @@ -432,20 +428,19 @@ static __init void pnpacpi_parse_irq_option(struct pnp_dev *dev, struct acpi_resource_irq *p) { int i; - struct pnp_irq *irq; + pnp_irq_mask_t map; + unsigned char flags; if (p->interrupt_count == 0) return; - irq = kzalloc(sizeof(struct pnp_irq), GFP_KERNEL); - if (!irq) - return; + bitmap_zero(map.bits, PNP_IRQ_NR); for (i = 0; i < p->interrupt_count; i++) if (p->interrupts[i]) - __set_bit(p->interrupts[i], irq->map.bits); - irq->flags = irq_flags(p->triggering, p->polarity, p->sharable); + __set_bit(p->interrupts[i], map.bits); - pnp_register_irq_resource(dev, option, irq); + flags = irq_flags(p->triggering, p->polarity, p->sharable); + pnp_register_irq_resource(dev, option, &map, flags); } static __init void pnpacpi_parse_ext_irq_option(struct pnp_dev *dev, @@ -453,123 +448,90 @@ static __init void pnpacpi_parse_ext_irq_option(struct pnp_dev *dev, struct acpi_resource_extended_irq *p) { int i; - struct pnp_irq *irq; + pnp_irq_mask_t map; + unsigned char flags; if (p->interrupt_count == 0) return; - irq = kzalloc(sizeof(struct pnp_irq), GFP_KERNEL); - if (!irq) - return; + bitmap_zero(map.bits, PNP_IRQ_NR); for (i = 0; i < p->interrupt_count; i++) if (p->interrupts[i]) - __set_bit(p->interrupts[i], irq->map.bits); - irq->flags = irq_flags(p->triggering, p->polarity, p->sharable); + __set_bit(p->interrupts[i], map.bits); - pnp_register_irq_resource(dev, option, irq); + flags = irq_flags(p->triggering, p->polarity, p->sharable); + pnp_register_irq_resource(dev, option, &map, flags); } static __init void pnpacpi_parse_port_option(struct pnp_dev *dev, struct pnp_option *option, struct acpi_resource_io *io) { - struct pnp_port *port; + unsigned char flags = 0; if (io->address_length == 0) return; - port = kzalloc(sizeof(struct pnp_port), GFP_KERNEL); - if (!port) - return; - port->min = io->minimum; - port->max = io->maximum; - port->align = io->alignment; - port->size = io->address_length; - port->flags = ACPI_DECODE_16 == io->io_decode ? - IORESOURCE_IO_16BIT_ADDR : 0; - pnp_register_port_resource(dev, option, port); + + if (io->io_decode == ACPI_DECODE_16) + flags = IORESOURCE_IO_16BIT_ADDR; + pnp_register_port_resource(dev, option, io->minimum, io->maximum, + io->alignment, io->address_length, flags); } static __init void pnpacpi_parse_fixed_port_option(struct pnp_dev *dev, struct pnp_option *option, struct acpi_resource_fixed_io *io) { - struct pnp_port *port; - if (io->address_length == 0) return; - port = kzalloc(sizeof(struct pnp_port), GFP_KERNEL); - if (!port) - return; - port->min = port->max = io->address; - port->size = io->address_length; - port->align = 0; - port->flags = IORESOURCE_IO_FIXED; - pnp_register_port_resource(dev, option, port); + + pnp_register_port_resource(dev, option, io->address, io->address, 0, + io->address_length, IORESOURCE_IO_FIXED); } static __init void pnpacpi_parse_mem24_option(struct pnp_dev *dev, struct pnp_option *option, struct acpi_resource_memory24 *p) { - struct pnp_mem *mem; + unsigned char flags = 0; if (p->address_length == 0) return; - mem = kzalloc(sizeof(struct pnp_mem), GFP_KERNEL); - if (!mem) - return; - mem->min = p->minimum; - mem->max = p->maximum; - mem->align = p->alignment; - mem->size = p->address_length; - mem->flags = (ACPI_READ_WRITE_MEMORY == p->write_protect) ? - IORESOURCE_MEM_WRITEABLE : 0; - - pnp_register_mem_resource(dev, option, mem); + if (p->write_protect == ACPI_READ_WRITE_MEMORY) + flags = IORESOURCE_MEM_WRITEABLE; + pnp_register_mem_resource(dev, option, p->minimum, p->maximum, + p->alignment, p->address_length, flags); } static __init void pnpacpi_parse_mem32_option(struct pnp_dev *dev, struct pnp_option *option, struct acpi_resource_memory32 *p) { - struct pnp_mem *mem; + unsigned char flags = 0; if (p->address_length == 0) return; - mem = kzalloc(sizeof(struct pnp_mem), GFP_KERNEL); - if (!mem) - return; - mem->min = p->minimum; - mem->max = p->maximum; - mem->align = p->alignment; - mem->size = p->address_length; - mem->flags = (ACPI_READ_WRITE_MEMORY == p->write_protect) ? - IORESOURCE_MEM_WRITEABLE : 0; - - pnp_register_mem_resource(dev, option, mem); + if (p->write_protect == ACPI_READ_WRITE_MEMORY) + flags = IORESOURCE_MEM_WRITEABLE; + pnp_register_mem_resource(dev, option, p->minimum, p->maximum, + p->alignment, p->address_length, flags); } static __init void pnpacpi_parse_fixed_mem32_option(struct pnp_dev *dev, struct pnp_option *option, struct acpi_resource_fixed_memory32 *p) { - struct pnp_mem *mem; + unsigned char flags = 0; if (p->address_length == 0) return; - mem = kzalloc(sizeof(struct pnp_mem), GFP_KERNEL); - if (!mem) - return; - mem->min = mem->max = p->address; - mem->size = p->address_length; - mem->align = 0; - mem->flags = (ACPI_READ_WRITE_MEMORY == p->write_protect) ? - IORESOURCE_MEM_WRITEABLE : 0; - - pnp_register_mem_resource(dev, option, mem); + if (p->write_protect == ACPI_READ_WRITE_MEMORY) + flags = IORESOURCE_MEM_WRITEABLE; + pnp_register_mem_resource(dev, option, p->address, p->address, + 0, p->address_length, flags); } static __init void pnpacpi_parse_address_option(struct pnp_dev *dev, @@ -578,8 +540,7 @@ static __init void pnpacpi_parse_address_option(struct pnp_dev *dev, { struct acpi_resource_address64 addr, *p = &addr; acpi_status status; - struct pnp_mem *mem; - struct pnp_port *port; + unsigned char flags = 0; status = acpi_resource_to_address64(r, p); if (!ACPI_SUCCESS(status)) { @@ -592,26 +553,14 @@ static __init void pnpacpi_parse_address_option(struct pnp_dev *dev, return; if (p->resource_type == ACPI_MEMORY_RANGE) { - mem = kzalloc(sizeof(struct pnp_mem), GFP_KERNEL); - if (!mem) - return; - mem->min = mem->max = p->minimum; - mem->size = p->address_length; - mem->align = 0; - mem->flags = (p->info.mem.write_protect == - ACPI_READ_WRITE_MEMORY) ? IORESOURCE_MEM_WRITEABLE - : 0; - pnp_register_mem_resource(dev, option, mem); - } else if (p->resource_type == ACPI_IO_RANGE) { - port = kzalloc(sizeof(struct pnp_port), GFP_KERNEL); - if (!port) - return; - port->min = port->max = p->minimum; - port->size = p->address_length; - port->align = 0; - port->flags = IORESOURCE_IO_FIXED; - pnp_register_port_resource(dev, option, port); - } + if (p->info.mem.write_protect == ACPI_READ_WRITE_MEMORY) + flags = IORESOURCE_MEM_WRITEABLE; + pnp_register_mem_resource(dev, option, p->minimum, p->minimum, + 0, p->address_length, flags); + } else if (p->resource_type == ACPI_IO_RANGE) + pnp_register_port_resource(dev, option, p->minimum, p->minimum, + 0, p->address_length, + IORESOURCE_IO_FIXED); } struct acpipnp_parse_option_s { diff --git a/drivers/pnp/pnpbios/rsparser.c b/drivers/pnp/pnpbios/rsparser.c index dd2ea7b..26fb04c 100644 --- a/drivers/pnp/pnpbios/rsparser.c +++ b/drivers/pnp/pnpbios/rsparser.c @@ -218,116 +218,96 @@ static __init void pnpbios_parse_mem_option(struct pnp_dev *dev, unsigned char *p, int size, struct pnp_option *option) { - struct pnp_mem *mem; - - mem = kzalloc(sizeof(struct pnp_mem), GFP_KERNEL); - if (!mem) - return; - mem->min = ((p[5] << 8) | p[4]) << 8; - mem->max = ((p[7] << 8) | p[6]) << 8; - mem->align = (p[9] << 8) | p[8]; - mem->size = ((p[11] << 8) | p[10]) << 8; - mem->flags = p[3]; - pnp_register_mem_resource(dev, option, mem); + resource_size_t min, max, align, len; + unsigned char flags; + + min = ((p[5] << 8) | p[4]) << 8; + max = ((p[7] << 8) | p[6]) << 8; + align = (p[9] << 8) | p[8]; + len = ((p[11] << 8) | p[10]) << 8; + flags = p[3]; + pnp_register_mem_resource(dev, option, min, max, align, len, flags); } static __init void pnpbios_parse_mem32_option(struct pnp_dev *dev, unsigned char *p, int size, struct pnp_option *option) { - struct pnp_mem *mem; - - mem = kzalloc(sizeof(struct pnp_mem), GFP_KERNEL); - if (!mem) - return; - mem->min = (p[7] << 24) | (p[6] << 16) | (p[5] << 8) | p[4]; - mem->max = (p[11] << 24) | (p[10] << 16) | (p[9] << 8) | p[8]; - mem->align = (p[15] << 24) | (p[14] << 16) | (p[13] << 8) | p[12]; - mem->size = (p[19] << 24) | (p[18] << 16) | (p[17] << 8) | p[16]; - mem->flags = p[3]; - pnp_register_mem_resource(dev, option, mem); + resource_size_t min, max, align, len; + unsigned char flags; + + min = (p[7] << 24) | (p[6] << 16) | (p[5] << 8) | p[4]; + max = (p[11] << 24) | (p[10] << 16) | (p[9] << 8) | p[8]; + align = (p[15] << 24) | (p[14] << 16) | (p[13] << 8) | p[12]; + len = (p[19] << 24) | (p[18] << 16) | (p[17] << 8) | p[16]; + flags = p[3]; + pnp_register_mem_resource(dev, option, min, max, align, len, flags); } static __init void pnpbios_parse_fixed_mem32_option(struct pnp_dev *dev, unsigned char *p, int size, struct pnp_option *option) { - struct pnp_mem *mem; - - mem = kzalloc(sizeof(struct pnp_mem), GFP_KERNEL); - if (!mem) - return; - mem->min = mem->max = (p[7] << 24) | (p[6] << 16) | (p[5] << 8) | p[4]; - mem->size = (p[11] << 24) | (p[10] << 16) | (p[9] << 8) | p[8]; - mem->align = 0; - mem->flags = p[3]; - pnp_register_mem_resource(dev, option, mem); + resource_size_t base, len; + unsigned char flags; + + base = (p[7] << 24) | (p[6] << 16) | (p[5] << 8) | p[4]; + len = (p[11] << 24) | (p[10] << 16) | (p[9] << 8) | p[8]; + flags = p[3]; + pnp_register_mem_resource(dev, option, base, base, 0, len, flags); } static __init void pnpbios_parse_irq_option(struct pnp_dev *dev, unsigned char *p, int size, struct pnp_option *option) { - struct pnp_irq *irq; unsigned long bits; + pnp_irq_mask_t map; + unsigned char flags = IORESOURCE_IRQ_HIGHEDGE; - irq = kzalloc(sizeof(struct pnp_irq), GFP_KERNEL); - if (!irq) - return; bits = (p[2] << 8) | p[1]; - bitmap_copy(irq->map.bits, &bits, 16); + + bitmap_zero(map.bits, PNP_IRQ_NR); + bitmap_copy(map.bits, &bits, 16); + if (size > 2) - irq->flags = p[3]; - else - irq->flags = IORESOURCE_IRQ_HIGHEDGE; - pnp_register_irq_resource(dev, option, irq); + flags = p[3]; + + pnp_register_irq_resource(dev, option, &map, flags); } static __init void pnpbios_parse_dma_option(struct pnp_dev *dev, unsigned char *p, int size, struct pnp_option *option) { - struct pnp_dma *dma; - - dma = kzalloc(sizeof(struct pnp_dma), GFP_KERNEL); - if (!dma) - return; - dma->map = p[1]; - dma->flags = p[2]; - pnp_register_dma_resource(dev, option, dma); + pnp_register_dma_resource(dev, option, p[1], p[2]); } static __init void pnpbios_parse_port_option(struct pnp_dev *dev, unsigned char *p, int size, struct pnp_option *option) { - struct pnp_port *port; - - port = kzalloc(sizeof(struct pnp_port), GFP_KERNEL); - if (!port) - return; - port->min = (p[3] << 8) | p[2]; - port->max = (p[5] << 8) | p[4]; - port->align = p[6]; - port->size = p[7]; - port->flags = p[1] ? IORESOURCE_IO_16BIT_ADDR : 0; - pnp_register_port_resource(dev, option, port); + resource_size_t min, max, align, len; + unsigned char flags; + + min = (p[3] << 8) | p[2]; + max = (p[5] << 8) | p[4]; + align = p[6]; + len = p[7]; + flags = p[1] ? IORESOURCE_IO_16BIT_ADDR : 0; + pnp_register_port_resource(dev, option, min, max, align, len, flags); } static __init void pnpbios_parse_fixed_port_option(struct pnp_dev *dev, unsigned char *p, int size, struct pnp_option *option) { - struct pnp_port *port; - - port = kzalloc(sizeof(struct pnp_port), GFP_KERNEL); - if (!port) - return; - port->min = port->max = (p[2] << 8) | p[1]; - port->size = p[3]; - port->align = 0; - port->flags = IORESOURCE_IO_FIXED; - pnp_register_port_resource(dev, option, port); + resource_size_t base, len; + + base = (p[2] << 8) | p[1]; + len = p[3]; + pnp_register_port_resource(dev, option, base, base, 0, len, + IORESOURCE_IO_FIXED); } static __init unsigned char * diff --git a/drivers/pnp/resource.c b/drivers/pnp/resource.c index 391828c..6114549 100644 --- a/drivers/pnp/resource.c +++ b/drivers/pnp/resource.c @@ -78,13 +78,20 @@ struct pnp_option *pnp_register_dependent_option(struct pnp_dev *dev, } int pnp_register_irq_resource(struct pnp_dev *dev, struct pnp_option *option, - struct pnp_irq *data) + pnp_irq_mask_t *map, unsigned char flags) { - struct pnp_irq *ptr; + struct pnp_irq *data, *ptr; #ifdef DEBUG char buf[PNP_IRQ_NR]; /* hex-encoded, so this is overkill but safe */ #endif + data = kzalloc(sizeof(struct pnp_irq), GFP_KERNEL); + if (!data) + return -ENOMEM; + + data->map = *map; + data->flags = flags; + ptr = option->irq; while (ptr && ptr->next) ptr = ptr->next; @@ -112,9 +119,16 @@ int pnp_register_irq_resource(struct pnp_dev *dev, struct pnp_option *option, } int pnp_register_dma_resource(struct pnp_dev *dev, struct pnp_option *option, - struct pnp_dma *data) + unsigned char map, unsigned char flags) { - struct pnp_dma *ptr; + struct pnp_dma *data, *ptr; + + data = kzalloc(sizeof(struct pnp_dma), GFP_KERNEL); + if (!data) + return -ENOMEM; + + data->map = map; + data->flags = flags; ptr = option->dma; while (ptr && ptr->next) @@ -130,9 +144,21 @@ int pnp_register_dma_resource(struct pnp_dev *dev, struct pnp_option *option, } int pnp_register_port_resource(struct pnp_dev *dev, struct pnp_option *option, - struct pnp_port *data) + resource_size_t min, resource_size_t max, + resource_size_t align, resource_size_t size, + unsigned char flags) { - struct pnp_port *ptr; + struct pnp_port *data, *ptr; + + data = kzalloc(sizeof(struct pnp_port), GFP_KERNEL); + if (!data) + return -ENOMEM; + + data->min = min; + data->max = max; + data->align = align; + data->size = size; + data->flags = flags; ptr = option->port; while (ptr && ptr->next) @@ -152,9 +178,21 @@ int pnp_register_port_resource(struct pnp_dev *dev, struct pnp_option *option, } int pnp_register_mem_resource(struct pnp_dev *dev, struct pnp_option *option, - struct pnp_mem *data) + resource_size_t min, resource_size_t max, + resource_size_t align, resource_size_t size, + unsigned char flags) { - struct pnp_mem *ptr; + struct pnp_mem *data, *ptr; + + data = kzalloc(sizeof(struct pnp_mem), GFP_KERNEL); + if (!data) + return -ENOMEM; + + data->min = min; + data->max = max; + data->align = align; + data->size = size; + data->flags = flags; ptr = option->mem; while (ptr && ptr->next) -- cgit v0.10.2 From fe2cf598e6942abd8fb70fee230d74b1a1eae0d1 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 27 Jun 2008 16:57:12 -0600 Subject: PNPACPI: ignore _PRS interrupt numbers larger than PNP_IRQ_NR ACPI Extended Interrupt Descriptors can encode 32-bit interrupt numbers, so an interrupt number may exceed the size of the bitmap we use to track possible IRQ settings. To avoid corrupting memory, complain and ignore too-large interrupt numbers. There's similar code in pnpacpi_parse_irq_option(), but I didn't change that because the small IRQ descriptor can only encode IRQs 0-15, which do not exceed bitmap size. In the future, we could handle IRQ numbers greater than PNP_IRQ_NR by replacing the bitmap with a table or list. Signed-off-by: Bjorn Helgaas Signed-off-by: Andi Kleen Acked-by: Rene Herman Signed-off-by: Len Brown diff --git a/drivers/pnp/pnpacpi/rsparser.c b/drivers/pnp/pnpacpi/rsparser.c index 3aaf406..851c773 100644 --- a/drivers/pnp/pnpacpi/rsparser.c +++ b/drivers/pnp/pnpacpi/rsparser.c @@ -455,9 +455,16 @@ static __init void pnpacpi_parse_ext_irq_option(struct pnp_dev *dev, return; bitmap_zero(map.bits, PNP_IRQ_NR); - for (i = 0; i < p->interrupt_count; i++) - if (p->interrupts[i]) - __set_bit(p->interrupts[i], map.bits); + for (i = 0; i < p->interrupt_count; i++) { + if (p->interrupts[i]) { + if (p->interrupts[i] < PNP_IRQ_NR) + __set_bit(p->interrupts[i], map.bits); + else + dev_err(&dev->dev, "ignoring IRQ %d option " + "(too large for %d entry bitmap)\n", + p->interrupts[i], PNP_IRQ_NR); + } + } flags = irq_flags(p->triggering, p->polarity, p->sharable); pnp_register_irq_resource(dev, option, &map, flags); -- cgit v0.10.2 From 2d29a7a794c5bae982955cd5dd0a76e766e57f39 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 27 Jun 2008 16:57:13 -0600 Subject: PNP: rename pnp_register_*_resource() local variables No functional change; just rename "data" to something more descriptive. Signed-off-by: Bjorn Helgaas Signed-off-by: Andi Kleen Acked-by: Rene Herman Signed-off-by: Len Brown diff --git a/drivers/pnp/resource.c b/drivers/pnp/resource.c index 6114549..a795864 100644 --- a/drivers/pnp/resource.c +++ b/drivers/pnp/resource.c @@ -80,40 +80,40 @@ struct pnp_option *pnp_register_dependent_option(struct pnp_dev *dev, int pnp_register_irq_resource(struct pnp_dev *dev, struct pnp_option *option, pnp_irq_mask_t *map, unsigned char flags) { - struct pnp_irq *data, *ptr; + struct pnp_irq *irq, *ptr; #ifdef DEBUG char buf[PNP_IRQ_NR]; /* hex-encoded, so this is overkill but safe */ #endif - data = kzalloc(sizeof(struct pnp_irq), GFP_KERNEL); - if (!data) + irq = kzalloc(sizeof(struct pnp_irq), GFP_KERNEL); + if (!irq) return -ENOMEM; - data->map = *map; - data->flags = flags; + irq->map = *map; + irq->flags = flags; ptr = option->irq; while (ptr && ptr->next) ptr = ptr->next; if (ptr) - ptr->next = data; + ptr->next = irq; else - option->irq = data; + option->irq = irq; #ifdef CONFIG_PCI { int i; for (i = 0; i < 16; i++) - if (test_bit(i, data->map.bits)) + if (test_bit(i, irq->map.bits)) pcibios_penalize_isa_irq(i, 0); } #endif #ifdef DEBUG - bitmap_scnprintf(buf, sizeof(buf), data->map.bits, PNP_IRQ_NR); + bitmap_scnprintf(buf, sizeof(buf), irq->map.bits, PNP_IRQ_NR); dev_dbg(&dev->dev, " irq bitmask %s flags %#x\n", buf, - data->flags); + irq->flags); #endif return 0; } @@ -121,25 +121,25 @@ int pnp_register_irq_resource(struct pnp_dev *dev, struct pnp_option *option, int pnp_register_dma_resource(struct pnp_dev *dev, struct pnp_option *option, unsigned char map, unsigned char flags) { - struct pnp_dma *data, *ptr; + struct pnp_dma *dma, *ptr; - data = kzalloc(sizeof(struct pnp_dma), GFP_KERNEL); - if (!data) + dma = kzalloc(sizeof(struct pnp_dma), GFP_KERNEL); + if (!dma) return -ENOMEM; - data->map = map; - data->flags = flags; + dma->map = map; + dma->flags = flags; ptr = option->dma; while (ptr && ptr->next) ptr = ptr->next; if (ptr) - ptr->next = data; + ptr->next = dma; else - option->dma = data; + option->dma = dma; - dev_dbg(&dev->dev, " dma bitmask %#x flags %#x\n", data->map, - data->flags); + dev_dbg(&dev->dev, " dma bitmask %#x flags %#x\n", dma->map, + dma->flags); return 0; } @@ -148,32 +148,32 @@ int pnp_register_port_resource(struct pnp_dev *dev, struct pnp_option *option, resource_size_t align, resource_size_t size, unsigned char flags) { - struct pnp_port *data, *ptr; + struct pnp_port *port, *ptr; - data = kzalloc(sizeof(struct pnp_port), GFP_KERNEL); - if (!data) + port = kzalloc(sizeof(struct pnp_port), GFP_KERNEL); + if (!port) return -ENOMEM; - data->min = min; - data->max = max; - data->align = align; - data->size = size; - data->flags = flags; + port->min = min; + port->max = max; + port->align = align; + port->size = size; + port->flags = flags; ptr = option->port; while (ptr && ptr->next) ptr = ptr->next; if (ptr) - ptr->next = data; + ptr->next = port; else - option->port = data; + option->port = port; dev_dbg(&dev->dev, " io " "min %#llx max %#llx align %lld size %lld flags %#x\n", - (unsigned long long) data->min, - (unsigned long long) data->max, - (unsigned long long) data->align, - (unsigned long long) data->size, data->flags); + (unsigned long long) port->min, + (unsigned long long) port->max, + (unsigned long long) port->align, + (unsigned long long) port->size, port->flags); return 0; } @@ -182,32 +182,32 @@ int pnp_register_mem_resource(struct pnp_dev *dev, struct pnp_option *option, resource_size_t align, resource_size_t size, unsigned char flags) { - struct pnp_mem *data, *ptr; + struct pnp_mem *mem, *ptr; - data = kzalloc(sizeof(struct pnp_mem), GFP_KERNEL); - if (!data) + mem = kzalloc(sizeof(struct pnp_mem), GFP_KERNEL); + if (!mem) return -ENOMEM; - data->min = min; - data->max = max; - data->align = align; - data->size = size; - data->flags = flags; + mem->min = min; + mem->max = max; + mem->align = align; + mem->size = size; + mem->flags = flags; ptr = option->mem; while (ptr && ptr->next) ptr = ptr->next; if (ptr) - ptr->next = data; + ptr->next = mem; else - option->mem = data; + option->mem = mem; dev_dbg(&dev->dev, " mem " "min %#llx max %#llx align %lld size %lld flags %#x\n", - (unsigned long long) data->min, - (unsigned long long) data->max, - (unsigned long long) data->align, - (unsigned long long) data->size, data->flags); + (unsigned long long) mem->min, + (unsigned long long) mem->max, + (unsigned long long) mem->align, + (unsigned long long) mem->size, mem->flags); return 0; } -- cgit v0.10.2 From d5ebde6ef5c2d51828f975a81d7d0e58bccfd833 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 27 Jun 2008 16:57:14 -0600 Subject: PNP: support optional IRQ resources This patch adds an IORESOURCE_IRQ_OPTIONAL flag for use when assigning resources to a device. If the flag is set and we are unable to assign an IRQ to the device, we can leave the IRQ disabled but allow the overall resource allocation to succeed. Some devices request an IRQ, but can run without an IRQ (possibly with degraded performance). This flag lets us run the device without the IRQ instead of just leaving the device disabled. This is a reimplementation of this previous change by Rene Herman : http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=3b73a223661ed137c5d3d2635f954382e94f5a43 I reimplemented this for two reasons: - to prepare for converting all resource options into a single linked list, as opposed to the per-resource-type lists we have now, and - to preserve the order and number of resource options. In PNPBIOS and ACPI, we configure a device by giving firmware a list of resource assignments. It is important that this list has exactly the same number of resources, in the same order, as the "template" list we got from the firmware in the first place. The problem of a sound card MPU401 being left disabled for want of an IRQ was reported by Uwe Bugla . Signed-off-by: Bjorn Helgaas Signed-off-by: Andi Kleen Acked-by: Rene Herman Signed-off-by: Len Brown diff --git a/drivers/pnp/interface.c b/drivers/pnp/interface.c index b09f67d..7a9fb55 100644 --- a/drivers/pnp/interface.c +++ b/drivers/pnp/interface.c @@ -90,6 +90,8 @@ static void pnp_print_irq(pnp_info_buffer_t * buffer, char *space, pnp_printf(buffer, " High-Level"); if (irq->flags & IORESOURCE_IRQ_LOWLEVEL) pnp_printf(buffer, " Low-Level"); + if (irq->flags & IORESOURCE_IRQ_OPTIONAL) + pnp_printf(buffer, " (optional)"); pnp_printf(buffer, "\n"); } diff --git a/drivers/pnp/manager.c b/drivers/pnp/manager.c index 7ea9e1e..a20accb 100644 --- a/drivers/pnp/manager.c +++ b/drivers/pnp/manager.c @@ -153,6 +153,15 @@ static int pnp_assign_irq(struct pnp_dev *dev, struct pnp_irq *rule, int idx) goto __add; } } + + if (rule->flags & IORESOURCE_IRQ_OPTIONAL) { + res->start = -1; + res->end = -1; + res->flags |= IORESOURCE_DISABLED; + dev_dbg(&dev->dev, " irq %d disabled (optional)\n", idx); + goto __add; + } + dev_dbg(&dev->dev, " couldn't assign irq %d\n", idx); return -EBUSY; diff --git a/drivers/pnp/quirks.c b/drivers/pnp/quirks.c index 48e6017..e8515ce 100644 --- a/drivers/pnp/quirks.c +++ b/drivers/pnp/quirks.c @@ -121,34 +121,46 @@ static struct pnp_option *quirk_isapnp_mpu_options(struct pnp_dev *dev) struct pnp_option *res; /* - * Build a functional IRQ-less variant of each MPU option. + * Build a functional IRQ-optional variant of each MPU option. */ for (res = dev->dependent; res; res = res->next) { struct pnp_option *curr; struct pnp_port *port; - struct pnp_port *copy; + struct pnp_port *copy_port; + struct pnp_irq *irq; + struct pnp_irq *copy_irq; port = res->port; - if (!port || !res->irq) + irq = res->irq; + if (!port || !irq) continue; - copy = pnp_alloc(sizeof *copy); - if (!copy) + copy_port = pnp_alloc(sizeof *copy_port); + if (!copy_port) + break; + + copy_irq = pnp_alloc(sizeof *copy_irq); + if (!copy_irq) { + kfree(copy_port); break; + } + + *copy_port = *port; + copy_port->next = NULL; - copy->min = port->min; - copy->max = port->max; - copy->align = port->align; - copy->size = port->size; - copy->flags = port->flags; + *copy_irq = *irq; + copy_irq->flags |= IORESOURCE_IRQ_OPTIONAL; + copy_irq->next = NULL; curr = pnp_build_option(PNP_RES_PRIORITY_FUNCTIONAL); if (!curr) { - kfree(copy); + kfree(copy_port); + kfree(copy_irq); break; } - curr->port = copy; + curr->port = copy_port; + curr->irq = copy_irq; if (prev) prev->next = curr; @@ -157,7 +169,7 @@ static struct pnp_option *quirk_isapnp_mpu_options(struct pnp_dev *dev) prev = curr; } if (head) - dev_info(&dev->dev, "adding IRQ-less MPU options\n"); + dev_info(&dev->dev, "adding IRQ-optional MPU options\n"); return head; } @@ -167,10 +179,6 @@ static void quirk_ad1815_mpu_resources(struct pnp_dev *dev) struct pnp_option *res; struct pnp_irq *irq; - /* - * Distribute the independent IRQ over the dependent options - */ - res = dev->independent; if (!res) return; @@ -179,33 +187,8 @@ static void quirk_ad1815_mpu_resources(struct pnp_dev *dev) if (!irq || irq->next) return; - res = dev->dependent; - if (!res) - return; - - while (1) { - struct pnp_irq *copy; - - copy = pnp_alloc(sizeof *copy); - if (!copy) - break; - - bitmap_copy(copy->map.bits, irq->map.bits, PNP_IRQ_NR); - copy->flags = irq->flags; - - copy->next = res->irq; /* Yes, this is NULL */ - res->irq = copy; - - if (!res->next) - break; - res = res->next; - } - kfree(irq); - - res->next = quirk_isapnp_mpu_options(dev); - - res = dev->independent; - res->irq = NULL; + irq->flags |= IORESOURCE_IRQ_OPTIONAL; + dev_info(&dev->dev, "made independent IRQ optional\n"); } static void quirk_isapnp_mpu_resources(struct pnp_dev *dev) diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 39db059..2cd07cc 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -59,6 +59,7 @@ struct resource_list { #define IORESOURCE_IRQ_HIGHLEVEL (1<<2) #define IORESOURCE_IRQ_LOWLEVEL (1<<3) #define IORESOURCE_IRQ_SHAREABLE (1<<4) +#define IORESOURCE_IRQ_OPTIONAL (1<<5) /* PnP DMA specific bits (IORESOURCE_BITS) */ #define IORESOURCE_DMA_TYPE_MASK (3<<0) -- cgit v0.10.2 From e2a1a6f1cfaf6ee770a8700e5df8a3708dae503b Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 27 Jun 2008 16:57:15 -0600 Subject: PNP: remove extra 0x100 bit from option priority When building resource options, ISAPNP and PNPBIOS set the priority to something like "0x100 | PNP_RES_PRIORITY_ACCEPTABLE", but we immediately mask off the 0x100 again in pnp_build_option(), so that bit looks superfluous. Thanks to Rene Herman for pointing this out. Signed-off-by: Bjorn Helgaas Signed-off-by: Andi Kleen Acked-by: Rene Herman Signed-off-by: Len Brown diff --git a/drivers/pnp/isapnp/core.c b/drivers/pnp/isapnp/core.c index 3f75fdb..90718be 100644 --- a/drivers/pnp/isapnp/core.c +++ b/drivers/pnp/isapnp/core.c @@ -582,7 +582,7 @@ isapnp_parse_name(char *name, unsigned int name_max, unsigned short *size) static int __init isapnp_create_device(struct pnp_card *card, unsigned short size) { - int number = 0, skip = 0, priority = 0, compat = 0; + int number = 0, skip = 0, priority, compat = 0; unsigned char type, tmp[17]; struct pnp_option *option; struct pnp_dev *dev; @@ -621,7 +621,6 @@ static int __init isapnp_create_device(struct pnp_card *card, } else { skip = 1; } - priority = 0; compat = 0; break; case _STAG_COMPATDEVID: @@ -650,10 +649,10 @@ static int __init isapnp_create_device(struct pnp_card *card, case _STAG_STARTDEP: if (size > 1) goto __skip; - priority = 0x100 | PNP_RES_PRIORITY_ACCEPTABLE; + priority = PNP_RES_PRIORITY_ACCEPTABLE; if (size > 0) { isapnp_peek(tmp, size); - priority = 0x100 | tmp[0]; + priority = tmp[0]; size = 0; } option = pnp_register_dependent_option(dev, priority); @@ -663,7 +662,6 @@ static int __init isapnp_create_device(struct pnp_card *card, case _STAG_ENDDEP: if (size != 0) goto __skip; - priority = 0; dev_dbg(&dev->dev, "end dependent options\n"); break; case _STAG_IOPORT: diff --git a/drivers/pnp/pnpacpi/rsparser.c b/drivers/pnp/pnpacpi/rsparser.c index 851c773..e114b3d 100644 --- a/drivers/pnp/pnpacpi/rsparser.c +++ b/drivers/pnp/pnpacpi/rsparser.c @@ -579,7 +579,7 @@ struct acpipnp_parse_option_s { static __init acpi_status pnpacpi_option_resource(struct acpi_resource *res, void *data) { - int priority = 0; + int priority; struct acpipnp_parse_option_s *parse_data = data; struct pnp_dev *dev = parse_data->dev; struct pnp_option *option = parse_data->option; diff --git a/drivers/pnp/pnpbios/rsparser.c b/drivers/pnp/pnpbios/rsparser.c index 26fb04c..db23ba7 100644 --- a/drivers/pnp/pnpbios/rsparser.c +++ b/drivers/pnp/pnpbios/rsparser.c @@ -315,7 +315,7 @@ pnpbios_parse_resource_option_data(unsigned char *p, unsigned char *end, struct pnp_dev *dev) { unsigned int len, tag; - int priority = 0; + int priority; struct pnp_option *option, *option_independent; if (!p) @@ -389,9 +389,9 @@ pnpbios_parse_resource_option_data(unsigned char *p, unsigned char *end, case SMALL_TAG_STARTDEP: if (len > 1) goto len_err; - priority = 0x100 | PNP_RES_PRIORITY_ACCEPTABLE; + priority = PNP_RES_PRIORITY_ACCEPTABLE; if (len > 0) - priority = 0x100 | p[1]; + priority = p[1]; option = pnp_register_dependent_option(dev, priority); if (!option) return NULL; -- cgit v0.10.2 From bbe413b4fc7f791248c7ee00ce7b3778491a3700 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 27 Jun 2008 16:57:16 -0600 Subject: ISAPNP: handle independent options following dependent ones The ISAPNP spec recommends that independent options precede dependent ones, but this is not actually required. The current ISAPNP code incorrectly puts such trailing independent options at the end of the last dependent option list. This patch fixes that bug by resetting the current option list to the independent list when we see an "End Dependent Functions" tag. PNPBIOS and PNPACPI handle this the same way. Signed-off-by: Bjorn Helgaas Signed-off-by: Andi Kleen Acked-by: Rene Herman Signed-off-by: Len Brown diff --git a/drivers/pnp/isapnp/core.c b/drivers/pnp/isapnp/core.c index 90718be..53cc4d6 100644 --- a/drivers/pnp/isapnp/core.c +++ b/drivers/pnp/isapnp/core.c @@ -584,14 +584,14 @@ static int __init isapnp_create_device(struct pnp_card *card, { int number = 0, skip = 0, priority, compat = 0; unsigned char type, tmp[17]; - struct pnp_option *option; + struct pnp_option *option, *option_independent; struct pnp_dev *dev; u32 eisa_id; char id[8]; if ((dev = isapnp_parse_device(card, size, number++)) == NULL) return 1; - option = pnp_register_independent_option(dev); + option_independent = option = pnp_register_independent_option(dev); if (!option) { kfree(dev); return 1; @@ -613,6 +613,7 @@ static int __init isapnp_create_device(struct pnp_card *card, size = 0; skip = 0; option = pnp_register_independent_option(dev); + option_independent = option; if (!option) { kfree(dev); return 1; @@ -662,6 +663,10 @@ static int __init isapnp_create_device(struct pnp_card *card, case _STAG_ENDDEP: if (size != 0) goto __skip; + if (option_independent == option) + dev_warn(&dev->dev, "missing " + "_STAG_STARTDEP tag\n"); + option = option_independent; dev_dbg(&dev->dev, "end dependent options\n"); break; case _STAG_IOPORT: -- cgit v0.10.2 From 1f32ca31e7409d37c1b25e5f81840fb184380cdf Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 27 Jun 2008 16:57:17 -0600 Subject: PNP: convert resource options to single linked list ISAPNP, PNPBIOS, and ACPI describe the "possible resource settings" of a device, i.e., the possibilities an OS bus driver has when it assigns I/O port, MMIO, and other resources to the device. PNP used to maintain this "possible resource setting" information in one independent option structure and a list of dependent option structures for each device. Each of these option structures had lists of I/O, memory, IRQ, and DMA resources, for example: dev independent options ind-io0 -> ind-io1 ... ind-mem0 -> ind-mem1 ... ... dependent option set 0 dep0-io0 -> dep0-io1 ... dep0-mem0 -> dep0-mem1 ... ... dependent option set 1 dep1-io0 -> dep1-io1 ... dep1-mem0 -> dep1-mem1 ... ... ... This data structure was designed for ISAPNP, where the OS configures device resource settings by writing directly to configuration registers. The OS can write the registers in arbitrary order much like it writes PCI BARs. However, for PNPBIOS and ACPI devices, the OS uses firmware interfaces that perform device configuration, and it is important to pass the desired settings to those interfaces in the correct order. The OS learns the correct order by using firmware interfaces that return the "current resource settings" and "possible resource settings," but the option structures above doesn't store the ordering information. This patch replaces the independent and dependent lists with a single list of options. For example, a device might have possible resource settings like this: dev options ind-io0 -> dep0-io0 -> dep1->io0 -> ind-io1 ... All the possible settings are in the same list, in the order they come from the firmware "possible resource settings" list. Each entry is tagged with an independent/dependent flag. Dependent entries also have a "set number" and an optional priority value. All dependent entries must be assigned from the same set. For example, the OS can use all the entries from dependent set 0, or all the entries from dependent set 1, but it cannot mix entries from set 0 with entries from set 1. Prior to this patch PNP didn't keep track of the order of this list, and it assigned all independent options first, then all dependent ones. Using the example above, that resulted in a "desired configuration" list like this: ind->io0 -> ind->io1 -> depN-io0 ... instead of the list the firmware expects, which looks like this: ind->io0 -> depN-io0 -> ind-io1 ... Signed-off-by: Bjorn Helgaas Signed-off-by: Andi Kleen Acked-by: Rene Herman Signed-off-by: Len Brown diff --git a/drivers/pnp/base.h b/drivers/pnp/base.h index 360c638..e3fa9a2 100644 --- a/drivers/pnp/base.h +++ b/drivers/pnp/base.h @@ -1,3 +1,8 @@ +/* + * Copyright (C) 2008 Hewlett-Packard Development Company, L.P. + * Bjorn Helgaas + */ + extern spinlock_t pnp_lock; void *pnp_alloc(long size); @@ -25,8 +30,6 @@ struct pnp_port { resource_size_t align; /* align boundary */ resource_size_t size; /* size of range */ unsigned char flags; /* port flags */ - unsigned char pad; /* pad */ - struct pnp_port *next; /* next port */ }; #define PNP_IRQ_NR 256 @@ -35,14 +38,11 @@ typedef struct { DECLARE_BITMAP(bits, PNP_IRQ_NR); } pnp_irq_mask_t; struct pnp_irq { pnp_irq_mask_t map; /* bitmap for IRQ lines */ unsigned char flags; /* IRQ flags */ - unsigned char pad; /* pad */ - struct pnp_irq *next; /* next IRQ */ }; struct pnp_dma { unsigned char map; /* bitmask for DMA channels */ unsigned char flags; /* DMA flags */ - struct pnp_dma *next; /* next port */ }; struct pnp_mem { @@ -51,44 +51,91 @@ struct pnp_mem { resource_size_t align; /* align boundary */ resource_size_t size; /* size of range */ unsigned char flags; /* memory flags */ - unsigned char pad; /* pad */ - struct pnp_mem *next; /* next memory resource */ }; +#define PNP_OPTION_DEPENDENT 0x80000000 +#define PNP_OPTION_SET_MASK 0xffff +#define PNP_OPTION_SET_SHIFT 12 +#define PNP_OPTION_PRIORITY_MASK 0xfff +#define PNP_OPTION_PRIORITY_SHIFT 0 + #define PNP_RES_PRIORITY_PREFERRED 0 #define PNP_RES_PRIORITY_ACCEPTABLE 1 #define PNP_RES_PRIORITY_FUNCTIONAL 2 -#define PNP_RES_PRIORITY_INVALID 65535 +#define PNP_RES_PRIORITY_INVALID PNP_OPTION_PRIORITY_MASK struct pnp_option { - unsigned short priority; /* priority */ - struct pnp_port *port; /* first port */ - struct pnp_irq *irq; /* first IRQ */ - struct pnp_dma *dma; /* first DMA */ - struct pnp_mem *mem; /* first memory resource */ - struct pnp_option *next; /* used to chain dependent resources */ + struct list_head list; + unsigned int flags; /* independent/dependent, set, priority */ + + unsigned long type; /* IORESOURCE_{IO,MEM,IRQ,DMA} */ + union { + struct pnp_port port; + struct pnp_irq irq; + struct pnp_dma dma; + struct pnp_mem mem; + } u; }; -struct pnp_option *pnp_build_option(int priority); -struct pnp_option *pnp_register_independent_option(struct pnp_dev *dev); -struct pnp_option *pnp_register_dependent_option(struct pnp_dev *dev, - int priority); -int pnp_register_irq_resource(struct pnp_dev *dev, struct pnp_option *option, +int pnp_register_irq_resource(struct pnp_dev *dev, unsigned int option_flags, pnp_irq_mask_t *map, unsigned char flags); -int pnp_register_dma_resource(struct pnp_dev *dev, struct pnp_option *option, +int pnp_register_dma_resource(struct pnp_dev *dev, unsigned int option_flags, unsigned char map, unsigned char flags); -int pnp_register_port_resource(struct pnp_dev *dev, struct pnp_option *option, +int pnp_register_port_resource(struct pnp_dev *dev, unsigned int option_flags, resource_size_t min, resource_size_t max, resource_size_t align, resource_size_t size, unsigned char flags); -int pnp_register_mem_resource(struct pnp_dev *dev, struct pnp_option *option, +int pnp_register_mem_resource(struct pnp_dev *dev, unsigned int option_flags, resource_size_t min, resource_size_t max, resource_size_t align, resource_size_t size, unsigned char flags); + +static inline int pnp_option_is_dependent(struct pnp_option *option) +{ + return option->flags & PNP_OPTION_DEPENDENT ? 1 : 0; +} + +static inline unsigned int pnp_option_set(struct pnp_option *option) +{ + return (option->flags >> PNP_OPTION_SET_SHIFT) & PNP_OPTION_SET_MASK; +} + +static inline unsigned int pnp_option_priority(struct pnp_option *option) +{ + return (option->flags >> PNP_OPTION_PRIORITY_SHIFT) & + PNP_OPTION_PRIORITY_MASK; +} + +static inline unsigned int pnp_new_dependent_set(struct pnp_dev *dev, + int priority) +{ + unsigned int flags; + + if (priority > PNP_RES_PRIORITY_FUNCTIONAL) { + dev_warn(&dev->dev, "invalid dependent option priority %d " + "clipped to %d", priority, + PNP_RES_PRIORITY_INVALID); + priority = PNP_RES_PRIORITY_INVALID; + } + + flags = PNP_OPTION_DEPENDENT | + ((dev->num_dependent_sets & PNP_OPTION_SET_MASK) << + PNP_OPTION_SET_SHIFT) | + ((priority & PNP_OPTION_PRIORITY_MASK) << + PNP_OPTION_PRIORITY_SHIFT); + + dev->num_dependent_sets++; + + return flags; +} + +char *pnp_option_priority_name(struct pnp_option *option); +void dbg_pnp_show_option(struct pnp_dev *dev, struct pnp_option *option); + void pnp_init_resources(struct pnp_dev *dev); void pnp_fixup_device(struct pnp_dev *dev); -void pnp_free_option(struct pnp_option *option); +void pnp_free_options(struct pnp_dev *dev); int __pnp_add_device(struct pnp_dev *dev); void __pnp_remove_device(struct pnp_dev *dev); diff --git a/drivers/pnp/core.c b/drivers/pnp/core.c index 7182da9..a411582 100644 --- a/drivers/pnp/core.c +++ b/drivers/pnp/core.c @@ -118,10 +118,9 @@ static void pnp_release_device(struct device *dmdev) { struct pnp_dev *dev = to_pnp_dev(dmdev); - pnp_free_option(dev->independent); - pnp_free_option(dev->dependent); pnp_free_ids(dev); pnp_free_resources(dev); + pnp_free_options(dev); kfree(dev); } @@ -135,6 +134,7 @@ struct pnp_dev *pnp_alloc_dev(struct pnp_protocol *protocol, int id, char *pnpid return NULL; INIT_LIST_HEAD(&dev->resources); + INIT_LIST_HEAD(&dev->options); dev->protocol = protocol; dev->number = id; dev->dma_mask = DMA_24BIT_MASK; diff --git a/drivers/pnp/interface.c b/drivers/pnp/interface.c index 7a9fb55..a876ecf 100644 --- a/drivers/pnp/interface.c +++ b/drivers/pnp/interface.c @@ -3,6 +3,8 @@ * * Some code, especially possible resource dumping is based on isapnp_proc.c (c) Jaroslav Kysela * Copyright 2002 Adam Belay + * Copyright (C) 2008 Hewlett-Packard Development Company, L.P. + * Bjorn Helgaas */ #include @@ -184,39 +186,22 @@ static void pnp_print_mem(pnp_info_buffer_t * buffer, char *space, } static void pnp_print_option(pnp_info_buffer_t * buffer, char *space, - struct pnp_option *option, int dep) + struct pnp_option *option) { - char *s; - struct pnp_port *port; - struct pnp_irq *irq; - struct pnp_dma *dma; - struct pnp_mem *mem; - - if (dep) { - switch (option->priority) { - case PNP_RES_PRIORITY_PREFERRED: - s = "preferred"; - break; - case PNP_RES_PRIORITY_ACCEPTABLE: - s = "acceptable"; - break; - case PNP_RES_PRIORITY_FUNCTIONAL: - s = "functional"; - break; - default: - s = "invalid"; - } - pnp_printf(buffer, "Dependent: %02i - Priority %s\n", dep, s); + switch (option->type) { + case IORESOURCE_IO: + pnp_print_port(buffer, space, &option->u.port); + break; + case IORESOURCE_MEM: + pnp_print_mem(buffer, space, &option->u.mem); + break; + case IORESOURCE_IRQ: + pnp_print_irq(buffer, space, &option->u.irq); + break; + case IORESOURCE_DMA: + pnp_print_dma(buffer, space, &option->u.dma); + break; } - - for (port = option->port; port; port = port->next) - pnp_print_port(buffer, space, port); - for (irq = option->irq; irq; irq = irq->next) - pnp_print_irq(buffer, space, irq); - for (dma = option->dma; dma; dma = dma->next) - pnp_print_dma(buffer, space, dma); - for (mem = option->mem; mem; mem = mem->next) - pnp_print_mem(buffer, space, mem); } static ssize_t pnp_show_options(struct device *dmdev, @@ -224,9 +209,9 @@ static ssize_t pnp_show_options(struct device *dmdev, { struct pnp_dev *dev = to_pnp_dev(dmdev); pnp_info_buffer_t *buffer; - struct pnp_option *independent = dev->independent; - struct pnp_option *dependent = dev->dependent; - int ret, dep = 1; + struct pnp_option *option; + int ret, dep = 0, set = 0; + char *indent; buffer = pnp_alloc(sizeof(pnp_info_buffer_t)); if (!buffer) @@ -235,14 +220,24 @@ static ssize_t pnp_show_options(struct device *dmdev, buffer->len = PAGE_SIZE; buffer->buffer = buf; buffer->curr = buffer->buffer; - if (independent) - pnp_print_option(buffer, "", independent, 0); - while (dependent) { - pnp_print_option(buffer, " ", dependent, dep); - dependent = dependent->next; - dep++; + list_for_each_entry(option, &dev->options, list) { + if (pnp_option_is_dependent(option)) { + indent = " "; + if (!dep || pnp_option_set(option) != set) { + set = pnp_option_set(option); + dep = 1; + pnp_printf(buffer, "Dependent: %02i - " + "Priority %s\n", set, + pnp_option_priority_name(option)); + } + } else { + dep = 0; + indent = ""; + } + pnp_print_option(buffer, indent, option); } + ret = (buffer->curr - buf); kfree(buffer); return ret; diff --git a/drivers/pnp/isapnp/core.c b/drivers/pnp/isapnp/core.c index 53cc4d6..101a835 100644 --- a/drivers/pnp/isapnp/core.c +++ b/drivers/pnp/isapnp/core.c @@ -429,7 +429,7 @@ static struct pnp_dev *__init isapnp_parse_device(struct pnp_card *card, * Add IRQ resource to resources list. */ static void __init isapnp_parse_irq_resource(struct pnp_dev *dev, - struct pnp_option *option, + unsigned int option_flags, int size) { unsigned char tmp[3]; @@ -446,27 +446,27 @@ static void __init isapnp_parse_irq_resource(struct pnp_dev *dev, if (size > 2) flags = tmp[2]; - pnp_register_irq_resource(dev, option, &map, flags); + pnp_register_irq_resource(dev, option_flags, &map, flags); } /* * Add DMA resource to resources list. */ static void __init isapnp_parse_dma_resource(struct pnp_dev *dev, - struct pnp_option *option, + unsigned int option_flags, int size) { unsigned char tmp[2]; isapnp_peek(tmp, size); - pnp_register_dma_resource(dev, option, tmp[0], tmp[1]); + pnp_register_dma_resource(dev, option_flags, tmp[0], tmp[1]); } /* * Add port resource to resources list. */ static void __init isapnp_parse_port_resource(struct pnp_dev *dev, - struct pnp_option *option, + unsigned int option_flags, int size) { unsigned char tmp[7]; @@ -479,14 +479,15 @@ static void __init isapnp_parse_port_resource(struct pnp_dev *dev, align = tmp[5]; len = tmp[6]; flags = tmp[0] ? IORESOURCE_IO_16BIT_ADDR : 0; - pnp_register_port_resource(dev, option, min, max, align, len, flags); + pnp_register_port_resource(dev, option_flags, + min, max, align, len, flags); } /* * Add fixed port resource to resources list. */ static void __init isapnp_parse_fixed_port_resource(struct pnp_dev *dev, - struct pnp_option *option, + unsigned int option_flags, int size) { unsigned char tmp[3]; @@ -495,7 +496,7 @@ static void __init isapnp_parse_fixed_port_resource(struct pnp_dev *dev, isapnp_peek(tmp, size); base = (tmp[1] << 8) | tmp[0]; len = tmp[2]; - pnp_register_port_resource(dev, option, base, base, 0, len, + pnp_register_port_resource(dev, option_flags, base, base, 0, len, IORESOURCE_IO_FIXED); } @@ -503,7 +504,7 @@ static void __init isapnp_parse_fixed_port_resource(struct pnp_dev *dev, * Add memory resource to resources list. */ static void __init isapnp_parse_mem_resource(struct pnp_dev *dev, - struct pnp_option *option, + unsigned int option_flags, int size) { unsigned char tmp[9]; @@ -516,14 +517,15 @@ static void __init isapnp_parse_mem_resource(struct pnp_dev *dev, align = (tmp[6] << 8) | tmp[5]; len = ((tmp[8] << 8) | tmp[7]) << 8; flags = tmp[0]; - pnp_register_mem_resource(dev, option, min, max, align, len, flags); + pnp_register_mem_resource(dev, option_flags, + min, max, align, len, flags); } /* * Add 32-bit memory resource to resources list. */ static void __init isapnp_parse_mem32_resource(struct pnp_dev *dev, - struct pnp_option *option, + unsigned int option_flags, int size) { unsigned char tmp[17]; @@ -536,14 +538,15 @@ static void __init isapnp_parse_mem32_resource(struct pnp_dev *dev, align = (tmp[12] << 24) | (tmp[11] << 16) | (tmp[10] << 8) | tmp[9]; len = (tmp[16] << 24) | (tmp[15] << 16) | (tmp[14] << 8) | tmp[13]; flags = tmp[0]; - pnp_register_mem_resource(dev, option, min, max, align, len, flags); + pnp_register_mem_resource(dev, option_flags, + min, max, align, len, flags); } /* * Add 32-bit fixed memory resource to resources list. */ static void __init isapnp_parse_fixed_mem32_resource(struct pnp_dev *dev, - struct pnp_option *option, + unsigned int option_flags, int size) { unsigned char tmp[9]; @@ -554,7 +557,7 @@ static void __init isapnp_parse_fixed_mem32_resource(struct pnp_dev *dev, base = (tmp[4] << 24) | (tmp[3] << 16) | (tmp[2] << 8) | tmp[1]; len = (tmp[8] << 24) | (tmp[7] << 16) | (tmp[6] << 8) | tmp[5]; flags = tmp[0]; - pnp_register_mem_resource(dev, option, base, base, 0, len, flags); + pnp_register_mem_resource(dev, option_flags, base, base, 0, len, flags); } /* @@ -584,18 +587,14 @@ static int __init isapnp_create_device(struct pnp_card *card, { int number = 0, skip = 0, priority, compat = 0; unsigned char type, tmp[17]; - struct pnp_option *option, *option_independent; + unsigned int option_flags; struct pnp_dev *dev; u32 eisa_id; char id[8]; if ((dev = isapnp_parse_device(card, size, number++)) == NULL) return 1; - option_independent = option = pnp_register_independent_option(dev); - if (!option) { - kfree(dev); - return 1; - } + option_flags = 0; pnp_add_card_device(card, dev); while (1) { @@ -612,12 +611,7 @@ static int __init isapnp_create_device(struct pnp_card *card, return 1; size = 0; skip = 0; - option = pnp_register_independent_option(dev); - option_independent = option; - if (!option) { - kfree(dev); - return 1; - } + option_flags = 0; pnp_add_card_device(card, dev); } else { skip = 1; @@ -638,13 +632,13 @@ static int __init isapnp_create_device(struct pnp_card *card, case _STAG_IRQ: if (size < 2 || size > 3) goto __skip; - isapnp_parse_irq_resource(dev, option, size); + isapnp_parse_irq_resource(dev, option_flags, size); size = 0; break; case _STAG_DMA: if (size != 2) goto __skip; - isapnp_parse_dma_resource(dev, option, size); + isapnp_parse_dma_resource(dev, option_flags, size); size = 0; break; case _STAG_STARTDEP: @@ -656,29 +650,24 @@ static int __init isapnp_create_device(struct pnp_card *card, priority = tmp[0]; size = 0; } - option = pnp_register_dependent_option(dev, priority); - if (!option) - return 1; + option_flags = pnp_new_dependent_set(dev, priority); break; case _STAG_ENDDEP: if (size != 0) goto __skip; - if (option_independent == option) - dev_warn(&dev->dev, "missing " - "_STAG_STARTDEP tag\n"); - option = option_independent; - dev_dbg(&dev->dev, "end dependent options\n"); + option_flags = 0; break; case _STAG_IOPORT: if (size != 7) goto __skip; - isapnp_parse_port_resource(dev, option, size); + isapnp_parse_port_resource(dev, option_flags, size); size = 0; break; case _STAG_FIXEDIO: if (size != 3) goto __skip; - isapnp_parse_fixed_port_resource(dev, option, size); + isapnp_parse_fixed_port_resource(dev, option_flags, + size); size = 0; break; case _STAG_VENDOR: @@ -686,7 +675,7 @@ static int __init isapnp_create_device(struct pnp_card *card, case _LTAG_MEMRANGE: if (size != 9) goto __skip; - isapnp_parse_mem_resource(dev, option, size); + isapnp_parse_mem_resource(dev, option_flags, size); size = 0; break; case _LTAG_ANSISTR: @@ -701,13 +690,14 @@ static int __init isapnp_create_device(struct pnp_card *card, case _LTAG_MEM32RANGE: if (size != 17) goto __skip; - isapnp_parse_mem32_resource(dev, option, size); + isapnp_parse_mem32_resource(dev, option_flags, size); size = 0; break; case _LTAG_FIXEDMEM32RANGE: if (size != 9) goto __skip; - isapnp_parse_fixed_mem32_resource(dev, option, size); + isapnp_parse_fixed_mem32_resource(dev, option_flags, + size); size = 0; break; case _STAG_END: diff --git a/drivers/pnp/manager.c b/drivers/pnp/manager.c index a20accb..b526eaa 100644 --- a/drivers/pnp/manager.c +++ b/drivers/pnp/manager.c @@ -3,6 +3,8 @@ * * based on isapnp.c resource management (c) Jaroslav Kysela * Copyright 2003 Adam Belay + * Copyright (C) 2008 Hewlett-Packard Development Company, L.P. + * Bjorn Helgaas */ #include @@ -228,102 +230,51 @@ static void pnp_clean_resource_table(struct pnp_dev *dev) /** * pnp_assign_resources - assigns resources to the device based on the specified dependent number * @dev: pointer to the desired device - * @depnum: the dependent function number - * - * Only set depnum to 0 if the device does not have dependent options. + * @set: the dependent function number */ -static int pnp_assign_resources(struct pnp_dev *dev, int depnum) +static int pnp_assign_resources(struct pnp_dev *dev, int set) { - struct pnp_port *port; - struct pnp_mem *mem; - struct pnp_irq *irq; - struct pnp_dma *dma; + struct pnp_option *option; int nport = 0, nmem = 0, nirq = 0, ndma = 0; + int ret = 0; - dbg_pnp_show_resources(dev, "before pnp_assign_resources"); + dev_dbg(&dev->dev, "pnp_assign_resources, try dependent set %d\n", set); mutex_lock(&pnp_res_mutex); pnp_clean_resource_table(dev); - if (dev->independent) { - dev_dbg(&dev->dev, "assigning independent options\n"); - port = dev->independent->port; - mem = dev->independent->mem; - irq = dev->independent->irq; - dma = dev->independent->dma; - while (port) { - if (pnp_assign_port(dev, port, nport) < 0) - goto fail; - nport++; - port = port->next; - } - while (mem) { - if (pnp_assign_mem(dev, mem, nmem) < 0) - goto fail; - nmem++; - mem = mem->next; - } - while (irq) { - if (pnp_assign_irq(dev, irq, nirq) < 0) - goto fail; - nirq++; - irq = irq->next; - } - while (dma) { - if (pnp_assign_dma(dev, dma, ndma) < 0) - goto fail; - ndma++; - dma = dma->next; - } - } - if (depnum) { - struct pnp_option *dep; - int i; - - dev_dbg(&dev->dev, "assigning dependent option %d\n", depnum); - for (i = 1, dep = dev->dependent; i < depnum; - i++, dep = dep->next) - if (!dep) - goto fail; - port = dep->port; - mem = dep->mem; - irq = dep->irq; - dma = dep->dma; - while (port) { - if (pnp_assign_port(dev, port, nport) < 0) - goto fail; - nport++; - port = port->next; - } - while (mem) { - if (pnp_assign_mem(dev, mem, nmem) < 0) - goto fail; - nmem++; - mem = mem->next; - } - while (irq) { - if (pnp_assign_irq(dev, irq, nirq) < 0) - goto fail; - nirq++; - irq = irq->next; - } - while (dma) { - if (pnp_assign_dma(dev, dma, ndma) < 0) - goto fail; - ndma++; - dma = dma->next; + list_for_each_entry(option, &dev->options, list) { + if (pnp_option_is_dependent(option) && + pnp_option_set(option) != set) + continue; + + switch (option->type) { + case IORESOURCE_IO: + ret = pnp_assign_port(dev, &option->u.port, nport++); + break; + case IORESOURCE_MEM: + ret = pnp_assign_mem(dev, &option->u.mem, nmem++); + break; + case IORESOURCE_IRQ: + ret = pnp_assign_irq(dev, &option->u.irq, nirq++); + break; + case IORESOURCE_DMA: + ret = pnp_assign_dma(dev, &option->u.dma, ndma++); + break; + default: + ret = -EINVAL; + break; } - } else if (dev->dependent) - goto fail; - - mutex_unlock(&pnp_res_mutex); - dbg_pnp_show_resources(dev, "after pnp_assign_resources"); - return 1; + if (ret < 0) + break; + } -fail: - pnp_clean_resource_table(dev); mutex_unlock(&pnp_res_mutex); - dbg_pnp_show_resources(dev, "after pnp_assign_resources (failed)"); - return 0; + if (ret < 0) { + dev_dbg(&dev->dev, "pnp_assign_resources failed (%d)\n", ret); + pnp_clean_resource_table(dev); + } else + dbg_pnp_show_resources(dev, "pnp_assign_resources succeeded"); + return ret; } /** @@ -332,29 +283,25 @@ fail: */ int pnp_auto_config_dev(struct pnp_dev *dev) { - struct pnp_option *dep; - int i = 1; + int i, ret; if (!pnp_can_configure(dev)) { dev_dbg(&dev->dev, "configuration not supported\n"); return -ENODEV; } - if (!dev->dependent) { - if (pnp_assign_resources(dev, 0)) + ret = pnp_assign_resources(dev, 0); + if (ret == 0) + return 0; + + for (i = 1; i < dev->num_dependent_sets; i++) { + ret = pnp_assign_resources(dev, i); + if (ret == 0) return 0; - } else { - dep = dev->dependent; - do { - if (pnp_assign_resources(dev, i)) - return 0; - dep = dep->next; - i++; - } while (dep); } dev_err(&dev->dev, "unable to assign resources\n"); - return -EBUSY; + return ret; } /** diff --git a/drivers/pnp/pnpacpi/rsparser.c b/drivers/pnp/pnpacpi/rsparser.c index e114b3d..c2f59f4 100644 --- a/drivers/pnp/pnpacpi/rsparser.c +++ b/drivers/pnp/pnpacpi/rsparser.c @@ -407,7 +407,7 @@ int pnpacpi_parse_allocated_resource(struct pnp_dev *dev) } static __init void pnpacpi_parse_dma_option(struct pnp_dev *dev, - struct pnp_option *option, + unsigned int option_flags, struct acpi_resource_dma *p) { int i; @@ -420,11 +420,11 @@ static __init void pnpacpi_parse_dma_option(struct pnp_dev *dev, map |= 1 << p->channels[i]; flags = dma_flags(p->type, p->bus_master, p->transfer); - pnp_register_dma_resource(dev, option, map, flags); + pnp_register_dma_resource(dev, option_flags, map, flags); } static __init void pnpacpi_parse_irq_option(struct pnp_dev *dev, - struct pnp_option *option, + unsigned int option_flags, struct acpi_resource_irq *p) { int i; @@ -440,11 +440,11 @@ static __init void pnpacpi_parse_irq_option(struct pnp_dev *dev, __set_bit(p->interrupts[i], map.bits); flags = irq_flags(p->triggering, p->polarity, p->sharable); - pnp_register_irq_resource(dev, option, &map, flags); + pnp_register_irq_resource(dev, option_flags, &map, flags); } static __init void pnpacpi_parse_ext_irq_option(struct pnp_dev *dev, - struct pnp_option *option, + unsigned int option_flags, struct acpi_resource_extended_irq *p) { int i; @@ -467,11 +467,11 @@ static __init void pnpacpi_parse_ext_irq_option(struct pnp_dev *dev, } flags = irq_flags(p->triggering, p->polarity, p->sharable); - pnp_register_irq_resource(dev, option, &map, flags); + pnp_register_irq_resource(dev, option_flags, &map, flags); } static __init void pnpacpi_parse_port_option(struct pnp_dev *dev, - struct pnp_option *option, + unsigned int option_flags, struct acpi_resource_io *io) { unsigned char flags = 0; @@ -481,23 +481,23 @@ static __init void pnpacpi_parse_port_option(struct pnp_dev *dev, if (io->io_decode == ACPI_DECODE_16) flags = IORESOURCE_IO_16BIT_ADDR; - pnp_register_port_resource(dev, option, io->minimum, io->maximum, + pnp_register_port_resource(dev, option_flags, io->minimum, io->maximum, io->alignment, io->address_length, flags); } static __init void pnpacpi_parse_fixed_port_option(struct pnp_dev *dev, - struct pnp_option *option, + unsigned int option_flags, struct acpi_resource_fixed_io *io) { if (io->address_length == 0) return; - pnp_register_port_resource(dev, option, io->address, io->address, 0, - io->address_length, IORESOURCE_IO_FIXED); + pnp_register_port_resource(dev, option_flags, io->address, io->address, + 0, io->address_length, IORESOURCE_IO_FIXED); } static __init void pnpacpi_parse_mem24_option(struct pnp_dev *dev, - struct pnp_option *option, + unsigned int option_flags, struct acpi_resource_memory24 *p) { unsigned char flags = 0; @@ -507,12 +507,12 @@ static __init void pnpacpi_parse_mem24_option(struct pnp_dev *dev, if (p->write_protect == ACPI_READ_WRITE_MEMORY) flags = IORESOURCE_MEM_WRITEABLE; - pnp_register_mem_resource(dev, option, p->minimum, p->maximum, + pnp_register_mem_resource(dev, option_flags, p->minimum, p->maximum, p->alignment, p->address_length, flags); } static __init void pnpacpi_parse_mem32_option(struct pnp_dev *dev, - struct pnp_option *option, + unsigned int option_flags, struct acpi_resource_memory32 *p) { unsigned char flags = 0; @@ -522,12 +522,12 @@ static __init void pnpacpi_parse_mem32_option(struct pnp_dev *dev, if (p->write_protect == ACPI_READ_WRITE_MEMORY) flags = IORESOURCE_MEM_WRITEABLE; - pnp_register_mem_resource(dev, option, p->minimum, p->maximum, + pnp_register_mem_resource(dev, option_flags, p->minimum, p->maximum, p->alignment, p->address_length, flags); } static __init void pnpacpi_parse_fixed_mem32_option(struct pnp_dev *dev, - struct pnp_option *option, + unsigned int option_flags, struct acpi_resource_fixed_memory32 *p) { unsigned char flags = 0; @@ -537,12 +537,12 @@ static __init void pnpacpi_parse_fixed_mem32_option(struct pnp_dev *dev, if (p->write_protect == ACPI_READ_WRITE_MEMORY) flags = IORESOURCE_MEM_WRITEABLE; - pnp_register_mem_resource(dev, option, p->address, p->address, + pnp_register_mem_resource(dev, option_flags, p->address, p->address, 0, p->address_length, flags); } static __init void pnpacpi_parse_address_option(struct pnp_dev *dev, - struct pnp_option *option, + unsigned int option_flags, struct acpi_resource *r) { struct acpi_resource_address64 addr, *p = &addr; @@ -562,18 +562,18 @@ static __init void pnpacpi_parse_address_option(struct pnp_dev *dev, if (p->resource_type == ACPI_MEMORY_RANGE) { if (p->info.mem.write_protect == ACPI_READ_WRITE_MEMORY) flags = IORESOURCE_MEM_WRITEABLE; - pnp_register_mem_resource(dev, option, p->minimum, p->minimum, - 0, p->address_length, flags); + pnp_register_mem_resource(dev, option_flags, p->minimum, + p->minimum, 0, p->address_length, + flags); } else if (p->resource_type == ACPI_IO_RANGE) - pnp_register_port_resource(dev, option, p->minimum, p->minimum, - 0, p->address_length, + pnp_register_port_resource(dev, option_flags, p->minimum, + p->minimum, 0, p->address_length, IORESOURCE_IO_FIXED); } struct acpipnp_parse_option_s { - struct pnp_option *option; - struct pnp_option *option_independent; struct pnp_dev *dev; + unsigned int option_flags; }; static __init acpi_status pnpacpi_option_resource(struct acpi_resource *res, @@ -582,15 +582,15 @@ static __init acpi_status pnpacpi_option_resource(struct acpi_resource *res, int priority; struct acpipnp_parse_option_s *parse_data = data; struct pnp_dev *dev = parse_data->dev; - struct pnp_option *option = parse_data->option; + unsigned int option_flags = parse_data->option_flags; switch (res->type) { case ACPI_RESOURCE_TYPE_IRQ: - pnpacpi_parse_irq_option(dev, option, &res->data.irq); + pnpacpi_parse_irq_option(dev, option_flags, &res->data.irq); break; case ACPI_RESOURCE_TYPE_DMA: - pnpacpi_parse_dma_option(dev, option, &res->data.dma); + pnpacpi_parse_dma_option(dev, option_flags, &res->data.dma); break; case ACPI_RESOURCE_TYPE_START_DEPENDENT: @@ -610,31 +610,19 @@ static __init acpi_status pnpacpi_option_resource(struct acpi_resource *res, priority = PNP_RES_PRIORITY_INVALID; break; } - /* TBD: Consider performance/robustness bits */ - option = pnp_register_dependent_option(dev, priority); - if (!option) - return AE_ERROR; - parse_data->option = option; + parse_data->option_flags = pnp_new_dependent_set(dev, priority); break; case ACPI_RESOURCE_TYPE_END_DEPENDENT: - /*only one EndDependentFn is allowed */ - if (!parse_data->option_independent) { - dev_warn(&dev->dev, "more than one EndDependentFn " - "in _PRS\n"); - return AE_ERROR; - } - parse_data->option = parse_data->option_independent; - parse_data->option_independent = NULL; - dev_dbg(&dev->dev, "end dependent options\n"); + parse_data->option_flags = 0; break; case ACPI_RESOURCE_TYPE_IO: - pnpacpi_parse_port_option(dev, option, &res->data.io); + pnpacpi_parse_port_option(dev, option_flags, &res->data.io); break; case ACPI_RESOURCE_TYPE_FIXED_IO: - pnpacpi_parse_fixed_port_option(dev, option, + pnpacpi_parse_fixed_port_option(dev, option_flags, &res->data.fixed_io); break; @@ -643,29 +631,31 @@ static __init acpi_status pnpacpi_option_resource(struct acpi_resource *res, break; case ACPI_RESOURCE_TYPE_MEMORY24: - pnpacpi_parse_mem24_option(dev, option, &res->data.memory24); + pnpacpi_parse_mem24_option(dev, option_flags, + &res->data.memory24); break; case ACPI_RESOURCE_TYPE_MEMORY32: - pnpacpi_parse_mem32_option(dev, option, &res->data.memory32); + pnpacpi_parse_mem32_option(dev, option_flags, + &res->data.memory32); break; case ACPI_RESOURCE_TYPE_FIXED_MEMORY32: - pnpacpi_parse_fixed_mem32_option(dev, option, + pnpacpi_parse_fixed_mem32_option(dev, option_flags, &res->data.fixed_memory32); break; case ACPI_RESOURCE_TYPE_ADDRESS16: case ACPI_RESOURCE_TYPE_ADDRESS32: case ACPI_RESOURCE_TYPE_ADDRESS64: - pnpacpi_parse_address_option(dev, option, res); + pnpacpi_parse_address_option(dev, option_flags, res); break; case ACPI_RESOURCE_TYPE_EXTENDED_ADDRESS64: break; case ACPI_RESOURCE_TYPE_EXTENDED_IRQ: - pnpacpi_parse_ext_irq_option(dev, option, + pnpacpi_parse_ext_irq_option(dev, option_flags, &res->data.extended_irq); break; @@ -689,12 +679,9 @@ int __init pnpacpi_parse_resource_option_data(struct pnp_dev *dev) dev_dbg(&dev->dev, "parse resource options\n"); - parse_data.option = pnp_register_independent_option(dev); - if (!parse_data.option) - return -ENOMEM; - - parse_data.option_independent = parse_data.option; parse_data.dev = dev; + parse_data.option_flags = 0; + status = acpi_walk_resources(handle, METHOD_NAME__PRS, pnpacpi_option_resource, &parse_data); diff --git a/drivers/pnp/pnpbios/rsparser.c b/drivers/pnp/pnpbios/rsparser.c index db23ba7..ca56767 100644 --- a/drivers/pnp/pnpbios/rsparser.c +++ b/drivers/pnp/pnpbios/rsparser.c @@ -216,7 +216,7 @@ len_err: static __init void pnpbios_parse_mem_option(struct pnp_dev *dev, unsigned char *p, int size, - struct pnp_option *option) + unsigned int option_flags) { resource_size_t min, max, align, len; unsigned char flags; @@ -226,12 +226,13 @@ static __init void pnpbios_parse_mem_option(struct pnp_dev *dev, align = (p[9] << 8) | p[8]; len = ((p[11] << 8) | p[10]) << 8; flags = p[3]; - pnp_register_mem_resource(dev, option, min, max, align, len, flags); + pnp_register_mem_resource(dev, option_flags, min, max, align, len, + flags); } static __init void pnpbios_parse_mem32_option(struct pnp_dev *dev, unsigned char *p, int size, - struct pnp_option *option) + unsigned int option_flags) { resource_size_t min, max, align, len; unsigned char flags; @@ -241,12 +242,13 @@ static __init void pnpbios_parse_mem32_option(struct pnp_dev *dev, align = (p[15] << 24) | (p[14] << 16) | (p[13] << 8) | p[12]; len = (p[19] << 24) | (p[18] << 16) | (p[17] << 8) | p[16]; flags = p[3]; - pnp_register_mem_resource(dev, option, min, max, align, len, flags); + pnp_register_mem_resource(dev, option_flags, min, max, align, len, + flags); } static __init void pnpbios_parse_fixed_mem32_option(struct pnp_dev *dev, unsigned char *p, int size, - struct pnp_option *option) + unsigned int option_flags) { resource_size_t base, len; unsigned char flags; @@ -254,12 +256,12 @@ static __init void pnpbios_parse_fixed_mem32_option(struct pnp_dev *dev, base = (p[7] << 24) | (p[6] << 16) | (p[5] << 8) | p[4]; len = (p[11] << 24) | (p[10] << 16) | (p[9] << 8) | p[8]; flags = p[3]; - pnp_register_mem_resource(dev, option, base, base, 0, len, flags); + pnp_register_mem_resource(dev, option_flags, base, base, 0, len, flags); } static __init void pnpbios_parse_irq_option(struct pnp_dev *dev, unsigned char *p, int size, - struct pnp_option *option) + unsigned int option_flags) { unsigned long bits; pnp_irq_mask_t map; @@ -273,19 +275,19 @@ static __init void pnpbios_parse_irq_option(struct pnp_dev *dev, if (size > 2) flags = p[3]; - pnp_register_irq_resource(dev, option, &map, flags); + pnp_register_irq_resource(dev, option_flags, &map, flags); } static __init void pnpbios_parse_dma_option(struct pnp_dev *dev, unsigned char *p, int size, - struct pnp_option *option) + unsigned int option_flags) { - pnp_register_dma_resource(dev, option, p[1], p[2]); + pnp_register_dma_resource(dev, option_flags, p[1], p[2]); } static __init void pnpbios_parse_port_option(struct pnp_dev *dev, unsigned char *p, int size, - struct pnp_option *option) + unsigned int option_flags) { resource_size_t min, max, align, len; unsigned char flags; @@ -295,38 +297,35 @@ static __init void pnpbios_parse_port_option(struct pnp_dev *dev, align = p[6]; len = p[7]; flags = p[1] ? IORESOURCE_IO_16BIT_ADDR : 0; - pnp_register_port_resource(dev, option, min, max, align, len, flags); + pnp_register_port_resource(dev, option_flags, min, max, align, len, + flags); } static __init void pnpbios_parse_fixed_port_option(struct pnp_dev *dev, unsigned char *p, int size, - struct pnp_option *option) + unsigned int option_flags) { resource_size_t base, len; base = (p[2] << 8) | p[1]; len = p[3]; - pnp_register_port_resource(dev, option, base, base, 0, len, + pnp_register_port_resource(dev, option_flags, base, base, 0, len, IORESOURCE_IO_FIXED); } static __init unsigned char * pnpbios_parse_resource_option_data(unsigned char *p, unsigned char *end, - struct pnp_dev *dev) + struct pnp_dev *dev) { unsigned int len, tag; int priority; - struct pnp_option *option, *option_independent; + unsigned int option_flags; if (!p) return NULL; dev_dbg(&dev->dev, "parse resource options\n"); - - option_independent = option = pnp_register_independent_option(dev); - if (!option) - return NULL; - + option_flags = 0; while ((char *)p < (char *)end) { /* determine the type of tag */ @@ -343,37 +342,38 @@ pnpbios_parse_resource_option_data(unsigned char *p, unsigned char *end, case LARGE_TAG_MEM: if (len != 9) goto len_err; - pnpbios_parse_mem_option(dev, p, len, option); + pnpbios_parse_mem_option(dev, p, len, option_flags); break; case LARGE_TAG_MEM32: if (len != 17) goto len_err; - pnpbios_parse_mem32_option(dev, p, len, option); + pnpbios_parse_mem32_option(dev, p, len, option_flags); break; case LARGE_TAG_FIXEDMEM32: if (len != 9) goto len_err; - pnpbios_parse_fixed_mem32_option(dev, p, len, option); + pnpbios_parse_fixed_mem32_option(dev, p, len, + option_flags); break; case SMALL_TAG_IRQ: if (len < 2 || len > 3) goto len_err; - pnpbios_parse_irq_option(dev, p, len, option); + pnpbios_parse_irq_option(dev, p, len, option_flags); break; case SMALL_TAG_DMA: if (len != 2) goto len_err; - pnpbios_parse_dma_option(dev, p, len, option); + pnpbios_parse_dma_option(dev, p, len, option_flags); break; case SMALL_TAG_PORT: if (len != 7) goto len_err; - pnpbios_parse_port_option(dev, p, len, option); + pnpbios_parse_port_option(dev, p, len, option_flags); break; case SMALL_TAG_VENDOR: @@ -383,7 +383,8 @@ pnpbios_parse_resource_option_data(unsigned char *p, unsigned char *end, case SMALL_TAG_FIXEDPORT: if (len != 3) goto len_err; - pnpbios_parse_fixed_port_option(dev, p, len, option); + pnpbios_parse_fixed_port_option(dev, p, len, + option_flags); break; case SMALL_TAG_STARTDEP: @@ -392,19 +393,13 @@ pnpbios_parse_resource_option_data(unsigned char *p, unsigned char *end, priority = PNP_RES_PRIORITY_ACCEPTABLE; if (len > 0) priority = p[1]; - option = pnp_register_dependent_option(dev, priority); - if (!option) - return NULL; + option_flags = pnp_new_dependent_set(dev, priority); break; case SMALL_TAG_ENDDEP: if (len != 0) goto len_err; - if (option_independent == option) - dev_warn(&dev->dev, "missing " - "SMALL_TAG_STARTDEP tag\n"); - option = option_independent; - dev_dbg(&dev->dev, "end dependent options\n"); + option_flags = 0; break; case SMALL_TAG_END: diff --git a/drivers/pnp/quirks.c b/drivers/pnp/quirks.c index e8515ce..55f55ed 100644 --- a/drivers/pnp/quirks.c +++ b/drivers/pnp/quirks.c @@ -5,6 +5,8 @@ * when building up the resource structure for the first time. * * Copyright (c) 2000 Peter Denison + * Copyright (C) 2008 Hewlett-Packard Development Company, L.P. + * Bjorn Helgaas * * Heavily based on PCI quirks handling which is * @@ -20,189 +22,207 @@ #include #include "base.h" +static void quirk_awe32_add_ports(struct pnp_dev *dev, + struct pnp_option *option, + unsigned int offset) +{ + struct pnp_option *new_option; + + new_option = kmalloc(sizeof(struct pnp_option), GFP_KERNEL); + if (!new_option) { + dev_err(&dev->dev, "couldn't add ioport region to option set " + "%d\n", pnp_option_set(option)); + return; + } + + *new_option = *option; + new_option->u.port.min += offset; + new_option->u.port.max += offset; + list_add(&new_option->list, &option->list); + + dev_info(&dev->dev, "added ioport region %#llx-%#llx to set %d\n", + (unsigned long long) new_option->u.port.min, + (unsigned long long) new_option->u.port.max, + pnp_option_set(option)); +} + static void quirk_awe32_resources(struct pnp_dev *dev) { - struct pnp_port *port, *port2, *port3; - struct pnp_option *res = dev->dependent; + struct pnp_option *option; + unsigned int set = ~0; /* - * Unfortunately the isapnp_add_port_resource is too tightly bound - * into the PnP discovery sequence, and cannot be used. Link in the - * two extra ports (at offset 0x400 and 0x800 from the one given) by - * hand. + * Add two extra ioport regions (at offset 0x400 and 0x800 from the + * one given) to every dependent option set. */ - for (; res; res = res->next) { - port2 = pnp_alloc(sizeof(struct pnp_port)); - if (!port2) - return; - port3 = pnp_alloc(sizeof(struct pnp_port)); - if (!port3) { - kfree(port2); - return; + list_for_each_entry(option, &dev->options, list) { + if (pnp_option_is_dependent(option) && + pnp_option_set(option) != set) { + set = pnp_option_set(option); + quirk_awe32_add_ports(dev, option, 0x800); + quirk_awe32_add_ports(dev, option, 0x400); } - port = res->port; - memcpy(port2, port, sizeof(struct pnp_port)); - memcpy(port3, port, sizeof(struct pnp_port)); - port->next = port2; - port2->next = port3; - port2->min += 0x400; - port2->max += 0x400; - port3->min += 0x800; - port3->max += 0x800; - dev_info(&dev->dev, - "AWE32 quirk - added ioports 0x%lx and 0x%lx\n", - (unsigned long)port2->min, - (unsigned long)port3->min); } } static void quirk_cmi8330_resources(struct pnp_dev *dev) { - struct pnp_option *res = dev->dependent; - unsigned long tmp; - - for (; res; res = res->next) { - - struct pnp_irq *irq; - struct pnp_dma *dma; + struct pnp_option *option; + struct pnp_irq *irq; + struct pnp_dma *dma; - for (irq = res->irq; irq; irq = irq->next) { - /* Valid irqs are 5, 7, 10 */ - tmp = 0x04A0; - bitmap_copy(irq->map.bits, &tmp, 16); - } + list_for_each_entry(option, &dev->options, list) { + if (!pnp_option_is_dependent(option)) + continue; - for (dma = res->dma; dma; dma = dma->next) { - /* Valid 8bit dma channels are 1,3 */ + if (option->type == IORESOURCE_IRQ) { + irq = &option->u.irq; + bitmap_zero(irq->map.bits, PNP_IRQ_NR); + __set_bit(5, irq->map.bits); + __set_bit(7, irq->map.bits); + __set_bit(10, irq->map.bits); + dev_info(&dev->dev, "set possible IRQs in " + "option set %d to 5, 7, 10\n", + pnp_option_set(option)); + } else if (option->type == IORESOURCE_DMA) { + dma = &option->u.dma; if ((dma->flags & IORESOURCE_DMA_TYPE_MASK) == - IORESOURCE_DMA_8BIT) - dma->map = 0x000A; + IORESOURCE_DMA_8BIT && + dma->map != 0x0A) { + dev_info(&dev->dev, "changing possible " + "DMA channel mask in option set %d " + "from %#02x to 0x0A (1, 3)\n", + pnp_option_set(option), dma->map); + dma->map = 0x0A; + } } } - dev_info(&dev->dev, "CMI8330 quirk - forced possible IRQs to 5, 7, 10 " - "and DMA channels to 1, 3\n"); } static void quirk_sb16audio_resources(struct pnp_dev *dev) { + struct pnp_option *option; + unsigned int prev_option_flags = ~0, n = 0; struct pnp_port *port; - struct pnp_option *res = dev->dependent; - int changed = 0; /* - * The default range on the mpu port for these devices is 0x388-0x388. + * The default range on the OPL port for these devices is 0x388-0x388. * Here we increase that range so that two such cards can be * auto-configured. */ + list_for_each_entry(option, &dev->options, list) { + if (prev_option_flags != option->flags) { + prev_option_flags = option->flags; + n = 0; + } - for (; res; res = res->next) { - port = res->port; - if (!port) - continue; - port = port->next; - if (!port) - continue; - port = port->next; - if (!port) - continue; - if (port->min != port->max) - continue; - port->max += 0x70; - changed = 1; + if (pnp_option_is_dependent(option) && + option->type == IORESOURCE_IO) { + n++; + port = &option->u.port; + if (n == 3 && port->min == port->max) { + port->max += 0x70; + dev_info(&dev->dev, "increased option port " + "range from %#llx-%#llx to " + "%#llx-%#llx\n", + (unsigned long long) port->min, + (unsigned long long) port->min, + (unsigned long long) port->min, + (unsigned long long) port->max); + } + } } - if (changed) - dev_info(&dev->dev, "SB audio device quirk - increased port range\n"); } -static struct pnp_option *quirk_isapnp_mpu_options(struct pnp_dev *dev) +static struct pnp_option *pnp_clone_dependent_set(struct pnp_dev *dev, + unsigned int set) { - struct pnp_option *head = NULL; - struct pnp_option *prev = NULL; - struct pnp_option *res; - - /* - * Build a functional IRQ-optional variant of each MPU option. - */ - - for (res = dev->dependent; res; res = res->next) { - struct pnp_option *curr; - struct pnp_port *port; - struct pnp_port *copy_port; - struct pnp_irq *irq; - struct pnp_irq *copy_irq; - - port = res->port; - irq = res->irq; - if (!port || !irq) - continue; + struct pnp_option *tail = NULL, *first_new_option = NULL; + struct pnp_option *option, *new_option; + unsigned int flags; - copy_port = pnp_alloc(sizeof *copy_port); - if (!copy_port) - break; - - copy_irq = pnp_alloc(sizeof *copy_irq); - if (!copy_irq) { - kfree(copy_port); - break; - } + list_for_each_entry(option, &dev->options, list) { + if (pnp_option_is_dependent(option)) + tail = option; + } + if (!tail) { + dev_err(&dev->dev, "no dependent option sets\n"); + return NULL; + } - *copy_port = *port; - copy_port->next = NULL; + flags = pnp_new_dependent_set(dev, PNP_RES_PRIORITY_FUNCTIONAL); + list_for_each_entry(option, &dev->options, list) { + if (pnp_option_is_dependent(option) && + pnp_option_set(option) == set) { + new_option = kmalloc(sizeof(struct pnp_option), + GFP_KERNEL); + if (!new_option) { + dev_err(&dev->dev, "couldn't clone dependent " + "set %d\n", set); + return NULL; + } - *copy_irq = *irq; - copy_irq->flags |= IORESOURCE_IRQ_OPTIONAL; - copy_irq->next = NULL; + *new_option = *option; + new_option->flags = flags; + if (!first_new_option) + first_new_option = new_option; - curr = pnp_build_option(PNP_RES_PRIORITY_FUNCTIONAL); - if (!curr) { - kfree(copy_port); - kfree(copy_irq); - break; + list_add(&new_option->list, &tail->list); + tail = new_option; } - curr->port = copy_port; - curr->irq = copy_irq; - - if (prev) - prev->next = curr; - else - head = curr; - prev = curr; } - if (head) - dev_info(&dev->dev, "adding IRQ-optional MPU options\n"); - return head; + return first_new_option; } -static void quirk_ad1815_mpu_resources(struct pnp_dev *dev) + +static void quirk_add_irq_optional_dependent_sets(struct pnp_dev *dev) { - struct pnp_option *res; + struct pnp_option *new_option; + unsigned int num_sets, i, set; struct pnp_irq *irq; - res = dev->independent; - if (!res) - return; + num_sets = dev->num_dependent_sets; + for (i = 0; i < num_sets; i++) { + new_option = pnp_clone_dependent_set(dev, i); + if (!new_option) + return; - irq = res->irq; - if (!irq || irq->next) - return; + set = pnp_option_set(new_option); + while (new_option && pnp_option_set(new_option) == set) { + if (new_option->type == IORESOURCE_IRQ) { + irq = &new_option->u.irq; + irq->flags |= IORESOURCE_IRQ_OPTIONAL; + } + dbg_pnp_show_option(dev, new_option); + new_option = list_entry(new_option->list.next, + struct pnp_option, list); + } - irq->flags |= IORESOURCE_IRQ_OPTIONAL; - dev_info(&dev->dev, "made independent IRQ optional\n"); + dev_info(&dev->dev, "added dependent option set %d (same as " + "set %d except IRQ optional)\n", set, i); + } } -static void quirk_isapnp_mpu_resources(struct pnp_dev *dev) +static void quirk_ad1815_mpu_resources(struct pnp_dev *dev) { - struct pnp_option *res; + struct pnp_option *option; + struct pnp_irq *irq = NULL; + unsigned int independent_irqs = 0; + + list_for_each_entry(option, &dev->options, list) { + if (option->type == IORESOURCE_IRQ && + !pnp_option_is_dependent(option)) { + independent_irqs++; + irq = &option->u.irq; + } + } - res = dev->dependent; - if (!res) + if (independent_irqs != 1) return; - while (res->next) - res = res->next; - - res->next = quirk_isapnp_mpu_options(dev); + irq->flags |= IORESOURCE_IRQ_OPTIONAL; + dev_info(&dev->dev, "made independent IRQ optional\n"); } #include @@ -297,10 +317,10 @@ static struct pnp_fixup pnp_fixups[] = { {"CTL0043", quirk_sb16audio_resources}, {"CTL0044", quirk_sb16audio_resources}, {"CTL0045", quirk_sb16audio_resources}, - /* Add IRQ-less MPU options */ + /* Add IRQ-optional MPU options */ {"ADS7151", quirk_ad1815_mpu_resources}, - {"ADS7181", quirk_isapnp_mpu_resources}, - {"AZT0002", quirk_isapnp_mpu_resources}, + {"ADS7181", quirk_add_irq_optional_dependent_sets}, + {"AZT0002", quirk_add_irq_optional_dependent_sets}, /* PnP resources that might overlap PCI BARs */ {"PNP0c01", quirk_system_pci_resources}, {"PNP0c02", quirk_system_pci_resources}, diff --git a/drivers/pnp/resource.c b/drivers/pnp/resource.c index a795864..d638897 100644 --- a/drivers/pnp/resource.c +++ b/drivers/pnp/resource.c @@ -3,6 +3,8 @@ * * based on isapnp.c resource management (c) Jaroslav Kysela * Copyright 2003 Adam Belay + * Copyright (C) 2008 Hewlett-Packard Development Company, L.P. + * Bjorn Helgaas */ #include @@ -28,78 +30,36 @@ static int pnp_reserve_mem[16] = {[0 ... 15] = -1 }; /* reserve (don't use) some * option registration */ -struct pnp_option *pnp_build_option(int priority) +struct pnp_option *pnp_build_option(struct pnp_dev *dev, unsigned long type, + unsigned int option_flags) { - struct pnp_option *option = pnp_alloc(sizeof(struct pnp_option)); + struct pnp_option *option; + option = kzalloc(sizeof(struct pnp_option), GFP_KERNEL); if (!option) return NULL; - option->priority = priority & 0xff; - /* make sure the priority is valid */ - if (option->priority > PNP_RES_PRIORITY_FUNCTIONAL) - option->priority = PNP_RES_PRIORITY_INVALID; - - return option; -} - -struct pnp_option *pnp_register_independent_option(struct pnp_dev *dev) -{ - struct pnp_option *option; - - option = pnp_build_option(PNP_RES_PRIORITY_PREFERRED); - - /* this should never happen but if it does we'll try to continue */ - if (dev->independent) - dev_err(&dev->dev, "independent resource already registered\n"); - dev->independent = option; - - dev_dbg(&dev->dev, "new independent option\n"); - return option; -} - -struct pnp_option *pnp_register_dependent_option(struct pnp_dev *dev, - int priority) -{ - struct pnp_option *option; + option->flags = option_flags; + option->type = type; - option = pnp_build_option(priority); - - if (dev->dependent) { - struct pnp_option *parent = dev->dependent; - while (parent->next) - parent = parent->next; - parent->next = option; - } else - dev->dependent = option; - - dev_dbg(&dev->dev, "new dependent option (priority %#x)\n", priority); + list_add_tail(&option->list, &dev->options); return option; } -int pnp_register_irq_resource(struct pnp_dev *dev, struct pnp_option *option, +int pnp_register_irq_resource(struct pnp_dev *dev, unsigned int option_flags, pnp_irq_mask_t *map, unsigned char flags) { - struct pnp_irq *irq, *ptr; -#ifdef DEBUG - char buf[PNP_IRQ_NR]; /* hex-encoded, so this is overkill but safe */ -#endif + struct pnp_option *option; + struct pnp_irq *irq; - irq = kzalloc(sizeof(struct pnp_irq), GFP_KERNEL); - if (!irq) + option = pnp_build_option(dev, IORESOURCE_IRQ, option_flags); + if (!option) return -ENOMEM; + irq = &option->u.irq; irq->map = *map; irq->flags = flags; - ptr = option->irq; - while (ptr && ptr->next) - ptr = ptr->next; - if (ptr) - ptr->next = irq; - else - option->irq = irq; - #ifdef CONFIG_PCI { int i; @@ -110,163 +70,81 @@ int pnp_register_irq_resource(struct pnp_dev *dev, struct pnp_option *option, } #endif -#ifdef DEBUG - bitmap_scnprintf(buf, sizeof(buf), irq->map.bits, PNP_IRQ_NR); - dev_dbg(&dev->dev, " irq bitmask %s flags %#x\n", buf, - irq->flags); -#endif + dbg_pnp_show_option(dev, option); return 0; } -int pnp_register_dma_resource(struct pnp_dev *dev, struct pnp_option *option, +int pnp_register_dma_resource(struct pnp_dev *dev, unsigned int option_flags, unsigned char map, unsigned char flags) { - struct pnp_dma *dma, *ptr; + struct pnp_option *option; + struct pnp_dma *dma; - dma = kzalloc(sizeof(struct pnp_dma), GFP_KERNEL); - if (!dma) + option = pnp_build_option(dev, IORESOURCE_DMA, option_flags); + if (!option) return -ENOMEM; + dma = &option->u.dma; dma->map = map; dma->flags = flags; - ptr = option->dma; - while (ptr && ptr->next) - ptr = ptr->next; - if (ptr) - ptr->next = dma; - else - option->dma = dma; - - dev_dbg(&dev->dev, " dma bitmask %#x flags %#x\n", dma->map, - dma->flags); + dbg_pnp_show_option(dev, option); return 0; } -int pnp_register_port_resource(struct pnp_dev *dev, struct pnp_option *option, +int pnp_register_port_resource(struct pnp_dev *dev, unsigned int option_flags, resource_size_t min, resource_size_t max, resource_size_t align, resource_size_t size, unsigned char flags) { - struct pnp_port *port, *ptr; + struct pnp_option *option; + struct pnp_port *port; - port = kzalloc(sizeof(struct pnp_port), GFP_KERNEL); - if (!port) + option = pnp_build_option(dev, IORESOURCE_IO, option_flags); + if (!option) return -ENOMEM; + port = &option->u.port; port->min = min; port->max = max; port->align = align; port->size = size; port->flags = flags; - ptr = option->port; - while (ptr && ptr->next) - ptr = ptr->next; - if (ptr) - ptr->next = port; - else - option->port = port; - - dev_dbg(&dev->dev, " io " - "min %#llx max %#llx align %lld size %lld flags %#x\n", - (unsigned long long) port->min, - (unsigned long long) port->max, - (unsigned long long) port->align, - (unsigned long long) port->size, port->flags); + dbg_pnp_show_option(dev, option); return 0; } -int pnp_register_mem_resource(struct pnp_dev *dev, struct pnp_option *option, +int pnp_register_mem_resource(struct pnp_dev *dev, unsigned int option_flags, resource_size_t min, resource_size_t max, resource_size_t align, resource_size_t size, unsigned char flags) { - struct pnp_mem *mem, *ptr; + struct pnp_option *option; + struct pnp_mem *mem; - mem = kzalloc(sizeof(struct pnp_mem), GFP_KERNEL); - if (!mem) + option = pnp_build_option(dev, IORESOURCE_MEM, option_flags); + if (!option) return -ENOMEM; + mem = &option->u.mem; mem->min = min; mem->max = max; mem->align = align; mem->size = size; mem->flags = flags; - ptr = option->mem; - while (ptr && ptr->next) - ptr = ptr->next; - if (ptr) - ptr->next = mem; - else - option->mem = mem; - - dev_dbg(&dev->dev, " mem " - "min %#llx max %#llx align %lld size %lld flags %#x\n", - (unsigned long long) mem->min, - (unsigned long long) mem->max, - (unsigned long long) mem->align, - (unsigned long long) mem->size, mem->flags); + dbg_pnp_show_option(dev, option); return 0; } -static void pnp_free_port(struct pnp_port *port) -{ - struct pnp_port *next; - - while (port) { - next = port->next; - kfree(port); - port = next; - } -} - -static void pnp_free_irq(struct pnp_irq *irq) -{ - struct pnp_irq *next; - - while (irq) { - next = irq->next; - kfree(irq); - irq = next; - } -} - -static void pnp_free_dma(struct pnp_dma *dma) -{ - struct pnp_dma *next; - - while (dma) { - next = dma->next; - kfree(dma); - dma = next; - } -} - -static void pnp_free_mem(struct pnp_mem *mem) +void pnp_free_options(struct pnp_dev *dev) { - struct pnp_mem *next; - - while (mem) { - next = mem->next; - kfree(mem); - mem = next; - } -} + struct pnp_option *option, *tmp; -void pnp_free_option(struct pnp_option *option) -{ - struct pnp_option *next; - - while (option) { - next = option->next; - pnp_free_port(option->port); - pnp_free_irq(option->irq); - pnp_free_dma(option->dma); - pnp_free_mem(option->mem); + list_for_each_entry_safe(option, tmp, &dev->options, list) { + list_del(&option->list); kfree(option); - option = next; } } @@ -668,66 +546,50 @@ struct pnp_resource *pnp_add_mem_resource(struct pnp_dev *dev, return pnp_res; } -static int pnp_possible_option(struct pnp_option *option, int type, - resource_size_t start, resource_size_t size) +/* + * Determine whether the specified resource is a possible configuration + * for this device. + */ +int pnp_possible_config(struct pnp_dev *dev, int type, resource_size_t start, + resource_size_t size) { - struct pnp_option *tmp; + struct pnp_option *option; struct pnp_port *port; struct pnp_mem *mem; struct pnp_irq *irq; struct pnp_dma *dma; - if (!option) - return 0; + list_for_each_entry(option, &dev->options, list) { + if (option->type != type) + continue; - for (tmp = option; tmp; tmp = tmp->next) { - switch (type) { + switch (option->type) { case IORESOURCE_IO: - for (port = tmp->port; port; port = port->next) { - if (port->min == start && port->size == size) - return 1; - } + port = &option->u.port; + if (port->min == start && port->size == size) + return 1; break; case IORESOURCE_MEM: - for (mem = tmp->mem; mem; mem = mem->next) { - if (mem->min == start && mem->size == size) - return 1; - } + mem = &option->u.mem; + if (mem->min == start && mem->size == size) + return 1; break; case IORESOURCE_IRQ: - for (irq = tmp->irq; irq; irq = irq->next) { - if (start < PNP_IRQ_NR && - test_bit(start, irq->map.bits)) - return 1; - } + irq = &option->u.irq; + if (start < PNP_IRQ_NR && + test_bit(start, irq->map.bits)) + return 1; break; case IORESOURCE_DMA: - for (dma = tmp->dma; dma; dma = dma->next) { - if (dma->map & (1 << start)) - return 1; - } + dma = &option->u.dma; + if (dma->map & (1 << start)) + return 1; break; } } return 0; } - -/* - * Determine whether the specified resource is a possible configuration - * for this device. - */ -int pnp_possible_config(struct pnp_dev *dev, int type, resource_size_t start, - resource_size_t size) -{ - if (pnp_possible_option(dev->independent, type, start, size)) - return 1; - - if (pnp_possible_option(dev->dependent, type, start, size)) - return 1; - - return 0; -} EXPORT_SYMBOL(pnp_possible_config); /* format is: pnp_reserve_irq=irq1[,irq2] .... */ diff --git a/drivers/pnp/support.c b/drivers/pnp/support.c index 0ad42db..bbf78ef 100644 --- a/drivers/pnp/support.c +++ b/drivers/pnp/support.c @@ -2,6 +2,8 @@ * support.c - standard functions for the use of pnp protocol drivers * * Copyright 2003 Adam Belay + * Copyright (C) 2008 Hewlett-Packard Development Company, L.P. + * Bjorn Helgaas */ #include @@ -117,3 +119,93 @@ void dbg_pnp_show_resources(struct pnp_dev *dev, char *desc) } #endif } + +char *pnp_option_priority_name(struct pnp_option *option) +{ + switch (pnp_option_priority(option)) { + case PNP_RES_PRIORITY_PREFERRED: + return "preferred"; + case PNP_RES_PRIORITY_ACCEPTABLE: + return "acceptable"; + case PNP_RES_PRIORITY_FUNCTIONAL: + return "functional"; + } + return "invalid"; +} + +void dbg_pnp_show_option(struct pnp_dev *dev, struct pnp_option *option) +{ +#ifdef DEBUG + char buf[128]; + int len = 0, i; + struct pnp_port *port; + struct pnp_mem *mem; + struct pnp_irq *irq; + struct pnp_dma *dma; + + if (pnp_option_is_dependent(option)) + len += snprintf(buf + len, sizeof(buf) - len, + " dependent set %d (%s) ", + pnp_option_set(option), + pnp_option_priority_name(option)); + else + len += snprintf(buf + len, sizeof(buf) - len, " independent "); + + switch (option->type) { + case IORESOURCE_IO: + port = &option->u.port; + len += snprintf(buf + len, sizeof(buf) - len, "io min %#llx " + "max %#llx align %lld size %lld flags %#x", + (unsigned long long) port->min, + (unsigned long long) port->max, + (unsigned long long) port->align, + (unsigned long long) port->size, port->flags); + break; + case IORESOURCE_MEM: + mem = &option->u.mem; + len += snprintf(buf + len, sizeof(buf) - len, "mem min %#llx " + "max %#llx align %lld size %lld flags %#x", + (unsigned long long) mem->min, + (unsigned long long) mem->max, + (unsigned long long) mem->align, + (unsigned long long) mem->size, mem->flags); + break; + case IORESOURCE_IRQ: + irq = &option->u.irq; + len += snprintf(buf + len, sizeof(buf) - len, "irq"); + if (bitmap_empty(irq->map.bits, PNP_IRQ_NR)) + len += snprintf(buf + len, sizeof(buf) - len, + " "); + else { + for (i = 0; i < PNP_IRQ_NR; i++) + if (test_bit(i, irq->map.bits)) + len += snprintf(buf + len, + sizeof(buf) - len, + " %d", i); + } + len += snprintf(buf + len, sizeof(buf) - len, " flags %#x", + irq->flags); + if (irq->flags & IORESOURCE_IRQ_OPTIONAL) + len += snprintf(buf + len, sizeof(buf) - len, + " (optional)"); + break; + case IORESOURCE_DMA: + dma = &option->u.dma; + len += snprintf(buf + len, sizeof(buf) - len, "dma"); + if (!dma->map) + len += snprintf(buf + len, sizeof(buf) - len, + " "); + else { + for (i = 0; i < 8; i++) + if (dma->map & (1 << i)) + len += snprintf(buf + len, + sizeof(buf) - len, + " %d", i); + } + len += snprintf(buf + len, sizeof(buf) - len, " (bitmask %#x) " + "flags %#x", dma->map, dma->flags); + break; + } + dev_dbg(&dev->dev, "%s\n", buf); +#endif +} diff --git a/include/linux/pnp.h b/include/linux/pnp.h index 785126f..1ce54b6 100644 --- a/include/linux/pnp.h +++ b/include/linux/pnp.h @@ -1,6 +1,8 @@ /* * Linux Plug and Play Support * Copyright by Adam Belay + * Copyright (C) 2008 Hewlett-Packard Development Company, L.P. + * Bjorn Helgaas */ #ifndef _LINUX_PNP_H @@ -249,9 +251,9 @@ struct pnp_dev { int active; int capabilities; - struct pnp_option *independent; - struct pnp_option *dependent; + unsigned int num_dependent_sets; struct list_head resources; + struct list_head options; char name[PNP_NAME_LEN]; /* contains a human-readable name */ int flags; /* used by protocols */ -- cgit v0.10.2 From 84684c7469a2e6fcbf8c808ac5030ba2de14ff77 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 27 Jun 2008 16:57:18 -0600 Subject: PNP: avoid legacy IDE IRQs If an IDE controller is in compatibility mode, it expects to use IRQs 14 and 15, so PNP should avoid them. This patch should resolve this problem report: parallel driver grabs IRQ14 preventing legacy SFF ATA controller from working https://bugzilla.novell.com/show_bug.cgi?id=375836 Signed-off-by: Bjorn Helgaas Signed-off-by: Len Brown Signed-off-by: Andi Kleen diff --git a/drivers/pnp/resource.c b/drivers/pnp/resource.c index d638897..4cfe3a1 100644 --- a/drivers/pnp/resource.c +++ b/drivers/pnp/resource.c @@ -286,6 +286,61 @@ static irqreturn_t pnp_test_handler(int irq, void *dev_id) return IRQ_HANDLED; } +#ifdef CONFIG_PCI +static int pci_dev_uses_irq(struct pnp_dev *pnp, struct pci_dev *pci, + unsigned int irq) +{ + u32 class; + u8 progif; + + if (pci->irq == irq) { + dev_dbg(&pnp->dev, "device %s using irq %d\n", + pci_name(pci), irq); + return 1; + } + + /* + * See pci_setup_device() and ata_pci_sff_activate_host() for + * similar IDE legacy detection. + */ + pci_read_config_dword(pci, PCI_CLASS_REVISION, &class); + class >>= 8; /* discard revision ID */ + progif = class & 0xff; + class >>= 8; + + if (class == PCI_CLASS_STORAGE_IDE) { + /* + * Unless both channels are native-PCI mode only, + * treat the compatibility IRQs as busy. + */ + if ((progif & 0x5) != 0x5) + if (pci_get_legacy_ide_irq(pci, 0) == irq || + pci_get_legacy_ide_irq(pci, 1) == irq) { + dev_dbg(&pnp->dev, "legacy IDE device %s " + "using irq %d\n", pci_name(pci), irq); + return 1; + } + } + + return 0; +} +#endif + +static int pci_uses_irq(struct pnp_dev *pnp, unsigned int irq) +{ +#ifdef CONFIG_PCI + struct pci_dev *pci = NULL; + + for_each_pci_dev(pci) { + if (pci_dev_uses_irq(pnp, pci, irq)) { + pci_dev_put(pci); + return 1; + } + } +#endif + return 0; +} + int pnp_check_irq(struct pnp_dev *dev, struct resource *res) { int i; @@ -317,18 +372,9 @@ int pnp_check_irq(struct pnp_dev *dev, struct resource *res) } } -#ifdef CONFIG_PCI /* check if the resource is being used by a pci device */ - { - struct pci_dev *pci = NULL; - for_each_pci_dev(pci) { - if (pci->irq == *irq) { - pci_dev_put(pci); - return 0; - } - } - } -#endif + if (pci_uses_irq(dev, *irq)) + return 0; /* check if the resource is already in use, skip if the * device is active because it itself may be in use */ -- cgit v0.10.2 From 40ab4f4c1d843362eb26d83425317e91fbd98b17 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 27 Jun 2008 16:57:19 -0600 Subject: PNPACPI: add support for HP vendor-specific CCSR descriptors The HP CCSR descriptor describes MMIO address space that should appear as a MEM resource. This patch adds support for parsing these descriptors in the _CRS data. The visible effect of this is that these MEM resources will appear in /sys/devices/pnp0/.../resources, which means that "lspnp -v" will report it, user applications can use this to locate device CSR space, and kernel drivers can use the normal PNP resource accessors to locate them. Signed-off-by: Bjorn Helgaas Signed-off-by: Len Brown Signed-off-by: Andi Kleen diff --git a/drivers/pnp/pnpacpi/rsparser.c b/drivers/pnp/pnpacpi/rsparser.c index c2f59f4..d7e9f21 100644 --- a/drivers/pnp/pnpacpi/rsparser.c +++ b/drivers/pnp/pnpacpi/rsparser.c @@ -3,6 +3,8 @@ * * Copyright (c) 2004 Matthieu Castet * Copyright (c) 2004 Li Shaohua + * Copyright (C) 2008 Hewlett-Packard Development Company, L.P. + * Bjorn Helgaas * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -187,6 +189,61 @@ static void pnpacpi_parse_allocated_ioresource(struct pnp_dev *dev, u64 start, pnp_add_io_resource(dev, start, end, flags); } +/* + * Device CSRs that do not appear in PCI config space should be described + * via ACPI. This would normally be done with Address Space Descriptors + * marked as "consumer-only," but old versions of Windows and Linux ignore + * the producer/consumer flag, so HP invented a vendor-defined resource to + * describe the location and size of CSR space. + */ +static struct acpi_vendor_uuid hp_ccsr_uuid = { + .subtype = 2, + .data = { 0xf9, 0xad, 0xe9, 0x69, 0x4f, 0x92, 0x5f, 0xab, 0xf6, 0x4a, + 0x24, 0xd2, 0x01, 0x37, 0x0e, 0xad }, +}; + +static int vendor_resource_matches(struct pnp_dev *dev, + struct acpi_resource_vendor_typed *vendor, + struct acpi_vendor_uuid *match, + int expected_len) +{ + int uuid_len = sizeof(vendor->uuid); + u8 uuid_subtype = vendor->uuid_subtype; + u8 *uuid = vendor->uuid; + int actual_len; + + /* byte_length includes uuid_subtype and uuid */ + actual_len = vendor->byte_length - uuid_len - 1; + + if (uuid_subtype == match->subtype && + uuid_len == sizeof(match->data) && + memcmp(uuid, match->data, uuid_len) == 0) { + if (expected_len && expected_len != actual_len) { + dev_err(&dev->dev, "wrong vendor descriptor size; " + "expected %d, found %d bytes\n", + expected_len, actual_len); + return 0; + } + + return 1; + } + + return 0; +} + +static void pnpacpi_parse_allocated_vendor(struct pnp_dev *dev, + struct acpi_resource_vendor_typed *vendor) +{ + if (vendor_resource_matches(dev, vendor, &hp_ccsr_uuid, 16)) { + u64 start, length; + + memcpy(&start, vendor->byte_data, sizeof(start)); + memcpy(&length, vendor->byte_data + 8, sizeof(length)); + + pnp_add_mem_resource(dev, start, start + length - 1, 0); + } +} + static void pnpacpi_parse_allocated_memresource(struct pnp_dev *dev, u64 start, u64 len, int write_protect) @@ -237,6 +294,7 @@ static acpi_status pnpacpi_allocated_resource(struct acpi_resource *res, struct acpi_resource_dma *dma; struct acpi_resource_io *io; struct acpi_resource_fixed_io *fixed_io; + struct acpi_resource_vendor_typed *vendor_typed; struct acpi_resource_memory24 *memory24; struct acpi_resource_memory32 *memory32; struct acpi_resource_fixed_memory32 *fixed_memory32; @@ -306,6 +364,8 @@ static acpi_status pnpacpi_allocated_resource(struct acpi_resource *res, break; case ACPI_RESOURCE_TYPE_VENDOR: + vendor_typed = &res->data.vendor_typed; + pnpacpi_parse_allocated_vendor(dev, vendor_typed); break; case ACPI_RESOURCE_TYPE_END_TAG: -- cgit v0.10.2 From c83642d5123225a22cccd75adea6e97c245714cb Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 27 Jun 2008 08:45:39 -0600 Subject: ACPI: use dev_printk when possible Convert printks to use dev_printk(). The most obvious change will be messages like this: -ACPI: PCI Interrupt 0000:00:04.0[A] -> GSI 31 (level, low) -> IRQ 31 +cciss 0000:00:04.0: PCI INT A -> GSI 31 (level, low) -> IRQ 31 Signed-off-by: Bjorn Helgaas Signed-off-by: Len Brown Signed-off-by: Andi Kleen diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c index 89022a7..11acaee 100644 --- a/drivers/acpi/pci_irq.c +++ b/drivers/acpi/pci_irq.c @@ -162,7 +162,7 @@ do_prt_fixups(struct acpi_prt_entry *entry, struct acpi_pci_routing_table *prt) !strcmp(prt->source, quirk->source) && strlen(prt->source) >= strlen(quirk->actual_source)) { printk(KERN_WARNING PREFIX "firmware reports " - "%04x:%02x:%02x[%c] connected to %s; " + "%04x:%02x:%02x PCI INT %c connected to %s; " "changing to %s\n", entry->id.segment, entry->id.bus, entry->id.device, 'A' + entry->pin, @@ -429,7 +429,7 @@ acpi_pci_irq_derive(struct pci_dev *dev, { struct pci_dev *bridge = dev; int irq = -1; - u8 bridge_pin = 0; + u8 bridge_pin = 0, orig_pin = pin; if (!dev) @@ -463,8 +463,8 @@ acpi_pci_irq_derive(struct pci_dev *dev, } if (irq < 0) { - printk(KERN_WARNING PREFIX "Unable to derive IRQ for device %s\n", - pci_name(dev)); + dev_warn(&dev->dev, "can't derive routing for PCI INT %c\n", + 'A' + orig_pin); return -1; } @@ -487,6 +487,7 @@ int acpi_pci_irq_enable(struct pci_dev *dev) int triggering = ACPI_LEVEL_SENSITIVE; int polarity = ACPI_ACTIVE_LOW; char *link = NULL; + char link_desc[16]; int rc; @@ -503,7 +504,7 @@ int acpi_pci_irq_enable(struct pci_dev *dev) pin--; if (!dev->bus) { - printk(KERN_ERR PREFIX "Invalid (NULL) 'bus' field\n"); + dev_err(&dev->dev, "invalid (NULL) 'bus' field\n"); return -ENODEV; } @@ -538,8 +539,7 @@ int acpi_pci_irq_enable(struct pci_dev *dev) * driver reported one, then use it. Exit in any case. */ if (irq < 0) { - printk(KERN_WARNING PREFIX "PCI Interrupt %s[%c]: no GSI", - pci_name(dev), ('A' + pin)); + dev_warn(&dev->dev, "PCI INT %c: no GSI", 'A' + pin); /* Interrupt Line values above 0xF are forbidden */ if (dev->irq > 0 && (dev->irq <= 0xF)) { printk(" - using IRQ %d\n", dev->irq); @@ -554,21 +554,21 @@ int acpi_pci_irq_enable(struct pci_dev *dev) rc = acpi_register_gsi(irq, triggering, polarity); if (rc < 0) { - printk(KERN_WARNING PREFIX "PCI Interrupt %s[%c]: failed " - "to register GSI\n", pci_name(dev), ('A' + pin)); + dev_warn(&dev->dev, "PCI INT %c: failed to register GSI\n", + 'A' + pin); return rc; } dev->irq = rc; - printk(KERN_INFO PREFIX "PCI Interrupt %s[%c] -> ", - pci_name(dev), 'A' + pin); - if (link) - printk("Link [%s] -> ", link); + snprintf(link_desc, sizeof(link_desc), " -> Link[%s]", link); + else + link_desc[0] = '\0'; - printk("GSI %u (%s, %s) -> IRQ %d\n", irq, - (triggering == ACPI_LEVEL_SENSITIVE) ? "level" : "edge", - (polarity == ACPI_ACTIVE_LOW) ? "low" : "high", dev->irq); + dev_info(&dev->dev, "PCI INT %c%s -> GSI %u (%s, %s) -> IRQ %d\n", + 'A' + pin, link_desc, irq, + (triggering == ACPI_LEVEL_SENSITIVE) ? "level" : "edge", + (polarity == ACPI_ACTIVE_LOW) ? "low" : "high", dev->irq); return 0; } @@ -616,10 +616,6 @@ void acpi_pci_irq_disable(struct pci_dev *dev) * (e.g. PCI_UNDEFINED_IRQ). */ - printk(KERN_INFO PREFIX "PCI interrupt for device %s disabled\n", - pci_name(dev)); - + dev_info(&dev->dev, "PCI INT %c disabled\n", 'A' + pin); acpi_unregister_gsi(gsi); - - return; } -- cgit v0.10.2 From 4d3870431d17346c4fdd80e087b7d76f1b5941d5 Mon Sep 17 00:00:00 2001 From: Aaron Durbin Date: Wed, 16 Jul 2008 23:27:08 +0200 Subject: Add the ability to reset the machine using the RESET_REG in ACPI's FADT table. Signed-off-by: Aaron Durbin Signed-off-by: Andi Kleen Cc: Len Brown Cc: Andi Kleen Signed-off-by: Andrew Morton diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile index 40b0fca..4efbe598 100644 --- a/drivers/acpi/Makefile +++ b/drivers/acpi/Makefile @@ -21,7 +21,7 @@ obj-$(CONFIG_X86) += blacklist.o # # ACPI Core Subsystem (Interpreter) # -obj-y += osl.o utils.o \ +obj-y += osl.o utils.o reboot.o\ dispatcher/ events/ executer/ hardware/ \ namespace/ parser/ resources/ tables/ \ utilities/ diff --git a/drivers/acpi/reboot.c b/drivers/acpi/reboot.c new file mode 100644 index 0000000..a6b662c --- /dev/null +++ b/drivers/acpi/reboot.c @@ -0,0 +1,50 @@ + +#include +#include +#include + +void acpi_reboot(void) +{ + struct acpi_generic_address *rr; + struct pci_bus *bus0; + u8 reset_value; + unsigned int devfn; + + if (acpi_disabled) + return; + + rr = &acpi_gbl_FADT.reset_register; + + /* Is the reset register supported? */ + if (!(acpi_gbl_FADT.flags & ACPI_FADT_RESET_REGISTER) || + rr->bit_width != 8 || rr->bit_offset != 0) + return; + + reset_value = acpi_gbl_FADT.reset_value; + + /* The reset register can only exist in I/O, Memory or PCI config space + * on a device on bus 0. */ + switch (rr->space_id) { + case ACPI_ADR_SPACE_PCI_CONFIG: + /* The reset register can only live on bus 0. */ + bus0 = pci_find_bus(0, 0); + if (!bus0) + return; + /* Form PCI device/function pair. */ + devfn = PCI_DEVFN((rr->address >> 32) & 0xffff, + (rr->address >> 16) & 0xffff); + printk(KERN_DEBUG "Resetting with ACPI PCI RESET_REG."); + /* Write the value that resets us. */ + pci_bus_write_config_byte(bus0, devfn, + (rr->address & 0xffff), reset_value); + break; + + case ACPI_ADR_SPACE_SYSTEM_MEMORY: + case ACPI_ADR_SPACE_SYSTEM_IO: + printk(KERN_DEBUG "ACPI MEMORY or I/O RESET_REG.\n"); + acpi_hw_low_level_write(8, reset_value, rr); + break; + } + /* Wait ten seconds */ + acpi_os_stall(10000000); +} diff --git a/include/acpi/reboot.h b/include/acpi/reboot.h index 8857f57..0419184 100644 --- a/include/acpi/reboot.h +++ b/include/acpi/reboot.h @@ -1,9 +1,11 @@ +#ifndef __ACPI_REBOOT_H +#define __ACPI_REBOOT_H + +#ifdef CONFIG_ACPI +extern void acpi_reboot(void); +#else +static inline void acpi_reboot(void) { } +#endif -/* - * Dummy placeholder to make the EFI patches apply to the x86 tree. - * Andrew/Len, please just kill this file if you encounter it. - */ -#ifndef acpi_reboot -# define acpi_reboot() do { } while (0) #endif -- cgit v0.10.2 From 01a5bba576b9364b33f61f0cd9fa70c2cf5535e2 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 16 Jul 2008 23:27:08 +0200 Subject: Fix FADT parsing The (1.0 inherited) separate length fields in the FADT are byte granular. Further, PM1a/b may have distinct lengths and live in distinct address spaces. acpi_tb_convert_fadt() should account for all of these conditions. Apart from these changes I'm puzzled by the fact that, not just for acpi_gbl_xpm1{a,b}_enable, acpi_hw_low_level_{read,write}() get an explicit size passed rather than using the size found in the passed GAS. What happens on a platform that defines PM1{a,b} wider than 16 bits? Of course, acpi_hw_low_level_{read,write}() at present are entirely un-prepared to deal with sizes other than 8, 16, or 32, not to speak of a non-zero bit_offset or access_width... Signed-off-by: Jan Beulich Signed-off-by: Andi Kleen Cc: Len Brown Signed-off-by: Andrew Morton diff --git a/drivers/acpi/tables/tbfadt.c b/drivers/acpi/tables/tbfadt.c index a4a41ba..ccb5b64 100644 --- a/drivers/acpi/tables/tbfadt.c +++ b/drivers/acpi/tables/tbfadt.c @@ -124,7 +124,7 @@ static struct acpi_fadt_info fadt_info_table[] = { static void inline acpi_tb_init_generic_address(struct acpi_generic_address *generic_address, - u8 bit_width, u64 address) + u8 byte_width, u64 address) { /* @@ -136,7 +136,7 @@ acpi_tb_init_generic_address(struct acpi_generic_address *generic_address, /* All other fields are byte-wide */ generic_address->space_id = ACPI_ADR_SPACE_SYSTEM_IO; - generic_address->bit_width = bit_width; + generic_address->bit_width = byte_width << 3; generic_address->bit_offset = 0; generic_address->access_width = 0; } @@ -343,9 +343,11 @@ static void acpi_tb_convert_fadt(void) * * The PM event blocks are split into two register blocks, first is the * PM Status Register block, followed immediately by the PM Enable Register - * block. Each is of length (pm1_event_length/2) + * block. Each is of length (xpm1x_event_block.bit_width/2) */ - pm1_register_length = (u8) ACPI_DIV_2(acpi_gbl_FADT.pm1_event_length); + WARN_ON(ACPI_MOD_16(acpi_gbl_FADT.xpm1a_event_block.bit_width)); + pm1_register_length = (u8) ACPI_DIV_16(acpi_gbl_FADT + .xpm1a_event_block.bit_width); /* The PM1A register block is required */ @@ -360,14 +362,17 @@ static void acpi_tb_convert_fadt(void) /* The PM1B register block is optional, ignore if not present */ if (acpi_gbl_FADT.xpm1b_event_block.address) { + WARN_ON(ACPI_MOD_16(acpi_gbl_FADT.xpm1b_event_block.bit_width)); + pm1_register_length = (u8) ACPI_DIV_16(acpi_gbl_FADT + .xpm1b_event_block + .bit_width); acpi_tb_init_generic_address(&acpi_gbl_xpm1b_enable, pm1_register_length, (acpi_gbl_FADT.xpm1b_event_block. address + pm1_register_length)); /* Don't forget to copy space_id of the GAS */ acpi_gbl_xpm1b_enable.space_id = - acpi_gbl_FADT.xpm1a_event_block.space_id; - + acpi_gbl_FADT.xpm1b_event_block.space_id; } } -- cgit v0.10.2 From d442cc44c0db56e84ef6aa244a88427d2efe06cd Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Wed, 16 Jul 2008 16:09:06 -0400 Subject: block: Trivial fix for blk_integrity_rq() Fail integrity check gracefully when request does not have a bio attached (BLOCK_PC). Signed-off-by: Martin K. Petersen Signed-off-by: Linus Torvalds diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 32a441b..88d6808 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -985,6 +985,9 @@ static inline int bdev_integrity_enabled(struct block_device *bdev, int rw) static inline int blk_integrity_rq(struct request *rq) { + if (rq->bio == NULL) + return 0; + return bio_integrity(rq->bio); } -- cgit v0.10.2 From c0420ad2ca514551ca086510b0e7d17a05c70492 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Mon, 30 Jun 2008 18:45:45 +0800 Subject: [PATCH] ocfs2: fix oops in mmap_truncate testing This patch fixes a mmap_truncate bug which was found by ocfs2 test suite. In an ocfs2 cluster more than 1 node, run program mmap_truncate, which races mmap writes and truncates from multiple processes. While the test is running, a stat from another node forces writeout, causing an oops in ocfs2_get_block() because it sees a buffer to write which isn't allocated. This patch fixed the bug by clear dirty and uptodate bits in buffer, leave the buffer unmapped and return. Fix is suggested by Mark Fasheh, and I code up the patch. Signed-off-by: Coly Li Signed-off-by: Mark Fasheh diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 17964c0..1db0801 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -174,10 +174,17 @@ static int ocfs2_get_block(struct inode *inode, sector_t iblock, * need to use BH_New is when we're extending i_size on a file * system which doesn't support holes, in which case BH_New * allows block_prepare_write() to zero. + * + * If we see this on a sparse file system, then a truncate has + * raced us and removed the cluster. In this case, we clear + * the buffers dirty and uptodate bits and let the buffer code + * ignore it as a hole. */ - mlog_bug_on_msg(create && p_blkno == 0 && ocfs2_sparse_alloc(osb), - "ino %lu, iblock %llu\n", inode->i_ino, - (unsigned long long)iblock); + if (create && p_blkno == 0 && ocfs2_sparse_alloc(osb)) { + clear_buffer_dirty(bh_result); + clear_buffer_uptodate(bh_result); + goto bail; + } /* Treat the unwritten extent as a hole for zeroing purposes. */ if (p_blkno && !(ext_flags & OCFS2_EXT_UNWRITTEN)) -- cgit v0.10.2 From 58b6e5538460be358fdf1286d9a2fbcfcc2cfaba Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Wed, 16 Jul 2008 16:21:47 -0700 Subject: Revert "x86/PCI: ACPI based PCI gap calculation" This reverts commit 809d9a8f93bd8504dcc34b16bbfdfd1a8c9bb1ed. This one isn't quite ready for prime time. It needs more testing and additional feedback from the ACPI guys. diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index d1ffb57..d95de2f 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -4,7 +4,6 @@ #include #include #include -#include #include "pci.h" struct pci_root_info { @@ -15,11 +14,6 @@ struct pci_root_info { int busnum; }; -struct gap_info { - unsigned long gapstart; - unsigned long gapsize; -}; - static acpi_status resource_to_addr(struct acpi_resource *resource, struct acpi_resource_address64 *addr) @@ -116,78 +110,6 @@ adjust_transparent_bridge_resources(struct pci_bus *bus) } } -static acpi_status search_gap(struct acpi_resource *resource, void *data) -{ - struct acpi_resource_address64 addr; - acpi_status status; - struct gap_info *gap = data; - unsigned long long start_addr, end_addr; - - status = resource_to_addr(resource, &addr); - if (ACPI_SUCCESS(status) && - addr.resource_type == ACPI_MEMORY_RANGE && - addr.address_length > gap->gapsize) { - start_addr = addr.minimum + addr.translation_offset; - /* - * We want space only in the 32bit address range - */ - if (start_addr < UINT_MAX) { - end_addr = start_addr + addr.address_length; - e820_search_gap(&gap->gapstart, &gap->gapsize, - start_addr, end_addr); - } - } - - return AE_OK; -} - -/* - * Search for a hole in the 32 bit address space for PCI to assign MMIO - * resources, for hotplug or unconfigured resources. - * We query the CRS object of the PCI root device to look for possible producer - * resources in the tree and consider these while calulating the start address - * for this hole. - */ -static void pci_setup_gap(acpi_handle *handle) -{ - struct gap_info gap; - acpi_status status; - - gap.gapstart = 0; - gap.gapsize = 0x400000; - - status = acpi_walk_resources(handle, METHOD_NAME__CRS, - search_gap, &gap); - - if (ACPI_SUCCESS(status)) { - unsigned long round; - - if (!gap.gapstart) { - printk(KERN_ERR "ACPI: Warning: Cannot find a gap " - "in the 32bit address range for PCI\n" - "ACPI: PCI devices may collide with " - "hotpluggable memory address range\n"); - } - /* - * Round the gapstart, uses the same logic as in - * e820_gap_setup - */ - round = 0x100000; - while ((gap.gapsize >> 4) > round) - round += round; - /* Fun with two's complement */ - pci_mem_start = (gap.gapstart + round) & -round; - - printk(KERN_INFO "ACPI: PCI resources should " - "start at %lx (gap: %lx:%lx)\n", - pci_mem_start, gap.gapstart, gap.gapsize); - } else { - printk(KERN_ERR "ACPI: Error while searching for gap in " - "the 32bit address range for PCI\n"); - } -} - - static void get_current_resources(struct acpi_device *device, int busnum, int domain, struct pci_bus *bus) @@ -293,8 +215,6 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do if (bus && (pci_probe & PCI_USE__CRS)) get_current_resources(device, busnum, domain, bus); - - pci_setup_gap(device->handle); return bus; } -- cgit v0.10.2 From 33af79d12e0fa25545d49e86afc67ea8ad5f2f40 Mon Sep 17 00:00:00 2001 From: Chandra Seetharaman Date: Wed, 16 Jul 2008 17:35:08 -0700 Subject: scsi_dh: Verify "dev" is a sdev before accessing it. Before accessing the device data structure in hardware handlers, make sure it is a indeed a sdev device. Yinghai Lu found the bug on Jul 16, 2008, and later tested/verified the following fix. Signed-off-by: Chandra Seetharaman Signed-off-by: Linus Torvalds diff --git a/drivers/scsi/device_handler/scsi_dh_emc.c b/drivers/scsi/device_handler/scsi_dh_emc.c index ed53f14..f2467e9 100644 --- a/drivers/scsi/device_handler/scsi_dh_emc.c +++ b/drivers/scsi/device_handler/scsi_dh_emc.c @@ -416,12 +416,17 @@ static int clariion_bus_notify(struct notifier_block *nb, unsigned long action, void *data) { struct device *dev = data; - struct scsi_device *sdev = to_scsi_device(dev); + struct scsi_device *sdev; struct scsi_dh_data *scsi_dh_data; struct clariion_dh_data *h; int i, found = 0; unsigned long flags; + if (!scsi_is_sdev_device(dev)) + return 0; + + sdev = to_scsi_device(dev); + if (action == BUS_NOTIFY_ADD_DEVICE) { for (i = 0; clariion_dev_list[i].vendor; i++) { if (!strncmp(sdev->vendor, clariion_dev_list[i].vendor, diff --git a/drivers/scsi/device_handler/scsi_dh_hp_sw.c b/drivers/scsi/device_handler/scsi_dh_hp_sw.c index 12ceab7..ae6be87 100644 --- a/drivers/scsi/device_handler/scsi_dh_hp_sw.c +++ b/drivers/scsi/device_handler/scsi_dh_hp_sw.c @@ -131,11 +131,16 @@ static int hp_sw_bus_notify(struct notifier_block *nb, unsigned long action, void *data) { struct device *dev = data; - struct scsi_device *sdev = to_scsi_device(dev); + struct scsi_device *sdev; struct scsi_dh_data *scsi_dh_data; int i, found = 0; unsigned long flags; + if (!scsi_is_sdev_device(dev)) + return 0; + + sdev = to_scsi_device(dev); + if (action == BUS_NOTIFY_ADD_DEVICE) { for (i = 0; hp_sw_dh_data_list[i].vendor; i++) { if (!strncmp(sdev->vendor, hp_sw_dh_data_list[i].vendor, diff --git a/drivers/scsi/device_handler/scsi_dh_rdac.c b/drivers/scsi/device_handler/scsi_dh_rdac.c index 6fff077..fdf34b0 100644 --- a/drivers/scsi/device_handler/scsi_dh_rdac.c +++ b/drivers/scsi/device_handler/scsi_dh_rdac.c @@ -608,12 +608,17 @@ static int rdac_bus_notify(struct notifier_block *nb, unsigned long action, void *data) { struct device *dev = data; - struct scsi_device *sdev = to_scsi_device(dev); + struct scsi_device *sdev; struct scsi_dh_data *scsi_dh_data; struct rdac_dh_data *h; int i, found = 0; unsigned long flags; + if (!scsi_is_sdev_device(dev)) + return 0; + + sdev = to_scsi_device(dev); + if (action == BUS_NOTIFY_ADD_DEVICE) { for (i = 0; rdac_dev_list[i].vendor; i++) { if (!strncmp(sdev->vendor, rdac_dev_list[i].vendor, -- cgit v0.10.2 From 98abed02007b19bbfd68b6d06a5485afc3eeb01b Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Wed, 19 Mar 2008 19:24:59 -0700 Subject: do_wait reorganization This breaks out the guts of do_wait into three subfunctions. The control flow is less nonobvious without so much goto. do_wait_thread and ptrace_do_wait contain the main work of the outer loop. wait_consider_task contains the main work of the inner loop. Signed-off-by: Roland McGrath diff --git a/kernel/exit.c b/kernel/exit.c index ceb2587..7453356 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1238,7 +1238,7 @@ static int wait_noreap_copyout(struct task_struct *p, pid_t pid, uid_t uid, * the lock and this task is uninteresting. If we return nonzero, we have * released the lock and the system call should return. */ -static int wait_task_zombie(struct task_struct *p, int noreap, +static int wait_task_zombie(struct task_struct *p, int options, struct siginfo __user *infop, int __user *stat_addr, struct rusage __user *ru) { @@ -1246,7 +1246,10 @@ static int wait_task_zombie(struct task_struct *p, int noreap, int retval, status, traced; pid_t pid = task_pid_vnr(p); - if (unlikely(noreap)) { + if (!likely(options & WEXITED)) + return 0; + + if (unlikely(options & WNOWAIT)) { uid_t uid = p->uid; int exit_code = p->exit_code; int why, status; @@ -1397,13 +1400,16 @@ static int wait_task_zombie(struct task_struct *p, int noreap, * released the lock and the system call should return. */ static int wait_task_stopped(struct task_struct *p, - int noreap, struct siginfo __user *infop, + int options, struct siginfo __user *infop, int __user *stat_addr, struct rusage __user *ru) { int retval, exit_code, why; uid_t uid = 0; /* unneeded, required by compiler */ pid_t pid; + if (!(p->ptrace & PT_PTRACED) && !(options & WUNTRACED)) + return 0; + exit_code = 0; spin_lock_irq(&p->sighand->siglock); @@ -1421,7 +1427,7 @@ static int wait_task_stopped(struct task_struct *p, if (!exit_code) goto unlock_sig; - if (!noreap) + if (!unlikely(options & WNOWAIT)) p->exit_code = 0; uid = p->uid; @@ -1442,7 +1448,7 @@ unlock_sig: why = (p->ptrace & PT_PTRACED) ? CLD_TRAPPED : CLD_STOPPED; read_unlock(&tasklist_lock); - if (unlikely(noreap)) + if (unlikely(options & WNOWAIT)) return wait_noreap_copyout(p, pid, uid, why, exit_code, infop, ru); @@ -1476,7 +1482,7 @@ unlock_sig: * the lock and this task is uninteresting. If we return nonzero, we have * released the lock and the system call should return. */ -static int wait_task_continued(struct task_struct *p, int noreap, +static int wait_task_continued(struct task_struct *p, int options, struct siginfo __user *infop, int __user *stat_addr, struct rusage __user *ru) { @@ -1484,6 +1490,9 @@ static int wait_task_continued(struct task_struct *p, int noreap, pid_t pid; uid_t uid; + if (!unlikely(options & WCONTINUED)) + return 0; + if (!(p->signal->flags & SIGNAL_STOP_CONTINUED)) return 0; @@ -1493,7 +1502,7 @@ static int wait_task_continued(struct task_struct *p, int noreap, spin_unlock_irq(&p->sighand->siglock); return 0; } - if (!noreap) + if (!unlikely(options & WNOWAIT)) p->signal->flags &= ~SIGNAL_STOP_CONTINUED; spin_unlock_irq(&p->sighand->siglock); @@ -1519,89 +1528,137 @@ static int wait_task_continued(struct task_struct *p, int noreap, return retval; } +/* + * Consider @p for a wait by @parent. + * + * -ECHILD should be in *@notask_error before the first call. + * Returns nonzero for a final return, when we have unlocked tasklist_lock. + * Returns zero if the search for a child should continue; + * then *@notask_error is 0 if @p is an eligible child, or still -ECHILD. + */ +static int wait_consider_task(struct task_struct *parent, + struct task_struct *p, int *notask_error, + enum pid_type type, struct pid *pid, int options, + struct siginfo __user *infop, + int __user *stat_addr, struct rusage __user *ru) +{ + int ret = eligible_child(type, pid, options, p); + if (ret <= 0) + return ret; + + if (p->exit_state == EXIT_DEAD) + return 0; + + /* + * We don't reap group leaders with subthreads. + */ + if (p->exit_state == EXIT_ZOMBIE && !delay_group_leader(p)) + return wait_task_zombie(p, options, infop, stat_addr, ru); + + /* + * It's stopped or running now, so it might + * later continue, exit, or stop again. + */ + *notask_error = 0; + + if (task_is_stopped_or_traced(p)) + return wait_task_stopped(p, options, infop, stat_addr, ru); + + return wait_task_continued(p, options, infop, stat_addr, ru); +} + +/* + * Do the work of do_wait() for one thread in the group, @tsk. + * + * -ECHILD should be in *@notask_error before the first call. + * Returns nonzero for a final return, when we have unlocked tasklist_lock. + * Returns zero if the search for a child should continue; then + * *@notask_error is 0 if there were any eligible children, or still -ECHILD. + */ +static int do_wait_thread(struct task_struct *tsk, int *notask_error, + enum pid_type type, struct pid *pid, int options, + struct siginfo __user *infop, int __user *stat_addr, + struct rusage __user *ru) +{ + struct task_struct *p; + + list_for_each_entry(p, &tsk->children, sibling) { + int ret = wait_consider_task(tsk, p, notask_error, + type, pid, options, + infop, stat_addr, ru); + if (ret) + return ret; + } + + return 0; +} + +static int ptrace_do_wait(struct task_struct *tsk, int *notask_error, + enum pid_type type, struct pid *pid, int options, + struct siginfo __user *infop, int __user *stat_addr, + struct rusage __user *ru) +{ + struct task_struct *p; + + /* + * If we never saw an eligile child, check for children stolen by + * ptrace. We don't leave -ECHILD in *@notask_error if there are any, + * because we will eventually be allowed to wait for them again. + */ + if (!*notask_error) + return 0; + + list_for_each_entry(p, &tsk->ptrace_children, ptrace_list) { + int ret = eligible_child(type, pid, options, p); + if (unlikely(ret < 0)) + return ret; + if (ret) { + *notask_error = 0; + return 0; + } + } + + return 0; +} + static long do_wait(enum pid_type type, struct pid *pid, int options, struct siginfo __user *infop, int __user *stat_addr, struct rusage __user *ru) { DECLARE_WAITQUEUE(wait, current); struct task_struct *tsk; - int flag, retval; + int retval; add_wait_queue(¤t->signal->wait_chldexit,&wait); repeat: - /* If there is nothing that can match our critier just get out */ + /* + * If there is nothing that can match our critiera just get out. + * We will clear @retval to zero if we see any child that might later + * match our criteria, even if we are not able to reap it yet. + */ retval = -ECHILD; if ((type < PIDTYPE_MAX) && (!pid || hlist_empty(&pid->tasks[type]))) goto end; - /* - * We will set this flag if we see any child that might later - * match our criteria, even if we are not able to reap it yet. - */ - flag = retval = 0; current->state = TASK_INTERRUPTIBLE; read_lock(&tasklist_lock); tsk = current; do { - struct task_struct *p; - - list_for_each_entry(p, &tsk->children, sibling) { - int ret = eligible_child(type, pid, options, p); - if (!ret) - continue; - - if (unlikely(ret < 0)) { - retval = ret; - } else if (task_is_stopped_or_traced(p)) { - /* - * It's stopped now, so it might later - * continue, exit, or stop again. - */ - flag = 1; - if (!(p->ptrace & PT_PTRACED) && - !(options & WUNTRACED)) - continue; - - retval = wait_task_stopped(p, - (options & WNOWAIT), infop, - stat_addr, ru); - } else if (p->exit_state == EXIT_ZOMBIE && - !delay_group_leader(p)) { - /* - * We don't reap group leaders with subthreads. - */ - if (!likely(options & WEXITED)) - continue; - retval = wait_task_zombie(p, - (options & WNOWAIT), infop, - stat_addr, ru); - } else if (p->exit_state != EXIT_DEAD) { - /* - * It's running now, so it might later - * exit, stop, or stop and then continue. - */ - flag = 1; - if (!unlikely(options & WCONTINUED)) - continue; - retval = wait_task_continued(p, - (options & WNOWAIT), infop, - stat_addr, ru); - } - if (retval != 0) /* tasklist_lock released */ - goto end; - } - if (!flag) { - list_for_each_entry(p, &tsk->ptrace_children, - ptrace_list) { - flag = eligible_child(type, pid, options, p); - if (!flag) - continue; - if (likely(flag > 0)) - break; - retval = flag; - goto end; - } + int tsk_result = do_wait_thread(tsk, &retval, + type, pid, options, + infop, stat_addr, ru); + if (!tsk_result) + tsk_result = ptrace_do_wait(tsk, &retval, + type, pid, options, + infop, stat_addr, ru); + if (tsk_result) { + /* + * tasklist_lock is unlocked and we have a final result. + */ + retval = tsk_result; + goto end; } + if (options & __WNOTHREAD) break; tsk = next_thread(tsk); @@ -1609,16 +1666,14 @@ repeat: } while (tsk != current); read_unlock(&tasklist_lock); - if (flag) { - if (options & WNOHANG) - goto end; + if (!retval && !(options & WNOHANG)) { retval = -ERESTARTSYS; - if (signal_pending(current)) - goto end; - schedule(); - goto repeat; + if (!signal_pending(current)) { + schedule(); + goto repeat; + } } - retval = -ECHILD; + end: current->state = TASK_RUNNING; remove_wait_queue(¤t->signal->wait_chldexit,&wait); -- cgit v0.10.2 From f470021adb9190819c03d6d8c5c860a17480aa6d Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Mon, 24 Mar 2008 18:36:23 -0700 Subject: ptrace children revamp ptrace no longer fiddles with the children/sibling links, and the old ptrace_children list is gone. Now ptrace, whether of one's own children or another's via PTRACE_ATTACH, just uses the new ptraced list instead. There should be no user-visible difference that matters. The only change is the order in which do_wait() sees multiple stopped children and stopped ptrace attachees. Since wait_task_stopped() was changed earlier so it no longer reorders the children list, we already know this won't cause any new problems. Signed-off-by: Roland McGrath diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 9927a88..93c45ac 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -140,8 +140,8 @@ extern struct group_info init_groups; .nr_cpus_allowed = NR_CPUS, \ }, \ .tasks = LIST_HEAD_INIT(tsk.tasks), \ - .ptrace_children= LIST_HEAD_INIT(tsk.ptrace_children), \ - .ptrace_list = LIST_HEAD_INIT(tsk.ptrace_list), \ + .ptraced = LIST_HEAD_INIT(tsk.ptraced), \ + .ptrace_entry = LIST_HEAD_INIT(tsk.ptrace_entry), \ .real_parent = &tsk, \ .parent = &tsk, \ .children = LIST_HEAD_INIT(tsk.children), \ diff --git a/include/linux/sched.h b/include/linux/sched.h index ba2f859..1941d8b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1062,12 +1062,6 @@ struct task_struct { #endif struct list_head tasks; - /* - * ptrace_list/ptrace_children forms the list of my children - * that were stolen by a ptracer. - */ - struct list_head ptrace_children; - struct list_head ptrace_list; struct mm_struct *mm, *active_mm; @@ -1089,18 +1083,25 @@ struct task_struct { /* * pointers to (original) parent process, youngest child, younger sibling, * older sibling, respectively. (p->father can be replaced with - * p->parent->pid) + * p->real_parent->pid) */ - struct task_struct *real_parent; /* real parent process (when being debugged) */ - struct task_struct *parent; /* parent process */ + struct task_struct *real_parent; /* real parent process */ + struct task_struct *parent; /* recipient of SIGCHLD, wait4() reports */ /* - * children/sibling forms the list of my children plus the - * tasks I'm ptracing. + * children/sibling forms the list of my natural children */ struct list_head children; /* list of my children */ struct list_head sibling; /* linkage in my parent's children list */ struct task_struct *group_leader; /* threadgroup leader */ + /* + * ptraced is the list of tasks this task is using ptrace on. + * This includes both natural children and PTRACE_ATTACH targets. + * p->ptrace_entry is p's link on the p->parent->ptraced list. + */ + struct list_head ptraced; + struct list_head ptrace_entry; + /* PID/PID hash table linkage. */ struct pid_link pids[PIDTYPE_MAX]; struct list_head thread_group; @@ -1876,9 +1877,6 @@ extern void wait_task_inactive(struct task_struct * p); #define wait_task_inactive(p) do { } while (0) #endif -#define remove_parent(p) list_del_init(&(p)->sibling) -#define add_parent(p) list_add_tail(&(p)->sibling,&(p)->parent->children) - #define next_task(p) list_entry(rcu_dereference((p)->tasks.next), struct task_struct, tasks) #define for_each_process(p) \ diff --git a/kernel/exit.c b/kernel/exit.c index 7453356..1e90982 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -71,7 +71,7 @@ static void __unhash_process(struct task_struct *p) __get_cpu_var(process_counts)--; } list_del_rcu(&p->thread_group); - remove_parent(p); + list_del_init(&p->sibling); } /* @@ -152,6 +152,18 @@ static void delayed_put_task_struct(struct rcu_head *rhp) put_task_struct(container_of(rhp, struct task_struct, rcu)); } +/* + * Do final ptrace-related cleanup of a zombie being reaped. + * + * Called with write_lock(&tasklist_lock) held. + */ +static void ptrace_release_task(struct task_struct *p) +{ + BUG_ON(!list_empty(&p->ptraced)); + ptrace_unlink(p); + BUG_ON(!list_empty(&p->ptrace_entry)); +} + void release_task(struct task_struct * p) { struct task_struct *leader; @@ -160,8 +172,7 @@ repeat: atomic_dec(&p->user->processes); proc_flush_task(p); write_lock_irq(&tasklist_lock); - ptrace_unlink(p); - BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children)); + ptrace_release_task(p); __exit_signal(p); /* @@ -315,9 +326,8 @@ static void reparent_to_kthreadd(void) ptrace_unlink(current); /* Reparent to init */ - remove_parent(current); current->real_parent = current->parent = kthreadd_task; - add_parent(current); + list_move_tail(¤t->sibling, ¤t->real_parent->children); /* Set the exit signal to SIGCHLD so we signal init on exit */ current->exit_signal = SIGCHLD; @@ -692,37 +702,71 @@ static void exit_mm(struct task_struct * tsk) mmput(mm); } -static void -reparent_thread(struct task_struct *p, struct task_struct *father, int traced) +/* + * Detach all tasks we were using ptrace on. + * Any that need to be release_task'd are put on the @dead list. + * + * Called with write_lock(&tasklist_lock) held. + */ +static void ptrace_exit(struct task_struct *parent, struct list_head *dead) { - if (p->pdeath_signal) - /* We already hold the tasklist_lock here. */ - group_send_sig_info(p->pdeath_signal, SEND_SIG_NOINFO, p); + struct task_struct *p, *n; - /* Move the child from its dying parent to the new one. */ - if (unlikely(traced)) { - /* Preserve ptrace links if someone else is tracing this child. */ - list_del_init(&p->ptrace_list); - if (ptrace_reparented(p)) - list_add(&p->ptrace_list, &p->real_parent->ptrace_children); - } else { - /* If this child is being traced, then we're the one tracing it - * anyway, so let go of it. + list_for_each_entry_safe(p, n, &parent->ptraced, ptrace_entry) { + __ptrace_unlink(p); + + if (p->exit_state != EXIT_ZOMBIE) + continue; + + /* + * If it's a zombie, our attachedness prevented normal + * parent notification or self-reaping. Do notification + * now if it would have happened earlier. If it should + * reap itself, add it to the @dead list. We can't call + * release_task() here because we already hold tasklist_lock. + * + * If it's our own child, there is no notification to do. */ - p->ptrace = 0; - remove_parent(p); - p->parent = p->real_parent; - add_parent(p); + if (!task_detached(p) && thread_group_empty(p)) { + if (!same_thread_group(p->real_parent, parent)) + do_notify_parent(p, p->exit_signal); + } - if (task_is_traced(p)) { + if (task_detached(p)) { /* - * If it was at a trace stop, turn it into - * a normal stop since it's no longer being - * traced. + * Mark it as in the process of being reaped. */ - ptrace_untrace(p); + p->exit_state = EXIT_DEAD; + list_add(&p->ptrace_entry, dead); } } +} + +/* + * Finish up exit-time ptrace cleanup. + * + * Called without locks. + */ +static void ptrace_exit_finish(struct task_struct *parent, + struct list_head *dead) +{ + struct task_struct *p, *n; + + BUG_ON(!list_empty(&parent->ptraced)); + + list_for_each_entry_safe(p, n, dead, ptrace_entry) { + list_del_init(&p->ptrace_entry); + release_task(p); + } +} + +static void reparent_thread(struct task_struct *p, struct task_struct *father) +{ + if (p->pdeath_signal) + /* We already hold the tasklist_lock here. */ + group_send_sig_info(p->pdeath_signal, SEND_SIG_NOINFO, p); + + list_move_tail(&p->sibling, &p->real_parent->children); /* If this is a threaded reparent there is no need to * notify anyone anything has happened. @@ -737,7 +781,8 @@ reparent_thread(struct task_struct *p, struct task_struct *father, int traced) /* If we'd notified the old parent about this child's death, * also notify the new parent. */ - if (!traced && p->exit_state == EXIT_ZOMBIE && + if (!ptrace_reparented(p) && + p->exit_state == EXIT_ZOMBIE && !task_detached(p) && thread_group_empty(p)) do_notify_parent(p, p->exit_signal); @@ -754,12 +799,15 @@ reparent_thread(struct task_struct *p, struct task_struct *father, int traced) static void forget_original_parent(struct task_struct *father) { struct task_struct *p, *n, *reaper = father; - struct list_head ptrace_dead; - - INIT_LIST_HEAD(&ptrace_dead); + LIST_HEAD(ptrace_dead); write_lock_irq(&tasklist_lock); + /* + * First clean up ptrace if we were using it. + */ + ptrace_exit(father, &ptrace_dead); + do { reaper = next_thread(reaper); if (reaper == father) { @@ -768,58 +816,19 @@ static void forget_original_parent(struct task_struct *father) } } while (reaper->flags & PF_EXITING); - /* - * There are only two places where our children can be: - * - * - in our child list - * - in our ptraced child list - * - * Search them and reparent children. - */ list_for_each_entry_safe(p, n, &father->children, sibling) { - int ptrace; - - ptrace = p->ptrace; - - /* if father isn't the real parent, then ptrace must be enabled */ - BUG_ON(father != p->real_parent && !ptrace); - - if (father == p->real_parent) { - /* reparent with a reaper, real father it's us */ - p->real_parent = reaper; - reparent_thread(p, father, 0); - } else { - /* reparent ptraced task to its real parent */ - __ptrace_unlink (p); - if (p->exit_state == EXIT_ZOMBIE && !task_detached(p) && - thread_group_empty(p)) - do_notify_parent(p, p->exit_signal); - } - - /* - * if the ptraced child is a detached zombie we must collect - * it before we exit, or it will remain zombie forever since - * we prevented it from self-reap itself while it was being - * traced by us, to be able to see it in wait4. - */ - if (unlikely(ptrace && p->exit_state == EXIT_ZOMBIE && task_detached(p))) - list_add(&p->ptrace_list, &ptrace_dead); - } - - list_for_each_entry_safe(p, n, &father->ptrace_children, ptrace_list) { p->real_parent = reaper; - reparent_thread(p, father, 1); + if (p->parent == father) { + BUG_ON(p->ptrace); + p->parent = p->real_parent; + } + reparent_thread(p, father); } write_unlock_irq(&tasklist_lock); BUG_ON(!list_empty(&father->children)); - BUG_ON(!list_empty(&father->ptrace_children)); - - list_for_each_entry_safe(p, n, &ptrace_dead, ptrace_list) { - list_del_init(&p->ptrace_list); - release_task(p); - } + ptrace_exit_finish(father, &ptrace_dead); } /* @@ -1180,13 +1189,6 @@ static int eligible_child(enum pid_type type, struct pid *pid, int options, return 0; } - /* - * Do not consider detached threads that are - * not ptraced: - */ - if (task_detached(p) && !p->ptrace) - return 0; - /* Wait for all children (clone and not) if __WALL is set; * otherwise, wait for clone children *only* if __WCLONE is * set; otherwise, wait for non-clone children *only*. (Note: @@ -1399,7 +1401,7 @@ static int wait_task_zombie(struct task_struct *p, int options, * the lock and this task is uninteresting. If we return nonzero, we have * released the lock and the system call should return. */ -static int wait_task_stopped(struct task_struct *p, +static int wait_task_stopped(int ptrace, struct task_struct *p, int options, struct siginfo __user *infop, int __user *stat_addr, struct rusage __user *ru) { @@ -1407,7 +1409,7 @@ static int wait_task_stopped(struct task_struct *p, uid_t uid = 0; /* unneeded, required by compiler */ pid_t pid; - if (!(p->ptrace & PT_PTRACED) && !(options & WUNTRACED)) + if (!(options & WUNTRACED)) return 0; exit_code = 0; @@ -1416,7 +1418,7 @@ static int wait_task_stopped(struct task_struct *p, if (unlikely(!task_is_stopped_or_traced(p))) goto unlock_sig; - if (!(p->ptrace & PT_PTRACED) && p->signal->group_stop_count > 0) + if (!ptrace && p->signal->group_stop_count > 0) /* * A group stop is in progress and this is the group leader. * We won't report until all threads have stopped. @@ -1445,7 +1447,7 @@ unlock_sig: */ get_task_struct(p); pid = task_pid_vnr(p); - why = (p->ptrace & PT_PTRACED) ? CLD_TRAPPED : CLD_STOPPED; + why = ptrace ? CLD_TRAPPED : CLD_STOPPED; read_unlock(&tasklist_lock); if (unlikely(options & WNOWAIT)) @@ -1536,7 +1538,7 @@ static int wait_task_continued(struct task_struct *p, int options, * Returns zero if the search for a child should continue; * then *@notask_error is 0 if @p is an eligible child, or still -ECHILD. */ -static int wait_consider_task(struct task_struct *parent, +static int wait_consider_task(struct task_struct *parent, int ptrace, struct task_struct *p, int *notask_error, enum pid_type type, struct pid *pid, int options, struct siginfo __user *infop, @@ -1546,6 +1548,15 @@ static int wait_consider_task(struct task_struct *parent, if (ret <= 0) return ret; + if (likely(!ptrace) && unlikely(p->ptrace)) { + /* + * This child is hidden by ptrace. + * We aren't allowed to see it now, but eventually we will. + */ + *notask_error = 0; + return 0; + } + if (p->exit_state == EXIT_DEAD) return 0; @@ -1562,7 +1573,8 @@ static int wait_consider_task(struct task_struct *parent, *notask_error = 0; if (task_is_stopped_or_traced(p)) - return wait_task_stopped(p, options, infop, stat_addr, ru); + return wait_task_stopped(ptrace, p, options, + infop, stat_addr, ru); return wait_task_continued(p, options, infop, stat_addr, ru); } @@ -1583,11 +1595,16 @@ static int do_wait_thread(struct task_struct *tsk, int *notask_error, struct task_struct *p; list_for_each_entry(p, &tsk->children, sibling) { - int ret = wait_consider_task(tsk, p, notask_error, - type, pid, options, - infop, stat_addr, ru); - if (ret) - return ret; + /* + * Do not consider detached threads. + */ + if (!task_detached(p)) { + int ret = wait_consider_task(tsk, 0, p, notask_error, + type, pid, options, + infop, stat_addr, ru); + if (ret) + return ret; + } } return 0; @@ -1601,21 +1618,16 @@ static int ptrace_do_wait(struct task_struct *tsk, int *notask_error, struct task_struct *p; /* - * If we never saw an eligile child, check for children stolen by - * ptrace. We don't leave -ECHILD in *@notask_error if there are any, - * because we will eventually be allowed to wait for them again. + * Traditionally we see ptrace'd stopped tasks regardless of options. */ - if (!*notask_error) - return 0; + options |= WUNTRACED; - list_for_each_entry(p, &tsk->ptrace_children, ptrace_list) { - int ret = eligible_child(type, pid, options, p); - if (unlikely(ret < 0)) + list_for_each_entry(p, &tsk->ptraced, ptrace_entry) { + int ret = wait_consider_task(tsk, 1, p, notask_error, + type, pid, options, + infop, stat_addr, ru); + if (ret) return ret; - if (ret) { - *notask_error = 0; - return 0; - } } return 0; diff --git a/kernel/fork.c b/kernel/fork.c index 4bd2f51..adefc11 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1125,8 +1125,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, */ p->group_leader = p; INIT_LIST_HEAD(&p->thread_group); - INIT_LIST_HEAD(&p->ptrace_children); - INIT_LIST_HEAD(&p->ptrace_list); + INIT_LIST_HEAD(&p->ptrace_entry); + INIT_LIST_HEAD(&p->ptraced); /* Now that the task is set up, run cgroup callbacks if * necessary. We need to run them before the task is visible @@ -1198,7 +1198,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, } if (likely(p->pid)) { - add_parent(p); + list_add_tail(&p->sibling, &p->real_parent->children); if (unlikely(p->ptrace & PT_PTRACED)) __ptrace_link(p, current->parent); diff --git a/kernel/ptrace.c b/kernel/ptrace.c index e337390..8392a9d 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -33,13 +33,9 @@ */ void __ptrace_link(struct task_struct *child, struct task_struct *new_parent) { - BUG_ON(!list_empty(&child->ptrace_list)); - if (child->parent == new_parent) - return; - list_add(&child->ptrace_list, &child->parent->ptrace_children); - remove_parent(child); + BUG_ON(!list_empty(&child->ptrace_entry)); + list_add(&child->ptrace_entry, &new_parent->ptraced); child->parent = new_parent; - add_parent(child); } /* @@ -73,12 +69,8 @@ void __ptrace_unlink(struct task_struct *child) BUG_ON(!child->ptrace); child->ptrace = 0; - if (ptrace_reparented(child)) { - list_del_init(&child->ptrace_list); - remove_parent(child); - child->parent = child->real_parent; - add_parent(child); - } + child->parent = child->real_parent; + list_del_init(&child->ptrace_entry); if (task_is_traced(child)) ptrace_untrace(child); @@ -492,15 +484,34 @@ int ptrace_traceme(void) /* * Are we already being traced? */ +repeat: task_lock(current); if (!(current->ptrace & PT_PTRACED)) { + /* + * See ptrace_attach() comments about the locking here. + */ + unsigned long flags; + if (!write_trylock_irqsave(&tasklist_lock, flags)) { + task_unlock(current); + do { + cpu_relax(); + } while (!write_can_lock(&tasklist_lock)); + goto repeat; + } + ret = security_ptrace(current->parent, current, PTRACE_MODE_ATTACH); + /* * Set the ptrace bit in the process ptrace flags. + * Then link us on our parent's ptraced list. */ - if (!ret) + if (!ret) { current->ptrace |= PT_PTRACED; + __ptrace_link(current, current->real_parent); + } + + write_unlock_irqrestore(&tasklist_lock, flags); } task_unlock(current); return ret; -- cgit v0.10.2 From 14dd0b81414a58caf0296dbeace016bb0a5d11ab Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Sun, 30 Mar 2008 18:41:25 -0700 Subject: do_wait: return security_task_wait() error code in place of -ECHILD This reverts the effect of commit f2cc3eb133baa2e9dc8efd40f417106b2ee520f3 "do_wait: fix security checks". That change reverted the effect of commit 73243284463a761e04d69d22c7516b2be7de096c. The rationale for the original commit still stands. The inconsistent treatment of children hidden by ptrace was an unintended omission in the original change and in no way invalidates its purpose. This makes do_wait return the error returned by security_task_wait() (usually -EACCES) in place of -ECHILD when there are some children the caller would be able to wait for if not for the permission failure. A permission error will give the user a clue to look for security policy problems, rather than for mysterious wait bugs. Signed-off-by: Roland McGrath diff --git a/kernel/exit.c b/kernel/exit.c index 1e90982..a2af6ca 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1199,14 +1199,10 @@ static int eligible_child(enum pid_type type, struct pid *pid, int options, return 0; err = security_task_wait(p); - if (likely(!err)) - return 1; + if (err) + return err; - if (type != PIDTYPE_PID) - return 0; - /* This child was explicitly requested, abort */ - read_unlock(&tasklist_lock); - return err; + return 1; } static int wait_noreap_copyout(struct task_struct *p, pid_t pid, uid_t uid, @@ -1536,7 +1532,8 @@ static int wait_task_continued(struct task_struct *p, int options, * -ECHILD should be in *@notask_error before the first call. * Returns nonzero for a final return, when we have unlocked tasklist_lock. * Returns zero if the search for a child should continue; - * then *@notask_error is 0 if @p is an eligible child, or still -ECHILD. + * then *@notask_error is 0 if @p is an eligible child, + * or another error from security_task_wait(), or still -ECHILD. */ static int wait_consider_task(struct task_struct *parent, int ptrace, struct task_struct *p, int *notask_error, @@ -1545,9 +1542,21 @@ static int wait_consider_task(struct task_struct *parent, int ptrace, int __user *stat_addr, struct rusage __user *ru) { int ret = eligible_child(type, pid, options, p); - if (ret <= 0) + if (!ret) return ret; + if (unlikely(ret < 0)) { + /* + * If we have not yet seen any eligible child, + * then let this error code replace -ECHILD. + * A permission error will give the user a clue + * to look for security policy problems, rather + * than for mysterious wait bugs. + */ + if (*notask_error) + *notask_error = ret; + } + if (likely(!ptrace) && unlikely(p->ptrace)) { /* * This child is hidden by ptrace. @@ -1585,7 +1594,8 @@ static int wait_consider_task(struct task_struct *parent, int ptrace, * -ECHILD should be in *@notask_error before the first call. * Returns nonzero for a final return, when we have unlocked tasklist_lock. * Returns zero if the search for a child should continue; then - * *@notask_error is 0 if there were any eligible children, or still -ECHILD. + * *@notask_error is 0 if there were any eligible children, + * or another error from security_task_wait(), or still -ECHILD. */ static int do_wait_thread(struct task_struct *tsk, int *notask_error, enum pid_type type, struct pid *pid, int options, -- cgit v0.10.2 From 666f164f4fbfa78bd00fb4b74788b42a39842c64 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Tue, 8 Apr 2008 23:12:30 -0700 Subject: fix dangling zombie when new parent ignores children This fixes an arcane bug that we think was a regression introduced by commit b2b2cbc4b2a2f389442549399a993a8306420baf. When a parent ignores SIGCHLD (or uses SA_NOCLDWAIT), its children would self-reap but they don't if it's using ptrace on them. When the parent thread later exits and ceases to ptrace a child but leaves other live threads in the parent's thread group, any zombie children are left dangling. The fix makes them self-reap then, as they would have done earlier if ptrace had not been in use. Signed-off-by: Roland McGrath diff --git a/kernel/exit.c b/kernel/exit.c index a2af6ca..93d2711 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -703,6 +703,23 @@ static void exit_mm(struct task_struct * tsk) } /* + * Return nonzero if @parent's children should reap themselves. + * + * Called with write_lock_irq(&tasklist_lock) held. + */ +static int ignoring_children(struct task_struct *parent) +{ + int ret; + struct sighand_struct *psig = parent->sighand; + unsigned long flags; + spin_lock_irqsave(&psig->siglock, flags); + ret = (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN || + (psig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDWAIT)); + spin_unlock_irqrestore(&psig->siglock, flags); + return ret; +} + +/* * Detach all tasks we were using ptrace on. * Any that need to be release_task'd are put on the @dead list. * @@ -711,6 +728,7 @@ static void exit_mm(struct task_struct * tsk) static void ptrace_exit(struct task_struct *parent, struct list_head *dead) { struct task_struct *p, *n; + int ign = -1; list_for_each_entry_safe(p, n, &parent->ptraced, ptrace_entry) { __ptrace_unlink(p); @@ -726,10 +744,18 @@ static void ptrace_exit(struct task_struct *parent, struct list_head *dead) * release_task() here because we already hold tasklist_lock. * * If it's our own child, there is no notification to do. + * But if our normal children self-reap, then this child + * was prevented by ptrace and we must reap it now. */ if (!task_detached(p) && thread_group_empty(p)) { if (!same_thread_group(p->real_parent, parent)) do_notify_parent(p, p->exit_signal); + else { + if (ign < 0) + ign = ignoring_children(parent); + if (ign) + p->exit_signal = -1; + } } if (task_detached(p)) { -- cgit v0.10.2 From 8e9509c827a28e2f365c203c04224f9e9dd1b63a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 17 Jul 2008 13:26:50 +0200 Subject: ftrace: fix merge buglet -tip testing found a bootup hang here: initcall anon_inode_init+0x0/0x130 returned 0 after 0 msecs calling acpi_event_init+0x0/0x57 the bootup should have continued with: initcall acpi_event_init+0x0/0x57 returned 0 after 45 msecs but it hung hard there instead. bisection led to this commit: | commit 5806b81ac1c0c52665b91723fd4146a4f86e386b | Merge: d14c8a6... 6712e29... | Author: Ingo Molnar | Date: Mon Jul 14 16:11:52 2008 +0200 | Merge branch 'auto-ftrace-next' into tracing/for-linus turns out that i made this mistake in the merge: ifdef CONFIG_FTRACE # Do not profile debug utilities CFLAGS_REMOVE_tsc_64.o = -pg CFLAGS_REMOVE_tsc_32.o = -pg those two files got unified meanwhile - so the dont-profile annotation got lost. The proper rule is: CFLAGS_REMOVE_tsc.o = -pg i guess this could have been caught sooner if the CFLAGS_REMOVE* kbuild rule aborted the build if it met a target that does not exist anymore? Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 5112c84..da14061 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -8,8 +8,7 @@ CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) ifdef CONFIG_FTRACE # Do not profile debug utilities -CFLAGS_REMOVE_tsc_64.o = -pg -CFLAGS_REMOVE_tsc_32.o = -pg +CFLAGS_REMOVE_tsc.o = -pg CFLAGS_REMOVE_rtc.o = -pg endif -- cgit v0.10.2 From 12e0c95e0ca99f633c9d9f90773037eb178685ad Mon Sep 17 00:00:00 2001 From: Frank Munzert Date: Thu, 17 Jul 2008 17:16:40 +0200 Subject: [S390] zfcpdump: Make SCSI disk dump tool recognize storage holes The kernel part of zfcpdump establishes a new debugfs file zcore/memmap which exports information on memory layout (start address and length of each memory chunk) to its userspace counterpart. Signed-off-by: Frank Munzert Signed-off-by: Heiko Carstens Cc: Martin Schwidefsky diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c index 047dd92..7fd84be 100644 --- a/drivers/s390/char/zcore.c +++ b/drivers/s390/char/zcore.c @@ -29,6 +29,7 @@ #define TO_USER 0 #define TO_KERNEL 1 +#define CHUNK_INFO_SIZE 34 /* 2 16-byte char, each followed by blank */ enum arch_id { ARCH_S390 = 0, @@ -51,6 +52,7 @@ static struct debug_info *zcore_dbf; static int hsa_available; static struct dentry *zcore_dir; static struct dentry *zcore_file; +static struct dentry *zcore_memmap_file; /* * Copy memory from HSA to kernel or user memory (not reentrant): @@ -476,6 +478,54 @@ static const struct file_operations zcore_fops = { .release = zcore_release, }; +static ssize_t zcore_memmap_read(struct file *filp, char __user *buf, + size_t count, loff_t *ppos) +{ + return simple_read_from_buffer(buf, count, ppos, filp->private_data, + MEMORY_CHUNKS * CHUNK_INFO_SIZE); +} + +static int zcore_memmap_open(struct inode *inode, struct file *filp) +{ + int i; + char *buf; + struct mem_chunk *chunk_array; + + chunk_array = kzalloc(MEMORY_CHUNKS * sizeof(struct mem_chunk), + GFP_KERNEL); + if (!chunk_array) + return -ENOMEM; + detect_memory_layout(chunk_array); + buf = kzalloc(MEMORY_CHUNKS * CHUNK_INFO_SIZE, GFP_KERNEL); + if (!buf) { + kfree(chunk_array); + return -ENOMEM; + } + for (i = 0; i < MEMORY_CHUNKS; i++) { + sprintf(buf + (i * CHUNK_INFO_SIZE), "%016llx %016llx ", + (unsigned long long) chunk_array[i].addr, + (unsigned long long) chunk_array[i].size); + if (chunk_array[i].size == 0) + break; + } + kfree(chunk_array); + filp->private_data = buf; + return 0; +} + +static int zcore_memmap_release(struct inode *inode, struct file *filp) +{ + kfree(filp->private_data); + return 0; +} + +static const struct file_operations zcore_memmap_fops = { + .owner = THIS_MODULE, + .read = zcore_memmap_read, + .open = zcore_memmap_open, + .release = zcore_memmap_release, +}; + static void __init set_s390_lc_mask(union save_area *map) { @@ -554,18 +604,44 @@ static int __init check_sdias(void) return 0; } -static void __init zcore_header_init(int arch, struct zcore_header *hdr) +static int __init get_mem_size(unsigned long *mem) +{ + int i; + struct mem_chunk *chunk_array; + + chunk_array = kzalloc(MEMORY_CHUNKS * sizeof(struct mem_chunk), + GFP_KERNEL); + if (!chunk_array) + return -ENOMEM; + detect_memory_layout(chunk_array); + for (i = 0; i < MEMORY_CHUNKS; i++) { + if (chunk_array[i].size == 0) + break; + *mem += chunk_array[i].size; + } + kfree(chunk_array); + return 0; +} + +static int __init zcore_header_init(int arch, struct zcore_header *hdr) { + int rc; + unsigned long memory = 0; + if (arch == ARCH_S390X) hdr->arch_id = DUMP_ARCH_S390X; else hdr->arch_id = DUMP_ARCH_S390; - hdr->mem_size = sys_info.mem_size; - hdr->rmem_size = sys_info.mem_size; + rc = get_mem_size(&memory); + if (rc) + return rc; + hdr->mem_size = memory; + hdr->rmem_size = memory; hdr->mem_end = sys_info.mem_size; - hdr->num_pages = sys_info.mem_size / PAGE_SIZE; + hdr->num_pages = memory / PAGE_SIZE; hdr->tod = get_clock(); get_cpu_id(&hdr->cpu_id); + return 0; } static int __init zcore_init(void) @@ -608,7 +684,9 @@ static int __init zcore_init(void) if (rc) goto fail; - zcore_header_init(arch, &zcore_header); + rc = zcore_header_init(arch, &zcore_header); + if (rc) + goto fail; zcore_dir = debugfs_create_dir("zcore" , NULL); if (!zcore_dir) { @@ -618,13 +696,22 @@ static int __init zcore_init(void) zcore_file = debugfs_create_file("mem", S_IRUSR, zcore_dir, NULL, &zcore_fops); if (!zcore_file) { - debugfs_remove(zcore_dir); rc = -ENOMEM; - goto fail; + goto fail_dir; + } + zcore_memmap_file = debugfs_create_file("memmap", S_IRUSR, zcore_dir, + NULL, &zcore_memmap_fops); + if (!zcore_memmap_file) { + rc = -ENOMEM; + goto fail_file; } hsa_available = 1; return 0; +fail_file: + debugfs_remove(zcore_file); +fail_dir: + debugfs_remove(zcore_dir); fail: diag308(DIAG308_REL_HSA, NULL); return rc; -- cgit v0.10.2 From 9d853caf44e6f969a9ad056a9937e8d97bc2c761 Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Thu, 17 Jul 2008 17:16:41 +0200 Subject: [S390] dasd: fix unsolicited SIM handling. Add missing schedule_bh and check that there is 32 bit sense data. Signed-off-by: Stefan Haberland Signed-off-by: Heiko Carstens Cc: Martin Schwidefsky diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index e0b7721..3590fdb 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -1418,8 +1418,10 @@ static void dasd_eckd_handle_unsolicited_interrupt(struct dasd_device *device, /* service information message SIM */ - if ((irb->ecw[6] & DASD_SIM_SENSE) == DASD_SIM_SENSE) { + if (irb->esw.esw0.erw.cons && (irb->ecw[27] & DASD_SENSE_BIT_0) && + ((irb->ecw[6] & DASD_SIM_SENSE) == DASD_SIM_SENSE)) { dasd_3990_erp_handle_sim(device, irb->ecw); + dasd_schedule_device_bh(device); return; } -- cgit v0.10.2 From 626f311737770f0fb5c09a6da2ea795a559aa42a Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Thu, 17 Jul 2008 17:16:42 +0200 Subject: [S390] chsc headers userspace cleanup Kernel headers shouldn't expose functions to userspace. Cc: Cornelia Huck Signed-off-by: Adrian Bunk Signed-off-by: Heiko Carstens Cc: Martin Schwidefsky diff --git a/include/asm-s390/Kbuild b/include/asm-s390/Kbuild index 09f3125..bb5e9ed 100644 --- a/include/asm-s390/Kbuild +++ b/include/asm-s390/Kbuild @@ -8,9 +8,9 @@ header-y += ucontext.h header-y += vtoc.h header-y += zcrypt.h header-y += kvm.h -header-y += schid.h header-y += chsc.h unifdef-y += cmb.h unifdef-y += debug.h unifdef-y += chpid.h +unifdef-y += schid.h diff --git a/include/asm-s390/chpid.h b/include/asm-s390/chpid.h index 606844d..dfe3c7f 100644 --- a/include/asm-s390/chpid.h +++ b/include/asm-s390/chpid.h @@ -20,6 +20,9 @@ struct chp_id { u8 id; } __attribute__((packed)); +#ifdef __KERNEL__ +#include + static inline void chp_id_init(struct chp_id *chpid) { memset(chpid, 0, sizeof(struct chp_id)); @@ -40,9 +43,6 @@ static inline void chp_id_next(struct chp_id *chpid) } } -#ifdef __KERNEL__ -#include - static inline int chp_id_is_valid(struct chp_id *chpid) { return (chpid->cssid <= __MAX_CSSID); diff --git a/include/asm-s390/schid.h b/include/asm-s390/schid.h index 5017ffa..7bdc0fe 100644 --- a/include/asm-s390/schid.h +++ b/include/asm-s390/schid.h @@ -10,6 +10,7 @@ struct subchannel_id { __u32 sch_no : 16; } __attribute__ ((packed, aligned(4))); +#ifdef __KERNEL__ /* Helper function for sane state of pre-allocated subchannel_id. */ static inline void @@ -25,4 +26,6 @@ schid_equal(struct subchannel_id *schid1, struct subchannel_id *schid2) return !memcmp(schid1, schid2, sizeof(struct subchannel_id)); } +#endif /* __KERNEL__ */ + #endif /* ASM_SCHID_H */ -- cgit v0.10.2 From 7337194f708bac977511c7890d7038ded187041a Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Thu, 17 Jul 2008 17:16:43 +0200 Subject: [S390] dasd: Fix cleanup in dasd_{fba,diag}_check_characteristics(). Signed-off-by: Cornelia Huck Signed-off-by: Heiko Carstens Cc: Martin Schwidefsky diff --git a/drivers/s390/block/dasd_diag.c b/drivers/s390/block/dasd_diag.c index d91df38..ab5c5b4 100644 --- a/drivers/s390/block/dasd_diag.c +++ b/drivers/s390/block/dasd_diag.c @@ -333,7 +333,8 @@ dasd_diag_check_device(struct dasd_device *device) if (IS_ERR(block)) { DEV_MESSAGE(KERN_WARNING, device, "%s", "could not allocate dasd block structure"); - kfree(device->private); + device->private = NULL; + kfree(private); return PTR_ERR(block); } device->block = block; @@ -348,7 +349,8 @@ dasd_diag_check_device(struct dasd_device *device) if (rc) { DEV_MESSAGE(KERN_WARNING, device, "failed to retrieve device " "information (rc=%d)", rc); - return -ENOTSUPP; + rc = -ENOTSUPP; + goto out; } /* Figure out position of label block */ @@ -362,7 +364,8 @@ dasd_diag_check_device(struct dasd_device *device) default: DEV_MESSAGE(KERN_WARNING, device, "unsupported device class " "(class=%d)", private->rdc_data.vdev_class); - return -ENOTSUPP; + rc = -ENOTSUPP; + goto out; } DBF_DEV_EVENT(DBF_INFO, device, @@ -379,7 +382,8 @@ dasd_diag_check_device(struct dasd_device *device) if (label == NULL) { DEV_MESSAGE(KERN_WARNING, device, "%s", "No memory to allocate initialization request"); - return -ENOMEM; + rc = -ENOMEM; + goto out; } rc = 0; end_block = 0; @@ -403,7 +407,7 @@ dasd_diag_check_device(struct dasd_device *device) DEV_MESSAGE(KERN_WARNING, device, "%s", "DIAG call failed"); rc = -EOPNOTSUPP; - goto out; + goto out_label; } mdsk_term_io(device); if (rc == 0) @@ -413,7 +417,7 @@ dasd_diag_check_device(struct dasd_device *device) DEV_MESSAGE(KERN_WARNING, device, "device access failed " "(rc=%d)", rc); rc = -EIO; - goto out; + goto out_label; } /* check for label block */ if (memcmp(label->label_id, DASD_DIAG_CMS1, @@ -439,8 +443,15 @@ dasd_diag_check_device(struct dasd_device *device) (unsigned long) (block->blocks << block->s2b_shift) >> 1); } -out: +out_label: free_page((long) label); +out: + if (rc) { + device->block = NULL; + dasd_free_block(block); + device->private = NULL; + kfree(private); + } return rc; } diff --git a/drivers/s390/block/dasd_fba.c b/drivers/s390/block/dasd_fba.c index aee4656..aa0c533 100644 --- a/drivers/s390/block/dasd_fba.c +++ b/drivers/s390/block/dasd_fba.c @@ -139,7 +139,8 @@ dasd_fba_check_characteristics(struct dasd_device *device) if (IS_ERR(block)) { DEV_MESSAGE(KERN_WARNING, device, "%s", "could not allocate dasd block structure"); - kfree(device->private); + device->private = NULL; + kfree(private); return PTR_ERR(block); } device->block = block; @@ -152,6 +153,10 @@ dasd_fba_check_characteristics(struct dasd_device *device) DEV_MESSAGE(KERN_WARNING, device, "Read device characteristics returned error %d", rc); + device->block = NULL; + dasd_free_block(block); + device->private = NULL; + kfree(private); return rc; } -- cgit v0.10.2 From c5a37255493a3a8bf527534c8700dd73bd591fc7 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 17 Jul 2008 17:16:44 +0200 Subject: [S390] Increase default warning stacksize. Compiling a kernel with allmodconfig or allyesconfig results in tons of gcc warnings, because the default maximum stacksize from which on gcc will emit a warning is just 256 bytes. Increase this to 2048, so these warnings don't distract from the real warnings that we need to watch at. Signed-off-by: Heiko Carstens Cc: Martin Schwidefsky diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 5dc8f80..eb530b4 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -289,7 +289,7 @@ config WARN_STACK_SIZE int "Maximum frame size considered safe (128-2048)" range 128 2048 depends on WARN_STACK - default "256" + default "2048" help This allows you to specify the maximum frame size a function may have without the compiler complaining about it. -- cgit v0.10.2 From 8de2ce86cdde64d00fc4a4034008b35d8fc0dc83 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 17 Jul 2008 17:16:45 +0200 Subject: [S390] Fix stacktrace compile bug. Add missing module.h include to fix this: CC arch/s390/kernel/stacktrace.o arch/s390/kernel/stacktrace.c:84: warning: data definition has no type or storage class arch/s390/kernel/stacktrace.c:84: warning: type defaults to 'int' in declaration of 'EXPORT_SYMBOL_GPL' arch/s390/kernel/stacktrace.c:84: warning: parameter names (without types) in function declaration arch/s390/kernel/stacktrace.c:97: warning: data definition has no type or storage class arch/s390/kernel/stacktrace.c:97: warning: type defaults to 'int' in declaration of 'EXPORT_SYMBOL_GPL' arch/s390/kernel/stacktrace.c:97: warning: parameter names (without types) in function declaration Signed-off-by: Heiko Carstens Cc: Martin Schwidefsky diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c index 57571f1..8841919 100644 --- a/arch/s390/kernel/stacktrace.c +++ b/arch/s390/kernel/stacktrace.c @@ -10,6 +10,7 @@ #include #include #include +#include static unsigned long save_context_stack(struct stack_trace *trace, unsigned long sp, -- cgit v0.10.2 From b9993a38a9b491a9df48a5bc82d2e03ab44e352a Mon Sep 17 00:00:00 2001 From: Frank Munzert Date: Thu, 17 Jul 2008 17:16:46 +0200 Subject: [S390] vmur: Fix return code handling. Use -EOPNOTSUPP instead of -ENOTSUPP. Signed-off-by: Frank Munzert Signed-off-by: Heiko Carstens Cc: Martin Schwidefsky diff --git a/drivers/s390/char/vmur.c b/drivers/s390/char/vmur.c index 0a9f1cc..b0ac44b 100644 --- a/drivers/s390/char/vmur.c +++ b/drivers/s390/char/vmur.c @@ -345,7 +345,7 @@ static int get_urd_class(struct urdev *urd) cc = diag210(&ur_diag210); switch (cc) { case 0: - return -ENOTSUPP; + return -EOPNOTSUPP; case 2: return ur_diag210.vrdcvcla; /* virtual device class */ case 3: @@ -621,7 +621,7 @@ static int verify_device(struct urdev *urd) case DEV_CLASS_UR_I: return verify_uri_device(urd); default: - return -ENOTSUPP; + return -EOPNOTSUPP; } } @@ -654,7 +654,7 @@ static int get_file_reclen(struct urdev *urd) case DEV_CLASS_UR_I: return get_uri_file_reclen(urd); default: - return -ENOTSUPP; + return -EOPNOTSUPP; } } @@ -827,7 +827,7 @@ static int ur_probe(struct ccw_device *cdev) goto fail_remove_attr; } if ((urd->class != DEV_CLASS_UR_I) && (urd->class != DEV_CLASS_UR_O)) { - rc = -ENOTSUPP; + rc = -EOPNOTSUPP; goto fail_remove_attr; } spin_lock_irq(get_ccwdev_lock(cdev)); @@ -892,7 +892,7 @@ static int ur_set_online(struct ccw_device *cdev) } else if (urd->cdev->id.cu_type == PRINTER_DEVTYPE) { sprintf(node_id, "vmprt-%s", cdev->dev.bus_id); } else { - rc = -ENOTSUPP; + rc = -EOPNOTSUPP; goto fail_free_cdev; } -- cgit v0.10.2 From dae39843f478d181da5b5e1c2c703dfcaaf838c1 Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Thu, 17 Jul 2008 17:16:47 +0200 Subject: [S390] cio: Export chsc_error_from_response(). Make chsc_error_from_response() available to chsc callers outside of chsc.c (namely qdio) to avoid duplicating error checking code. Signed-off-by: Cornelia Huck Signed-off-by: Martin Schwidefsky Signed-off-by: Heiko Carstens diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c index 65264a3..29826fd 100644 --- a/drivers/s390/cio/chsc.c +++ b/drivers/s390/cio/chsc.c @@ -27,7 +27,13 @@ static void *sei_page; -static int chsc_error_from_response(int response) +/** + * chsc_error_from_response() - convert a chsc response to an error + * @response: chsc response code + * + * Returns an appropriate Linux error code for @response. + */ +int chsc_error_from_response(int response) { switch (response) { case 0x0001: @@ -45,6 +51,7 @@ static int chsc_error_from_response(int response) return -EIO; } } +EXPORT_SYMBOL_GPL(chsc_error_from_response); struct chsc_ssd_area { struct chsc_header request; diff --git a/drivers/s390/cio/chsc.h b/drivers/s390/cio/chsc.h index fb6c4d6..ba59bce 100644 --- a/drivers/s390/cio/chsc.h +++ b/drivers/s390/cio/chsc.h @@ -101,4 +101,6 @@ void chsc_chp_online(struct chp_id chpid); void chsc_chp_offline(struct chp_id chpid); int chsc_get_channel_measurement_chars(struct channel_path *chp); +int chsc_error_from_response(int response); + #endif -- cgit v0.10.2 From 779e6e1c724d30e0fd1baca78b852e41e3a23c1d Mon Sep 17 00:00:00 2001 From: Jan Glauber Date: Thu, 17 Jul 2008 17:16:48 +0200 Subject: [S390] qdio: new qdio driver. List of major changes: - split qdio driver into several files - seperation of thin interrupt code - improved handling for multiple thin interrupt devices - inbound and outbound processing now always runs in tasklet context - significant less tasklet schedules per interrupt needed - merged qebsm with non-qebsm handling - cleanup qdio interface and added kerneldoc - coding style Reviewed-by: Cornelia Huck Reviewed-by: Utz Bacher Reviewed-by: Ursula Braun Signed-off-by: Jan Glauber Signed-off-by: Martin Schwidefsky Signed-off-by: Heiko Carstens diff --git a/drivers/s390/cio/Makefile b/drivers/s390/cio/Makefile index 91e9e3f..bd79bd1 100644 --- a/drivers/s390/cio/Makefile +++ b/drivers/s390/cio/Makefile @@ -9,4 +9,6 @@ ccw_device-objs += device_id.o device_pgid.o device_status.o obj-y += ccw_device.o cmf.o obj-$(CONFIG_CHSC_SCH) += chsc_sch.o obj-$(CONFIG_CCWGROUP) += ccwgroup.o + +qdio-objs := qdio_main.o qdio_thinint.o qdio_debug.o qdio_perf.o qdio_setup.o obj-$(CONFIG_QDIO) += qdio.o diff --git a/drivers/s390/cio/qdio.c b/drivers/s390/cio/qdio.c deleted file mode 100644 index 2bf36e1..0000000 --- a/drivers/s390/cio/qdio.c +++ /dev/null @@ -1,3929 +0,0 @@ -/* - * - * linux/drivers/s390/cio/qdio.c - * - * Linux for S/390 QDIO base support, Hipersocket base support - * version 2 - * - * Copyright 2000,2002 IBM Corporation - * Author(s): Utz Bacher - * 2.6 cio integration by Cornelia Huck - * - * Restriction: only 63 iqdio subchannels would have its own indicator, - * after that, subsequent subchannels share one indicator - * - * - * - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include -#include -#include -#include - -#include "cio.h" -#include "css.h" -#include "device.h" -#include "qdio.h" -#include "ioasm.h" -#include "chsc.h" - -/****************** MODULE PARAMETER VARIABLES ********************/ -MODULE_AUTHOR("Utz Bacher "); -MODULE_DESCRIPTION("QDIO base support version 2, " \ - "Copyright 2000 IBM Corporation"); -MODULE_LICENSE("GPL"); - -/******************** HERE WE GO ***********************************/ - -static const char version[] = "QDIO base support version 2"; - -static int qdio_performance_stats = 0; -static int proc_perf_file_registration; -static struct qdio_perf_stats perf_stats; - -static int hydra_thinints; -static int is_passthrough = 0; -static int omit_svs; - -static int indicator_used[INDICATORS_PER_CACHELINE]; -static __u32 * volatile indicators; -static __u32 volatile spare_indicator; -static atomic_t spare_indicator_usecount; -#define QDIO_MEMPOOL_SCSSC_ELEMENTS 2 -static mempool_t *qdio_mempool_scssc; -static struct kmem_cache *qdio_q_cache; - -static debug_info_t *qdio_dbf_setup; -static debug_info_t *qdio_dbf_sbal; -static debug_info_t *qdio_dbf_trace; -static debug_info_t *qdio_dbf_sense; -#ifdef CONFIG_QDIO_DEBUG -static debug_info_t *qdio_dbf_slsb_out; -static debug_info_t *qdio_dbf_slsb_in; -#endif /* CONFIG_QDIO_DEBUG */ - -/* iQDIO stuff: */ -static volatile struct qdio_q *tiq_list=NULL; /* volatile as it could change - during a while loop */ -static DEFINE_SPINLOCK(ttiq_list_lock); -static void *tiqdio_ind; -static void tiqdio_tl(unsigned long); -static DECLARE_TASKLET(tiqdio_tasklet,tiqdio_tl,0); - -/* not a macro, as one of the arguments is atomic_read */ -static inline int -qdio_min(int a,int b) -{ - if (a> 12); /* time>>12 is microseconds */ -} - -/* - * unfortunately, we can't just xchg the values; in do_QDIO we want to reserve - * the q in any case, so that we'll not be interrupted when we are in - * qdio_mark_tiq... shouldn't have a really bad impact, as reserving almost - * ever works (last famous words) - */ -static inline int -qdio_reserve_q(struct qdio_q *q) -{ - return atomic_add_return(1,&q->use_count) - 1; -} - -static inline void -qdio_release_q(struct qdio_q *q) -{ - atomic_dec(&q->use_count); -} - -/*check ccq */ -static int -qdio_check_ccq(struct qdio_q *q, unsigned int ccq) -{ - char dbf_text[15]; - - if (ccq == 0 || ccq == 32) - return 0; - if (ccq == 96 || ccq == 97) - return 1; - /*notify devices immediately*/ - sprintf(dbf_text,"%d", ccq); - QDIO_DBF_TEXT2(1,trace,dbf_text); - return -EIO; -} -/* EQBS: extract buffer states */ -static int -qdio_do_eqbs(struct qdio_q *q, unsigned char *state, - unsigned int *start, unsigned int *cnt) -{ - struct qdio_irq *irq; - unsigned int tmp_cnt, q_no, ccq; - int rc ; - char dbf_text[15]; - - ccq = 0; - tmp_cnt = *cnt; - irq = (struct qdio_irq*)q->irq_ptr; - q_no = q->q_no; - if(!q->is_input_q) - q_no += irq->no_input_qs; -again: - ccq = do_eqbs(irq->sch_token, state, q_no, start, cnt); - rc = qdio_check_ccq(q, ccq); - if ((ccq == 96) && (tmp_cnt != *cnt)) - rc = 0; - if (rc == 1) { - QDIO_DBF_TEXT5(1,trace,"eqAGAIN"); - goto again; - } - if (rc < 0) { - QDIO_DBF_TEXT2(1,trace,"eqberr"); - sprintf(dbf_text,"%2x,%2x,%d,%d",tmp_cnt, *cnt, ccq, q_no); - QDIO_DBF_TEXT2(1,trace,dbf_text); - q->handler(q->cdev,QDIO_STATUS_ACTIVATE_CHECK_CONDITION| - QDIO_STATUS_LOOK_FOR_ERROR, - 0, 0, 0, -1, -1, q->int_parm); - return 0; - } - return (tmp_cnt - *cnt); -} - -/* SQBS: set buffer states */ -static int -qdio_do_sqbs(struct qdio_q *q, unsigned char state, - unsigned int *start, unsigned int *cnt) -{ - struct qdio_irq *irq; - unsigned int tmp_cnt, q_no, ccq; - int rc; - char dbf_text[15]; - - ccq = 0; - tmp_cnt = *cnt; - irq = (struct qdio_irq*)q->irq_ptr; - q_no = q->q_no; - if(!q->is_input_q) - q_no += irq->no_input_qs; -again: - ccq = do_sqbs(irq->sch_token, state, q_no, start, cnt); - rc = qdio_check_ccq(q, ccq); - if (rc == 1) { - QDIO_DBF_TEXT5(1,trace,"sqAGAIN"); - goto again; - } - if (rc < 0) { - QDIO_DBF_TEXT3(1,trace,"sqberr"); - sprintf(dbf_text,"%2x,%2x",tmp_cnt,*cnt); - QDIO_DBF_TEXT3(1,trace,dbf_text); - sprintf(dbf_text,"%d,%d",ccq,q_no); - QDIO_DBF_TEXT3(1,trace,dbf_text); - q->handler(q->cdev,QDIO_STATUS_ACTIVATE_CHECK_CONDITION| - QDIO_STATUS_LOOK_FOR_ERROR, - 0, 0, 0, -1, -1, q->int_parm); - return 0; - } - return (tmp_cnt - *cnt); -} - -static inline int -qdio_set_slsb(struct qdio_q *q, unsigned int *bufno, - unsigned char state, unsigned int *count) -{ - volatile char *slsb; - struct qdio_irq *irq; - - irq = (struct qdio_irq*)q->irq_ptr; - if (!irq->is_qebsm) { - slsb = (char *)&q->slsb.acc.val[(*bufno)]; - xchg(slsb, state); - return 1; - } - return qdio_do_sqbs(q, state, bufno, count); -} - -#ifdef CONFIG_QDIO_DEBUG -static inline void -qdio_trace_slsb(struct qdio_q *q) -{ - if (q->queue_type==QDIO_TRACE_QTYPE) { - if (q->is_input_q) - QDIO_DBF_HEX2(0,slsb_in,&q->slsb, - QDIO_MAX_BUFFERS_PER_Q); - else - QDIO_DBF_HEX2(0,slsb_out,&q->slsb, - QDIO_MAX_BUFFERS_PER_Q); - } -} -#endif - -static inline int -set_slsb(struct qdio_q *q, unsigned int *bufno, - unsigned char state, unsigned int *count) -{ - int rc; -#ifdef CONFIG_QDIO_DEBUG - qdio_trace_slsb(q); -#endif - rc = qdio_set_slsb(q, bufno, state, count); -#ifdef CONFIG_QDIO_DEBUG - qdio_trace_slsb(q); -#endif - return rc; -} -static inline int -qdio_siga_sync(struct qdio_q *q, unsigned int gpr2, - unsigned int gpr3) -{ - int cc; - - QDIO_DBF_TEXT4(0,trace,"sigasync"); - QDIO_DBF_HEX4(0,trace,&q,sizeof(void*)); - - qdio_perf_stat_inc(&perf_stats.siga_syncs); - - cc = do_siga_sync(q->schid, gpr2, gpr3); - if (cc) - QDIO_DBF_HEX3(0,trace,&cc,sizeof(int*)); - - return cc; -} - -static inline int -qdio_siga_sync_q(struct qdio_q *q) -{ - if (q->is_input_q) - return qdio_siga_sync(q, 0, q->mask); - return qdio_siga_sync(q, q->mask, 0); -} - -static int -__do_siga_output(struct qdio_q *q, unsigned int *busy_bit) -{ - struct qdio_irq *irq; - unsigned int fc = 0; - unsigned long schid; - - irq = (struct qdio_irq *) q->irq_ptr; - if (!irq->is_qebsm) - schid = *((u32 *)&q->schid); - else { - schid = irq->sch_token; - fc |= 0x80; - } - return do_siga_output(schid, q->mask, busy_bit, fc); -} - -/* - * returns QDIO_SIGA_ERROR_ACCESS_EXCEPTION as cc, when SIGA returns - * an access exception - */ -static int -qdio_siga_output(struct qdio_q *q) -{ - int cc; - __u32 busy_bit; - __u64 start_time=0; - - qdio_perf_stat_inc(&perf_stats.siga_outs); - - QDIO_DBF_TEXT4(0,trace,"sigaout"); - QDIO_DBF_HEX4(0,trace,&q,sizeof(void*)); - - for (;;) { - cc = __do_siga_output(q, &busy_bit); -//QDIO_PRINT_ERR("cc=%x, busy=%x\n",cc,busy_bit); - if ((cc==2) && (busy_bit) && (q->is_iqdio_q)) { - if (!start_time) - start_time=NOW; - if ((NOW-start_time)>QDIO_BUSY_BIT_PATIENCE) - break; - } else - break; - } - - if ((cc==2) && (busy_bit)) - cc |= QDIO_SIGA_ERROR_B_BIT_SET; - - if (cc) - QDIO_DBF_HEX3(0,trace,&cc,sizeof(int*)); - - return cc; -} - -static int -qdio_siga_input(struct qdio_q *q) -{ - int cc; - - QDIO_DBF_TEXT4(0,trace,"sigain"); - QDIO_DBF_HEX4(0,trace,&q,sizeof(void*)); - - qdio_perf_stat_inc(&perf_stats.siga_ins); - - cc = do_siga_input(q->schid, q->mask); - - if (cc) - QDIO_DBF_HEX3(0,trace,&cc,sizeof(int*)); - - return cc; -} - -/* locked by the locks in qdio_activate and qdio_cleanup */ -static __u32 * -qdio_get_indicator(void) -{ - int i; - - for (i = 0; i < INDICATORS_PER_CACHELINE; i++) - if (!indicator_used[i]) { - indicator_used[i]=1; - return indicators+i; - } - atomic_inc(&spare_indicator_usecount); - return (__u32 * volatile) &spare_indicator; -} - -/* locked by the locks in qdio_activate and qdio_cleanup */ -static void -qdio_put_indicator(__u32 *addr) -{ - int i; - - if ( (addr) && (addr!=&spare_indicator) ) { - i=addr-indicators; - indicator_used[i]=0; - } - if (addr == &spare_indicator) - atomic_dec(&spare_indicator_usecount); -} - -static inline void -tiqdio_clear_summary_bit(__u32 *location) -{ - QDIO_DBF_TEXT5(0,trace,"clrsummb"); - QDIO_DBF_HEX5(0,trace,&location,sizeof(void*)); - - xchg(location,0); -} - -static inline void -tiqdio_set_summary_bit(__u32 *location) -{ - QDIO_DBF_TEXT5(0,trace,"setsummb"); - QDIO_DBF_HEX5(0,trace,&location,sizeof(void*)); - - xchg(location,-1); -} - -static inline void -tiqdio_sched_tl(void) -{ - tasklet_hi_schedule(&tiqdio_tasklet); -} - -static void -qdio_mark_tiq(struct qdio_q *q) -{ - unsigned long flags; - - QDIO_DBF_TEXT4(0,trace,"mark iq"); - QDIO_DBF_HEX4(0,trace,&q,sizeof(void*)); - - spin_lock_irqsave(&ttiq_list_lock,flags); - if (unlikely(atomic_read(&q->is_in_shutdown))) - goto out_unlock; - - if (!q->is_input_q) - goto out_unlock; - - if ((q->list_prev) || (q->list_next)) - goto out_unlock; - - if (!tiq_list) { - tiq_list=q; - q->list_prev=q; - q->list_next=q; - } else { - q->list_next=tiq_list; - q->list_prev=tiq_list->list_prev; - tiq_list->list_prev->list_next=q; - tiq_list->list_prev=q; - } - spin_unlock_irqrestore(&ttiq_list_lock,flags); - - tiqdio_set_summary_bit((__u32*)q->dev_st_chg_ind); - tiqdio_sched_tl(); - return; -out_unlock: - spin_unlock_irqrestore(&ttiq_list_lock,flags); - return; -} - -static inline void -qdio_mark_q(struct qdio_q *q) -{ - QDIO_DBF_TEXT4(0,trace,"mark q"); - QDIO_DBF_HEX4(0,trace,&q,sizeof(void*)); - - if (unlikely(atomic_read(&q->is_in_shutdown))) - return; - - tasklet_schedule(&q->tasklet); -} - -static int -qdio_stop_polling(struct qdio_q *q) -{ -#ifdef QDIO_USE_PROCESSING_STATE - unsigned int tmp, gsf, count = 1; - unsigned char state = 0; - struct qdio_irq *irq = (struct qdio_irq *) q->irq_ptr; - - if (!atomic_xchg(&q->polling,0)) - return 1; - - QDIO_DBF_TEXT4(0,trace,"stoppoll"); - QDIO_DBF_HEX4(0,trace,&q,sizeof(void*)); - - /* show the card that we are not polling anymore */ - if (!q->is_input_q) - return 1; - - tmp = gsf = GET_SAVED_FRONTIER(q); - tmp = ((tmp + QDIO_MAX_BUFFERS_PER_Q-1) & (QDIO_MAX_BUFFERS_PER_Q-1) ); - set_slsb(q, &tmp, SLSB_P_INPUT_NOT_INIT, &count); - - /* - * we don't issue this SYNC_MEMORY, as we trust Rick T and - * moreover will not use the PROCESSING state under VM, so - * q->polling was 0 anyway - */ - /*SYNC_MEMORY;*/ - if (irq->is_qebsm) { - count = 1; - qdio_do_eqbs(q, &state, &gsf, &count); - } else - state = q->slsb.acc.val[gsf]; - if (state != SLSB_P_INPUT_PRIMED) - return 1; - /* - * set our summary bit again, as otherwise there is a - * small window we can miss between resetting it and - * checking for PRIMED state - */ - if (q->is_thinint_q) - tiqdio_set_summary_bit((__u32*)q->dev_st_chg_ind); - return 0; - -#else /* QDIO_USE_PROCESSING_STATE */ - return 1; -#endif /* QDIO_USE_PROCESSING_STATE */ -} - -/* - * see the comment in do_QDIO and before qdio_reserve_q about the - * sophisticated locking outside of unmark_q, so that we don't need to - * disable the interrupts :-) -*/ -static void -qdio_unmark_q(struct qdio_q *q) -{ - unsigned long flags; - - QDIO_DBF_TEXT4(0,trace,"unmark q"); - QDIO_DBF_HEX4(0,trace,&q,sizeof(void*)); - - if ((!q->list_prev)||(!q->list_next)) - return; - - if ((q->is_thinint_q)&&(q->is_input_q)) { - /* iQDIO */ - spin_lock_irqsave(&ttiq_list_lock,flags); - /* in case cleanup has done this already and simultanously - * qdio_unmark_q is called from the interrupt handler, we've - * got to check this in this specific case again */ - if ((!q->list_prev)||(!q->list_next)) - goto out; - if (q->list_next==q) { - /* q was the only interesting q */ - tiq_list=NULL; - q->list_next=NULL; - q->list_prev=NULL; - } else { - q->list_next->list_prev=q->list_prev; - q->list_prev->list_next=q->list_next; - tiq_list=q->list_next; - q->list_next=NULL; - q->list_prev=NULL; - } -out: - spin_unlock_irqrestore(&ttiq_list_lock,flags); - } -} - -static inline unsigned long -tiqdio_clear_global_summary(void) -{ - unsigned long time; - - QDIO_DBF_TEXT5(0,trace,"clrglobl"); - - time = do_clear_global_summary(); - - QDIO_DBF_HEX5(0,trace,&time,sizeof(unsigned long)); - - return time; -} - - -/************************* OUTBOUND ROUTINES *******************************/ -static int -qdio_qebsm_get_outbound_buffer_frontier(struct qdio_q *q) -{ - struct qdio_irq *irq; - unsigned char state; - unsigned int cnt, count, ftc; - - irq = (struct qdio_irq *) q->irq_ptr; - if ((!q->is_iqdio_q) && (!q->hydra_gives_outbound_pcis)) - SYNC_MEMORY; - - ftc = q->first_to_check; - count = qdio_min(atomic_read(&q->number_of_buffers_used), - (QDIO_MAX_BUFFERS_PER_Q-1)); - if (count == 0) - return q->first_to_check; - cnt = qdio_do_eqbs(q, &state, &ftc, &count); - if (cnt == 0) - return q->first_to_check; - switch (state) { - case SLSB_P_OUTPUT_ERROR: - QDIO_DBF_TEXT3(0,trace,"outperr"); - atomic_sub(cnt , &q->number_of_buffers_used); - if (q->qdio_error) - q->error_status_flags |= - QDIO_STATUS_MORE_THAN_ONE_QDIO_ERROR; - q->qdio_error = SLSB_P_OUTPUT_ERROR; - q->error_status_flags |= QDIO_STATUS_LOOK_FOR_ERROR; - q->first_to_check = ftc; - break; - case SLSB_P_OUTPUT_EMPTY: - QDIO_DBF_TEXT5(0,trace,"outpempt"); - atomic_sub(cnt, &q->number_of_buffers_used); - q->first_to_check = ftc; - break; - case SLSB_CU_OUTPUT_PRIMED: - /* all buffers primed */ - QDIO_DBF_TEXT5(0,trace,"outpprim"); - break; - default: - break; - } - QDIO_DBF_HEX4(0,trace,&q->first_to_check,sizeof(int)); - return q->first_to_check; -} - -static int -qdio_qebsm_get_inbound_buffer_frontier(struct qdio_q *q) -{ - struct qdio_irq *irq; - unsigned char state; - int tmp, ftc, count, cnt; - char dbf_text[15]; - - - irq = (struct qdio_irq *) q->irq_ptr; - ftc = q->first_to_check; - count = qdio_min(atomic_read(&q->number_of_buffers_used), - (QDIO_MAX_BUFFERS_PER_Q-1)); - if (count == 0) - return q->first_to_check; - cnt = qdio_do_eqbs(q, &state, &ftc, &count); - if (cnt == 0) - return q->first_to_check; - switch (state) { - case SLSB_P_INPUT_ERROR : -#ifdef CONFIG_QDIO_DEBUG - QDIO_DBF_TEXT3(1,trace,"inperr"); - sprintf(dbf_text,"%2x,%2x",ftc,count); - QDIO_DBF_TEXT3(1,trace,dbf_text); -#endif /* CONFIG_QDIO_DEBUG */ - if (q->qdio_error) - q->error_status_flags |= - QDIO_STATUS_MORE_THAN_ONE_QDIO_ERROR; - q->qdio_error = SLSB_P_INPUT_ERROR; - q->error_status_flags |= QDIO_STATUS_LOOK_FOR_ERROR; - atomic_sub(cnt, &q->number_of_buffers_used); - q->first_to_check = ftc; - break; - case SLSB_P_INPUT_PRIMED : - QDIO_DBF_TEXT3(0,trace,"inptprim"); - sprintf(dbf_text,"%2x,%2x",ftc,count); - QDIO_DBF_TEXT3(1,trace,dbf_text); - tmp = 0; - ftc = q->first_to_check; -#ifdef QDIO_USE_PROCESSING_STATE - if (cnt > 1) { - cnt -= 1; - tmp = set_slsb(q, &ftc, SLSB_P_INPUT_NOT_INIT, &cnt); - if (!tmp) - break; - } - cnt = 1; - tmp += set_slsb(q, &ftc, - SLSB_P_INPUT_PROCESSING, &cnt); - atomic_set(&q->polling, 1); -#else - tmp = set_slsb(q, &ftc, SLSB_P_INPUT_NOT_INIT, &cnt); -#endif - atomic_sub(tmp, &q->number_of_buffers_used); - q->first_to_check = ftc; - break; - case SLSB_CU_INPUT_EMPTY: - case SLSB_P_INPUT_NOT_INIT: - case SLSB_P_INPUT_PROCESSING: - QDIO_DBF_TEXT5(0,trace,"inpnipro"); - break; - default: - break; - } - QDIO_DBF_HEX4(0,trace,&q->first_to_check,sizeof(int)); - return q->first_to_check; -} - -static int -qdio_get_outbound_buffer_frontier(struct qdio_q *q) -{ - struct qdio_irq *irq; - volatile char *slsb; - unsigned int count = 1; - int first_not_to_check, f, f_mod_no; - char dbf_text[15]; - - QDIO_DBF_TEXT4(0,trace,"getobfro"); - QDIO_DBF_HEX4(0,trace,&q,sizeof(void*)); - - irq = (struct qdio_irq *) q->irq_ptr; - if (irq->is_qebsm) - return qdio_qebsm_get_outbound_buffer_frontier(q); - - slsb=&q->slsb.acc.val[0]; - f_mod_no=f=q->first_to_check; - /* - * f points to already processed elements, so f+no_used is correct... - * ... but: we don't check 128 buffers, as otherwise - * qdio_has_outbound_q_moved would return 0 - */ - first_not_to_check=f+qdio_min(atomic_read(&q->number_of_buffers_used), - (QDIO_MAX_BUFFERS_PER_Q-1)); - - if (((!q->is_iqdio_q) && (!q->hydra_gives_outbound_pcis)) || - (q->queue_type == QDIO_IQDIO_QFMT_ASYNCH)) - SYNC_MEMORY; - -check_next: - if (f==first_not_to_check) - goto out; - - switch(slsb[f_mod_no]) { - - /* the adapter has not fetched the output yet */ - case SLSB_CU_OUTPUT_PRIMED: - QDIO_DBF_TEXT5(0,trace,"outpprim"); - break; - - /* the adapter got it */ - case SLSB_P_OUTPUT_EMPTY: - atomic_dec(&q->number_of_buffers_used); - f++; - f_mod_no=f&(QDIO_MAX_BUFFERS_PER_Q-1); - QDIO_DBF_TEXT5(0,trace,"outpempt"); - goto check_next; - - case SLSB_P_OUTPUT_ERROR: - QDIO_DBF_TEXT3(0,trace,"outperr"); - sprintf(dbf_text,"%x-%x-%x",f_mod_no, - q->sbal[f_mod_no]->element[14].sbalf.value, - q->sbal[f_mod_no]->element[15].sbalf.value); - QDIO_DBF_TEXT3(1,trace,dbf_text); - QDIO_DBF_HEX2(1,sbal,q->sbal[f_mod_no],256); - - /* kind of process the buffer */ - set_slsb(q, &f_mod_no, SLSB_P_OUTPUT_NOT_INIT, &count); - - /* - * we increment the frontier, as this buffer - * was processed obviously - */ - atomic_dec(&q->number_of_buffers_used); - f_mod_no=(f_mod_no+1)&(QDIO_MAX_BUFFERS_PER_Q-1); - - if (q->qdio_error) - q->error_status_flags|= - QDIO_STATUS_MORE_THAN_ONE_QDIO_ERROR; - q->qdio_error=SLSB_P_OUTPUT_ERROR; - q->error_status_flags|=QDIO_STATUS_LOOK_FOR_ERROR; - - break; - - /* no new buffers */ - default: - QDIO_DBF_TEXT5(0,trace,"outpni"); - } -out: - return (q->first_to_check=f_mod_no); -} - -/* all buffers are processed */ -static int -qdio_is_outbound_q_done(struct qdio_q *q) -{ - int no_used; -#ifdef CONFIG_QDIO_DEBUG - char dbf_text[15]; -#endif - - no_used=atomic_read(&q->number_of_buffers_used); - -#ifdef CONFIG_QDIO_DEBUG - if (no_used) { - sprintf(dbf_text,"oqisnt%02x",no_used); - QDIO_DBF_TEXT4(0,trace,dbf_text); - } else { - QDIO_DBF_TEXT4(0,trace,"oqisdone"); - } - QDIO_DBF_HEX4(0,trace,&q,sizeof(void*)); -#endif /* CONFIG_QDIO_DEBUG */ - return (no_used==0); -} - -static int -qdio_has_outbound_q_moved(struct qdio_q *q) -{ - int i; - - i=qdio_get_outbound_buffer_frontier(q); - - if ( (i!=GET_SAVED_FRONTIER(q)) || - (q->error_status_flags&QDIO_STATUS_LOOK_FOR_ERROR) ) { - SAVE_FRONTIER(q,i); - QDIO_DBF_TEXT4(0,trace,"oqhasmvd"); - QDIO_DBF_HEX4(0,trace,&q,sizeof(void*)); - return 1; - } else { - QDIO_DBF_TEXT4(0,trace,"oqhsntmv"); - QDIO_DBF_HEX4(0,trace,&q,sizeof(void*)); - return 0; - } -} - -static void -qdio_kick_outbound_q(struct qdio_q *q) -{ - int result; -#ifdef CONFIG_QDIO_DEBUG - char dbf_text[15]; - - QDIO_DBF_TEXT4(0,trace,"kickoutq"); - QDIO_DBF_HEX4(0,trace,&q,sizeof(void*)); -#endif /* CONFIG_QDIO_DEBUG */ - - if (!q->siga_out) - return; - - /* here's the story with cc=2 and busy bit set (thanks, Rick): - * VM's CP could present us cc=2 and busy bit set on SIGA-write - * during reconfiguration of their Guest LAN (only in HIPERS mode, - * QDIO mode is asynchronous -- cc=2 and busy bit there will take - * the queues down immediately; and not being under VM we have a - * problem on cc=2 and busy bit set right away). - * - * Therefore qdio_siga_output will try for a short time constantly, - * if such a condition occurs. If it doesn't change, it will - * increase the busy_siga_counter and save the timestamp, and - * schedule the queue for later processing (via mark_q, using the - * queue tasklet). __qdio_outbound_processing will check out the - * counter. If non-zero, it will call qdio_kick_outbound_q as often - * as the value of the counter. This will attempt further SIGA - * instructions. For each successful SIGA, the counter is - * decreased, for failing SIGAs the counter remains the same, after - * all. - * After some time of no movement, qdio_kick_outbound_q will - * finally fail and reflect corresponding error codes to call - * the upper layer module and have it take the queues down. - * - * Note that this is a change from the original HiperSockets design - * (saying cc=2 and busy bit means take the queues down), but in - * these days Guest LAN didn't exist... excessive cc=2 with busy bit - * conditions will still take the queues down, but the threshold is - * higher due to the Guest LAN environment. - */ - - - result=qdio_siga_output(q); - - switch (result) { - case 0: - /* went smooth this time, reset timestamp */ -#ifdef CONFIG_QDIO_DEBUG - QDIO_DBF_TEXT3(0,trace,"cc2reslv"); - sprintf(dbf_text,"%4x%2x%2x",q->schid.sch_no,q->q_no, - atomic_read(&q->busy_siga_counter)); - QDIO_DBF_TEXT3(0,trace,dbf_text); -#endif /* CONFIG_QDIO_DEBUG */ - q->timing.busy_start=0; - break; - case (2|QDIO_SIGA_ERROR_B_BIT_SET): - /* cc=2 and busy bit: */ - atomic_inc(&q->busy_siga_counter); - - /* if the last siga was successful, save - * timestamp here */ - if (!q->timing.busy_start) - q->timing.busy_start=NOW; - - /* if we're in time, don't touch error_status_flags - * and siga_error */ - if (NOW-q->timing.busy_startschid.sch_no,q->q_no, - atomic_read(&q->busy_siga_counter)); - QDIO_DBF_TEXT3(0,trace,dbf_text); -#endif /* CONFIG_QDIO_DEBUG */ - /* else fallthrough and report error */ - default: - /* for plain cc=1, 2 or 3: */ - if (q->siga_error) - q->error_status_flags|= - QDIO_STATUS_MORE_THAN_ONE_SIGA_ERROR; - q->error_status_flags|= - QDIO_STATUS_LOOK_FOR_ERROR; - q->siga_error=result; - } -} - -static void -qdio_kick_outbound_handler(struct qdio_q *q) -{ - int start, end, real_end, count; -#ifdef CONFIG_QDIO_DEBUG - char dbf_text[15]; -#endif - - start = q->first_element_to_kick; - /* last_move_ftc was just updated */ - real_end = GET_SAVED_FRONTIER(q); - end = (real_end+QDIO_MAX_BUFFERS_PER_Q-1)& - (QDIO_MAX_BUFFERS_PER_Q-1); - count = (end+QDIO_MAX_BUFFERS_PER_Q+1-start)& - (QDIO_MAX_BUFFERS_PER_Q-1); - -#ifdef CONFIG_QDIO_DEBUG - QDIO_DBF_TEXT4(0,trace,"kickouth"); - QDIO_DBF_HEX4(0,trace,&q,sizeof(void*)); - - sprintf(dbf_text,"s=%2xc=%2x",start,count); - QDIO_DBF_TEXT4(0,trace,dbf_text); -#endif /* CONFIG_QDIO_DEBUG */ - - if (q->state==QDIO_IRQ_STATE_ACTIVE) - q->handler(q->cdev,QDIO_STATUS_OUTBOUND_INT| - q->error_status_flags, - q->qdio_error,q->siga_error,q->q_no,start,count, - q->int_parm); - - /* for the next time: */ - q->first_element_to_kick=real_end; - q->qdio_error=0; - q->siga_error=0; - q->error_status_flags=0; -} - -static void -__qdio_outbound_processing(struct qdio_q *q) -{ - int siga_attempts; - - QDIO_DBF_TEXT4(0,trace,"qoutproc"); - QDIO_DBF_HEX4(0,trace,&q,sizeof(void*)); - - if (unlikely(qdio_reserve_q(q))) { - qdio_release_q(q); - qdio_perf_stat_inc(&perf_stats.outbound_tl_runs_resched); - /* as we're sissies, we'll check next time */ - if (likely(!atomic_read(&q->is_in_shutdown))) { - qdio_mark_q(q); - QDIO_DBF_TEXT4(0,trace,"busy,agn"); - } - return; - } - qdio_perf_stat_inc(&perf_stats.outbound_tl_runs); - qdio_perf_stat_inc(&perf_stats.tl_runs); - - /* see comment in qdio_kick_outbound_q */ - siga_attempts=atomic_read(&q->busy_siga_counter); - while (siga_attempts) { - atomic_dec(&q->busy_siga_counter); - qdio_kick_outbound_q(q); - siga_attempts--; - } - - if (qdio_has_outbound_q_moved(q)) - qdio_kick_outbound_handler(q); - - if (q->queue_type == QDIO_ZFCP_QFMT) { - if ((!q->hydra_gives_outbound_pcis) && - (!qdio_is_outbound_q_done(q))) - qdio_mark_q(q); - } - else if (((!q->is_iqdio_q) && (!q->is_pci_out)) || - (q->queue_type == QDIO_IQDIO_QFMT_ASYNCH)) { - /* - * make sure buffer switch from PRIMED to EMPTY is noticed - * and outbound_handler is called - */ - if (qdio_is_outbound_q_done(q)) { - del_timer(&q->timer); - } else { - if (!timer_pending(&q->timer)) - mod_timer(&q->timer, jiffies + - QDIO_FORCE_CHECK_TIMEOUT); - } - } - - qdio_release_q(q); -} - -static void -qdio_outbound_processing(unsigned long q) -{ - __qdio_outbound_processing((struct qdio_q *) q); -} - -/************************* INBOUND ROUTINES *******************************/ - - -static int -qdio_get_inbound_buffer_frontier(struct qdio_q *q) -{ - struct qdio_irq *irq; - int f,f_mod_no; - volatile char *slsb; - unsigned int count = 1; - int first_not_to_check; -#ifdef CONFIG_QDIO_DEBUG - char dbf_text[15]; -#endif /* CONFIG_QDIO_DEBUG */ -#ifdef QDIO_USE_PROCESSING_STATE - int last_position=-1; -#endif /* QDIO_USE_PROCESSING_STATE */ - - QDIO_DBF_TEXT4(0,trace,"getibfro"); - QDIO_DBF_HEX4(0,trace,&q,sizeof(void*)); - - irq = (struct qdio_irq *) q->irq_ptr; - if (irq->is_qebsm) - return qdio_qebsm_get_inbound_buffer_frontier(q); - - slsb=&q->slsb.acc.val[0]; - f_mod_no=f=q->first_to_check; - /* - * we don't check 128 buffers, as otherwise qdio_has_inbound_q_moved - * would return 0 - */ - first_not_to_check=f+qdio_min(atomic_read(&q->number_of_buffers_used), - (QDIO_MAX_BUFFERS_PER_Q-1)); - - /* - * we don't use this one, as a PCI or we after a thin interrupt - * will sync the queues - */ - /* SYNC_MEMORY;*/ - -check_next: - f_mod_no=f&(QDIO_MAX_BUFFERS_PER_Q-1); - if (f==first_not_to_check) - goto out; - switch (slsb[f_mod_no]) { - - /* CU_EMPTY means frontier is reached */ - case SLSB_CU_INPUT_EMPTY: - QDIO_DBF_TEXT5(0,trace,"inptempt"); - break; - - /* P_PRIMED means set slsb to P_PROCESSING and move on */ - case SLSB_P_INPUT_PRIMED: - QDIO_DBF_TEXT5(0,trace,"inptprim"); - -#ifdef QDIO_USE_PROCESSING_STATE - /* - * as soon as running under VM, polling the input queues will - * kill VM in terms of CP overhead - */ - if (q->siga_sync) { - set_slsb(q, &f_mod_no, SLSB_P_INPUT_NOT_INIT, &count); - } else { - /* set the previous buffer to NOT_INIT. The current - * buffer will be set to PROCESSING at the end of - * this function to avoid further interrupts. */ - if (last_position>=0) - set_slsb(q, &last_position, - SLSB_P_INPUT_NOT_INIT, &count); - atomic_set(&q->polling,1); - last_position=f_mod_no; - } -#else /* QDIO_USE_PROCESSING_STATE */ - set_slsb(q, &f_mod_no, SLSB_P_INPUT_NOT_INIT, &count); -#endif /* QDIO_USE_PROCESSING_STATE */ - /* - * not needed, as the inbound queue will be synced on the next - * siga-r, resp. tiqdio_is_inbound_q_done will do the siga-s - */ - /*SYNC_MEMORY;*/ - f++; - atomic_dec(&q->number_of_buffers_used); - goto check_next; - - case SLSB_P_INPUT_NOT_INIT: - case SLSB_P_INPUT_PROCESSING: - QDIO_DBF_TEXT5(0,trace,"inpnipro"); - break; - - /* P_ERROR means frontier is reached, break and report error */ - case SLSB_P_INPUT_ERROR: -#ifdef CONFIG_QDIO_DEBUG - sprintf(dbf_text,"inperr%2x",f_mod_no); - QDIO_DBF_TEXT3(1,trace,dbf_text); -#endif /* CONFIG_QDIO_DEBUG */ - QDIO_DBF_HEX2(1,sbal,q->sbal[f_mod_no],256); - - /* kind of process the buffer */ - set_slsb(q, &f_mod_no, SLSB_P_INPUT_NOT_INIT, &count); - - if (q->qdio_error) - q->error_status_flags|= - QDIO_STATUS_MORE_THAN_ONE_QDIO_ERROR; - q->qdio_error=SLSB_P_INPUT_ERROR; - q->error_status_flags|=QDIO_STATUS_LOOK_FOR_ERROR; - - /* we increment the frontier, as this buffer - * was processed obviously */ - f_mod_no=(f_mod_no+1)&(QDIO_MAX_BUFFERS_PER_Q-1); - atomic_dec(&q->number_of_buffers_used); - -#ifdef QDIO_USE_PROCESSING_STATE - last_position=-1; -#endif /* QDIO_USE_PROCESSING_STATE */ - - break; - - /* everything else means frontier not changed (HALTED or so) */ - default: - break; - } -out: - q->first_to_check=f_mod_no; - -#ifdef QDIO_USE_PROCESSING_STATE - if (last_position>=0) - set_slsb(q, &last_position, SLSB_P_INPUT_PROCESSING, &count); -#endif /* QDIO_USE_PROCESSING_STATE */ - - QDIO_DBF_HEX4(0,trace,&q->first_to_check,sizeof(int)); - - return q->first_to_check; -} - -static int -qdio_has_inbound_q_moved(struct qdio_q *q) -{ - int i; - - i=qdio_get_inbound_buffer_frontier(q); - if ( (i!=GET_SAVED_FRONTIER(q)) || - (q->error_status_flags&QDIO_STATUS_LOOK_FOR_ERROR) ) { - SAVE_FRONTIER(q,i); - if ((!q->siga_sync)&&(!q->hydra_gives_outbound_pcis)) - SAVE_TIMESTAMP(q); - - QDIO_DBF_TEXT4(0,trace,"inhasmvd"); - QDIO_DBF_HEX4(0,trace,&q,sizeof(void*)); - return 1; - } else { - QDIO_DBF_TEXT4(0,trace,"inhsntmv"); - QDIO_DBF_HEX4(0,trace,&q,sizeof(void*)); - return 0; - } -} - -/* means, no more buffers to be filled */ -static int -tiqdio_is_inbound_q_done(struct qdio_q *q) -{ - int no_used; - unsigned int start_buf, count; - unsigned char state = 0; - struct qdio_irq *irq = (struct qdio_irq *) q->irq_ptr; - -#ifdef CONFIG_QDIO_DEBUG - char dbf_text[15]; -#endif - - no_used=atomic_read(&q->number_of_buffers_used); - - /* propagate the change from 82 to 80 through VM */ - SYNC_MEMORY; - -#ifdef CONFIG_QDIO_DEBUG - if (no_used) { - sprintf(dbf_text,"iqisnt%02x",no_used); - QDIO_DBF_TEXT4(0,trace,dbf_text); - } else { - QDIO_DBF_TEXT4(0,trace,"iniqisdo"); - } - QDIO_DBF_HEX4(0,trace,&q,sizeof(void*)); -#endif /* CONFIG_QDIO_DEBUG */ - - if (!no_used) - return 1; - if (irq->is_qebsm) { - count = 1; - start_buf = q->first_to_check; - qdio_do_eqbs(q, &state, &start_buf, &count); - } else - state = q->slsb.acc.val[q->first_to_check]; - if (state != SLSB_P_INPUT_PRIMED) - /* - * nothing more to do, if next buffer is not PRIMED. - * note that we did a SYNC_MEMORY before, that there - * has been a sychnronization. - * we will return 0 below, as there is nothing to do - * (stop_polling not necessary, as we have not been - * using the PROCESSING state - */ - return 0; - - /* - * ok, the next input buffer is primed. that means, that device state - * change indicator and adapter local summary are set, so we will find - * it next time. - * we will return 0 below, as there is nothing to do, except scheduling - * ourselves for the next time. - */ - tiqdio_set_summary_bit((__u32*)q->dev_st_chg_ind); - tiqdio_sched_tl(); - return 0; -} - -static int -qdio_is_inbound_q_done(struct qdio_q *q) -{ - int no_used; - unsigned int start_buf, count; - unsigned char state = 0; - struct qdio_irq *irq = (struct qdio_irq *) q->irq_ptr; - -#ifdef CONFIG_QDIO_DEBUG - char dbf_text[15]; -#endif - - no_used=atomic_read(&q->number_of_buffers_used); - - /* - * we need that one for synchronization with the adapter, as it - * does a kind of PCI avoidance - */ - SYNC_MEMORY; - - if (!no_used) { - QDIO_DBF_TEXT4(0,trace,"inqisdnA"); - QDIO_DBF_HEX4(0,trace,&q,sizeof(void*)); - return 1; - } - if (irq->is_qebsm) { - count = 1; - start_buf = q->first_to_check; - qdio_do_eqbs(q, &state, &start_buf, &count); - } else - state = q->slsb.acc.val[q->first_to_check]; - if (state == SLSB_P_INPUT_PRIMED) { - /* we got something to do */ - QDIO_DBF_TEXT4(0,trace,"inqisntA"); - QDIO_DBF_HEX4(0,trace,&q,sizeof(void*)); - return 0; - } - - /* on VM, we don't poll, so the q is always done here */ - if (q->siga_sync) - return 1; - if (q->hydra_gives_outbound_pcis) - return 1; - - /* - * at this point we know, that inbound first_to_check - * has (probably) not moved (see qdio_inbound_processing) - */ - if (NOW>GET_SAVED_TIMESTAMP(q)+q->timing.threshold) { -#ifdef CONFIG_QDIO_DEBUG - QDIO_DBF_TEXT4(0,trace,"inqisdon"); - QDIO_DBF_HEX4(0,trace,&q,sizeof(void*)); - sprintf(dbf_text,"pf%02xcn%02x",q->first_to_check,no_used); - QDIO_DBF_TEXT4(0,trace,dbf_text); -#endif /* CONFIG_QDIO_DEBUG */ - return 1; - } else { -#ifdef CONFIG_QDIO_DEBUG - QDIO_DBF_TEXT4(0,trace,"inqisntd"); - QDIO_DBF_HEX4(0,trace,&q,sizeof(void*)); - sprintf(dbf_text,"pf%02xcn%02x",q->first_to_check,no_used); - QDIO_DBF_TEXT4(0,trace,dbf_text); -#endif /* CONFIG_QDIO_DEBUG */ - return 0; - } -} - -static void -qdio_kick_inbound_handler(struct qdio_q *q) -{ - int count, start, end, real_end, i; -#ifdef CONFIG_QDIO_DEBUG - char dbf_text[15]; -#endif - - QDIO_DBF_TEXT4(0,trace,"kickinh"); - QDIO_DBF_HEX4(0,trace,&q,sizeof(void*)); - - start=q->first_element_to_kick; - real_end=q->first_to_check; - end=(real_end+QDIO_MAX_BUFFERS_PER_Q-1)&(QDIO_MAX_BUFFERS_PER_Q-1); - - i=start; - count=0; - while (1) { - count++; - if (i==end) - break; - i=(i+1)&(QDIO_MAX_BUFFERS_PER_Q-1); - } - -#ifdef CONFIG_QDIO_DEBUG - sprintf(dbf_text,"s=%2xc=%2x",start,count); - QDIO_DBF_TEXT4(0,trace,dbf_text); -#endif /* CONFIG_QDIO_DEBUG */ - - if (likely(q->state==QDIO_IRQ_STATE_ACTIVE)) - q->handler(q->cdev, - QDIO_STATUS_INBOUND_INT|q->error_status_flags, - q->qdio_error,q->siga_error,q->q_no,start,count, - q->int_parm); - - /* for the next time: */ - q->first_element_to_kick=real_end; - q->qdio_error=0; - q->siga_error=0; - q->error_status_flags=0; - - qdio_perf_stat_inc(&perf_stats.inbound_cnt); -} - -static void -__tiqdio_inbound_processing(struct qdio_q *q, int spare_ind_was_set) -{ - struct qdio_irq *irq_ptr; - struct qdio_q *oq; - int i; - - QDIO_DBF_TEXT4(0,trace,"iqinproc"); - QDIO_DBF_HEX4(0,trace,&q,sizeof(void*)); - - /* - * we first want to reserve the q, so that we know, that we don't - * interrupt ourselves and call qdio_unmark_q, as is_in_shutdown might - * be set - */ - if (unlikely(qdio_reserve_q(q))) { - qdio_release_q(q); - qdio_perf_stat_inc(&perf_stats.inbound_thin_tl_runs_resched); - /* - * as we might just be about to stop polling, we make - * sure that we check again at least once more - */ - tiqdio_sched_tl(); - return; - } - qdio_perf_stat_inc(&perf_stats.inbound_thin_tl_runs); - if (unlikely(atomic_read(&q->is_in_shutdown))) { - qdio_unmark_q(q); - goto out; - } - - /* - * we reset spare_ind_was_set, when the queue does not use the - * spare indicator - */ - if (spare_ind_was_set) - spare_ind_was_set = (q->dev_st_chg_ind == &spare_indicator); - - if (!(*(q->dev_st_chg_ind)) && !spare_ind_was_set) - goto out; - /* - * q->dev_st_chg_ind is the indicator, be it shared or not. - * only clear it, if indicator is non-shared - */ - if (q->dev_st_chg_ind != &spare_indicator) - tiqdio_clear_summary_bit((__u32*)q->dev_st_chg_ind); - - if (q->hydra_gives_outbound_pcis) { - if (!q->siga_sync_done_on_thinints) { - SYNC_MEMORY_ALL; - } else if (!q->siga_sync_done_on_outb_tis) { - SYNC_MEMORY_ALL_OUTB; - } - } else { - SYNC_MEMORY; - } - /* - * maybe we have to do work on our outbound queues... at least - * we have to check the outbound-int-capable thinint-capable - * queues - */ - if (q->hydra_gives_outbound_pcis) { - irq_ptr = (struct qdio_irq*)q->irq_ptr; - for (i=0;ino_output_qs;i++) { - oq = irq_ptr->output_qs[i]; - if (!qdio_is_outbound_q_done(oq)) { - qdio_perf_stat_dec(&perf_stats.tl_runs); - __qdio_outbound_processing(oq); - } - } - } - - if (!qdio_has_inbound_q_moved(q)) - goto out; - - qdio_kick_inbound_handler(q); - if (tiqdio_is_inbound_q_done(q)) - if (!qdio_stop_polling(q)) { - /* - * we set the flags to get into the stuff next time, - * see also comment in qdio_stop_polling - */ - tiqdio_set_summary_bit((__u32*)q->dev_st_chg_ind); - tiqdio_sched_tl(); - } -out: - qdio_release_q(q); -} - -static void -tiqdio_inbound_processing(unsigned long q) -{ - __tiqdio_inbound_processing((struct qdio_q *) q, - atomic_read(&spare_indicator_usecount)); -} - -static void -__qdio_inbound_processing(struct qdio_q *q) -{ - int q_laps=0; - - QDIO_DBF_TEXT4(0,trace,"qinproc"); - QDIO_DBF_HEX4(0,trace,&q,sizeof(void*)); - - if (unlikely(qdio_reserve_q(q))) { - qdio_release_q(q); - qdio_perf_stat_inc(&perf_stats.inbound_tl_runs_resched); - /* as we're sissies, we'll check next time */ - if (likely(!atomic_read(&q->is_in_shutdown))) { - qdio_mark_q(q); - QDIO_DBF_TEXT4(0,trace,"busy,agn"); - } - return; - } - qdio_perf_stat_inc(&perf_stats.inbound_tl_runs); - qdio_perf_stat_inc(&perf_stats.tl_runs); - -again: - if (qdio_has_inbound_q_moved(q)) { - qdio_kick_inbound_handler(q); - if (!qdio_stop_polling(q)) { - q_laps++; - if (q_lapssiga_sync) - return 2; - - if (unlikely(qdio_reserve_q(q))) { - qdio_release_q(q); - qdio_perf_stat_inc(&perf_stats.inbound_thin_tl_runs_resched); - /* - * as we might just be about to stop polling, we make - * sure that we check again at least once more - */ - - /* - * sanity -- we'd get here without setting the - * dev st chg ind - */ - tiqdio_set_summary_bit((__u32*)q->dev_st_chg_ind); - tiqdio_sched_tl(); - return 0; - } - if (qdio_stop_polling(q)) { - qdio_release_q(q); - return 2; - } - if (q_lapsdev_st_chg_ind); - tiqdio_sched_tl(); - qdio_release_q(q); - return 1; - -} -#endif /* QDIO_USE_PROCESSING_STATE */ - -static void -tiqdio_inbound_checks(void) -{ - struct qdio_q *q; - int spare_ind_was_set=0; -#ifdef QDIO_USE_PROCESSING_STATE - int q_laps=0; -#endif /* QDIO_USE_PROCESSING_STATE */ - - QDIO_DBF_TEXT4(0,trace,"iqdinbck"); - QDIO_DBF_TEXT5(0,trace,"iqlocsum"); - -#ifdef QDIO_USE_PROCESSING_STATE -again: -#endif /* QDIO_USE_PROCESSING_STATE */ - - /* when the spare indicator is used and set, save that and clear it */ - if ((atomic_read(&spare_indicator_usecount)) && spare_indicator) { - spare_ind_was_set = 1; - tiqdio_clear_summary_bit((__u32*)&spare_indicator); - } - - q=(struct qdio_q*)tiq_list; - do { - if (!q) - break; - __tiqdio_inbound_processing(q, spare_ind_was_set); - q=(struct qdio_q*)q->list_next; - } while (q!=(struct qdio_q*)tiq_list); - -#ifdef QDIO_USE_PROCESSING_STATE - q=(struct qdio_q*)tiq_list; - do { - int ret; - - ret = tiqdio_reset_processing_state(q, q_laps); - switch (ret) { - case 0: - return; - case 1: - q_laps++; - case 2: - q = (struct qdio_q*)q->list_next; - break; - default: - q_laps++; - goto again; - } - } while (q!=(struct qdio_q*)tiq_list); -#endif /* QDIO_USE_PROCESSING_STATE */ -} - -static void -tiqdio_tl(unsigned long data) -{ - QDIO_DBF_TEXT4(0,trace,"iqdio_tl"); - - qdio_perf_stat_inc(&perf_stats.tl_runs); - - tiqdio_inbound_checks(); -} - -/********************* GENERAL HELPER_ROUTINES ***********************/ - -static void -qdio_release_irq_memory(struct qdio_irq *irq_ptr) -{ - int i; - struct qdio_q *q; - - for (i = 0; i < QDIO_MAX_QUEUES_PER_IRQ; i++) { - q = irq_ptr->input_qs[i]; - if (q) { - free_page((unsigned long) q->slib); - kmem_cache_free(qdio_q_cache, q); - } - q = irq_ptr->output_qs[i]; - if (q) { - free_page((unsigned long) q->slib); - kmem_cache_free(qdio_q_cache, q); - } - } - free_page((unsigned long) irq_ptr->qdr); - free_page((unsigned long) irq_ptr); -} - -static void -qdio_set_impl_params(struct qdio_irq *irq_ptr, - unsigned int qib_param_field_format, - /* pointer to 128 bytes or NULL, if no param field */ - unsigned char *qib_param_field, - /* pointer to no_queues*128 words of data or NULL */ - unsigned int no_input_qs, - unsigned int no_output_qs, - unsigned long *input_slib_elements, - unsigned long *output_slib_elements) -{ - int i,j; - - if (!irq_ptr) - return; - - irq_ptr->qib.pfmt=qib_param_field_format; - if (qib_param_field) - memcpy(irq_ptr->qib.parm,qib_param_field, - QDIO_MAX_BUFFERS_PER_Q); - - if (input_slib_elements) - for (i=0;iinput_qs[i]->slib->slibe[j].parms= - input_slib_elements[ - i*QDIO_MAX_BUFFERS_PER_Q+j]; - } - if (output_slib_elements) - for (i=0;ioutput_qs[i]->slib->slibe[j].parms= - output_slib_elements[ - i*QDIO_MAX_BUFFERS_PER_Q+j]; - } -} - -static int -qdio_alloc_qs(struct qdio_irq *irq_ptr, - int no_input_qs, int no_output_qs) -{ - int i; - struct qdio_q *q; - - for (i = 0; i < no_input_qs; i++) { - q = kmem_cache_alloc(qdio_q_cache, GFP_KERNEL); - if (!q) - return -ENOMEM; - memset(q, 0, sizeof(*q)); - - q->slib = (struct slib *) __get_free_page(GFP_KERNEL); - if (!q->slib) { - kmem_cache_free(qdio_q_cache, q); - return -ENOMEM; - } - irq_ptr->input_qs[i]=q; - } - - for (i = 0; i < no_output_qs; i++) { - q = kmem_cache_alloc(qdio_q_cache, GFP_KERNEL); - if (!q) - return -ENOMEM; - memset(q, 0, sizeof(*q)); - - q->slib = (struct slib *) __get_free_page(GFP_KERNEL); - if (!q->slib) { - kmem_cache_free(qdio_q_cache, q); - return -ENOMEM; - } - irq_ptr->output_qs[i]=q; - } - return 0; -} - -static void -qdio_fill_qs(struct qdio_irq *irq_ptr, struct ccw_device *cdev, - int no_input_qs, int no_output_qs, - qdio_handler_t *input_handler, - qdio_handler_t *output_handler, - unsigned long int_parm,int q_format, - unsigned long flags, - void **inbound_sbals_array, - void **outbound_sbals_array) -{ - struct qdio_q *q; - int i,j; - char dbf_text[20]; /* see qdio_initialize */ - void *ptr; - int available; - - sprintf(dbf_text,"qfqs%4x",cdev->private->schid.sch_no); - QDIO_DBF_TEXT0(0,setup,dbf_text); - for (i=0;iinput_qs[i]; - - memset(q,0,((char*)&q->slib)-((char*)q)); - sprintf(dbf_text,"in-q%4x",i); - QDIO_DBF_TEXT0(0,setup,dbf_text); - QDIO_DBF_HEX0(0,setup,&q,sizeof(void*)); - - memset(q->slib,0,PAGE_SIZE); - q->sl=(struct sl*)(((char*)q->slib)+PAGE_SIZE/2); - - available=0; - - for (j=0;jsbal[j]=*(inbound_sbals_array++); - - q->queue_type=q_format; - q->int_parm=int_parm; - q->schid = irq_ptr->schid; - q->irq_ptr = irq_ptr; - q->cdev = cdev; - q->mask=1<<(31-i); - q->q_no=i; - q->is_input_q=1; - q->first_to_check=0; - q->last_move_ftc=0; - q->handler=input_handler; - q->dev_st_chg_ind=irq_ptr->dev_st_chg_ind; - - /* q->is_thinint_q isn't valid at this time, but - * irq_ptr->is_thinint_irq is - */ - if (irq_ptr->is_thinint_irq) - tasklet_init(&q->tasklet, tiqdio_inbound_processing, - (unsigned long) q); - else - tasklet_init(&q->tasklet, qdio_inbound_processing, - (unsigned long) q); - - /* actually this is not used for inbound queues. yet. */ - atomic_set(&q->busy_siga_counter,0); - q->timing.busy_start=0; - -/* for (j=0;jtiming.last_transfer_times[j]=(qdio_get_micros()/ - QDIO_STATS_NUMBER)*j; - q->timing.last_transfer_index=QDIO_STATS_NUMBER-1; -*/ - - /* fill in slib */ - if (i>0) irq_ptr->input_qs[i-1]->slib->nsliba= - (unsigned long)(q->slib); - q->slib->sla=(unsigned long)(q->sl); - q->slib->slsba=(unsigned long)(&q->slsb.acc.val[0]); - - /* fill in sl */ - for (j=0;jsl->element[j].sbal=(unsigned long)(q->sbal[j]); - - QDIO_DBF_TEXT2(0,setup,"sl-sb-b0"); - ptr=(void*)q->sl; - QDIO_DBF_HEX2(0,setup,&ptr,sizeof(void*)); - ptr=(void*)&q->slsb; - QDIO_DBF_HEX2(0,setup,&ptr,sizeof(void*)); - ptr=(void*)q->sbal[0]; - QDIO_DBF_HEX2(0,setup,&ptr,sizeof(void*)); - - /* fill in slsb */ - if (!irq_ptr->is_qebsm) { - unsigned int count = 1; - for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; j++) - set_slsb(q, &j, SLSB_P_INPUT_NOT_INIT, &count); - } - } - - for (i=0;ioutput_qs[i]; - memset(q,0,((char*)&q->slib)-((char*)q)); - - sprintf(dbf_text,"outq%4x",i); - QDIO_DBF_TEXT0(0,setup,dbf_text); - QDIO_DBF_HEX0(0,setup,&q,sizeof(void*)); - - memset(q->slib,0,PAGE_SIZE); - q->sl=(struct sl*)(((char*)q->slib)+PAGE_SIZE/2); - - available=0; - - for (j=0;jsbal[j]=*(outbound_sbals_array++); - - q->queue_type=q_format; - if ((q->queue_type == QDIO_IQDIO_QFMT) && - (no_output_qs > 1) && - (i == no_output_qs-1)) - q->queue_type = QDIO_IQDIO_QFMT_ASYNCH; - q->int_parm=int_parm; - q->is_input_q=0; - q->is_pci_out = 0; - q->schid = irq_ptr->schid; - q->cdev = cdev; - q->irq_ptr = irq_ptr; - q->mask=1<<(31-i); - q->q_no=i; - q->first_to_check=0; - q->last_move_ftc=0; - q->handler=output_handler; - - tasklet_init(&q->tasklet, qdio_outbound_processing, - (unsigned long) q); - setup_timer(&q->timer, qdio_outbound_processing, - (unsigned long) q); - - atomic_set(&q->busy_siga_counter,0); - q->timing.busy_start=0; - - /* fill in slib */ - if (i>0) irq_ptr->output_qs[i-1]->slib->nsliba= - (unsigned long)(q->slib); - q->slib->sla=(unsigned long)(q->sl); - q->slib->slsba=(unsigned long)(&q->slsb.acc.val[0]); - - /* fill in sl */ - for (j=0;jsl->element[j].sbal=(unsigned long)(q->sbal[j]); - - QDIO_DBF_TEXT2(0,setup,"sl-sb-b0"); - ptr=(void*)q->sl; - QDIO_DBF_HEX2(0,setup,&ptr,sizeof(void*)); - ptr=(void*)&q->slsb; - QDIO_DBF_HEX2(0,setup,&ptr,sizeof(void*)); - ptr=(void*)q->sbal[0]; - QDIO_DBF_HEX2(0,setup,&ptr,sizeof(void*)); - - /* fill in slsb */ - if (!irq_ptr->is_qebsm) { - unsigned int count = 1; - for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; j++) - set_slsb(q, &j, SLSB_P_OUTPUT_NOT_INIT, &count); - } - } -} - -static void -qdio_fill_thresholds(struct qdio_irq *irq_ptr, - unsigned int no_input_qs, - unsigned int no_output_qs, - unsigned int min_input_threshold, - unsigned int max_input_threshold, - unsigned int min_output_threshold, - unsigned int max_output_threshold) -{ - int i; - struct qdio_q *q; - - for (i=0;iinput_qs[i]; - q->timing.threshold=max_input_threshold; -/* for (j=0;jthreshold_classes[j].threshold= - min_input_threshold+ - (max_input_threshold-min_input_threshold)/ - QDIO_STATS_CLASSES; - } - qdio_use_thresholds(q,QDIO_STATS_CLASSES/2);*/ - } - for (i=0;ioutput_qs[i]; - q->timing.threshold=max_output_threshold; -/* for (j=0;jthreshold_classes[j].threshold= - min_output_threshold+ - (max_output_threshold-min_output_threshold)/ - QDIO_STATS_CLASSES; - } - qdio_use_thresholds(q,QDIO_STATS_CLASSES/2);*/ - } -} - -static void tiqdio_thinint_handler(void *ind, void *drv_data) -{ - QDIO_DBF_TEXT4(0,trace,"thin_int"); - - qdio_perf_stat_inc(&perf_stats.thinints); - - /* SVS only when needed: - * issue SVS to benefit from iqdio interrupt avoidance - * (SVS clears AISOI)*/ - if (!omit_svs) - tiqdio_clear_global_summary(); - - tiqdio_inbound_checks(); -} - -static void -qdio_set_state(struct qdio_irq *irq_ptr, enum qdio_irq_states state) -{ - int i; -#ifdef CONFIG_QDIO_DEBUG - char dbf_text[15]; - - QDIO_DBF_TEXT5(0,trace,"newstate"); - sprintf(dbf_text,"%4x%4x",irq_ptr->schid.sch_no,state); - QDIO_DBF_TEXT5(0,trace,dbf_text); -#endif /* CONFIG_QDIO_DEBUG */ - - irq_ptr->state=state; - for (i=0;ino_input_qs;i++) - irq_ptr->input_qs[i]->state=state; - for (i=0;ino_output_qs;i++) - irq_ptr->output_qs[i]->state=state; - mb(); -} - -static void -qdio_irq_check_sense(struct subchannel_id schid, struct irb *irb) -{ - char dbf_text[15]; - - if (irb->esw.esw0.erw.cons) { - sprintf(dbf_text,"sens%4x",schid.sch_no); - QDIO_DBF_TEXT2(1,trace,dbf_text); - QDIO_DBF_HEX0(0,sense,irb,QDIO_DBF_SENSE_LEN); - - QDIO_PRINT_WARN("sense data available on qdio channel.\n"); - QDIO_HEXDUMP16(WARN,"irb: ",irb); - QDIO_HEXDUMP16(WARN,"sense data: ",irb->ecw); - } - -} - -static void -qdio_handle_pci(struct qdio_irq *irq_ptr) -{ - int i; - struct qdio_q *q; - - qdio_perf_stat_inc(&perf_stats.pcis); - for (i=0;ino_input_qs;i++) { - q=irq_ptr->input_qs[i]; - if (q->is_input_q&QDIO_FLAG_NO_INPUT_INTERRUPT_CONTEXT) - qdio_mark_q(q); - else { - qdio_perf_stat_dec(&perf_stats.tl_runs); - __qdio_inbound_processing(q); - } - } - if (!irq_ptr->hydra_gives_outbound_pcis) - return; - for (i=0;ino_output_qs;i++) { - q=irq_ptr->output_qs[i]; - if (qdio_is_outbound_q_done(q)) - continue; - qdio_perf_stat_dec(&perf_stats.tl_runs); - if (!irq_ptr->sync_done_on_outb_pcis) - SYNC_MEMORY; - __qdio_outbound_processing(q); - } -} - -static void qdio_establish_handle_irq(struct ccw_device*, int, int); - -static void -qdio_handle_activate_check(struct ccw_device *cdev, unsigned long intparm, - int cstat, int dstat) -{ - struct qdio_irq *irq_ptr; - struct qdio_q *q; - char dbf_text[15]; - - irq_ptr = cdev->private->qdio_data; - - QDIO_DBF_TEXT2(1, trace, "ick2"); - sprintf(dbf_text,"%s", cdev->dev.bus_id); - QDIO_DBF_TEXT2(1,trace,dbf_text); - QDIO_DBF_HEX2(0,trace,&intparm,sizeof(int)); - QDIO_DBF_HEX2(0,trace,&dstat,sizeof(int)); - QDIO_DBF_HEX2(0,trace,&cstat,sizeof(int)); - QDIO_PRINT_ERR("received check condition on activate " \ - "queues on device %s (cs=x%x, ds=x%x).\n", - cdev->dev.bus_id, cstat, dstat); - if (irq_ptr->no_input_qs) { - q=irq_ptr->input_qs[0]; - } else if (irq_ptr->no_output_qs) { - q=irq_ptr->output_qs[0]; - } else { - QDIO_PRINT_ERR("oops... no queue registered for device %s!?\n", - cdev->dev.bus_id); - goto omit_handler_call; - } - q->handler(q->cdev,QDIO_STATUS_ACTIVATE_CHECK_CONDITION| - QDIO_STATUS_LOOK_FOR_ERROR, - 0,0,0,-1,-1,q->int_parm); -omit_handler_call: - qdio_set_state(irq_ptr,QDIO_IRQ_STATE_STOPPED); - -} - -static void -qdio_call_shutdown(struct work_struct *work) -{ - struct ccw_device_private *priv; - struct ccw_device *cdev; - - priv = container_of(work, struct ccw_device_private, kick_work); - cdev = priv->cdev; - qdio_shutdown(cdev, QDIO_FLAG_CLEANUP_USING_CLEAR); - put_device(&cdev->dev); -} - -static void -qdio_timeout_handler(struct ccw_device *cdev) -{ - struct qdio_irq *irq_ptr; - char dbf_text[15]; - - QDIO_DBF_TEXT2(0, trace, "qtoh"); - sprintf(dbf_text, "%s", cdev->dev.bus_id); - QDIO_DBF_TEXT2(0, trace, dbf_text); - - irq_ptr = cdev->private->qdio_data; - sprintf(dbf_text, "state:%d", irq_ptr->state); - QDIO_DBF_TEXT2(0, trace, dbf_text); - - switch (irq_ptr->state) { - case QDIO_IRQ_STATE_INACTIVE: - QDIO_PRINT_ERR("establish queues on irq 0.%x.%04x: timed out\n", - irq_ptr->schid.ssid, irq_ptr->schid.sch_no); - QDIO_DBF_TEXT2(1,setup,"eq:timeo"); - qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ERR); - break; - case QDIO_IRQ_STATE_CLEANUP: - QDIO_PRINT_INFO("Did not get interrupt on cleanup, " - "irq=0.%x.%x.\n", - irq_ptr->schid.ssid, irq_ptr->schid.sch_no); - qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ERR); - break; - case QDIO_IRQ_STATE_ESTABLISHED: - case QDIO_IRQ_STATE_ACTIVE: - /* I/O has been terminated by common I/O layer. */ - QDIO_PRINT_INFO("Queues on irq 0.%x.%04x killed by cio.\n", - irq_ptr->schid.ssid, irq_ptr->schid.sch_no); - QDIO_DBF_TEXT2(1, trace, "cio:term"); - qdio_set_state(irq_ptr, QDIO_IRQ_STATE_STOPPED); - if (get_device(&cdev->dev)) { - /* Can't call shutdown from interrupt context. */ - PREPARE_WORK(&cdev->private->kick_work, - qdio_call_shutdown); - queue_work(ccw_device_work, &cdev->private->kick_work); - } - break; - default: - BUG(); - } - wake_up(&cdev->private->wait_q); -} - -static void -qdio_handler(struct ccw_device *cdev, unsigned long intparm, struct irb *irb) -{ - struct qdio_irq *irq_ptr; - int cstat,dstat; - char dbf_text[15]; - -#ifdef CONFIG_QDIO_DEBUG - QDIO_DBF_TEXT4(0, trace, "qint"); - sprintf(dbf_text, "%s", cdev->dev.bus_id); - QDIO_DBF_TEXT4(0, trace, dbf_text); -#endif /* CONFIG_QDIO_DEBUG */ - - if (!intparm) { - QDIO_PRINT_ERR("got unsolicited interrupt in qdio " \ - "handler, device %s\n", cdev->dev.bus_id); - return; - } - - irq_ptr = cdev->private->qdio_data; - if (!irq_ptr) { - QDIO_DBF_TEXT2(1, trace, "uint"); - sprintf(dbf_text,"%s", cdev->dev.bus_id); - QDIO_DBF_TEXT2(1,trace,dbf_text); - QDIO_PRINT_ERR("received interrupt on unused device %s!\n", - cdev->dev.bus_id); - return; - } - - if (IS_ERR(irb)) { - /* Currently running i/o is in error. */ - switch (PTR_ERR(irb)) { - case -EIO: - QDIO_PRINT_ERR("i/o error on device %s\n", - cdev->dev.bus_id); - qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ERR); - wake_up(&cdev->private->wait_q); - return; - case -ETIMEDOUT: - qdio_timeout_handler(cdev); - return; - default: - QDIO_PRINT_ERR("unknown error state %ld on device %s\n", - PTR_ERR(irb), cdev->dev.bus_id); - return; - } - } - - qdio_irq_check_sense(irq_ptr->schid, irb); - -#ifdef CONFIG_QDIO_DEBUG - sprintf(dbf_text, "state:%d", irq_ptr->state); - QDIO_DBF_TEXT4(0, trace, dbf_text); -#endif /* CONFIG_QDIO_DEBUG */ - - cstat = irb->scsw.cmd.cstat; - dstat = irb->scsw.cmd.dstat; - - switch (irq_ptr->state) { - case QDIO_IRQ_STATE_INACTIVE: - qdio_establish_handle_irq(cdev, cstat, dstat); - break; - - case QDIO_IRQ_STATE_CLEANUP: - qdio_set_state(irq_ptr, QDIO_IRQ_STATE_INACTIVE); - break; - - case QDIO_IRQ_STATE_ESTABLISHED: - case QDIO_IRQ_STATE_ACTIVE: - if (cstat & SCHN_STAT_PCI) { - qdio_handle_pci(irq_ptr); - break; - } - - if ((cstat&~SCHN_STAT_PCI)||dstat) { - qdio_handle_activate_check(cdev, intparm, cstat, dstat); - break; - } - default: - QDIO_PRINT_ERR("got interrupt for queues in state %d on " \ - "device %s?!\n", - irq_ptr->state, cdev->dev.bus_id); - } - wake_up(&cdev->private->wait_q); - -} - -int -qdio_synchronize(struct ccw_device *cdev, unsigned int flags, - unsigned int queue_number) -{ - int cc = 0; - struct qdio_q *q; - struct qdio_irq *irq_ptr; - void *ptr; -#ifdef CONFIG_QDIO_DEBUG - char dbf_text[15]="SyncXXXX"; -#endif - - irq_ptr = cdev->private->qdio_data; - if (!irq_ptr) - return -ENODEV; - -#ifdef CONFIG_QDIO_DEBUG - *((int*)(&dbf_text[4])) = irq_ptr->schid.sch_no; - QDIO_DBF_HEX4(0,trace,dbf_text,QDIO_DBF_TRACE_LEN); - *((int*)(&dbf_text[0]))=flags; - *((int*)(&dbf_text[4]))=queue_number; - QDIO_DBF_HEX4(0,trace,dbf_text,QDIO_DBF_TRACE_LEN); -#endif /* CONFIG_QDIO_DEBUG */ - - if (flags&QDIO_FLAG_SYNC_INPUT) { - q=irq_ptr->input_qs[queue_number]; - if (!q) - return -EINVAL; - if (!(irq_ptr->is_qebsm)) - cc = do_siga_sync(q->schid, 0, q->mask); - } else if (flags&QDIO_FLAG_SYNC_OUTPUT) { - q=irq_ptr->output_qs[queue_number]; - if (!q) - return -EINVAL; - if (!(irq_ptr->is_qebsm)) - cc = do_siga_sync(q->schid, q->mask, 0); - } else - return -EINVAL; - - ptr=&cc; - if (cc) - QDIO_DBF_HEX3(0,trace,&ptr,sizeof(int)); - - return cc; -} - -static int -qdio_get_ssqd_information(struct subchannel_id *schid, - struct qdio_chsc_ssqd **ssqd_area) -{ - int result; - - QDIO_DBF_TEXT0(0, setup, "getssqd"); - *ssqd_area = mempool_alloc(qdio_mempool_scssc, GFP_ATOMIC); - if (!ssqd_area) { - QDIO_PRINT_WARN("Could not get memory for chsc on sch x%x.\n", - schid->sch_no); - return -ENOMEM; - } - - (*ssqd_area)->request = (struct chsc_header) { - .length = 0x0010, - .code = 0x0024, - }; - (*ssqd_area)->first_sch = schid->sch_no; - (*ssqd_area)->last_sch = schid->sch_no; - (*ssqd_area)->ssid = schid->ssid; - result = chsc(*ssqd_area); - - if (result) { - QDIO_PRINT_WARN("CHSC returned cc %i on sch 0.%x.%x.\n", - result, schid->ssid, schid->sch_no); - goto out; - } - - if ((*ssqd_area)->response.code != QDIO_CHSC_RESPONSE_CODE_OK) { - QDIO_PRINT_WARN("CHSC response is 0x%x on sch 0.%x.%x.\n", - (*ssqd_area)->response.code, - schid->ssid, schid->sch_no); - goto out; - } - if (!((*ssqd_area)->flags & CHSC_FLAG_QDIO_CAPABILITY) || - !((*ssqd_area)->flags & CHSC_FLAG_VALIDITY) || - ((*ssqd_area)->sch != schid->sch_no)) { - QDIO_PRINT_WARN("huh? problems checking out sch 0.%x.%x... " \ - "using all SIGAs.\n", - schid->ssid, schid->sch_no); - goto out; - } - return 0; -out: - return -EINVAL; -} - -int -qdio_get_ssqd_pct(struct ccw_device *cdev) -{ - struct qdio_chsc_ssqd *ssqd_area; - struct subchannel_id schid; - char dbf_text[15]; - int rc; - int pct = 0; - - QDIO_DBF_TEXT0(0, setup, "getpct"); - schid = ccw_device_get_subchannel_id(cdev); - rc = qdio_get_ssqd_information(&schid, &ssqd_area); - if (!rc) - pct = (int)ssqd_area->pct; - if (rc != -ENOMEM) - mempool_free(ssqd_area, qdio_mempool_scssc); - sprintf(dbf_text, "pct: %d", pct); - QDIO_DBF_TEXT2(0, setup, dbf_text); - return pct; -} -EXPORT_SYMBOL(qdio_get_ssqd_pct); - -static void -qdio_check_subchannel_qebsm(struct qdio_irq *irq_ptr, unsigned long token) -{ - struct qdio_q *q; - int i; - unsigned int count, start_buf; - char dbf_text[15]; - - /*check if QEBSM is disabled */ - if (!(irq_ptr->is_qebsm) || !(irq_ptr->qdioac & 0x01)) { - irq_ptr->is_qebsm = 0; - irq_ptr->sch_token = 0; - irq_ptr->qib.rflags &= ~QIB_RFLAGS_ENABLE_QEBSM; - QDIO_DBF_TEXT0(0,setup,"noV=V"); - return; - } - irq_ptr->sch_token = token; - /*input queue*/ - for (i = 0; i < irq_ptr->no_input_qs;i++) { - q = irq_ptr->input_qs[i]; - count = QDIO_MAX_BUFFERS_PER_Q; - start_buf = 0; - set_slsb(q, &start_buf, SLSB_P_INPUT_NOT_INIT, &count); - } - sprintf(dbf_text,"V=V:%2x",irq_ptr->is_qebsm); - QDIO_DBF_TEXT0(0,setup,dbf_text); - sprintf(dbf_text,"%8lx",irq_ptr->sch_token); - QDIO_DBF_TEXT0(0,setup,dbf_text); - /*output queue*/ - for (i = 0; i < irq_ptr->no_output_qs; i++) { - q = irq_ptr->output_qs[i]; - count = QDIO_MAX_BUFFERS_PER_Q; - start_buf = 0; - set_slsb(q, &start_buf, SLSB_P_OUTPUT_NOT_INIT, &count); - } -} - -static void -qdio_get_ssqd_siga(struct qdio_irq *irq_ptr) -{ - int rc; - struct qdio_chsc_ssqd *ssqd_area; - - QDIO_DBF_TEXT0(0,setup,"getssqd"); - irq_ptr->qdioac = 0; - rc = qdio_get_ssqd_information(&irq_ptr->schid, &ssqd_area); - if (rc) { - QDIO_PRINT_WARN("using all SIGAs for sch x%x.n", - irq_ptr->schid.sch_no); - irq_ptr->qdioac = CHSC_FLAG_SIGA_INPUT_NECESSARY | - CHSC_FLAG_SIGA_OUTPUT_NECESSARY | - CHSC_FLAG_SIGA_SYNC_NECESSARY; /* all flags set */ - irq_ptr->is_qebsm = 0; - } else - irq_ptr->qdioac = ssqd_area->qdioac1; - - qdio_check_subchannel_qebsm(irq_ptr, ssqd_area->sch_token); - if (rc != -ENOMEM) - mempool_free(ssqd_area, qdio_mempool_scssc); -} - -static unsigned int -tiqdio_check_chsc_availability(void) -{ - char dbf_text[15]; - - /* Check for bit 41. */ - if (!css_general_characteristics.aif) { - QDIO_PRINT_WARN("Adapter interruption facility not " \ - "installed.\n"); - return -ENOENT; - } - - /* Check for bits 107 and 108. */ - if (!css_chsc_characteristics.scssc || - !css_chsc_characteristics.scsscf) { - QDIO_PRINT_WARN("Set Chan Subsys. Char. & Fast-CHSCs " \ - "not available.\n"); - return -ENOENT; - } - - /* Check for OSA/FCP thin interrupts (bit 67). */ - hydra_thinints = css_general_characteristics.aif_osa; - sprintf(dbf_text,"hydrati%1x", hydra_thinints); - QDIO_DBF_TEXT0(0,setup,dbf_text); - -#ifdef CONFIG_64BIT - /* Check for QEBSM support in general (bit 58). */ - is_passthrough = css_general_characteristics.qebsm; -#endif - sprintf(dbf_text,"cssQBS:%1x", is_passthrough); - QDIO_DBF_TEXT0(0,setup,dbf_text); - - /* Check for aif time delay disablement fac (bit 56). If installed, - * omit svs even under lpar (good point by rick again) */ - omit_svs = css_general_characteristics.aif_tdd; - sprintf(dbf_text,"omitsvs%1x", omit_svs); - QDIO_DBF_TEXT0(0,setup,dbf_text); - return 0; -} - - -static unsigned int -tiqdio_set_subchannel_ind(struct qdio_irq *irq_ptr, int reset_to_zero) -{ - unsigned long real_addr_local_summary_bit; - unsigned long real_addr_dev_st_chg_ind; - void *ptr; - char dbf_text[15]; - - unsigned int resp_code; - int result; - - struct { - struct chsc_header request; - u16 operation_code; - u16 reserved1; - u32 reserved2; - u32 reserved3; - u64 summary_indicator_addr; - u64 subchannel_indicator_addr; - u32 ks:4; - u32 kc:4; - u32 reserved4:21; - u32 isc:3; - u32 word_with_d_bit; - /* set to 0x10000000 to enable - * time delay disablement facility */ - u32 reserved5; - struct subchannel_id schid; - u32 reserved6[1004]; - struct chsc_header response; - u32 reserved7; - } *scssc_area; - - if (!irq_ptr->is_thinint_irq) - return -ENODEV; - - if (reset_to_zero) { - real_addr_local_summary_bit=0; - real_addr_dev_st_chg_ind=0; - } else { - real_addr_local_summary_bit= - virt_to_phys((volatile void *)tiqdio_ind); - real_addr_dev_st_chg_ind= - virt_to_phys((volatile void *)irq_ptr->dev_st_chg_ind); - } - - scssc_area = mempool_alloc(qdio_mempool_scssc, GFP_ATOMIC); - if (!scssc_area) { - QDIO_PRINT_WARN("No memory for setting indicators on " \ - "subchannel 0.%x.%x.\n", - irq_ptr->schid.ssid, irq_ptr->schid.sch_no); - return -ENOMEM; - } - scssc_area->request = (struct chsc_header) { - .length = 0x0fe0, - .code = 0x0021, - }; - scssc_area->operation_code = 0; - - scssc_area->summary_indicator_addr = real_addr_local_summary_bit; - scssc_area->subchannel_indicator_addr = real_addr_dev_st_chg_ind; - scssc_area->ks = QDIO_STORAGE_KEY; - scssc_area->kc = QDIO_STORAGE_KEY; - scssc_area->isc = TIQDIO_THININT_ISC; - scssc_area->schid = irq_ptr->schid; - /* enables the time delay disablement facility. Don't care - * whether it is really there (i.e. we haven't checked for - * it) */ - if (css_general_characteristics.aif_tdd) - scssc_area->word_with_d_bit = 0x10000000; - else - QDIO_PRINT_WARN("Time delay disablement facility " \ - "not available\n"); - - result = chsc(scssc_area); - if (result) { - QDIO_PRINT_WARN("could not set indicators on irq 0.%x.%x, " \ - "cc=%i.\n", - irq_ptr->schid.ssid, irq_ptr->schid.sch_no,result); - result = -EIO; - goto out; - } - - resp_code = scssc_area->response.code; - if (resp_code!=QDIO_CHSC_RESPONSE_CODE_OK) { - QDIO_PRINT_WARN("response upon setting indicators " \ - "is 0x%x.\n",resp_code); - sprintf(dbf_text,"sidR%4x",resp_code); - QDIO_DBF_TEXT1(0,trace,dbf_text); - QDIO_DBF_TEXT1(0,setup,dbf_text); - ptr=&scssc_area->response; - QDIO_DBF_HEX2(1,setup,&ptr,QDIO_DBF_SETUP_LEN); - result = -EIO; - goto out; - } - - QDIO_DBF_TEXT2(0,setup,"setscind"); - QDIO_DBF_HEX2(0,setup,&real_addr_local_summary_bit, - sizeof(unsigned long)); - QDIO_DBF_HEX2(0,setup,&real_addr_dev_st_chg_ind,sizeof(unsigned long)); - result = 0; -out: - mempool_free(scssc_area, qdio_mempool_scssc); - return result; - -} - -static unsigned int -tiqdio_set_delay_target(struct qdio_irq *irq_ptr, unsigned long delay_target) -{ - unsigned int resp_code; - int result; - void *ptr; - char dbf_text[15]; - - struct { - struct chsc_header request; - u16 operation_code; - u16 reserved1; - u32 reserved2; - u32 reserved3; - u32 reserved4[2]; - u32 delay_target; - u32 reserved5[1009]; - struct chsc_header response; - u32 reserved6; - } *scsscf_area; - - if (!irq_ptr->is_thinint_irq) - return -ENODEV; - - scsscf_area = mempool_alloc(qdio_mempool_scssc, GFP_ATOMIC); - if (!scsscf_area) { - QDIO_PRINT_WARN("No memory for setting delay target on " \ - "subchannel 0.%x.%x.\n", - irq_ptr->schid.ssid, irq_ptr->schid.sch_no); - return -ENOMEM; - } - scsscf_area->request = (struct chsc_header) { - .length = 0x0fe0, - .code = 0x1027, - }; - - scsscf_area->delay_target = delay_target<<16; - - result=chsc(scsscf_area); - if (result) { - QDIO_PRINT_WARN("could not set delay target on irq 0.%x.%x, " \ - "cc=%i. Continuing.\n", - irq_ptr->schid.ssid, irq_ptr->schid.sch_no, - result); - result = -EIO; - goto out; - } - - resp_code = scsscf_area->response.code; - if (resp_code!=QDIO_CHSC_RESPONSE_CODE_OK) { - QDIO_PRINT_WARN("response upon setting delay target " \ - "is 0x%x. Continuing.\n",resp_code); - sprintf(dbf_text,"sdtR%4x",resp_code); - QDIO_DBF_TEXT1(0,trace,dbf_text); - QDIO_DBF_TEXT1(0,setup,dbf_text); - ptr=&scsscf_area->response; - QDIO_DBF_HEX2(1,trace,&ptr,QDIO_DBF_TRACE_LEN); - } - QDIO_DBF_TEXT2(0,trace,"delytrgt"); - QDIO_DBF_HEX2(0,trace,&delay_target,sizeof(unsigned long)); - result = 0; /* not critical */ -out: - mempool_free(scsscf_area, qdio_mempool_scssc); - return result; -} - -int -qdio_cleanup(struct ccw_device *cdev, int how) -{ - struct qdio_irq *irq_ptr; - char dbf_text[15]; - int rc; - - irq_ptr = cdev->private->qdio_data; - if (!irq_ptr) - return -ENODEV; - - sprintf(dbf_text,"qcln%4x",irq_ptr->schid.sch_no); - QDIO_DBF_TEXT1(0,trace,dbf_text); - QDIO_DBF_TEXT0(0,setup,dbf_text); - - rc = qdio_shutdown(cdev, how); - if ((rc == 0) || (rc == -EINPROGRESS)) - rc = qdio_free(cdev); - return rc; -} - -int -qdio_shutdown(struct ccw_device *cdev, int how) -{ - struct qdio_irq *irq_ptr; - int i; - int result = 0; - int rc; - unsigned long flags; - int timeout; - char dbf_text[15]; - - irq_ptr = cdev->private->qdio_data; - if (!irq_ptr) - return -ENODEV; - - down(&irq_ptr->setting_up_sema); - - sprintf(dbf_text,"qsqs%4x",irq_ptr->schid.sch_no); - QDIO_DBF_TEXT1(0,trace,dbf_text); - QDIO_DBF_TEXT0(0,setup,dbf_text); - - /* mark all qs as uninteresting */ - for (i=0;ino_input_qs;i++) - atomic_set(&irq_ptr->input_qs[i]->is_in_shutdown,1); - - for (i=0;ino_output_qs;i++) - atomic_set(&irq_ptr->output_qs[i]->is_in_shutdown,1); - - tasklet_kill(&tiqdio_tasklet); - - for (i=0;ino_input_qs;i++) { - qdio_unmark_q(irq_ptr->input_qs[i]); - tasklet_kill(&irq_ptr->input_qs[i]->tasklet); - wait_event_interruptible_timeout(cdev->private->wait_q, - !atomic_read(&irq_ptr-> - input_qs[i]-> - use_count), - QDIO_NO_USE_COUNT_TIMEOUT); - if (atomic_read(&irq_ptr->input_qs[i]->use_count)) - result=-EINPROGRESS; - } - - for (i=0;ino_output_qs;i++) { - tasklet_kill(&irq_ptr->output_qs[i]->tasklet); - del_timer(&irq_ptr->output_qs[i]->timer); - wait_event_interruptible_timeout(cdev->private->wait_q, - !atomic_read(&irq_ptr-> - output_qs[i]-> - use_count), - QDIO_NO_USE_COUNT_TIMEOUT); - if (atomic_read(&irq_ptr->output_qs[i]->use_count)) - result=-EINPROGRESS; - } - - /* cleanup subchannel */ - spin_lock_irqsave(get_ccwdev_lock(cdev),flags); - if (how&QDIO_FLAG_CLEANUP_USING_CLEAR) { - rc = ccw_device_clear(cdev, QDIO_DOING_CLEANUP); - timeout=QDIO_CLEANUP_CLEAR_TIMEOUT; - } else if (how&QDIO_FLAG_CLEANUP_USING_HALT) { - rc = ccw_device_halt(cdev, QDIO_DOING_CLEANUP); - timeout=QDIO_CLEANUP_HALT_TIMEOUT; - } else { /* default behaviour */ - rc = ccw_device_halt(cdev, QDIO_DOING_CLEANUP); - timeout=QDIO_CLEANUP_HALT_TIMEOUT; - } - if (rc == -ENODEV) { - /* No need to wait for device no longer present. */ - qdio_set_state(irq_ptr, QDIO_IRQ_STATE_INACTIVE); - spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags); - } else if (((void *)cdev->handler != (void *)qdio_handler) && rc == 0) { - /* - * Whoever put another handler there, has to cope with the - * interrupt theirself. Might happen if qdio_shutdown was - * called on already shutdown queues, but this shouldn't have - * bad side effects. - */ - qdio_set_state(irq_ptr, QDIO_IRQ_STATE_INACTIVE); - spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags); - } else if (rc == 0) { - qdio_set_state(irq_ptr, QDIO_IRQ_STATE_CLEANUP); - spin_unlock_irqrestore(get_ccwdev_lock(cdev),flags); - - wait_event_interruptible_timeout(cdev->private->wait_q, - irq_ptr->state == QDIO_IRQ_STATE_INACTIVE || - irq_ptr->state == QDIO_IRQ_STATE_ERR, - timeout); - } else { - QDIO_PRINT_INFO("ccw_device_{halt,clear} returned %d for " - "device %s\n", result, cdev->dev.bus_id); - spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags); - result = rc; - goto out; - } - if (irq_ptr->is_thinint_irq) { - qdio_put_indicator((__u32*)irq_ptr->dev_st_chg_ind); - tiqdio_set_subchannel_ind(irq_ptr,1); - /* reset adapter interrupt indicators */ - } - - /* exchange int handlers, if necessary */ - if ((void*)cdev->handler == (void*)qdio_handler) - cdev->handler=irq_ptr->original_int_handler; - - /* Ignore errors. */ - qdio_set_state(irq_ptr, QDIO_IRQ_STATE_INACTIVE); -out: - up(&irq_ptr->setting_up_sema); - return result; -} - -int -qdio_free(struct ccw_device *cdev) -{ - struct qdio_irq *irq_ptr; - char dbf_text[15]; - - irq_ptr = cdev->private->qdio_data; - if (!irq_ptr) - return -ENODEV; - - down(&irq_ptr->setting_up_sema); - - sprintf(dbf_text,"qfqs%4x",irq_ptr->schid.sch_no); - QDIO_DBF_TEXT1(0,trace,dbf_text); - QDIO_DBF_TEXT0(0,setup,dbf_text); - - cdev->private->qdio_data = NULL; - - up(&irq_ptr->setting_up_sema); - - qdio_release_irq_memory(irq_ptr); - module_put(THIS_MODULE); - return 0; -} - -static void -qdio_allocate_do_dbf(struct qdio_initialize *init_data) -{ - char dbf_text[20]; /* if a printf printed out more than 8 chars */ - - sprintf(dbf_text,"qfmt:%x",init_data->q_format); - QDIO_DBF_TEXT0(0,setup,dbf_text); - QDIO_DBF_HEX0(0,setup,init_data->adapter_name,8); - sprintf(dbf_text,"qpff%4x",init_data->qib_param_field_format); - QDIO_DBF_TEXT0(0,setup,dbf_text); - QDIO_DBF_HEX0(0,setup,&init_data->qib_param_field,sizeof(char*)); - QDIO_DBF_HEX0(0,setup,&init_data->input_slib_elements,sizeof(long*)); - QDIO_DBF_HEX0(0,setup,&init_data->output_slib_elements,sizeof(long*)); - sprintf(dbf_text,"miit%4x",init_data->min_input_threshold); - QDIO_DBF_TEXT0(0,setup,dbf_text); - sprintf(dbf_text,"mait%4x",init_data->max_input_threshold); - QDIO_DBF_TEXT0(0,setup,dbf_text); - sprintf(dbf_text,"miot%4x",init_data->min_output_threshold); - QDIO_DBF_TEXT0(0,setup,dbf_text); - sprintf(dbf_text,"maot%4x",init_data->max_output_threshold); - QDIO_DBF_TEXT0(0,setup,dbf_text); - sprintf(dbf_text,"niq:%4x",init_data->no_input_qs); - QDIO_DBF_TEXT0(0,setup,dbf_text); - sprintf(dbf_text,"noq:%4x",init_data->no_output_qs); - QDIO_DBF_TEXT0(0,setup,dbf_text); - QDIO_DBF_HEX0(0,setup,&init_data->input_handler,sizeof(void*)); - QDIO_DBF_HEX0(0,setup,&init_data->output_handler,sizeof(void*)); - QDIO_DBF_HEX0(0,setup,&init_data->int_parm,sizeof(long)); - QDIO_DBF_HEX0(0,setup,&init_data->flags,sizeof(long)); - QDIO_DBF_HEX0(0,setup,&init_data->input_sbal_addr_array,sizeof(void*)); - QDIO_DBF_HEX0(0,setup,&init_data->output_sbal_addr_array,sizeof(void*)); -} - -static void -qdio_allocate_fill_input_desc(struct qdio_irq *irq_ptr, int i, int iqfmt) -{ - irq_ptr->input_qs[i]->is_iqdio_q = iqfmt; - irq_ptr->input_qs[i]->is_thinint_q = irq_ptr->is_thinint_irq; - - irq_ptr->qdr->qdf0[i].sliba=(unsigned long)(irq_ptr->input_qs[i]->slib); - - irq_ptr->qdr->qdf0[i].sla=(unsigned long)(irq_ptr->input_qs[i]->sl); - - irq_ptr->qdr->qdf0[i].slsba= - (unsigned long)(&irq_ptr->input_qs[i]->slsb.acc.val[0]); - - irq_ptr->qdr->qdf0[i].akey=QDIO_STORAGE_KEY; - irq_ptr->qdr->qdf0[i].bkey=QDIO_STORAGE_KEY; - irq_ptr->qdr->qdf0[i].ckey=QDIO_STORAGE_KEY; - irq_ptr->qdr->qdf0[i].dkey=QDIO_STORAGE_KEY; -} - -static void -qdio_allocate_fill_output_desc(struct qdio_irq *irq_ptr, int i, - int j, int iqfmt) -{ - irq_ptr->output_qs[i]->is_iqdio_q = iqfmt; - irq_ptr->output_qs[i]->is_thinint_q = irq_ptr->is_thinint_irq; - - irq_ptr->qdr->qdf0[i+j].sliba=(unsigned long)(irq_ptr->output_qs[i]->slib); - - irq_ptr->qdr->qdf0[i+j].sla=(unsigned long)(irq_ptr->output_qs[i]->sl); - - irq_ptr->qdr->qdf0[i+j].slsba= - (unsigned long)(&irq_ptr->output_qs[i]->slsb.acc.val[0]); - - irq_ptr->qdr->qdf0[i+j].akey=QDIO_STORAGE_KEY; - irq_ptr->qdr->qdf0[i+j].bkey=QDIO_STORAGE_KEY; - irq_ptr->qdr->qdf0[i+j].ckey=QDIO_STORAGE_KEY; - irq_ptr->qdr->qdf0[i+j].dkey=QDIO_STORAGE_KEY; -} - - -static void -qdio_initialize_set_siga_flags_input(struct qdio_irq *irq_ptr) -{ - int i; - - for (i=0;ino_input_qs;i++) { - irq_ptr->input_qs[i]->siga_sync= - irq_ptr->qdioac&CHSC_FLAG_SIGA_SYNC_NECESSARY; - irq_ptr->input_qs[i]->siga_in= - irq_ptr->qdioac&CHSC_FLAG_SIGA_INPUT_NECESSARY; - irq_ptr->input_qs[i]->siga_out= - irq_ptr->qdioac&CHSC_FLAG_SIGA_OUTPUT_NECESSARY; - irq_ptr->input_qs[i]->siga_sync_done_on_thinints= - irq_ptr->qdioac&CHSC_FLAG_SIGA_SYNC_DONE_ON_THININTS; - irq_ptr->input_qs[i]->hydra_gives_outbound_pcis= - irq_ptr->hydra_gives_outbound_pcis; - irq_ptr->input_qs[i]->siga_sync_done_on_outb_tis= - ((irq_ptr->qdioac& - (CHSC_FLAG_SIGA_SYNC_DONE_ON_OUTB_PCIS| - CHSC_FLAG_SIGA_SYNC_DONE_ON_THININTS))== - (CHSC_FLAG_SIGA_SYNC_DONE_ON_OUTB_PCIS| - CHSC_FLAG_SIGA_SYNC_DONE_ON_THININTS)); - - } -} - -static void -qdio_initialize_set_siga_flags_output(struct qdio_irq *irq_ptr) -{ - int i; - - for (i=0;ino_output_qs;i++) { - irq_ptr->output_qs[i]->siga_sync= - irq_ptr->qdioac&CHSC_FLAG_SIGA_SYNC_NECESSARY; - irq_ptr->output_qs[i]->siga_in= - irq_ptr->qdioac&CHSC_FLAG_SIGA_INPUT_NECESSARY; - irq_ptr->output_qs[i]->siga_out= - irq_ptr->qdioac&CHSC_FLAG_SIGA_OUTPUT_NECESSARY; - irq_ptr->output_qs[i]->siga_sync_done_on_thinints= - irq_ptr->qdioac&CHSC_FLAG_SIGA_SYNC_DONE_ON_THININTS; - irq_ptr->output_qs[i]->hydra_gives_outbound_pcis= - irq_ptr->hydra_gives_outbound_pcis; - irq_ptr->output_qs[i]->siga_sync_done_on_outb_tis= - ((irq_ptr->qdioac& - (CHSC_FLAG_SIGA_SYNC_DONE_ON_OUTB_PCIS| - CHSC_FLAG_SIGA_SYNC_DONE_ON_THININTS))== - (CHSC_FLAG_SIGA_SYNC_DONE_ON_OUTB_PCIS| - CHSC_FLAG_SIGA_SYNC_DONE_ON_THININTS)); - - } -} - -static int -qdio_establish_irq_check_for_errors(struct ccw_device *cdev, int cstat, - int dstat) -{ - char dbf_text[15]; - struct qdio_irq *irq_ptr; - - irq_ptr = cdev->private->qdio_data; - - if (cstat || (dstat & ~(DEV_STAT_CHN_END|DEV_STAT_DEV_END))) { - sprintf(dbf_text,"ick1%4x",irq_ptr->schid.sch_no); - QDIO_DBF_TEXT2(1,trace,dbf_text); - QDIO_DBF_HEX2(0,trace,&dstat,sizeof(int)); - QDIO_DBF_HEX2(0,trace,&cstat,sizeof(int)); - QDIO_PRINT_ERR("received check condition on establish " \ - "queues on irq 0.%x.%x (cs=x%x, ds=x%x).\n", - irq_ptr->schid.ssid, irq_ptr->schid.sch_no, - cstat,dstat); - qdio_set_state(irq_ptr,QDIO_IRQ_STATE_ERR); - } - - if (!(dstat & DEV_STAT_DEV_END)) { - QDIO_DBF_TEXT2(1,setup,"eq:no de"); - QDIO_DBF_HEX2(0,setup,&dstat, sizeof(dstat)); - QDIO_DBF_HEX2(0,setup,&cstat, sizeof(cstat)); - QDIO_PRINT_ERR("establish queues on irq 0.%x.%04x: didn't get " - "device end: dstat=%02x, cstat=%02x\n", - irq_ptr->schid.ssid, irq_ptr->schid.sch_no, - dstat, cstat); - qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ERR); - return 1; - } - - if (dstat & ~(DEV_STAT_CHN_END|DEV_STAT_DEV_END)) { - QDIO_DBF_TEXT2(1,setup,"eq:badio"); - QDIO_DBF_HEX2(0,setup,&dstat, sizeof(dstat)); - QDIO_DBF_HEX2(0,setup,&cstat, sizeof(cstat)); - QDIO_PRINT_ERR("establish queues on irq 0.%x.%04x: got " - "the following devstat: dstat=%02x, " - "cstat=%02x\n", irq_ptr->schid.ssid, - irq_ptr->schid.sch_no, dstat, cstat); - qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ERR); - return 1; - } - return 0; -} - -static void -qdio_establish_handle_irq(struct ccw_device *cdev, int cstat, int dstat) -{ - struct qdio_irq *irq_ptr; - char dbf_text[15]; - - irq_ptr = cdev->private->qdio_data; - - sprintf(dbf_text,"qehi%4x",cdev->private->schid.sch_no); - QDIO_DBF_TEXT0(0,setup,dbf_text); - QDIO_DBF_TEXT0(0,trace,dbf_text); - - if (qdio_establish_irq_check_for_errors(cdev, cstat, dstat)) - return; - - qdio_set_state(irq_ptr,QDIO_IRQ_STATE_ESTABLISHED); -} - -int -qdio_initialize(struct qdio_initialize *init_data) -{ - int rc; - char dbf_text[15]; - - sprintf(dbf_text,"qini%4x",init_data->cdev->private->schid.sch_no); - QDIO_DBF_TEXT0(0,setup,dbf_text); - QDIO_DBF_TEXT0(0,trace,dbf_text); - - rc = qdio_allocate(init_data); - if (rc == 0) { - rc = qdio_establish(init_data); - if (rc != 0) - qdio_free(init_data->cdev); - } - - return rc; -} - - -int -qdio_allocate(struct qdio_initialize *init_data) -{ - struct qdio_irq *irq_ptr; - char dbf_text[15]; - - sprintf(dbf_text,"qalc%4x",init_data->cdev->private->schid.sch_no); - QDIO_DBF_TEXT0(0,setup,dbf_text); - QDIO_DBF_TEXT0(0,trace,dbf_text); - if ( (init_data->no_input_qs>QDIO_MAX_QUEUES_PER_IRQ) || - (init_data->no_output_qs>QDIO_MAX_QUEUES_PER_IRQ) || - ((init_data->no_input_qs) && (!init_data->input_handler)) || - ((init_data->no_output_qs) && (!init_data->output_handler)) ) - return -EINVAL; - - if (!init_data->input_sbal_addr_array) - return -EINVAL; - - if (!init_data->output_sbal_addr_array) - return -EINVAL; - - qdio_allocate_do_dbf(init_data); - - /* create irq */ - irq_ptr = (void *) get_zeroed_page(GFP_KERNEL | GFP_DMA); - - QDIO_DBF_TEXT0(0,setup,"irq_ptr:"); - QDIO_DBF_HEX0(0,setup,&irq_ptr,sizeof(void*)); - - if (!irq_ptr) { - QDIO_PRINT_ERR("allocation of irq_ptr failed!\n"); - return -ENOMEM; - } - - init_MUTEX(&irq_ptr->setting_up_sema); - - /* QDR must be in DMA area since CCW data address is only 32 bit */ - irq_ptr->qdr = (struct qdr *) __get_free_page(GFP_KERNEL | GFP_DMA); - if (!(irq_ptr->qdr)) { - free_page((unsigned long) irq_ptr); - QDIO_PRINT_ERR("allocation of irq_ptr->qdr failed!\n"); - return -ENOMEM; - } - QDIO_DBF_TEXT0(0,setup,"qdr:"); - QDIO_DBF_HEX0(0,setup,&irq_ptr->qdr,sizeof(void*)); - - if (qdio_alloc_qs(irq_ptr, - init_data->no_input_qs, - init_data->no_output_qs)) { - QDIO_PRINT_ERR("queue allocation failed!\n"); - qdio_release_irq_memory(irq_ptr); - return -ENOMEM; - } - - init_data->cdev->private->qdio_data = irq_ptr; - - qdio_set_state(irq_ptr,QDIO_IRQ_STATE_INACTIVE); - - return 0; -} - -static int qdio_fill_irq(struct qdio_initialize *init_data) -{ - int i; - char dbf_text[15]; - struct ciw *ciw; - int is_iqdio; - struct qdio_irq *irq_ptr; - - irq_ptr = init_data->cdev->private->qdio_data; - - memset(irq_ptr,0,((char*)&irq_ptr->qdr)-((char*)irq_ptr)); - - /* wipes qib.ac, required by ar7063 */ - memset(irq_ptr->qdr,0,sizeof(struct qdr)); - - irq_ptr->int_parm=init_data->int_parm; - - irq_ptr->schid = ccw_device_get_subchannel_id(init_data->cdev); - irq_ptr->no_input_qs=init_data->no_input_qs; - irq_ptr->no_output_qs=init_data->no_output_qs; - - if (init_data->q_format==QDIO_IQDIO_QFMT) { - irq_ptr->is_iqdio_irq=1; - irq_ptr->is_thinint_irq=1; - } else { - irq_ptr->is_iqdio_irq=0; - irq_ptr->is_thinint_irq=hydra_thinints; - } - sprintf(dbf_text,"is_i_t%1x%1x", - irq_ptr->is_iqdio_irq,irq_ptr->is_thinint_irq); - QDIO_DBF_TEXT2(0,setup,dbf_text); - - if (irq_ptr->is_thinint_irq) { - irq_ptr->dev_st_chg_ind = qdio_get_indicator(); - QDIO_DBF_HEX1(0,setup,&irq_ptr->dev_st_chg_ind,sizeof(void*)); - if (!irq_ptr->dev_st_chg_ind) { - QDIO_PRINT_WARN("no indicator location available " \ - "for irq 0.%x.%x\n", - irq_ptr->schid.ssid, irq_ptr->schid.sch_no); - qdio_release_irq_memory(irq_ptr); - return -ENOBUFS; - } - } - - /* defaults */ - irq_ptr->equeue.cmd=DEFAULT_ESTABLISH_QS_CMD; - irq_ptr->equeue.count=DEFAULT_ESTABLISH_QS_COUNT; - irq_ptr->aqueue.cmd=DEFAULT_ACTIVATE_QS_CMD; - irq_ptr->aqueue.count=DEFAULT_ACTIVATE_QS_COUNT; - - qdio_fill_qs(irq_ptr, init_data->cdev, - init_data->no_input_qs, - init_data->no_output_qs, - init_data->input_handler, - init_data->output_handler,init_data->int_parm, - init_data->q_format,init_data->flags, - init_data->input_sbal_addr_array, - init_data->output_sbal_addr_array); - - if (!try_module_get(THIS_MODULE)) { - QDIO_PRINT_CRIT("try_module_get() failed!\n"); - qdio_release_irq_memory(irq_ptr); - return -EINVAL; - } - - qdio_fill_thresholds(irq_ptr,init_data->no_input_qs, - init_data->no_output_qs, - init_data->min_input_threshold, - init_data->max_input_threshold, - init_data->min_output_threshold, - init_data->max_output_threshold); - - /* fill in qdr */ - irq_ptr->qdr->qfmt=init_data->q_format; - irq_ptr->qdr->iqdcnt=init_data->no_input_qs; - irq_ptr->qdr->oqdcnt=init_data->no_output_qs; - irq_ptr->qdr->iqdsz=sizeof(struct qdesfmt0)/4; /* size in words */ - irq_ptr->qdr->oqdsz=sizeof(struct qdesfmt0)/4; - - irq_ptr->qdr->qiba=(unsigned long)&irq_ptr->qib; - irq_ptr->qdr->qkey=QDIO_STORAGE_KEY; - - /* fill in qib */ - irq_ptr->is_qebsm = is_passthrough; - if (irq_ptr->is_qebsm) - irq_ptr->qib.rflags |= QIB_RFLAGS_ENABLE_QEBSM; - - irq_ptr->qib.qfmt=init_data->q_format; - if (init_data->no_input_qs) - irq_ptr->qib.isliba=(unsigned long)(irq_ptr->input_qs[0]->slib); - if (init_data->no_output_qs) - irq_ptr->qib.osliba=(unsigned long)(irq_ptr->output_qs[0]->slib); - memcpy(irq_ptr->qib.ebcnam,init_data->adapter_name,8); - - qdio_set_impl_params(irq_ptr,init_data->qib_param_field_format, - init_data->qib_param_field, - init_data->no_input_qs, - init_data->no_output_qs, - init_data->input_slib_elements, - init_data->output_slib_elements); - - /* first input descriptors, then output descriptors */ - is_iqdio = (init_data->q_format == QDIO_IQDIO_QFMT) ? 1 : 0; - for (i=0;ino_input_qs;i++) - qdio_allocate_fill_input_desc(irq_ptr, i, is_iqdio); - - for (i=0;ino_output_qs;i++) - qdio_allocate_fill_output_desc(irq_ptr, i, - init_data->no_input_qs, - is_iqdio); - - /* qdr, qib, sls, slsbs, slibs, sbales filled. */ - - /* get qdio commands */ - ciw = ccw_device_get_ciw(init_data->cdev, CIW_TYPE_EQUEUE); - if (!ciw) { - QDIO_DBF_TEXT2(1,setup,"no eq"); - QDIO_PRINT_INFO("No equeue CIW found for QDIO commands. " - "Trying to use default.\n"); - } else - irq_ptr->equeue = *ciw; - ciw = ccw_device_get_ciw(init_data->cdev, CIW_TYPE_AQUEUE); - if (!ciw) { - QDIO_DBF_TEXT2(1,setup,"no aq"); - QDIO_PRINT_INFO("No aqueue CIW found for QDIO commands. " - "Trying to use default.\n"); - } else - irq_ptr->aqueue = *ciw; - - /* Set new interrupt handler. */ - irq_ptr->original_int_handler = init_data->cdev->handler; - init_data->cdev->handler = qdio_handler; - - return 0; -} - -int -qdio_establish(struct qdio_initialize *init_data) -{ - struct qdio_irq *irq_ptr; - unsigned long saveflags; - int result, result2; - struct ccw_device *cdev; - char dbf_text[20]; - - cdev=init_data->cdev; - irq_ptr = cdev->private->qdio_data; - if (!irq_ptr) - return -EINVAL; - - if (cdev->private->state != DEV_STATE_ONLINE) - return -EINVAL; - - down(&irq_ptr->setting_up_sema); - - qdio_fill_irq(init_data); - - /* the thinint CHSC stuff */ - if (irq_ptr->is_thinint_irq) { - - result = tiqdio_set_subchannel_ind(irq_ptr,0); - if (result) { - up(&irq_ptr->setting_up_sema); - qdio_shutdown(cdev, QDIO_FLAG_CLEANUP_USING_CLEAR); - return result; - } - tiqdio_set_delay_target(irq_ptr,TIQDIO_DELAY_TARGET); - } - - sprintf(dbf_text,"qest%4x",cdev->private->schid.sch_no); - QDIO_DBF_TEXT0(0,setup,dbf_text); - QDIO_DBF_TEXT0(0,trace,dbf_text); - - /* establish q */ - irq_ptr->ccw.cmd_code=irq_ptr->equeue.cmd; - irq_ptr->ccw.flags=CCW_FLAG_SLI; - irq_ptr->ccw.count=irq_ptr->equeue.count; - irq_ptr->ccw.cda=QDIO_GET_ADDR(irq_ptr->qdr); - - spin_lock_irqsave(get_ccwdev_lock(cdev),saveflags); - - ccw_device_set_options_mask(cdev, 0); - result = ccw_device_start(cdev, &irq_ptr->ccw, - QDIO_DOING_ESTABLISH, 0, 0); - if (result) { - result2 = ccw_device_start(cdev, &irq_ptr->ccw, - QDIO_DOING_ESTABLISH, 0, 0); - sprintf(dbf_text,"eq:io%4x",result); - QDIO_DBF_TEXT2(1,setup,dbf_text); - if (result2) { - sprintf(dbf_text,"eq:io%4x",result); - QDIO_DBF_TEXT2(1,setup,dbf_text); - } - QDIO_PRINT_WARN("establish queues on irq 0.%x.%04x: do_IO " \ - "returned %i, next try returned %i\n", - irq_ptr->schid.ssid, irq_ptr->schid.sch_no, - result, result2); - result=result2; - } - - spin_unlock_irqrestore(get_ccwdev_lock(cdev),saveflags); - - if (result) { - up(&irq_ptr->setting_up_sema); - qdio_shutdown(cdev,QDIO_FLAG_CLEANUP_USING_CLEAR); - return result; - } - - wait_event_interruptible_timeout(cdev->private->wait_q, - irq_ptr->state == QDIO_IRQ_STATE_ESTABLISHED || - irq_ptr->state == QDIO_IRQ_STATE_ERR, - QDIO_ESTABLISH_TIMEOUT); - - if (irq_ptr->state == QDIO_IRQ_STATE_ESTABLISHED) - result = 0; - else { - up(&irq_ptr->setting_up_sema); - qdio_shutdown(cdev, QDIO_FLAG_CLEANUP_USING_CLEAR); - return -EIO; - } - - qdio_get_ssqd_siga(irq_ptr); - /* if this gets set once, we're running under VM and can omit SVSes */ - if (irq_ptr->qdioac&CHSC_FLAG_SIGA_SYNC_NECESSARY) - omit_svs=1; - - sprintf(dbf_text,"qdioac%2x",irq_ptr->qdioac); - QDIO_DBF_TEXT2(0,setup,dbf_text); - - sprintf(dbf_text,"qib ac%2x",irq_ptr->qib.ac); - QDIO_DBF_TEXT2(0,setup,dbf_text); - - irq_ptr->hydra_gives_outbound_pcis= - irq_ptr->qib.ac&QIB_AC_OUTBOUND_PCI_SUPPORTED; - irq_ptr->sync_done_on_outb_pcis= - irq_ptr->qdioac&CHSC_FLAG_SIGA_SYNC_DONE_ON_OUTB_PCIS; - - qdio_initialize_set_siga_flags_input(irq_ptr); - qdio_initialize_set_siga_flags_output(irq_ptr); - - up(&irq_ptr->setting_up_sema); - - return result; - -} - -int -qdio_activate(struct ccw_device *cdev, int flags) -{ - struct qdio_irq *irq_ptr; - int i,result=0,result2; - unsigned long saveflags; - char dbf_text[20]; /* see qdio_initialize */ - - irq_ptr = cdev->private->qdio_data; - if (!irq_ptr) - return -ENODEV; - - if (cdev->private->state != DEV_STATE_ONLINE) - return -EINVAL; - - down(&irq_ptr->setting_up_sema); - if (irq_ptr->state==QDIO_IRQ_STATE_INACTIVE) { - result=-EBUSY; - goto out; - } - - sprintf(dbf_text,"qact%4x", irq_ptr->schid.sch_no); - QDIO_DBF_TEXT2(0,setup,dbf_text); - QDIO_DBF_TEXT2(0,trace,dbf_text); - - /* activate q */ - irq_ptr->ccw.cmd_code=irq_ptr->aqueue.cmd; - irq_ptr->ccw.flags=CCW_FLAG_SLI; - irq_ptr->ccw.count=irq_ptr->aqueue.count; - irq_ptr->ccw.cda=QDIO_GET_ADDR(0); - - spin_lock_irqsave(get_ccwdev_lock(cdev),saveflags); - - ccw_device_set_options(cdev, CCWDEV_REPORT_ALL); - result=ccw_device_start(cdev,&irq_ptr->ccw,QDIO_DOING_ACTIVATE, - 0, DOIO_DENY_PREFETCH); - if (result) { - result2=ccw_device_start(cdev,&irq_ptr->ccw, - QDIO_DOING_ACTIVATE,0,0); - sprintf(dbf_text,"aq:io%4x",result); - QDIO_DBF_TEXT2(1,setup,dbf_text); - if (result2) { - sprintf(dbf_text,"aq:io%4x",result); - QDIO_DBF_TEXT2(1,setup,dbf_text); - } - QDIO_PRINT_WARN("activate queues on irq 0.%x.%04x: do_IO " \ - "returned %i, next try returned %i\n", - irq_ptr->schid.ssid, irq_ptr->schid.sch_no, - result, result2); - result=result2; - } - - spin_unlock_irqrestore(get_ccwdev_lock(cdev),saveflags); - if (result) - goto out; - - for (i=0;ino_input_qs;i++) { - if (irq_ptr->is_thinint_irq) { - /* - * that way we know, that, if we will get interrupted - * by tiqdio_inbound_processing, qdio_unmark_q will - * not be called - */ - qdio_reserve_q(irq_ptr->input_qs[i]); - qdio_mark_tiq(irq_ptr->input_qs[i]); - qdio_release_q(irq_ptr->input_qs[i]); - } - } - - if (flags&QDIO_FLAG_NO_INPUT_INTERRUPT_CONTEXT) { - for (i=0;ino_input_qs;i++) { - irq_ptr->input_qs[i]->is_input_q|= - QDIO_FLAG_NO_INPUT_INTERRUPT_CONTEXT; - } - } - - msleep(QDIO_ACTIVATE_TIMEOUT); - switch (irq_ptr->state) { - case QDIO_IRQ_STATE_STOPPED: - case QDIO_IRQ_STATE_ERR: - up(&irq_ptr->setting_up_sema); - qdio_shutdown(cdev, QDIO_FLAG_CLEANUP_USING_CLEAR); - down(&irq_ptr->setting_up_sema); - result = -EIO; - break; - default: - qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ACTIVE); - result = 0; - } - out: - up(&irq_ptr->setting_up_sema); - - return result; -} - -/* buffers filled forwards again to make Rick happy */ -static void -qdio_do_qdio_fill_input(struct qdio_q *q, unsigned int qidx, - unsigned int count, struct qdio_buffer *buffers) -{ - struct qdio_irq *irq = (struct qdio_irq *) q->irq_ptr; - int tmp = 0; - - qidx &= (QDIO_MAX_BUFFERS_PER_Q - 1); - if (irq->is_qebsm) { - while (count) { - tmp = set_slsb(q, &qidx, SLSB_CU_INPUT_EMPTY, &count); - if (!tmp) - return; - } - return; - } - for (;;) { - set_slsb(q, &qidx, SLSB_CU_INPUT_EMPTY, &count); - count--; - if (!count) break; - qidx = (qidx + 1) & (QDIO_MAX_BUFFERS_PER_Q - 1); - } -} - -static void -qdio_do_qdio_fill_output(struct qdio_q *q, unsigned int qidx, - unsigned int count, struct qdio_buffer *buffers) -{ - struct qdio_irq *irq = (struct qdio_irq *) q->irq_ptr; - int tmp = 0; - - qidx &= (QDIO_MAX_BUFFERS_PER_Q - 1); - if (irq->is_qebsm) { - while (count) { - tmp = set_slsb(q, &qidx, SLSB_CU_OUTPUT_PRIMED, &count); - if (!tmp) - return; - } - return; - } - - for (;;) { - set_slsb(q, &qidx, SLSB_CU_OUTPUT_PRIMED, &count); - count--; - if (!count) break; - qidx = (qidx + 1) & (QDIO_MAX_BUFFERS_PER_Q - 1); - } -} - -static void -do_qdio_handle_inbound(struct qdio_q *q, unsigned int callflags, - unsigned int qidx, unsigned int count, - struct qdio_buffer *buffers) -{ - int used_elements; - - /* This is the inbound handling of queues */ - used_elements=atomic_add_return(count, &q->number_of_buffers_used) - count; - - qdio_do_qdio_fill_input(q,qidx,count,buffers); - - if ((used_elements+count==QDIO_MAX_BUFFERS_PER_Q)&& - (callflags&QDIO_FLAG_UNDER_INTERRUPT)) - atomic_xchg(&q->polling,0); - - if (used_elements) - return; - if (callflags&QDIO_FLAG_DONT_SIGA) - return; - if (q->siga_in) { - int result; - - result=qdio_siga_input(q); - if (result) { - if (q->siga_error) - q->error_status_flags|= - QDIO_STATUS_MORE_THAN_ONE_SIGA_ERROR; - q->error_status_flags|=QDIO_STATUS_LOOK_FOR_ERROR; - q->siga_error=result; - } - } - - qdio_mark_q(q); -} - -static void -do_qdio_handle_outbound(struct qdio_q *q, unsigned int callflags, - unsigned int qidx, unsigned int count, - struct qdio_buffer *buffers) -{ - int used_elements; - unsigned int cnt, start_buf; - unsigned char state = 0; - struct qdio_irq *irq = (struct qdio_irq *) q->irq_ptr; - - /* This is the outbound handling of queues */ - qdio_do_qdio_fill_output(q,qidx,count,buffers); - - used_elements=atomic_add_return(count, &q->number_of_buffers_used) - count; - - if (callflags&QDIO_FLAG_DONT_SIGA) { - qdio_perf_stat_inc(&perf_stats.outbound_cnt); - return; - } - if (callflags & QDIO_FLAG_PCI_OUT) - q->is_pci_out = 1; - else - q->is_pci_out = 0; - if (q->is_iqdio_q) { - /* one siga for every sbal */ - while (count--) - qdio_kick_outbound_q(q); - - __qdio_outbound_processing(q); - } else { - /* under VM, we do a SIGA sync unconditionally */ - SYNC_MEMORY; - else { - /* - * w/o shadow queues (else branch of - * SYNC_MEMORY :-/ ), we try to - * fast-requeue buffers - */ - if (irq->is_qebsm) { - cnt = 1; - start_buf = ((qidx+QDIO_MAX_BUFFERS_PER_Q-1) & - (QDIO_MAX_BUFFERS_PER_Q-1)); - qdio_do_eqbs(q, &state, &start_buf, &cnt); - } else - state = q->slsb.acc.val[(qidx+QDIO_MAX_BUFFERS_PER_Q-1) - &(QDIO_MAX_BUFFERS_PER_Q-1) ]; - if (state != SLSB_CU_OUTPUT_PRIMED) { - qdio_kick_outbound_q(q); - } else { - QDIO_DBF_TEXT3(0,trace, "fast-req"); - qdio_perf_stat_inc(&perf_stats.fast_reqs); - } - } - /* - * only marking the q could take too long, - * the upper layer module could do a lot of - * traffic in that time - */ - __qdio_outbound_processing(q); - } - - qdio_perf_stat_inc(&perf_stats.outbound_cnt); -} - -/* count must be 1 in iqdio */ -int -do_QDIO(struct ccw_device *cdev,unsigned int callflags, - unsigned int queue_number, unsigned int qidx, - unsigned int count,struct qdio_buffer *buffers) -{ - struct qdio_irq *irq_ptr; -#ifdef CONFIG_QDIO_DEBUG - char dbf_text[20]; - - sprintf(dbf_text,"doQD%04x",cdev->private->schid.sch_no); - QDIO_DBF_TEXT3(0,trace,dbf_text); -#endif /* CONFIG_QDIO_DEBUG */ - - if ( (qidx>QDIO_MAX_BUFFERS_PER_Q) || - (count>QDIO_MAX_BUFFERS_PER_Q) || - (queue_number>QDIO_MAX_QUEUES_PER_IRQ) ) - return -EINVAL; - - if (count==0) - return 0; - - irq_ptr = cdev->private->qdio_data; - if (!irq_ptr) - return -ENODEV; - -#ifdef CONFIG_QDIO_DEBUG - if (callflags&QDIO_FLAG_SYNC_INPUT) - QDIO_DBF_HEX3(0,trace,&irq_ptr->input_qs[queue_number], - sizeof(void*)); - else - QDIO_DBF_HEX3(0,trace,&irq_ptr->output_qs[queue_number], - sizeof(void*)); - sprintf(dbf_text,"flag%04x",callflags); - QDIO_DBF_TEXT3(0,trace,dbf_text); - sprintf(dbf_text,"qi%02xct%02x",qidx,count); - QDIO_DBF_TEXT3(0,trace,dbf_text); -#endif /* CONFIG_QDIO_DEBUG */ - - if (irq_ptr->state!=QDIO_IRQ_STATE_ACTIVE) - return -EBUSY; - - if (callflags&QDIO_FLAG_SYNC_INPUT) - do_qdio_handle_inbound(irq_ptr->input_qs[queue_number], - callflags, qidx, count, buffers); - else if (callflags&QDIO_FLAG_SYNC_OUTPUT) - do_qdio_handle_outbound(irq_ptr->output_qs[queue_number], - callflags, qidx, count, buffers); - else { - QDIO_DBF_TEXT3(1,trace,"doQD:inv"); - return -EINVAL; - } - return 0; -} - -static int -qdio_perf_procfile_read(char *buffer, char **buffer_location, off_t offset, - int buffer_length, int *eof, void *data) -{ - int c=0; - - /* we are always called with buffer_length=4k, so we all - deliver on the first read */ - if (offset>0) - return 0; - -#define _OUTP_IT(x...) c+=sprintf(buffer+c,x) -#ifdef CONFIG_64BIT - _OUTP_IT("Number of tasklet runs (total) : %li\n", - (long)atomic64_read(&perf_stats.tl_runs)); - _OUTP_IT("Inbound tasklet runs tried/retried : %li/%li\n", - (long)atomic64_read(&perf_stats.inbound_tl_runs), - (long)atomic64_read(&perf_stats.inbound_tl_runs_resched)); - _OUTP_IT("Inbound-thin tasklet runs tried/retried : %li/%li\n", - (long)atomic64_read(&perf_stats.inbound_thin_tl_runs), - (long)atomic64_read(&perf_stats.inbound_thin_tl_runs_resched)); - _OUTP_IT("Outbound tasklet runs tried/retried : %li/%li\n", - (long)atomic64_read(&perf_stats.outbound_tl_runs), - (long)atomic64_read(&perf_stats.outbound_tl_runs_resched)); - _OUTP_IT("\n"); - _OUTP_IT("Number of SIGA sync's issued : %li\n", - (long)atomic64_read(&perf_stats.siga_syncs)); - _OUTP_IT("Number of SIGA in's issued : %li\n", - (long)atomic64_read(&perf_stats.siga_ins)); - _OUTP_IT("Number of SIGA out's issued : %li\n", - (long)atomic64_read(&perf_stats.siga_outs)); - _OUTP_IT("Number of PCIs caught : %li\n", - (long)atomic64_read(&perf_stats.pcis)); - _OUTP_IT("Number of adapter interrupts caught : %li\n", - (long)atomic64_read(&perf_stats.thinints)); - _OUTP_IT("Number of fast requeues (outg. SBALs w/o SIGA) : %li\n", - (long)atomic64_read(&perf_stats.fast_reqs)); - _OUTP_IT("\n"); - _OUTP_IT("Number of inbound transfers : %li\n", - (long)atomic64_read(&perf_stats.inbound_cnt)); - _OUTP_IT("Number of do_QDIOs outbound : %li\n", - (long)atomic64_read(&perf_stats.outbound_cnt)); -#else /* CONFIG_64BIT */ - _OUTP_IT("Number of tasklet runs (total) : %i\n", - atomic_read(&perf_stats.tl_runs)); - _OUTP_IT("Inbound tasklet runs tried/retried : %i/%i\n", - atomic_read(&perf_stats.inbound_tl_runs), - atomic_read(&perf_stats.inbound_tl_runs_resched)); - _OUTP_IT("Inbound-thin tasklet runs tried/retried : %i/%i\n", - atomic_read(&perf_stats.inbound_thin_tl_runs), - atomic_read(&perf_stats.inbound_thin_tl_runs_resched)); - _OUTP_IT("Outbound tasklet runs tried/retried : %i/%i\n", - atomic_read(&perf_stats.outbound_tl_runs), - atomic_read(&perf_stats.outbound_tl_runs_resched)); - _OUTP_IT("\n"); - _OUTP_IT("Number of SIGA sync's issued : %i\n", - atomic_read(&perf_stats.siga_syncs)); - _OUTP_IT("Number of SIGA in's issued : %i\n", - atomic_read(&perf_stats.siga_ins)); - _OUTP_IT("Number of SIGA out's issued : %i\n", - atomic_read(&perf_stats.siga_outs)); - _OUTP_IT("Number of PCIs caught : %i\n", - atomic_read(&perf_stats.pcis)); - _OUTP_IT("Number of adapter interrupts caught : %i\n", - atomic_read(&perf_stats.thinints)); - _OUTP_IT("Number of fast requeues (outg. SBALs w/o SIGA) : %i\n", - atomic_read(&perf_stats.fast_reqs)); - _OUTP_IT("\n"); - _OUTP_IT("Number of inbound transfers : %i\n", - atomic_read(&perf_stats.inbound_cnt)); - _OUTP_IT("Number of do_QDIOs outbound : %i\n", - atomic_read(&perf_stats.outbound_cnt)); -#endif /* CONFIG_64BIT */ - _OUTP_IT("\n"); - - return c; -} - -static struct proc_dir_entry *qdio_perf_proc_file; - -static void -qdio_add_procfs_entry(void) -{ - proc_perf_file_registration=0; - qdio_perf_proc_file=create_proc_entry(QDIO_PERF, - S_IFREG|0444,NULL); - if (qdio_perf_proc_file) { - qdio_perf_proc_file->read_proc=&qdio_perf_procfile_read; - } else proc_perf_file_registration=-1; - - if (proc_perf_file_registration) - QDIO_PRINT_WARN("was not able to register perf. " \ - "proc-file (%i).\n", - proc_perf_file_registration); -} - -static void -qdio_remove_procfs_entry(void) -{ - if (!proc_perf_file_registration) /* means if it went ok earlier */ - remove_proc_entry(QDIO_PERF,NULL); -} - -/** - * attributes in sysfs - *****************************************************************************/ - -static ssize_t -qdio_performance_stats_show(struct bus_type *bus, char *buf) -{ - return sprintf(buf, "%i\n", qdio_performance_stats ? 1 : 0); -} - -static ssize_t -qdio_performance_stats_store(struct bus_type *bus, const char *buf, size_t count) -{ - unsigned long i; - int ret; - - ret = strict_strtoul(buf, 16, &i); - if (!ret && ((i == 0) || (i == 1))) { - if (i == qdio_performance_stats) - return count; - qdio_performance_stats = i; - if (i==0) { - /* reset perf. stat. info */ -#ifdef CONFIG_64BIT - atomic64_set(&perf_stats.tl_runs, 0); - atomic64_set(&perf_stats.outbound_tl_runs, 0); - atomic64_set(&perf_stats.inbound_tl_runs, 0); - atomic64_set(&perf_stats.inbound_tl_runs_resched, 0); - atomic64_set(&perf_stats.inbound_thin_tl_runs, 0); - atomic64_set(&perf_stats.inbound_thin_tl_runs_resched, - 0); - atomic64_set(&perf_stats.siga_outs, 0); - atomic64_set(&perf_stats.siga_ins, 0); - atomic64_set(&perf_stats.siga_syncs, 0); - atomic64_set(&perf_stats.pcis, 0); - atomic64_set(&perf_stats.thinints, 0); - atomic64_set(&perf_stats.fast_reqs, 0); - atomic64_set(&perf_stats.outbound_cnt, 0); - atomic64_set(&perf_stats.inbound_cnt, 0); -#else /* CONFIG_64BIT */ - atomic_set(&perf_stats.tl_runs, 0); - atomic_set(&perf_stats.outbound_tl_runs, 0); - atomic_set(&perf_stats.inbound_tl_runs, 0); - atomic_set(&perf_stats.inbound_tl_runs_resched, 0); - atomic_set(&perf_stats.inbound_thin_tl_runs, 0); - atomic_set(&perf_stats.inbound_thin_tl_runs_resched, 0); - atomic_set(&perf_stats.siga_outs, 0); - atomic_set(&perf_stats.siga_ins, 0); - atomic_set(&perf_stats.siga_syncs, 0); - atomic_set(&perf_stats.pcis, 0); - atomic_set(&perf_stats.thinints, 0); - atomic_set(&perf_stats.fast_reqs, 0); - atomic_set(&perf_stats.outbound_cnt, 0); - atomic_set(&perf_stats.inbound_cnt, 0); -#endif /* CONFIG_64BIT */ - } - } else { - QDIO_PRINT_ERR("QDIO performance_stats: write 0 or 1 to this file!\n"); - return -EINVAL; - } - return count; -} - -static BUS_ATTR(qdio_performance_stats, 0644, qdio_performance_stats_show, - qdio_performance_stats_store); - -static void -tiqdio_register_thinints(void) -{ - char dbf_text[20]; - - tiqdio_ind = - s390_register_adapter_interrupt(&tiqdio_thinint_handler, NULL, - TIQDIO_THININT_ISC); - if (IS_ERR(tiqdio_ind)) { - sprintf(dbf_text, "regthn%lx", PTR_ERR(tiqdio_ind)); - QDIO_DBF_TEXT0(0,setup,dbf_text); - QDIO_PRINT_ERR("failed to register adapter handler " \ - "(rc=%li).\nAdapter interrupts might " \ - "not work. Continuing.\n", - PTR_ERR(tiqdio_ind)); - tiqdio_ind = NULL; - } -} - -static void -tiqdio_unregister_thinints(void) -{ - if (tiqdio_ind) - s390_unregister_adapter_interrupt(tiqdio_ind, - TIQDIO_THININT_ISC); -} - -static int -qdio_get_qdio_memory(void) -{ - int i; - indicator_used[0]=1; - - for (i=1;i + * Jan Glauber + */ #ifndef _CIO_QDIO_H #define _CIO_QDIO_H #include -#include #include +#include "chsc.h" -#ifdef CONFIG_QDIO_DEBUG -#define QDIO_VERBOSE_LEVEL 9 -#else /* CONFIG_QDIO_DEBUG */ -#define QDIO_VERBOSE_LEVEL 5 -#endif /* CONFIG_QDIO_DEBUG */ -#define QDIO_USE_PROCESSING_STATE - -#define QDIO_MINIMAL_BH_RELIEF_TIME 16 -#define QDIO_TIMER_POLL_VALUE 1 -#define IQDIO_TIMER_POLL_VALUE 1 - -/* - * unfortunately this can't be (QDIO_MAX_BUFFERS_PER_Q*4/3) or so -- as - * we never know, whether we'll get initiative again, e.g. to give the - * transmit skb's back to the stack, however the stack may be waiting for - * them... therefore we define 4 as threshold to start polling (which - * will stop as soon as the asynchronous queue catches up) - * btw, this only applies to the asynchronous HiperSockets queue - */ -#define IQDIO_FILL_LEVEL_TO_POLL 4 - -#define TIQDIO_THININT_ISC QDIO_AIRQ_ISC -#define TIQDIO_DELAY_TARGET 0 -#define QDIO_BUSY_BIT_PATIENCE 100 /* in microsecs */ -#define QDIO_BUSY_BIT_GIVE_UP 10000000 /* 10 seconds */ -#define IQDIO_GLOBAL_LAPS 2 /* GLOBAL_LAPS are not used as we */ -#define IQDIO_GLOBAL_LAPS_INT 1 /* don't global summary */ -#define IQDIO_LOCAL_LAPS 4 -#define IQDIO_LOCAL_LAPS_INT 1 -#define IQDIO_GLOBAL_SUMMARY_CC_MASK 2 -/*#define IQDIO_IQDC_INT_PARM 0x1234*/ - -#define QDIO_Q_LAPS 5 - -#define QDIO_STORAGE_KEY PAGE_DEFAULT_KEY - -#define L2_CACHELINE_SIZE 256 -#define INDICATORS_PER_CACHELINE (L2_CACHELINE_SIZE/sizeof(__u32)) - -#define QDIO_PERF "qdio_perf" - -/* must be a power of 2 */ -/*#define QDIO_STATS_NUMBER 4 - -#define QDIO_STATS_CLASSES 2 -#define QDIO_STATS_COUNT_NEEDED 2*/ - -#define QDIO_NO_USE_COUNT_TIMEOUT (1*HZ) /* wait for 1 sec on each q before - exiting without having use_count - of the queue to 0 */ - -#define QDIO_ESTABLISH_TIMEOUT (1*HZ) -#define QDIO_CLEANUP_CLEAR_TIMEOUT (20*HZ) -#define QDIO_CLEANUP_HALT_TIMEOUT (10*HZ) -#define QDIO_FORCE_CHECK_TIMEOUT (10*HZ) -#define QDIO_ACTIVATE_TIMEOUT (5) /* 5 ms */ +#define QDIO_BUSY_BIT_PATIENCE 100 /* 100 microseconds */ +#define QDIO_BUSY_BIT_GIVE_UP 2000000 /* 2 seconds = eternity */ +#define QDIO_INPUT_THRESHOLD 500 /* 500 microseconds */ enum qdio_irq_states { QDIO_IRQ_STATE_INACTIVE, @@ -72,565 +26,352 @@ enum qdio_irq_states { NR_QDIO_IRQ_STATES, }; -/* used as intparm in do_IO: */ -#define QDIO_DOING_SENSEID 0 -#define QDIO_DOING_ESTABLISH 1 -#define QDIO_DOING_ACTIVATE 2 -#define QDIO_DOING_CLEANUP 3 - -/************************* DEBUG FACILITY STUFF *********************/ - -#define QDIO_DBF_HEX(ex,name,level,addr,len) \ - do { \ - if (ex) \ - debug_exception(qdio_dbf_##name,level,(void*)(addr),len); \ - else \ - debug_event(qdio_dbf_##name,level,(void*)(addr),len); \ - } while (0) -#define QDIO_DBF_TEXT(ex,name,level,text) \ - do { \ - if (ex) \ - debug_text_exception(qdio_dbf_##name,level,text); \ - else \ - debug_text_event(qdio_dbf_##name,level,text); \ - } while (0) - - -#define QDIO_DBF_HEX0(ex,name,addr,len) QDIO_DBF_HEX(ex,name,0,addr,len) -#define QDIO_DBF_HEX1(ex,name,addr,len) QDIO_DBF_HEX(ex,name,1,addr,len) -#define QDIO_DBF_HEX2(ex,name,addr,len) QDIO_DBF_HEX(ex,name,2,addr,len) -#ifdef CONFIG_QDIO_DEBUG -#define QDIO_DBF_HEX3(ex,name,addr,len) QDIO_DBF_HEX(ex,name,3,addr,len) -#define QDIO_DBF_HEX4(ex,name,addr,len) QDIO_DBF_HEX(ex,name,4,addr,len) -#define QDIO_DBF_HEX5(ex,name,addr,len) QDIO_DBF_HEX(ex,name,5,addr,len) -#define QDIO_DBF_HEX6(ex,name,addr,len) QDIO_DBF_HEX(ex,name,6,addr,len) -#else /* CONFIG_QDIO_DEBUG */ -#define QDIO_DBF_HEX3(ex,name,addr,len) do {} while (0) -#define QDIO_DBF_HEX4(ex,name,addr,len) do {} while (0) -#define QDIO_DBF_HEX5(ex,name,addr,len) do {} while (0) -#define QDIO_DBF_HEX6(ex,name,addr,len) do {} while (0) -#endif /* CONFIG_QDIO_DEBUG */ - -#define QDIO_DBF_TEXT0(ex,name,text) QDIO_DBF_TEXT(ex,name,0,text) -#define QDIO_DBF_TEXT1(ex,name,text) QDIO_DBF_TEXT(ex,name,1,text) -#define QDIO_DBF_TEXT2(ex,name,text) QDIO_DBF_TEXT(ex,name,2,text) -#ifdef CONFIG_QDIO_DEBUG -#define QDIO_DBF_TEXT3(ex,name,text) QDIO_DBF_TEXT(ex,name,3,text) -#define QDIO_DBF_TEXT4(ex,name,text) QDIO_DBF_TEXT(ex,name,4,text) -#define QDIO_DBF_TEXT5(ex,name,text) QDIO_DBF_TEXT(ex,name,5,text) -#define QDIO_DBF_TEXT6(ex,name,text) QDIO_DBF_TEXT(ex,name,6,text) -#else /* CONFIG_QDIO_DEBUG */ -#define QDIO_DBF_TEXT3(ex,name,text) do {} while (0) -#define QDIO_DBF_TEXT4(ex,name,text) do {} while (0) -#define QDIO_DBF_TEXT5(ex,name,text) do {} while (0) -#define QDIO_DBF_TEXT6(ex,name,text) do {} while (0) -#endif /* CONFIG_QDIO_DEBUG */ - -#define QDIO_DBF_SETUP_NAME "qdio_setup" -#define QDIO_DBF_SETUP_LEN 8 -#define QDIO_DBF_SETUP_PAGES 4 -#define QDIO_DBF_SETUP_NR_AREAS 1 -#ifdef CONFIG_QDIO_DEBUG -#define QDIO_DBF_SETUP_LEVEL 6 -#else /* CONFIG_QDIO_DEBUG */ -#define QDIO_DBF_SETUP_LEVEL 2 -#endif /* CONFIG_QDIO_DEBUG */ - -#define QDIO_DBF_SBAL_NAME "qdio_labs" /* sbal */ -#define QDIO_DBF_SBAL_LEN 256 -#define QDIO_DBF_SBAL_PAGES 4 -#define QDIO_DBF_SBAL_NR_AREAS 2 -#ifdef CONFIG_QDIO_DEBUG -#define QDIO_DBF_SBAL_LEVEL 6 -#else /* CONFIG_QDIO_DEBUG */ -#define QDIO_DBF_SBAL_LEVEL 2 -#endif /* CONFIG_QDIO_DEBUG */ - -#define QDIO_DBF_TRACE_NAME "qdio_trace" -#define QDIO_DBF_TRACE_LEN 8 -#define QDIO_DBF_TRACE_NR_AREAS 2 -#ifdef CONFIG_QDIO_DEBUG -#define QDIO_DBF_TRACE_PAGES 16 -#define QDIO_DBF_TRACE_LEVEL 4 /* -------- could be even more verbose here */ -#else /* CONFIG_QDIO_DEBUG */ -#define QDIO_DBF_TRACE_PAGES 4 -#define QDIO_DBF_TRACE_LEVEL 2 -#endif /* CONFIG_QDIO_DEBUG */ - -#define QDIO_DBF_SENSE_NAME "qdio_sense" -#define QDIO_DBF_SENSE_LEN 64 -#define QDIO_DBF_SENSE_PAGES 2 -#define QDIO_DBF_SENSE_NR_AREAS 1 -#ifdef CONFIG_QDIO_DEBUG -#define QDIO_DBF_SENSE_LEVEL 6 -#else /* CONFIG_QDIO_DEBUG */ -#define QDIO_DBF_SENSE_LEVEL 2 -#endif /* CONFIG_QDIO_DEBUG */ - -#ifdef CONFIG_QDIO_DEBUG -#define QDIO_TRACE_QTYPE QDIO_ZFCP_QFMT - -#define QDIO_DBF_SLSB_OUT_NAME "qdio_slsb_out" -#define QDIO_DBF_SLSB_OUT_LEN QDIO_MAX_BUFFERS_PER_Q -#define QDIO_DBF_SLSB_OUT_PAGES 256 -#define QDIO_DBF_SLSB_OUT_NR_AREAS 1 -#define QDIO_DBF_SLSB_OUT_LEVEL 6 - -#define QDIO_DBF_SLSB_IN_NAME "qdio_slsb_in" -#define QDIO_DBF_SLSB_IN_LEN QDIO_MAX_BUFFERS_PER_Q -#define QDIO_DBF_SLSB_IN_PAGES 256 -#define QDIO_DBF_SLSB_IN_NR_AREAS 1 -#define QDIO_DBF_SLSB_IN_LEVEL 6 -#endif /* CONFIG_QDIO_DEBUG */ - -#define QDIO_PRINTK_HEADER QDIO_NAME ": " - -#if QDIO_VERBOSE_LEVEL>8 -#define QDIO_PRINT_STUPID(x...) printk( KERN_DEBUG QDIO_PRINTK_HEADER x) -#else -#define QDIO_PRINT_STUPID(x...) do { } while (0) -#endif +/* used as intparm in do_IO */ +#define QDIO_DOING_ESTABLISH 1 +#define QDIO_DOING_ACTIVATE 2 +#define QDIO_DOING_CLEANUP 3 + +#define SLSB_STATE_NOT_INIT 0x0 +#define SLSB_STATE_EMPTY 0x1 +#define SLSB_STATE_PRIMED 0x2 +#define SLSB_STATE_HALTED 0xe +#define SLSB_STATE_ERROR 0xf +#define SLSB_TYPE_INPUT 0x0 +#define SLSB_TYPE_OUTPUT 0x20 +#define SLSB_OWNER_PROG 0x80 +#define SLSB_OWNER_CU 0x40 + +#define SLSB_P_INPUT_NOT_INIT \ + (SLSB_OWNER_PROG | SLSB_TYPE_INPUT | SLSB_STATE_NOT_INIT) /* 0x80 */ +#define SLSB_P_INPUT_ACK \ + (SLSB_OWNER_PROG | SLSB_TYPE_INPUT | SLSB_STATE_EMPTY) /* 0x81 */ +#define SLSB_CU_INPUT_EMPTY \ + (SLSB_OWNER_CU | SLSB_TYPE_INPUT | SLSB_STATE_EMPTY) /* 0x41 */ +#define SLSB_P_INPUT_PRIMED \ + (SLSB_OWNER_PROG | SLSB_TYPE_INPUT | SLSB_STATE_PRIMED) /* 0x82 */ +#define SLSB_P_INPUT_HALTED \ + (SLSB_OWNER_PROG | SLSB_TYPE_INPUT | SLSB_STATE_HALTED) /* 0x8e */ +#define SLSB_P_INPUT_ERROR \ + (SLSB_OWNER_PROG | SLSB_TYPE_INPUT | SLSB_STATE_ERROR) /* 0x8f */ +#define SLSB_P_OUTPUT_NOT_INIT \ + (SLSB_OWNER_PROG | SLSB_TYPE_OUTPUT | SLSB_STATE_NOT_INIT) /* 0xa0 */ +#define SLSB_P_OUTPUT_EMPTY \ + (SLSB_OWNER_PROG | SLSB_TYPE_OUTPUT | SLSB_STATE_EMPTY) /* 0xa1 */ +#define SLSB_CU_OUTPUT_PRIMED \ + (SLSB_OWNER_CU | SLSB_TYPE_OUTPUT | SLSB_STATE_PRIMED) /* 0x62 */ +#define SLSB_P_OUTPUT_HALTED \ + (SLSB_OWNER_PROG | SLSB_TYPE_OUTPUT | SLSB_STATE_HALTED) /* 0xae */ +#define SLSB_P_OUTPUT_ERROR \ + (SLSB_OWNER_PROG | SLSB_TYPE_OUTPUT | SLSB_STATE_ERROR) /* 0xaf */ + +#define SLSB_ERROR_DURING_LOOKUP 0xff + +/* additional CIWs returned by extended Sense-ID */ +#define CIW_TYPE_EQUEUE 0x3 /* establish QDIO queues */ +#define CIW_TYPE_AQUEUE 0x4 /* activate QDIO queues */ -#if QDIO_VERBOSE_LEVEL>7 -#define QDIO_PRINT_ALL(x...) printk( QDIO_PRINTK_HEADER x) -#else -#define QDIO_PRINT_ALL(x...) do { } while (0) -#endif - -#if QDIO_VERBOSE_LEVEL>6 -#define QDIO_PRINT_INFO(x...) printk( QDIO_PRINTK_HEADER x) -#else -#define QDIO_PRINT_INFO(x...) do { } while (0) -#endif - -#if QDIO_VERBOSE_LEVEL>5 -#define QDIO_PRINT_WARN(x...) printk( QDIO_PRINTK_HEADER x) -#else -#define QDIO_PRINT_WARN(x...) do { } while (0) -#endif - -#if QDIO_VERBOSE_LEVEL>4 -#define QDIO_PRINT_ERR(x...) printk( QDIO_PRINTK_HEADER x) -#else -#define QDIO_PRINT_ERR(x...) do { } while (0) -#endif - -#if QDIO_VERBOSE_LEVEL>3 -#define QDIO_PRINT_CRIT(x...) printk( QDIO_PRINTK_HEADER x) -#else -#define QDIO_PRINT_CRIT(x...) do { } while (0) -#endif - -#if QDIO_VERBOSE_LEVEL>2 -#define QDIO_PRINT_ALERT(x...) printk( QDIO_PRINTK_HEADER x) -#else -#define QDIO_PRINT_ALERT(x...) do { } while (0) -#endif +/* flags for st qdio sch data */ +#define CHSC_FLAG_QDIO_CAPABILITY 0x80 +#define CHSC_FLAG_VALIDITY 0x40 + +/* qdio adapter-characteristics-1 flag */ +#define AC1_SIGA_INPUT_NEEDED 0x40 /* process input queues */ +#define AC1_SIGA_OUTPUT_NEEDED 0x20 /* process output queues */ +#define AC1_SIGA_SYNC_NEEDED 0x10 /* ask hypervisor to sync */ +#define AC1_AUTOMATIC_SYNC_ON_THININT 0x08 /* set by hypervisor */ +#define AC1_AUTOMATIC_SYNC_ON_OUT_PCI 0x04 /* set by hypervisor */ +#define AC1_SC_QEBSM_AVAILABLE 0x02 /* available for subchannel */ +#define AC1_SC_QEBSM_ENABLED 0x01 /* enabled for subchannel */ -#if QDIO_VERBOSE_LEVEL>1 -#define QDIO_PRINT_EMERG(x...) printk( QDIO_PRINTK_HEADER x) -#else -#define QDIO_PRINT_EMERG(x...) do { } while (0) -#endif - -#define QDIO_HEXDUMP16(importance,header,ptr) \ -QDIO_PRINT_##importance(header "%02x %02x %02x %02x " \ - "%02x %02x %02x %02x %02x %02x %02x %02x " \ - "%02x %02x %02x %02x\n",*(((char*)ptr)), \ - *(((char*)ptr)+1),*(((char*)ptr)+2), \ - *(((char*)ptr)+3),*(((char*)ptr)+4), \ - *(((char*)ptr)+5),*(((char*)ptr)+6), \ - *(((char*)ptr)+7),*(((char*)ptr)+8), \ - *(((char*)ptr)+9),*(((char*)ptr)+10), \ - *(((char*)ptr)+11),*(((char*)ptr)+12), \ - *(((char*)ptr)+13),*(((char*)ptr)+14), \ - *(((char*)ptr)+15)); \ -QDIO_PRINT_##importance(header "%02x %02x %02x %02x %02x %02x %02x %02x " \ - "%02x %02x %02x %02x %02x %02x %02x %02x\n", \ - *(((char*)ptr)+16),*(((char*)ptr)+17), \ - *(((char*)ptr)+18),*(((char*)ptr)+19), \ - *(((char*)ptr)+20),*(((char*)ptr)+21), \ - *(((char*)ptr)+22),*(((char*)ptr)+23), \ - *(((char*)ptr)+24),*(((char*)ptr)+25), \ - *(((char*)ptr)+26),*(((char*)ptr)+27), \ - *(((char*)ptr)+28),*(((char*)ptr)+29), \ - *(((char*)ptr)+30),*(((char*)ptr)+31)); - -/****************** END OF DEBUG FACILITY STUFF *********************/ +#ifdef CONFIG_64BIT +static inline int do_sqbs(u64 token, unsigned char state, int queue, + int *start, int *count) +{ + register unsigned long _ccq asm ("0") = *count; + register unsigned long _token asm ("1") = token; + unsigned long _queuestart = ((unsigned long)queue << 32) | *start; -/* - * Some instructions as assembly - */ + asm volatile( + " .insn rsy,0xeb000000008A,%1,0,0(%2)" + : "+d" (_ccq), "+d" (_queuestart) + : "d" ((unsigned long)state), "d" (_token) + : "memory", "cc"); + *count = _ccq & 0xff; + *start = _queuestart & 0xff; -static inline int -do_sqbs(unsigned long sch, unsigned char state, int queue, - unsigned int *start, unsigned int *count) -{ -#ifdef CONFIG_64BIT - register unsigned long _ccq asm ("0") = *count; - register unsigned long _sch asm ("1") = sch; - unsigned long _queuestart = ((unsigned long)queue << 32) | *start; - - asm volatile( - " .insn rsy,0xeb000000008A,%1,0,0(%2)" - : "+d" (_ccq), "+d" (_queuestart) - : "d" ((unsigned long)state), "d" (_sch) - : "memory", "cc"); - *count = _ccq & 0xff; - *start = _queuestart & 0xff; - - return (_ccq >> 32) & 0xff; -#else - return 0; -#endif + return (_ccq >> 32) & 0xff; } -static inline int -do_eqbs(unsigned long sch, unsigned char *state, int queue, - unsigned int *start, unsigned int *count) +static inline int do_eqbs(u64 token, unsigned char *state, int queue, + int *start, int *count) { -#ifdef CONFIG_64BIT register unsigned long _ccq asm ("0") = *count; - register unsigned long _sch asm ("1") = sch; + register unsigned long _token asm ("1") = token; unsigned long _queuestart = ((unsigned long)queue << 32) | *start; unsigned long _state = 0; asm volatile( " .insn rrf,0xB99c0000,%1,%2,0,0" : "+d" (_ccq), "+d" (_queuestart), "+d" (_state) - : "d" (_sch) - : "memory", "cc" ); + : "d" (_token) + : "memory", "cc"); *count = _ccq & 0xff; *start = _queuestart & 0xff; *state = _state & 0xff; return (_ccq >> 32) & 0xff; -#else - return 0; -#endif -} - - -static inline int -do_siga_sync(struct subchannel_id schid, unsigned int mask1, unsigned int mask2) -{ - register unsigned long reg0 asm ("0") = 2; - register struct subchannel_id reg1 asm ("1") = schid; - register unsigned long reg2 asm ("2") = mask1; - register unsigned long reg3 asm ("3") = mask2; - int cc; - - asm volatile( - " siga 0\n" - " ipm %0\n" - " srl %0,28\n" - : "=d" (cc) - : "d" (reg0), "d" (reg1), "d" (reg2), "d" (reg3) : "cc"); - return cc; -} - -static inline int -do_siga_input(struct subchannel_id schid, unsigned int mask) -{ - register unsigned long reg0 asm ("0") = 1; - register struct subchannel_id reg1 asm ("1") = schid; - register unsigned long reg2 asm ("2") = mask; - int cc; - - asm volatile( - " siga 0\n" - " ipm %0\n" - " srl %0,28\n" - : "=d" (cc) - : "d" (reg0), "d" (reg1), "d" (reg2) : "cc", "memory"); - return cc; -} - -static inline int -do_siga_output(unsigned long schid, unsigned long mask, __u32 *bb, - unsigned int fc) -{ - register unsigned long __fc asm("0") = fc; - register unsigned long __schid asm("1") = schid; - register unsigned long __mask asm("2") = mask; - int cc; - - asm volatile( - " siga 0\n" - "0: ipm %0\n" - " srl %0,28\n" - "1:\n" - EX_TABLE(0b,1b) - : "=d" (cc), "+d" (__fc), "+d" (__schid), "+d" (__mask) - : "0" (QDIO_SIGA_ERROR_ACCESS_EXCEPTION) - : "cc", "memory"); - (*bb) = ((unsigned int) __fc) >> 31; - return cc; -} - -static inline unsigned long -do_clear_global_summary(void) -{ - register unsigned long __fn asm("1") = 3; - register unsigned long __tmp asm("2"); - register unsigned long __time asm("3"); - - asm volatile( - " .insn rre,0xb2650000,2,0" - : "+d" (__fn), "=d" (__tmp), "=d" (__time)); - return __time; } - -/* - * QDIO device commands returned by extended Sense-ID - */ -#define DEFAULT_ESTABLISH_QS_CMD 0x1b -#define DEFAULT_ESTABLISH_QS_COUNT 0x1000 -#define DEFAULT_ACTIVATE_QS_CMD 0x1f -#define DEFAULT_ACTIVATE_QS_COUNT 0 - -/* - * additional CIWs returned by extended Sense-ID - */ -#define CIW_TYPE_EQUEUE 0x3 /* establish QDIO queues */ -#define CIW_TYPE_AQUEUE 0x4 /* activate QDIO queues */ +#else +static inline int do_sqbs(u64 token, unsigned char state, int queue, + int *start, int *count) { return 0; } +static inline int do_eqbs(u64 token, unsigned char *state, int queue, + int *start, int *count) { return 0; } +#endif /* CONFIG_64BIT */ -#define QDIO_CHSC_RESPONSE_CODE_OK 1 -/* flags for st qdio sch data */ -#define CHSC_FLAG_QDIO_CAPABILITY 0x80 -#define CHSC_FLAG_VALIDITY 0x40 +struct qdio_irq; -#define CHSC_FLAG_SIGA_INPUT_NECESSARY 0x40 -#define CHSC_FLAG_SIGA_OUTPUT_NECESSARY 0x20 -#define CHSC_FLAG_SIGA_SYNC_NECESSARY 0x10 -#define CHSC_FLAG_SIGA_SYNC_DONE_ON_THININTS 0x08 -#define CHSC_FLAG_SIGA_SYNC_DONE_ON_OUTB_PCIS 0x04 +struct siga_flag { + u8 input:1; + u8 output:1; + u8 sync:1; + u8 no_sync_ti:1; + u8 no_sync_out_ti:1; + u8 no_sync_out_pci:1; + u8:2; +} __attribute__ ((packed)); -struct qdio_chsc_ssqd { +struct chsc_ssqd_area { struct chsc_header request; - u16 reserved1:10; - u16 ssid:2; - u16 fmt:4; + u16:10; + u8 ssid:2; + u8 fmt:4; u16 first_sch; - u16 reserved2; + u16:16; u16 last_sch; - u32 reserved3; + u32:32; struct chsc_header response; - u32 reserved4; - u8 flags; - u8 reserved5; - u16 sch; - u8 qfmt; - u8 parm; - u8 qdioac1; - u8 sch_class; - u8 pct; - u8 icnt; - u8 reserved7; - u8 ocnt; - u8 reserved8; - u8 mbccnt; - u16 qdioac2; - u64 sch_token; -}; + u32:32; + struct qdio_ssqd_desc qdio_ssqd; +} __attribute__ ((packed)); -struct qdio_perf_stats { -#ifdef CONFIG_64BIT - atomic64_t tl_runs; - atomic64_t outbound_tl_runs; - atomic64_t outbound_tl_runs_resched; - atomic64_t inbound_tl_runs; - atomic64_t inbound_tl_runs_resched; - atomic64_t inbound_thin_tl_runs; - atomic64_t inbound_thin_tl_runs_resched; - - atomic64_t siga_outs; - atomic64_t siga_ins; - atomic64_t siga_syncs; - atomic64_t pcis; - atomic64_t thinints; - atomic64_t fast_reqs; - - atomic64_t outbound_cnt; - atomic64_t inbound_cnt; -#else /* CONFIG_64BIT */ - atomic_t tl_runs; - atomic_t outbound_tl_runs; - atomic_t outbound_tl_runs_resched; - atomic_t inbound_tl_runs; - atomic_t inbound_tl_runs_resched; - atomic_t inbound_thin_tl_runs; - atomic_t inbound_thin_tl_runs_resched; - - atomic_t siga_outs; - atomic_t siga_ins; - atomic_t siga_syncs; - atomic_t pcis; - atomic_t thinints; - atomic_t fast_reqs; - - atomic_t outbound_cnt; - atomic_t inbound_cnt; -#endif /* CONFIG_64BIT */ +struct scssc_area { + struct chsc_header request; + u16 operation_code; + u16:16; + u32:32; + u32:32; + u64 summary_indicator_addr; + u64 subchannel_indicator_addr; + u32 ks:4; + u32 kc:4; + u32:21; + u32 isc:3; + u32 word_with_d_bit; + u32:32; + struct subchannel_id schid; + u32 reserved[1004]; + struct chsc_header response; + u32:32; +} __attribute__ ((packed)); + +struct qdio_input_q { + /* input buffer acknowledgement flag */ + int polling; + + /* last time of noticing incoming data */ + u64 timestamp; + + /* lock for clearing the acknowledgement */ + spinlock_t lock; }; -/* unlikely as the later the better */ -#define SYNC_MEMORY if (unlikely(q->siga_sync)) qdio_siga_sync_q(q) -#define SYNC_MEMORY_ALL if (unlikely(q->siga_sync)) \ - qdio_siga_sync(q,~0U,~0U) -#define SYNC_MEMORY_ALL_OUTB if (unlikely(q->siga_sync)) \ - qdio_siga_sync(q,~0U,0) +struct qdio_output_q { + /* failed siga-w attempts*/ + atomic_t busy_siga_counter; -#define NOW qdio_get_micros() -#define SAVE_TIMESTAMP(q) q->timing.last_transfer_time=NOW -#define GET_SAVED_TIMESTAMP(q) (q->timing.last_transfer_time) -#define SAVE_FRONTIER(q,val) q->last_move_ftc=val -#define GET_SAVED_FRONTIER(q) (q->last_move_ftc) + /* start time of busy condition */ + u64 timestamp; -#define MY_MODULE_STRING(x) #x + /* PCIs are enabled for the queue */ + int pci_out_enabled; -#ifdef CONFIG_64BIT -#define QDIO_GET_ADDR(x) ((__u32)(unsigned long)x) -#else /* CONFIG_64BIT */ -#define QDIO_GET_ADDR(x) ((__u32)(long)x) -#endif /* CONFIG_64BIT */ + /* timer to check for more outbound work */ + struct timer_list timer; +}; struct qdio_q { - volatile struct slsb slsb; + struct slsb slsb; + union { + struct qdio_input_q in; + struct qdio_output_q out; + } u; - char unused[QDIO_MAX_BUFFERS_PER_Q]; + /* queue number */ + int nr; - __u32 * dev_st_chg_ind; + /* bitmask of queue number */ + int mask; + /* input or output queue */ int is_input_q; - struct subchannel_id schid; - struct ccw_device *cdev; - - unsigned int is_iqdio_q; - unsigned int is_thinint_q; - /* bit 0 means queue 0, bit 1 means queue 1, ... */ - unsigned int mask; - unsigned int q_no; + /* list of thinint input queues */ + struct list_head entry; + /* upper-layer program handler */ qdio_handler_t (*handler); - /* points to the next buffer to be checked for having - * been processed by the card (outbound) - * or to the next buffer the program should check for (inbound) */ - volatile int first_to_check; - /* and the last time it was: */ - volatile int last_move_ftc; + /* + * inbound: next buffer the program should check for + * outbound: next buffer to check for having been processed + * by the card + */ + int first_to_check; - atomic_t number_of_buffers_used; - atomic_t polling; + /* first_to_check of the last time */ + int last_move_ftc; - unsigned int siga_in; - unsigned int siga_out; - unsigned int siga_sync; - unsigned int siga_sync_done_on_thinints; - unsigned int siga_sync_done_on_outb_tis; - unsigned int hydra_gives_outbound_pcis; + /* beginning position for calling the program */ + int first_to_kick; - /* used to save beginning position when calling dd_handlers */ - int first_element_to_kick; + /* number of buffers in use by the adapter */ + atomic_t nr_buf_used; - atomic_t use_count; - atomic_t is_in_shutdown; - - void *irq_ptr; - - struct timer_list timer; -#ifdef QDIO_USE_TIMERS_FOR_POLLING - atomic_t timer_already_set; - spinlock_t timer_lock; -#else /* QDIO_USE_TIMERS_FOR_POLLING */ + struct qdio_irq *irq_ptr; struct tasklet_struct tasklet; -#endif /* QDIO_USE_TIMERS_FOR_POLLING */ - - enum qdio_irq_states state; - - /* used to store the error condition during a data transfer */ + /* error condition during a data transfer */ unsigned int qdio_error; - unsigned int siga_error; - unsigned int error_status_flags; - - /* list of interesting queues */ - volatile struct qdio_q *list_next; - volatile struct qdio_q *list_prev; struct sl *sl; - volatile struct sbal *sbal[QDIO_MAX_BUFFERS_PER_Q]; - - struct qdio_buffer *qdio_buffers[QDIO_MAX_BUFFERS_PER_Q]; - - unsigned long int_parm; - - /*struct { - int in_bh_check_limit; - int threshold; - } threshold_classes[QDIO_STATS_CLASSES];*/ - - struct { - /* inbound: the time to stop polling - outbound: the time to kick peer */ - int threshold; /* the real value */ - - /* outbound: last time of do_QDIO - inbound: last time of noticing incoming data */ - /*__u64 last_transfer_times[QDIO_STATS_NUMBER]; - int last_transfer_index; */ - - __u64 last_transfer_time; - __u64 busy_start; - } timing; - atomic_t busy_siga_counter; - unsigned int queue_type; - unsigned int is_pci_out; - - /* leave this member at the end. won't be cleared in qdio_fill_qs */ - struct slib *slib; /* a page is allocated under this pointer, - sl points into this page, offset PAGE_SIZE/2 - (after slib) */ + struct qdio_buffer *sbal[QDIO_MAX_BUFFERS_PER_Q]; + + /* + * Warning: Leave this member at the end so it won't be cleared in + * qdio_fill_qs. A page is allocated under this pointer and used for + * slib and sl. slib is 2048 bytes big and sl points to offset + * PAGE_SIZE / 2. + */ + struct slib *slib; } __attribute__ ((aligned(256))); struct qdio_irq { - __u32 * volatile dev_st_chg_ind; + struct qib qib; + u32 *dsci; /* address of device state change indicator */ + struct ccw_device *cdev; unsigned long int_parm; struct subchannel_id schid; - - unsigned int is_iqdio_irq; - unsigned int is_thinint_irq; - unsigned int hydra_gives_outbound_pcis; - unsigned int sync_done_on_outb_pcis; - - /* QEBSM facility */ - unsigned int is_qebsm; - unsigned long sch_token; + unsigned long sch_token; /* QEBSM facility */ enum qdio_irq_states state; - unsigned int no_input_qs; - unsigned int no_output_qs; + struct siga_flag siga_flag; /* siga sync information from qdioac */ - unsigned char qdioac; + int nr_input_qs; + int nr_output_qs; struct ccw1 ccw; - struct ciw equeue; struct ciw aqueue; - struct qib qib; - - void (*original_int_handler) (struct ccw_device *, - unsigned long, struct irb *); + struct qdio_ssqd_desc ssqd_desc; + + void (*orig_handler) (struct ccw_device *, unsigned long, struct irb *); - /* leave these four members together at the end. won't be cleared in qdio_fill_irq */ + /* + * Warning: Leave these members together at the end so they won't be + * cleared in qdio_setup_irq. + */ struct qdr *qdr; + unsigned long chsc_page; + struct qdio_q *input_qs[QDIO_MAX_QUEUES_PER_IRQ]; struct qdio_q *output_qs[QDIO_MAX_QUEUES_PER_IRQ]; - struct semaphore setting_up_sema; + + struct mutex setup_mutex; }; -#endif + +/* helper functions */ +#define queue_type(q) q->irq_ptr->qib.qfmt + +#define is_thinint_irq(irq) \ + (irq->qib.qfmt == QDIO_IQDIO_QFMT || \ + css_general_characteristics.aif_osa) + +/* the highest iqdio queue is used for multicast */ +static inline int multicast_outbound(struct qdio_q *q) +{ + return (q->irq_ptr->nr_output_qs > 1) && + (q->nr == q->irq_ptr->nr_output_qs - 1); +} + +static inline unsigned long long get_usecs(void) +{ + return monotonic_clock() >> 12; +} + +#define pci_out_supported(q) \ + (q->irq_ptr->qib.ac & QIB_AC_OUTBOUND_PCI_SUPPORTED) +#define is_qebsm(q) (q->irq_ptr->sch_token != 0) + +#define need_siga_sync_thinint(q) (!q->irq_ptr->siga_flag.no_sync_ti) +#define need_siga_sync_out_thinint(q) (!q->irq_ptr->siga_flag.no_sync_out_ti) +#define need_siga_in(q) (q->irq_ptr->siga_flag.input) +#define need_siga_out(q) (q->irq_ptr->siga_flag.output) +#define need_siga_sync(q) (q->irq_ptr->siga_flag.sync) +#define siga_syncs_out_pci(q) (q->irq_ptr->siga_flag.no_sync_out_pci) + +#define for_each_input_queue(irq_ptr, q, i) \ + for (i = 0, q = irq_ptr->input_qs[0]; \ + i < irq_ptr->nr_input_qs; \ + q = irq_ptr->input_qs[++i]) +#define for_each_output_queue(irq_ptr, q, i) \ + for (i = 0, q = irq_ptr->output_qs[0]; \ + i < irq_ptr->nr_output_qs; \ + q = irq_ptr->output_qs[++i]) + +#define prev_buf(bufnr) \ + ((bufnr + QDIO_MAX_BUFFERS_MASK) & QDIO_MAX_BUFFERS_MASK) +#define next_buf(bufnr) \ + ((bufnr + 1) & QDIO_MAX_BUFFERS_MASK) +#define add_buf(bufnr, inc) \ + ((bufnr + inc) & QDIO_MAX_BUFFERS_MASK) + +/* prototypes for thin interrupt */ +void qdio_sync_after_thinint(struct qdio_q *q); +int get_buf_state(struct qdio_q *q, unsigned int bufnr, unsigned char *state); +void qdio_check_outbound_after_thinint(struct qdio_q *q); +int qdio_inbound_q_moved(struct qdio_q *q); +void qdio_kick_inbound_handler(struct qdio_q *q); +void qdio_stop_polling(struct qdio_q *q); +int qdio_siga_sync_q(struct qdio_q *q); + +void qdio_setup_thinint(struct qdio_irq *irq_ptr); +int qdio_establish_thinint(struct qdio_irq *irq_ptr); +void qdio_shutdown_thinint(struct qdio_irq *irq_ptr); +void tiqdio_add_input_queues(struct qdio_irq *irq_ptr); +void tiqdio_remove_input_queues(struct qdio_irq *irq_ptr); +void tiqdio_inbound_processing(unsigned long q); +int tiqdio_allocate_memory(void); +void tiqdio_free_memory(void); +int tiqdio_register_thinints(void); +void tiqdio_unregister_thinints(void); + +/* prototypes for setup */ +void qdio_inbound_processing(unsigned long data); +void qdio_outbound_processing(unsigned long data); +void qdio_outbound_timer(unsigned long data); +void qdio_int_handler(struct ccw_device *cdev, unsigned long intparm, + struct irb *irb); +int qdio_allocate_qs(struct qdio_irq *irq_ptr, int nr_input_qs, + int nr_output_qs); +void qdio_setup_ssqd_info(struct qdio_irq *irq_ptr); +int qdio_setup_irq(struct qdio_initialize *init_data); +void qdio_print_subchannel_info(struct qdio_irq *irq_ptr, + struct ccw_device *cdev); +void qdio_release_memory(struct qdio_irq *irq_ptr); +int qdio_setup_init(void); +void qdio_setup_exit(void); + +#endif /* _CIO_QDIO_H */ diff --git a/drivers/s390/cio/qdio_debug.c b/drivers/s390/cio/qdio_debug.c new file mode 100644 index 0000000..337aa30 --- /dev/null +++ b/drivers/s390/cio/qdio_debug.c @@ -0,0 +1,240 @@ +/* + * drivers/s390/cio/qdio_debug.c + * + * Copyright IBM Corp. 2008 + * + * Author: Jan Glauber (jang@linux.vnet.ibm.com) + */ +#include +#include +#include +#include +#include +#include "qdio_debug.h" +#include "qdio.h" + +debug_info_t *qdio_dbf_setup; +debug_info_t *qdio_dbf_trace; + +static struct dentry *debugfs_root; +#define MAX_DEBUGFS_QUEUES 32 +static struct dentry *debugfs_queues[MAX_DEBUGFS_QUEUES] = { NULL }; +static DEFINE_MUTEX(debugfs_mutex); + +void qdio_allocate_do_dbf(struct qdio_initialize *init_data) +{ + char dbf_text[20]; + + sprintf(dbf_text, "qfmt:%x", init_data->q_format); + QDIO_DBF_TEXT0(0, setup, dbf_text); + QDIO_DBF_HEX0(0, setup, init_data->adapter_name, 8); + sprintf(dbf_text, "qpff%4x", init_data->qib_param_field_format); + QDIO_DBF_TEXT0(0, setup, dbf_text); + QDIO_DBF_HEX0(0, setup, &init_data->qib_param_field, sizeof(void *)); + QDIO_DBF_HEX0(0, setup, &init_data->input_slib_elements, sizeof(void *)); + QDIO_DBF_HEX0(0, setup, &init_data->output_slib_elements, sizeof(void *)); + sprintf(dbf_text, "niq:%4x", init_data->no_input_qs); + QDIO_DBF_TEXT0(0, setup, dbf_text); + sprintf(dbf_text, "noq:%4x", init_data->no_output_qs); + QDIO_DBF_TEXT0(0, setup, dbf_text); + QDIO_DBF_HEX0(0, setup, &init_data->input_handler, sizeof(void *)); + QDIO_DBF_HEX0(0, setup, &init_data->output_handler, sizeof(void *)); + QDIO_DBF_HEX0(0, setup, &init_data->int_parm, sizeof(long)); + QDIO_DBF_HEX0(0, setup, &init_data->flags, sizeof(long)); + QDIO_DBF_HEX0(0, setup, &init_data->input_sbal_addr_array, sizeof(void *)); + QDIO_DBF_HEX0(0, setup, &init_data->output_sbal_addr_array, sizeof(void *)); +} + +static void qdio_unregister_dbf_views(void) +{ + if (qdio_dbf_setup) + debug_unregister(qdio_dbf_setup); + if (qdio_dbf_trace) + debug_unregister(qdio_dbf_trace); +} + +static int qdio_register_dbf_views(void) +{ + qdio_dbf_setup = debug_register("qdio_setup", QDIO_DBF_SETUP_PAGES, + QDIO_DBF_SETUP_NR_AREAS, + QDIO_DBF_SETUP_LEN); + if (!qdio_dbf_setup) + goto oom; + debug_register_view(qdio_dbf_setup, &debug_hex_ascii_view); + debug_set_level(qdio_dbf_setup, QDIO_DBF_SETUP_LEVEL); + + qdio_dbf_trace = debug_register("qdio_trace", QDIO_DBF_TRACE_PAGES, + QDIO_DBF_TRACE_NR_AREAS, + QDIO_DBF_TRACE_LEN); + if (!qdio_dbf_trace) + goto oom; + debug_register_view(qdio_dbf_trace, &debug_hex_ascii_view); + debug_set_level(qdio_dbf_trace, QDIO_DBF_TRACE_LEVEL); + return 0; +oom: + qdio_unregister_dbf_views(); + return -ENOMEM; +} + +static int qstat_show(struct seq_file *m, void *v) +{ + unsigned char state; + struct qdio_q *q = m->private; + int i; + + if (!q) + return 0; + + seq_printf(m, "device state indicator: %d\n", *q->irq_ptr->dsci); + seq_printf(m, "nr_used: %d\n", atomic_read(&q->nr_buf_used)); + seq_printf(m, "ftc: %d\n", q->first_to_check); + seq_printf(m, "last_move_ftc: %d\n", q->last_move_ftc); + seq_printf(m, "polling: %d\n", q->u.in.polling); + seq_printf(m, "slsb buffer states:\n"); + + qdio_siga_sync_q(q); + for (i = 0; i < QDIO_MAX_BUFFERS_PER_Q; i++) { + get_buf_state(q, i, &state); + switch (state) { + case SLSB_P_INPUT_NOT_INIT: + case SLSB_P_OUTPUT_NOT_INIT: + seq_printf(m, "N"); + break; + case SLSB_P_INPUT_PRIMED: + case SLSB_CU_OUTPUT_PRIMED: + seq_printf(m, "+"); + break; + case SLSB_P_INPUT_ACK: + seq_printf(m, "A"); + break; + case SLSB_P_INPUT_ERROR: + case SLSB_P_OUTPUT_ERROR: + seq_printf(m, "x"); + break; + case SLSB_CU_INPUT_EMPTY: + case SLSB_P_OUTPUT_EMPTY: + seq_printf(m, "-"); + break; + case SLSB_P_INPUT_HALTED: + case SLSB_P_OUTPUT_HALTED: + seq_printf(m, "."); + break; + default: + seq_printf(m, "?"); + } + if (i == 63) + seq_printf(m, "\n"); + } + seq_printf(m, "\n"); + return 0; +} + +static ssize_t qstat_seq_write(struct file *file, const char __user *buf, + size_t count, loff_t *off) +{ + struct seq_file *seq = file->private_data; + struct qdio_q *q = seq->private; + + if (!q) + return 0; + + if (q->is_input_q) + xchg(q->irq_ptr->dsci, 1); + local_bh_disable(); + tasklet_schedule(&q->tasklet); + local_bh_enable(); + return count; +} + +static int qstat_seq_open(struct inode *inode, struct file *filp) +{ + return single_open(filp, qstat_show, + filp->f_path.dentry->d_inode->i_private); +} + +static void get_queue_name(struct qdio_q *q, struct ccw_device *cdev, char *name) +{ + memset(name, 0, sizeof(name)); + sprintf(name, "%s", cdev->dev.bus_id); + if (q->is_input_q) + sprintf(name + strlen(name), "_input"); + else + sprintf(name + strlen(name), "_output"); + sprintf(name + strlen(name), "_%d", q->nr); +} + +static void remove_debugfs_entry(struct qdio_q *q) +{ + int i; + + for (i = 0; i < MAX_DEBUGFS_QUEUES; i++) { + if (!debugfs_queues[i]) + continue; + if (debugfs_queues[i]->d_inode->i_private == q) { + debugfs_remove(debugfs_queues[i]); + debugfs_queues[i] = NULL; + } + } +} + +static struct file_operations debugfs_fops = { + .owner = THIS_MODULE, + .open = qstat_seq_open, + .read = seq_read, + .write = qstat_seq_write, + .llseek = seq_lseek, + .release = single_release, +}; + +static void setup_debugfs_entry(struct qdio_q *q, struct ccw_device *cdev) +{ + int i = 0; + char name[40]; + + while (debugfs_queues[i] != NULL) { + i++; + if (i >= MAX_DEBUGFS_QUEUES) + return; + } + get_queue_name(q, cdev, name); + debugfs_queues[i] = debugfs_create_file(name, S_IFREG | S_IRUGO | S_IWUSR, + debugfs_root, q, &debugfs_fops); +} + +void qdio_setup_debug_entries(struct qdio_irq *irq_ptr, struct ccw_device *cdev) +{ + struct qdio_q *q; + int i; + + mutex_lock(&debugfs_mutex); + for_each_input_queue(irq_ptr, q, i) + setup_debugfs_entry(q, cdev); + for_each_output_queue(irq_ptr, q, i) + setup_debugfs_entry(q, cdev); + mutex_unlock(&debugfs_mutex); +} + +void qdio_shutdown_debug_entries(struct qdio_irq *irq_ptr, struct ccw_device *cdev) +{ + struct qdio_q *q; + int i; + + mutex_lock(&debugfs_mutex); + for_each_input_queue(irq_ptr, q, i) + remove_debugfs_entry(q); + for_each_output_queue(irq_ptr, q, i) + remove_debugfs_entry(q); + mutex_unlock(&debugfs_mutex); +} + +int __init qdio_debug_init(void) +{ + debugfs_root = debugfs_create_dir("qdio_queues", NULL); + return qdio_register_dbf_views(); +} + +void qdio_debug_exit(void) +{ + debugfs_remove(debugfs_root); + qdio_unregister_dbf_views(); +} diff --git a/drivers/s390/cio/qdio_debug.h b/drivers/s390/cio/qdio_debug.h new file mode 100644 index 0000000..8484b83 --- /dev/null +++ b/drivers/s390/cio/qdio_debug.h @@ -0,0 +1,91 @@ +/* + * drivers/s390/cio/qdio_debug.h + * + * Copyright IBM Corp. 2008 + * + * Author: Jan Glauber (jang@linux.vnet.ibm.com) + */ +#ifndef QDIO_DEBUG_H +#define QDIO_DEBUG_H + +#include +#include +#include "qdio.h" + +#define QDIO_DBF_HEX(ex, name, level, addr, len) \ + do { \ + if (ex) \ + debug_exception(qdio_dbf_##name, level, (void *)(addr), len); \ + else \ + debug_event(qdio_dbf_##name, level, (void *)(addr), len); \ + } while (0) +#define QDIO_DBF_TEXT(ex, name, level, text) \ + do { \ + if (ex) \ + debug_text_exception(qdio_dbf_##name, level, text); \ + else \ + debug_text_event(qdio_dbf_##name, level, text); \ + } while (0) + +#define QDIO_DBF_HEX0(ex, name, addr, len) QDIO_DBF_HEX(ex, name, 0, addr, len) +#define QDIO_DBF_HEX1(ex, name, addr, len) QDIO_DBF_HEX(ex, name, 1, addr, len) +#define QDIO_DBF_HEX2(ex, name, addr, len) QDIO_DBF_HEX(ex, name, 2, addr, len) + +#ifdef CONFIG_QDIO_DEBUG +#define QDIO_DBF_HEX3(ex, name, addr, len) QDIO_DBF_HEX(ex, name, 3, addr, len) +#define QDIO_DBF_HEX4(ex, name, addr, len) QDIO_DBF_HEX(ex, name, 4, addr, len) +#define QDIO_DBF_HEX5(ex, name, addr, len) QDIO_DBF_HEX(ex, name, 5, addr, len) +#define QDIO_DBF_HEX6(ex, name, addr, len) QDIO_DBF_HEX(ex, name, 6, addr, len) +#else +#define QDIO_DBF_HEX3(ex, name, addr, len) do {} while (0) +#define QDIO_DBF_HEX4(ex, name, addr, len) do {} while (0) +#define QDIO_DBF_HEX5(ex, name, addr, len) do {} while (0) +#define QDIO_DBF_HEX6(ex, name, addr, len) do {} while (0) +#endif /* CONFIG_QDIO_DEBUG */ + +#define QDIO_DBF_TEXT0(ex, name, text) QDIO_DBF_TEXT(ex, name, 0, text) +#define QDIO_DBF_TEXT1(ex, name, text) QDIO_DBF_TEXT(ex, name, 1, text) +#define QDIO_DBF_TEXT2(ex, name, text) QDIO_DBF_TEXT(ex, name, 2, text) + +#ifdef CONFIG_QDIO_DEBUG +#define QDIO_DBF_TEXT3(ex, name, text) QDIO_DBF_TEXT(ex, name, 3, text) +#define QDIO_DBF_TEXT4(ex, name, text) QDIO_DBF_TEXT(ex, name, 4, text) +#define QDIO_DBF_TEXT5(ex, name, text) QDIO_DBF_TEXT(ex, name, 5, text) +#define QDIO_DBF_TEXT6(ex, name, text) QDIO_DBF_TEXT(ex, name, 6, text) +#else +#define QDIO_DBF_TEXT3(ex, name, text) do {} while (0) +#define QDIO_DBF_TEXT4(ex, name, text) do {} while (0) +#define QDIO_DBF_TEXT5(ex, name, text) do {} while (0) +#define QDIO_DBF_TEXT6(ex, name, text) do {} while (0) +#endif /* CONFIG_QDIO_DEBUG */ + +/* s390dbf views */ +#define QDIO_DBF_SETUP_LEN 8 +#define QDIO_DBF_SETUP_PAGES 4 +#define QDIO_DBF_SETUP_NR_AREAS 1 + +#define QDIO_DBF_TRACE_LEN 8 +#define QDIO_DBF_TRACE_NR_AREAS 2 + +#ifdef CONFIG_QDIO_DEBUG +#define QDIO_DBF_TRACE_PAGES 16 +#define QDIO_DBF_SETUP_LEVEL 6 +#define QDIO_DBF_TRACE_LEVEL 4 +#else /* !CONFIG_QDIO_DEBUG */ +#define QDIO_DBF_TRACE_PAGES 4 +#define QDIO_DBF_SETUP_LEVEL 2 +#define QDIO_DBF_TRACE_LEVEL 2 +#endif /* CONFIG_QDIO_DEBUG */ + +extern debug_info_t *qdio_dbf_setup; +extern debug_info_t *qdio_dbf_trace; + +void qdio_allocate_do_dbf(struct qdio_initialize *init_data); +void debug_print_bstat(struct qdio_q *q); +void qdio_setup_debug_entries(struct qdio_irq *irq_ptr, + struct ccw_device *cdev); +void qdio_shutdown_debug_entries(struct qdio_irq *irq_ptr, + struct ccw_device *cdev); +int qdio_debug_init(void); +void qdio_debug_exit(void); +#endif diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c new file mode 100644 index 0000000..d10c73c --- /dev/null +++ b/drivers/s390/cio/qdio_main.c @@ -0,0 +1,1755 @@ +/* + * linux/drivers/s390/cio/qdio_main.c + * + * Linux for s390 qdio support, buffer handling, qdio API and module support. + * + * Copyright 2000,2008 IBM Corp. + * Author(s): Utz Bacher + * Jan Glauber + * 2.6 cio integration by Cornelia Huck + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cio.h" +#include "css.h" +#include "device.h" +#include "qdio.h" +#include "qdio_debug.h" +#include "qdio_perf.h" + +MODULE_AUTHOR("Utz Bacher ,"\ + "Jan Glauber "); +MODULE_DESCRIPTION("QDIO base support"); +MODULE_LICENSE("GPL"); + +static inline int do_siga_sync(struct subchannel_id schid, + unsigned int out_mask, unsigned int in_mask) +{ + register unsigned long __fc asm ("0") = 2; + register struct subchannel_id __schid asm ("1") = schid; + register unsigned long out asm ("2") = out_mask; + register unsigned long in asm ("3") = in_mask; + int cc; + + asm volatile( + " siga 0\n" + " ipm %0\n" + " srl %0,28\n" + : "=d" (cc) + : "d" (__fc), "d" (__schid), "d" (out), "d" (in) : "cc"); + return cc; +} + +static inline int do_siga_input(struct subchannel_id schid, unsigned int mask) +{ + register unsigned long __fc asm ("0") = 1; + register struct subchannel_id __schid asm ("1") = schid; + register unsigned long __mask asm ("2") = mask; + int cc; + + asm volatile( + " siga 0\n" + " ipm %0\n" + " srl %0,28\n" + : "=d" (cc) + : "d" (__fc), "d" (__schid), "d" (__mask) : "cc", "memory"); + return cc; +} + +/** + * do_siga_output - perform SIGA-w/wt function + * @schid: subchannel id or in case of QEBSM the subchannel token + * @mask: which output queues to process + * @bb: busy bit indicator, set only if SIGA-w/wt could not access a buffer + * @fc: function code to perform + * + * Returns cc or QDIO_ERROR_SIGA_ACCESS_EXCEPTION. + * Note: For IQDC unicast queues only the highest priority queue is processed. + */ +static inline int do_siga_output(unsigned long schid, unsigned long mask, + u32 *bb, unsigned int fc) +{ + register unsigned long __fc asm("0") = fc; + register unsigned long __schid asm("1") = schid; + register unsigned long __mask asm("2") = mask; + int cc = QDIO_ERROR_SIGA_ACCESS_EXCEPTION; + + asm volatile( + " siga 0\n" + "0: ipm %0\n" + " srl %0,28\n" + "1:\n" + EX_TABLE(0b, 1b) + : "+d" (cc), "+d" (__fc), "+d" (__schid), "+d" (__mask) + : : "cc", "memory"); + *bb = ((unsigned int) __fc) >> 31; + return cc; +} + +static inline int qdio_check_ccq(struct qdio_q *q, unsigned int ccq) +{ + char dbf_text[15]; + + /* all done or next buffer state different */ + if (ccq == 0 || ccq == 32) + return 0; + /* not all buffers processed */ + if (ccq == 96 || ccq == 97) + return 1; + /* notify devices immediately */ + sprintf(dbf_text, "%d", ccq); + QDIO_DBF_TEXT2(1, trace, dbf_text); + return -EIO; +} + +/** + * qdio_do_eqbs - extract buffer states for QEBSM + * @q: queue to manipulate + * @state: state of the extracted buffers + * @start: buffer number to start at + * @count: count of buffers to examine + * + * Returns the number of successfull extracted equal buffer states. + * Stops processing if a state is different from the last buffers state. + */ +static int qdio_do_eqbs(struct qdio_q *q, unsigned char *state, + int start, int count) +{ + unsigned int ccq = 0; + int tmp_count = count, tmp_start = start; + int nr = q->nr; + int rc; + char dbf_text[15]; + + BUG_ON(!q->irq_ptr->sch_token); + + if (!q->is_input_q) + nr += q->irq_ptr->nr_input_qs; +again: + ccq = do_eqbs(q->irq_ptr->sch_token, state, nr, &tmp_start, &tmp_count); + rc = qdio_check_ccq(q, ccq); + + /* At least one buffer was processed, return and extract the remaining + * buffers later. + */ + if ((ccq == 96) && (count != tmp_count)) + return (count - tmp_count); + if (rc == 1) { + QDIO_DBF_TEXT5(1, trace, "eqAGAIN"); + goto again; + } + + if (rc < 0) { + QDIO_DBF_TEXT2(1, trace, "eqberr"); + sprintf(dbf_text, "%2x,%2x,%d,%d", count, tmp_count, ccq, nr); + QDIO_DBF_TEXT2(1, trace, dbf_text); + q->handler(q->irq_ptr->cdev, + QDIO_ERROR_ACTIVATE_CHECK_CONDITION, + 0, -1, -1, q->irq_ptr->int_parm); + return 0; + } + return count - tmp_count; +} + +/** + * qdio_do_sqbs - set buffer states for QEBSM + * @q: queue to manipulate + * @state: new state of the buffers + * @start: first buffer number to change + * @count: how many buffers to change + * + * Returns the number of successfully changed buffers. + * Does retrying until the specified count of buffer states is set or an + * error occurs. + */ +static int qdio_do_sqbs(struct qdio_q *q, unsigned char state, int start, + int count) +{ + unsigned int ccq = 0; + int tmp_count = count, tmp_start = start; + int nr = q->nr; + int rc; + char dbf_text[15]; + + BUG_ON(!q->irq_ptr->sch_token); + + if (!q->is_input_q) + nr += q->irq_ptr->nr_input_qs; +again: + ccq = do_sqbs(q->irq_ptr->sch_token, state, nr, &tmp_start, &tmp_count); + rc = qdio_check_ccq(q, ccq); + if (rc == 1) { + QDIO_DBF_TEXT5(1, trace, "sqAGAIN"); + goto again; + } + if (rc < 0) { + QDIO_DBF_TEXT3(1, trace, "sqberr"); + sprintf(dbf_text, "%2x,%2x", count, tmp_count); + QDIO_DBF_TEXT3(1, trace, dbf_text); + sprintf(dbf_text, "%d,%d", ccq, nr); + QDIO_DBF_TEXT3(1, trace, dbf_text); + + q->handler(q->irq_ptr->cdev, + QDIO_ERROR_ACTIVATE_CHECK_CONDITION, + 0, -1, -1, q->irq_ptr->int_parm); + return 0; + } + WARN_ON(tmp_count); + return count - tmp_count; +} + +/* returns number of examined buffers and their common state in *state */ +static inline int get_buf_states(struct qdio_q *q, unsigned int bufnr, + unsigned char *state, unsigned int count) +{ + unsigned char __state = 0; + int i; + + BUG_ON(bufnr > QDIO_MAX_BUFFERS_MASK); + BUG_ON(count > QDIO_MAX_BUFFERS_PER_Q); + + if (is_qebsm(q)) + return qdio_do_eqbs(q, state, bufnr, count); + + for (i = 0; i < count; i++) { + if (!__state) + __state = q->slsb.val[bufnr]; + else if (q->slsb.val[bufnr] != __state) + break; + bufnr = next_buf(bufnr); + } + *state = __state; + return i; +} + +inline int get_buf_state(struct qdio_q *q, unsigned int bufnr, + unsigned char *state) +{ + return get_buf_states(q, bufnr, state, 1); +} + +/* wrap-around safe setting of slsb states, returns number of changed buffers */ +static inline int set_buf_states(struct qdio_q *q, int bufnr, + unsigned char state, int count) +{ + int i; + + BUG_ON(bufnr > QDIO_MAX_BUFFERS_MASK); + BUG_ON(count > QDIO_MAX_BUFFERS_PER_Q); + + if (is_qebsm(q)) + return qdio_do_sqbs(q, state, bufnr, count); + + for (i = 0; i < count; i++) { + xchg(&q->slsb.val[bufnr], state); + bufnr = next_buf(bufnr); + } + return count; +} + +static inline int set_buf_state(struct qdio_q *q, int bufnr, + unsigned char state) +{ + return set_buf_states(q, bufnr, state, 1); +} + +/* set slsb states to initial state */ +void qdio_init_buf_states(struct qdio_irq *irq_ptr) +{ + struct qdio_q *q; + int i; + + for_each_input_queue(irq_ptr, q, i) + set_buf_states(q, 0, SLSB_P_INPUT_NOT_INIT, + QDIO_MAX_BUFFERS_PER_Q); + for_each_output_queue(irq_ptr, q, i) + set_buf_states(q, 0, SLSB_P_OUTPUT_NOT_INIT, + QDIO_MAX_BUFFERS_PER_Q); +} + +static int qdio_siga_sync(struct qdio_q *q, unsigned int output, + unsigned int input) +{ + int cc; + + if (!need_siga_sync(q)) + return 0; + + qdio_perf_stat_inc(&perf_stats.siga_sync); + + cc = do_siga_sync(q->irq_ptr->schid, output, input); + if (cc) { + QDIO_DBF_TEXT4(0, trace, "sigasync"); + QDIO_DBF_HEX4(0, trace, &q, sizeof(void *)); + QDIO_DBF_HEX3(0, trace, &cc, sizeof(int *)); + } + return cc; +} + +inline int qdio_siga_sync_q(struct qdio_q *q) +{ + if (q->is_input_q) + return qdio_siga_sync(q, 0, q->mask); + else + return qdio_siga_sync(q, q->mask, 0); +} + +static inline int qdio_siga_sync_out(struct qdio_q *q) +{ + return qdio_siga_sync(q, ~0U, 0); +} + +static inline int qdio_siga_sync_all(struct qdio_q *q) +{ + return qdio_siga_sync(q, ~0U, ~0U); +} + +static inline int qdio_do_siga_output(struct qdio_q *q, unsigned int *busy_bit) +{ + unsigned int fc = 0; + unsigned long schid; + + if (!is_qebsm(q)) + schid = *((u32 *)&q->irq_ptr->schid); + else { + schid = q->irq_ptr->sch_token; + fc |= 0x80; + } + return do_siga_output(schid, q->mask, busy_bit, fc); +} + +static int qdio_siga_output(struct qdio_q *q) +{ + int cc; + u32 busy_bit; + u64 start_time = 0; + + QDIO_DBF_TEXT5(0, trace, "sigaout"); + QDIO_DBF_HEX5(0, trace, &q, sizeof(void *)); + + qdio_perf_stat_inc(&perf_stats.siga_out); +again: + cc = qdio_do_siga_output(q, &busy_bit); + if (queue_type(q) == QDIO_IQDIO_QFMT && cc == 2 && busy_bit) { + if (!start_time) + start_time = get_usecs(); + else if ((get_usecs() - start_time) < QDIO_BUSY_BIT_PATIENCE) + goto again; + } + + if (cc == 2 && busy_bit) + cc |= QDIO_ERROR_SIGA_BUSY; + if (cc) + QDIO_DBF_HEX3(0, trace, &cc, sizeof(int *)); + return cc; +} + +static inline int qdio_siga_input(struct qdio_q *q) +{ + int cc; + + QDIO_DBF_TEXT4(0, trace, "sigain"); + QDIO_DBF_HEX4(0, trace, &q, sizeof(void *)); + + qdio_perf_stat_inc(&perf_stats.siga_in); + + cc = do_siga_input(q->irq_ptr->schid, q->mask); + if (cc) + QDIO_DBF_HEX3(0, trace, &cc, sizeof(int *)); + return cc; +} + +/* called from thinint inbound handler */ +void qdio_sync_after_thinint(struct qdio_q *q) +{ + if (pci_out_supported(q)) { + if (need_siga_sync_thinint(q)) + qdio_siga_sync_all(q); + else if (need_siga_sync_out_thinint(q)) + qdio_siga_sync_out(q); + } else + qdio_siga_sync_q(q); +} + +inline void qdio_stop_polling(struct qdio_q *q) +{ + spin_lock_bh(&q->u.in.lock); + if (!q->u.in.polling) { + spin_unlock_bh(&q->u.in.lock); + return; + } + q->u.in.polling = 0; + qdio_perf_stat_inc(&perf_stats.debug_stop_polling); + + /* show the card that we are not polling anymore */ + set_buf_state(q, q->last_move_ftc, SLSB_P_INPUT_NOT_INIT); + spin_unlock_bh(&q->u.in.lock); +} + +static void announce_buffer_error(struct qdio_q *q) +{ + char dbf_text[15]; + + if (q->is_input_q) + QDIO_DBF_TEXT3(1, trace, "inperr"); + else + QDIO_DBF_TEXT3(0, trace, "outperr"); + + sprintf(dbf_text, "%x-%x-%x", q->first_to_check, + q->sbal[q->first_to_check]->element[14].flags, + q->sbal[q->first_to_check]->element[15].flags); + QDIO_DBF_TEXT3(1, trace, dbf_text); + QDIO_DBF_HEX2(1, trace, q->sbal[q->first_to_check], 256); + + q->qdio_error = QDIO_ERROR_SLSB_STATE; +} + +static int get_inbound_buffer_frontier(struct qdio_q *q) +{ + int count, stop; + unsigned char state; + + /* + * If we still poll don't update last_move_ftc, keep the + * previously ACK buffer there. + */ + if (!q->u.in.polling) + q->last_move_ftc = q->first_to_check; + + /* + * Don't check 128 buffers, as otherwise qdio_inbound_q_moved + * would return 0. + */ + count = min(atomic_read(&q->nr_buf_used), QDIO_MAX_BUFFERS_MASK); + stop = add_buf(q->first_to_check, count); + + /* + * No siga sync here, as a PCI or we after a thin interrupt + * will sync the queues. + */ + + /* need to set count to 1 for non-qebsm */ + if (!is_qebsm(q)) + count = 1; + +check_next: + if (q->first_to_check == stop) + goto out; + + count = get_buf_states(q, q->first_to_check, &state, count); + if (!count) + goto out; + + switch (state) { + case SLSB_P_INPUT_PRIMED: + QDIO_DBF_TEXT5(0, trace, "inptprim"); + + /* + * Only ACK the first buffer. The ACK will be removed in + * qdio_stop_polling. + */ + if (q->u.in.polling) + state = SLSB_P_INPUT_NOT_INIT; + else { + q->u.in.polling = 1; + state = SLSB_P_INPUT_ACK; + } + set_buf_state(q, q->first_to_check, state); + + /* + * Need to change all PRIMED buffers to NOT_INIT, otherwise + * we're loosing initiative in the thinint code. + */ + if (count > 1) + set_buf_states(q, next_buf(q->first_to_check), + SLSB_P_INPUT_NOT_INIT, count - 1); + + /* + * No siga-sync needed for non-qebsm here, as the inbound queue + * will be synced on the next siga-r, resp. + * tiqdio_is_inbound_q_done will do the siga-sync. + */ + q->first_to_check = add_buf(q->first_to_check, count); + atomic_sub(count, &q->nr_buf_used); + goto check_next; + case SLSB_P_INPUT_ERROR: + announce_buffer_error(q); + /* process the buffer, the upper layer will take care of it */ + q->first_to_check = add_buf(q->first_to_check, count); + atomic_sub(count, &q->nr_buf_used); + break; + case SLSB_CU_INPUT_EMPTY: + case SLSB_P_INPUT_NOT_INIT: + case SLSB_P_INPUT_ACK: + QDIO_DBF_TEXT5(0, trace, "inpnipro"); + break; + default: + BUG(); + } +out: + QDIO_DBF_HEX4(0, trace, &q->first_to_check, sizeof(int)); + return q->first_to_check; +} + +int qdio_inbound_q_moved(struct qdio_q *q) +{ + int bufnr; + + bufnr = get_inbound_buffer_frontier(q); + + if ((bufnr != q->last_move_ftc) || q->qdio_error) { + if (!need_siga_sync(q) && !pci_out_supported(q)) + q->u.in.timestamp = get_usecs(); + + QDIO_DBF_TEXT4(0, trace, "inhasmvd"); + QDIO_DBF_HEX4(0, trace, &q, sizeof(void *)); + return 1; + } else + return 0; +} + +static int qdio_inbound_q_done(struct qdio_q *q) +{ + unsigned char state; +#ifdef CONFIG_QDIO_DEBUG + char dbf_text[15]; +#endif + + if (!atomic_read(&q->nr_buf_used)) + return 1; + + /* + * We need that one for synchronization with the adapter, as it + * does a kind of PCI avoidance. + */ + qdio_siga_sync_q(q); + + get_buf_state(q, q->first_to_check, &state); + if (state == SLSB_P_INPUT_PRIMED) + /* we got something to do */ + return 0; + + /* on VM, we don't poll, so the q is always done here */ + if (need_siga_sync(q) || pci_out_supported(q)) + return 1; + + /* + * At this point we know, that inbound first_to_check + * has (probably) not moved (see qdio_inbound_processing). + */ + if (get_usecs() > q->u.in.timestamp + QDIO_INPUT_THRESHOLD) { +#ifdef CONFIG_QDIO_DEBUG + QDIO_DBF_TEXT4(0, trace, "inqisdon"); + QDIO_DBF_HEX4(0, trace, &q, sizeof(void *)); + sprintf(dbf_text, "pf%02x", q->first_to_check); + QDIO_DBF_TEXT4(0, trace, dbf_text); +#endif /* CONFIG_QDIO_DEBUG */ + return 1; + } else { +#ifdef CONFIG_QDIO_DEBUG + QDIO_DBF_TEXT4(0, trace, "inqisntd"); + QDIO_DBF_HEX4(0, trace, &q, sizeof(void *)); + sprintf(dbf_text, "pf%02x", q->first_to_check); + QDIO_DBF_TEXT4(0, trace, dbf_text); +#endif /* CONFIG_QDIO_DEBUG */ + return 0; + } +} + +void qdio_kick_inbound_handler(struct qdio_q *q) +{ + int count, start, end; +#ifdef CONFIG_QDIO_DEBUG + char dbf_text[15]; +#endif + + qdio_perf_stat_inc(&perf_stats.inbound_handler); + + start = q->first_to_kick; + end = q->first_to_check; + if (end >= start) + count = end - start; + else + count = end + QDIO_MAX_BUFFERS_PER_Q - start; + +#ifdef CONFIG_QDIO_DEBUG + sprintf(dbf_text, "s=%2xc=%2x", start, count); + QDIO_DBF_TEXT4(0, trace, dbf_text); +#endif /* CONFIG_QDIO_DEBUG */ + + if (unlikely(q->irq_ptr->state != QDIO_IRQ_STATE_ACTIVE)) + return; + + q->handler(q->irq_ptr->cdev, q->qdio_error, q->nr, + start, count, q->irq_ptr->int_parm); + + /* for the next time */ + q->first_to_kick = q->first_to_check; + q->qdio_error = 0; +} + +static void __qdio_inbound_processing(struct qdio_q *q) +{ + qdio_perf_stat_inc(&perf_stats.tasklet_inbound); +again: + if (!qdio_inbound_q_moved(q)) + return; + + qdio_kick_inbound_handler(q); + + if (!qdio_inbound_q_done(q)) + /* means poll time is not yet over */ + goto again; + + qdio_stop_polling(q); + /* + * We need to check again to not lose initiative after + * resetting the ACK state. + */ + if (!qdio_inbound_q_done(q)) + goto again; +} + +/* inbound tasklet */ +void qdio_inbound_processing(unsigned long data) +{ + struct qdio_q *q = (struct qdio_q *)data; + __qdio_inbound_processing(q); +} + +static int get_outbound_buffer_frontier(struct qdio_q *q) +{ + int count, stop; + unsigned char state; + + if (((queue_type(q) != QDIO_IQDIO_QFMT) && !pci_out_supported(q)) || + (queue_type(q) == QDIO_IQDIO_QFMT && multicast_outbound(q))) + qdio_siga_sync_q(q); + + /* + * Don't check 128 buffers, as otherwise qdio_inbound_q_moved + * would return 0. + */ + count = min(atomic_read(&q->nr_buf_used), QDIO_MAX_BUFFERS_MASK); + stop = add_buf(q->first_to_check, count); + + /* need to set count to 1 for non-qebsm */ + if (!is_qebsm(q)) + count = 1; + +check_next: + if (q->first_to_check == stop) + return q->first_to_check; + + count = get_buf_states(q, q->first_to_check, &state, count); + if (!count) + return q->first_to_check; + + switch (state) { + case SLSB_P_OUTPUT_EMPTY: + /* the adapter got it */ + QDIO_DBF_TEXT5(0, trace, "outpempt"); + + atomic_sub(count, &q->nr_buf_used); + q->first_to_check = add_buf(q->first_to_check, count); + /* + * We fetch all buffer states at once. get_buf_states may + * return count < stop. For QEBSM we do not loop. + */ + if (is_qebsm(q)) + break; + goto check_next; + case SLSB_P_OUTPUT_ERROR: + announce_buffer_error(q); + /* process the buffer, the upper layer will take care of it */ + q->first_to_check = add_buf(q->first_to_check, count); + atomic_sub(count, &q->nr_buf_used); + break; + case SLSB_CU_OUTPUT_PRIMED: + /* the adapter has not fetched the output yet */ + QDIO_DBF_TEXT5(0, trace, "outpprim"); + break; + case SLSB_P_OUTPUT_NOT_INIT: + case SLSB_P_OUTPUT_HALTED: + break; + default: + BUG(); + } + return q->first_to_check; +} + +/* all buffers processed? */ +static inline int qdio_outbound_q_done(struct qdio_q *q) +{ + return atomic_read(&q->nr_buf_used) == 0; +} + +static inline int qdio_outbound_q_moved(struct qdio_q *q) +{ + int bufnr; + + bufnr = get_outbound_buffer_frontier(q); + + if ((bufnr != q->last_move_ftc) || q->qdio_error) { + q->last_move_ftc = bufnr; + QDIO_DBF_TEXT4(0, trace, "oqhasmvd"); + QDIO_DBF_HEX4(0, trace, &q, sizeof(void *)); + return 1; + } else + return 0; +} + +/* + * VM could present us cc=2 and busy bit set on SIGA-write + * during reconfiguration of their Guest LAN (only in iqdio mode, + * otherwise qdio is asynchronous and cc=2 and busy bit there will take + * the queues down immediately). + * + * Therefore qdio_siga_output will try for a short time constantly, + * if such a condition occurs. If it doesn't change, it will + * increase the busy_siga_counter and save the timestamp, and + * schedule the queue for later processing. qdio_outbound_processing + * will check out the counter. If non-zero, it will call qdio_kick_outbound_q + * as often as the value of the counter. This will attempt further SIGA + * instructions. For each successful SIGA, the counter is + * decreased, for failing SIGAs the counter remains the same, after + * all. After some time of no movement, qdio_kick_outbound_q will + * finally fail and reflect corresponding error codes to call + * the upper layer module and have it take the queues down. + * + * Note that this is a change from the original HiperSockets design + * (saying cc=2 and busy bit means take the queues down), but in + * these days Guest LAN didn't exist... excessive cc=2 with busy bit + * conditions will still take the queues down, but the threshold is + * higher due to the Guest LAN environment. + * + * Called from outbound tasklet and do_QDIO handler. + */ +static void qdio_kick_outbound_q(struct qdio_q *q) +{ + int rc; +#ifdef CONFIG_QDIO_DEBUG + char dbf_text[15]; + + QDIO_DBF_TEXT5(0, trace, "kickoutq"); + QDIO_DBF_HEX5(0, trace, &q, sizeof(void *)); +#endif /* CONFIG_QDIO_DEBUG */ + + if (!need_siga_out(q)) + return; + + rc = qdio_siga_output(q); + switch (rc) { + case 0: + /* went smooth this time, reset timestamp */ + q->u.out.timestamp = 0; + + /* TODO: improve error handling for CC=0 case */ +#ifdef CONFIG_QDIO_DEBUG + QDIO_DBF_TEXT3(0, trace, "cc2reslv"); + sprintf(dbf_text, "%4x%2x%2x", q->irq_ptr->schid.sch_no, q->nr, + atomic_read(&q->u.out.busy_siga_counter)); + QDIO_DBF_TEXT3(0, trace, dbf_text); +#endif /* CONFIG_QDIO_DEBUG */ + break; + /* cc=2 and busy bit */ + case (2 | QDIO_ERROR_SIGA_BUSY): + atomic_inc(&q->u.out.busy_siga_counter); + + /* if the last siga was successful, save timestamp here */ + if (!q->u.out.timestamp) + q->u.out.timestamp = get_usecs(); + + /* if we're in time, don't touch qdio_error */ + if (get_usecs() - q->u.out.timestamp < QDIO_BUSY_BIT_GIVE_UP) { + tasklet_schedule(&q->tasklet); + break; + } + QDIO_DBF_TEXT2(0, trace, "cc2REPRT"); +#ifdef CONFIG_QDIO_DEBUG + sprintf(dbf_text, "%4x%2x%2x", q->irq_ptr->schid.sch_no, q->nr, + atomic_read(&q->u.out.busy_siga_counter)); + QDIO_DBF_TEXT3(0, trace, dbf_text); +#endif /* CONFIG_QDIO_DEBUG */ + default: + /* for plain cc=1, 2 or 3 */ + q->qdio_error = rc; + } +} + +static void qdio_kick_outbound_handler(struct qdio_q *q) +{ + int start, end, count; +#ifdef CONFIG_QDIO_DEBUG + char dbf_text[15]; +#endif + + start = q->first_to_kick; + end = q->last_move_ftc; + if (end >= start) + count = end - start; + else + count = end + QDIO_MAX_BUFFERS_PER_Q - start; + +#ifdef CONFIG_QDIO_DEBUG + QDIO_DBF_TEXT4(0, trace, "kickouth"); + QDIO_DBF_HEX4(0, trace, &q, sizeof(void *)); + + sprintf(dbf_text, "s=%2xc=%2x", start, count); + QDIO_DBF_TEXT4(0, trace, dbf_text); +#endif /* CONFIG_QDIO_DEBUG */ + + if (unlikely(q->irq_ptr->state != QDIO_IRQ_STATE_ACTIVE)) + return; + + q->handler(q->irq_ptr->cdev, q->qdio_error, q->nr, start, count, + q->irq_ptr->int_parm); + + /* for the next time: */ + q->first_to_kick = q->last_move_ftc; + q->qdio_error = 0; +} + +static void __qdio_outbound_processing(struct qdio_q *q) +{ + int siga_attempts; + + qdio_perf_stat_inc(&perf_stats.tasklet_outbound); + + /* see comment in qdio_kick_outbound_q */ + siga_attempts = atomic_read(&q->u.out.busy_siga_counter); + while (siga_attempts--) { + atomic_dec(&q->u.out.busy_siga_counter); + qdio_kick_outbound_q(q); + } + + BUG_ON(atomic_read(&q->nr_buf_used) < 0); + + if (qdio_outbound_q_moved(q)) + qdio_kick_outbound_handler(q); + + if (queue_type(q) == QDIO_ZFCP_QFMT) { + if (!pci_out_supported(q) && !qdio_outbound_q_done(q)) + tasklet_schedule(&q->tasklet); + return; + } + + /* bail out for HiperSockets unicast queues */ + if (queue_type(q) == QDIO_IQDIO_QFMT && !multicast_outbound(q)) + return; + + if (q->u.out.pci_out_enabled) + return; + + /* + * Now we know that queue type is either qeth without pci enabled + * or HiperSockets multicast. Make sure buffer switch from PRIMED to + * EMPTY is noticed and outbound_handler is called after some time. + */ + if (qdio_outbound_q_done(q)) + del_timer(&q->u.out.timer); + else { + if (!timer_pending(&q->u.out.timer)) { + mod_timer(&q->u.out.timer, jiffies + 10 * HZ); + qdio_perf_stat_inc(&perf_stats.debug_tl_out_timer); + } + } +} + +/* outbound tasklet */ +void qdio_outbound_processing(unsigned long data) +{ + struct qdio_q *q = (struct qdio_q *)data; + __qdio_outbound_processing(q); +} + +void qdio_outbound_timer(unsigned long data) +{ + struct qdio_q *q = (struct qdio_q *)data; + tasklet_schedule(&q->tasklet); +} + +/* called from thinint inbound tasklet */ +void qdio_check_outbound_after_thinint(struct qdio_q *q) +{ + struct qdio_q *out; + int i; + + if (!pci_out_supported(q)) + return; + + for_each_output_queue(q->irq_ptr, out, i) + if (!qdio_outbound_q_done(out)) + tasklet_schedule(&out->tasklet); +} + +static inline void qdio_set_state(struct qdio_irq *irq_ptr, + enum qdio_irq_states state) +{ +#ifdef CONFIG_QDIO_DEBUG + char dbf_text[15]; + + QDIO_DBF_TEXT5(0, trace, "newstate"); + sprintf(dbf_text, "%4x%4x", irq_ptr->schid.sch_no, state); + QDIO_DBF_TEXT5(0, trace, dbf_text); +#endif /* CONFIG_QDIO_DEBUG */ + + irq_ptr->state = state; + mb(); +} + +static void qdio_irq_check_sense(struct subchannel_id schid, struct irb *irb) +{ + char dbf_text[15]; + + if (irb->esw.esw0.erw.cons) { + sprintf(dbf_text, "sens%4x", schid.sch_no); + QDIO_DBF_TEXT2(1, trace, dbf_text); + QDIO_DBF_HEX0(0, trace, irb, 64); + QDIO_DBF_HEX0(0, trace, irb->ecw, 64); + } +} + +/* PCI interrupt handler */ +static void qdio_int_handler_pci(struct qdio_irq *irq_ptr) +{ + int i; + struct qdio_q *q; + + qdio_perf_stat_inc(&perf_stats.pci_int); + + for_each_input_queue(irq_ptr, q, i) + tasklet_schedule(&q->tasklet); + + if (!(irq_ptr->qib.ac & QIB_AC_OUTBOUND_PCI_SUPPORTED)) + return; + + for_each_output_queue(irq_ptr, q, i) { + if (qdio_outbound_q_done(q)) + continue; + + if (!siga_syncs_out_pci(q)) + qdio_siga_sync_q(q); + + tasklet_schedule(&q->tasklet); + } +} + +static void qdio_handle_activate_check(struct ccw_device *cdev, + unsigned long intparm, int cstat, int dstat) +{ + struct qdio_irq *irq_ptr = cdev->private->qdio_data; + struct qdio_q *q; + char dbf_text[15]; + + QDIO_DBF_TEXT2(1, trace, "ick2"); + sprintf(dbf_text, "%s", cdev->dev.bus_id); + QDIO_DBF_TEXT2(1, trace, dbf_text); + QDIO_DBF_HEX2(0, trace, &intparm, sizeof(int)); + QDIO_DBF_HEX2(0, trace, &dstat, sizeof(int)); + QDIO_DBF_HEX2(0, trace, &cstat, sizeof(int)); + + if (irq_ptr->nr_input_qs) { + q = irq_ptr->input_qs[0]; + } else if (irq_ptr->nr_output_qs) { + q = irq_ptr->output_qs[0]; + } else { + dump_stack(); + goto no_handler; + } + q->handler(q->irq_ptr->cdev, QDIO_ERROR_ACTIVATE_CHECK_CONDITION, + 0, -1, -1, irq_ptr->int_parm); +no_handler: + qdio_set_state(irq_ptr, QDIO_IRQ_STATE_STOPPED); +} + +static void qdio_call_shutdown(struct work_struct *work) +{ + struct ccw_device_private *priv; + struct ccw_device *cdev; + + priv = container_of(work, struct ccw_device_private, kick_work); + cdev = priv->cdev; + qdio_shutdown(cdev, QDIO_FLAG_CLEANUP_USING_CLEAR); + put_device(&cdev->dev); +} + +static void qdio_int_error(struct ccw_device *cdev) +{ + struct qdio_irq *irq_ptr = cdev->private->qdio_data; + + switch (irq_ptr->state) { + case QDIO_IRQ_STATE_INACTIVE: + case QDIO_IRQ_STATE_CLEANUP: + qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ERR); + break; + case QDIO_IRQ_STATE_ESTABLISHED: + case QDIO_IRQ_STATE_ACTIVE: + qdio_set_state(irq_ptr, QDIO_IRQ_STATE_STOPPED); + if (get_device(&cdev->dev)) { + /* Can't call shutdown from interrupt context. */ + PREPARE_WORK(&cdev->private->kick_work, + qdio_call_shutdown); + queue_work(ccw_device_work, &cdev->private->kick_work); + } + break; + default: + WARN_ON(1); + } + wake_up(&cdev->private->wait_q); +} + +static int qdio_establish_check_errors(struct ccw_device *cdev, int cstat, + int dstat) +{ + struct qdio_irq *irq_ptr = cdev->private->qdio_data; + + if (cstat || (dstat & ~(DEV_STAT_CHN_END | DEV_STAT_DEV_END))) { + QDIO_DBF_TEXT2(1, setup, "eq:ckcon"); + goto error; + } + + if (!(dstat & DEV_STAT_DEV_END)) { + QDIO_DBF_TEXT2(1, setup, "eq:no de"); + goto error; + } + + if (dstat & ~(DEV_STAT_CHN_END | DEV_STAT_DEV_END)) { + QDIO_DBF_TEXT2(1, setup, "eq:badio"); + goto error; + } + return 0; +error: + QDIO_DBF_HEX2(0, trace, &cstat, sizeof(int)); + QDIO_DBF_HEX2(0, trace, &dstat, sizeof(int)); + qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ERR); + return 1; +} + +static void qdio_establish_handle_irq(struct ccw_device *cdev, int cstat, + int dstat) +{ + struct qdio_irq *irq_ptr = cdev->private->qdio_data; + char dbf_text[15]; + + sprintf(dbf_text, "qehi%4x", cdev->private->schid.sch_no); + QDIO_DBF_TEXT0(0, setup, dbf_text); + QDIO_DBF_TEXT0(0, trace, dbf_text); + + if (!qdio_establish_check_errors(cdev, cstat, dstat)) + qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ESTABLISHED); +} + +/* qdio interrupt handler */ +void qdio_int_handler(struct ccw_device *cdev, unsigned long intparm, + struct irb *irb) +{ + struct qdio_irq *irq_ptr = cdev->private->qdio_data; + int cstat, dstat; + char dbf_text[15]; + + qdio_perf_stat_inc(&perf_stats.qdio_int); + + if (!intparm || !irq_ptr) { + sprintf(dbf_text, "qihd%4x", cdev->private->schid.sch_no); + QDIO_DBF_TEXT2(1, setup, dbf_text); + return; + } + + if (IS_ERR(irb)) { + switch (PTR_ERR(irb)) { + case -EIO: + sprintf(dbf_text, "ierr%4x", + cdev->private->schid.sch_no); + QDIO_DBF_TEXT2(1, setup, dbf_text); + qdio_int_error(cdev); + return; + case -ETIMEDOUT: + sprintf(dbf_text, "qtoh%4x", + cdev->private->schid.sch_no); + QDIO_DBF_TEXT2(1, setup, dbf_text); + qdio_int_error(cdev); + return; + default: + WARN_ON(1); + return; + } + } + qdio_irq_check_sense(irq_ptr->schid, irb); + + cstat = irb->scsw.cmd.cstat; + dstat = irb->scsw.cmd.dstat; + + switch (irq_ptr->state) { + case QDIO_IRQ_STATE_INACTIVE: + qdio_establish_handle_irq(cdev, cstat, dstat); + break; + + case QDIO_IRQ_STATE_CLEANUP: + qdio_set_state(irq_ptr, QDIO_IRQ_STATE_INACTIVE); + break; + + case QDIO_IRQ_STATE_ESTABLISHED: + case QDIO_IRQ_STATE_ACTIVE: + if (cstat & SCHN_STAT_PCI) { + qdio_int_handler_pci(irq_ptr); + /* no state change so no need to wake up wait_q */ + return; + } + if ((cstat & ~SCHN_STAT_PCI) || dstat) { + qdio_handle_activate_check(cdev, intparm, cstat, + dstat); + break; + } + default: + WARN_ON(1); + } + wake_up(&cdev->private->wait_q); +} + +/** + * qdio_get_ssqd_desc - get qdio subchannel description + * @cdev: ccw device to get description for + * + * Returns a pointer to the saved qdio subchannel description, + * or NULL for not setup qdio devices. + */ +struct qdio_ssqd_desc *qdio_get_ssqd_desc(struct ccw_device *cdev) +{ + struct qdio_irq *irq_ptr; + + QDIO_DBF_TEXT0(0, setup, "getssqd"); + + irq_ptr = cdev->private->qdio_data; + if (!irq_ptr) + return NULL; + + return &irq_ptr->ssqd_desc; +} +EXPORT_SYMBOL_GPL(qdio_get_ssqd_desc); + +/** + * qdio_cleanup - shutdown queues and free data structures + * @cdev: associated ccw device + * @how: use halt or clear to shutdown + * + * This function calls qdio_shutdown() for @cdev with method @how + * and on success qdio_free() for @cdev. + */ +int qdio_cleanup(struct ccw_device *cdev, int how) +{ + struct qdio_irq *irq_ptr; + char dbf_text[15]; + int rc; + + irq_ptr = cdev->private->qdio_data; + if (!irq_ptr) + return -ENODEV; + + sprintf(dbf_text, "qcln%4x", irq_ptr->schid.sch_no); + QDIO_DBF_TEXT1(0, trace, dbf_text); + QDIO_DBF_TEXT0(0, setup, dbf_text); + + rc = qdio_shutdown(cdev, how); + if (rc == 0) + rc = qdio_free(cdev); + return rc; +} +EXPORT_SYMBOL_GPL(qdio_cleanup); + +static void qdio_shutdown_queues(struct ccw_device *cdev) +{ + struct qdio_irq *irq_ptr = cdev->private->qdio_data; + struct qdio_q *q; + int i; + + for_each_input_queue(irq_ptr, q, i) + tasklet_disable(&q->tasklet); + + for_each_output_queue(irq_ptr, q, i) { + tasklet_disable(&q->tasklet); + del_timer(&q->u.out.timer); + } +} + +/** + * qdio_shutdown - shut down a qdio subchannel + * @cdev: associated ccw device + * @how: use halt or clear to shutdown + */ +int qdio_shutdown(struct ccw_device *cdev, int how) +{ + struct qdio_irq *irq_ptr; + int rc; + unsigned long flags; + char dbf_text[15]; + + irq_ptr = cdev->private->qdio_data; + if (!irq_ptr) + return -ENODEV; + + mutex_lock(&irq_ptr->setup_mutex); + /* + * Subchannel was already shot down. We cannot prevent being called + * twice since cio may trigger a shutdown asynchronously. + */ + if (irq_ptr->state == QDIO_IRQ_STATE_INACTIVE) { + mutex_unlock(&irq_ptr->setup_mutex); + return 0; + } + + sprintf(dbf_text, "qsqs%4x", irq_ptr->schid.sch_no); + QDIO_DBF_TEXT1(0, trace, dbf_text); + QDIO_DBF_TEXT0(0, setup, dbf_text); + + tiqdio_remove_input_queues(irq_ptr); + qdio_shutdown_queues(cdev); + qdio_shutdown_debug_entries(irq_ptr, cdev); + + /* cleanup subchannel */ + spin_lock_irqsave(get_ccwdev_lock(cdev), flags); + + if (how & QDIO_FLAG_CLEANUP_USING_CLEAR) + rc = ccw_device_clear(cdev, QDIO_DOING_CLEANUP); + else + /* default behaviour is halt */ + rc = ccw_device_halt(cdev, QDIO_DOING_CLEANUP); + if (rc) { + sprintf(dbf_text, "sher%4x", irq_ptr->schid.sch_no); + QDIO_DBF_TEXT0(0, setup, dbf_text); + sprintf(dbf_text, "rc=%d", rc); + QDIO_DBF_TEXT0(0, setup, dbf_text); + goto no_cleanup; + } + + qdio_set_state(irq_ptr, QDIO_IRQ_STATE_CLEANUP); + spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags); + wait_event_interruptible_timeout(cdev->private->wait_q, + irq_ptr->state == QDIO_IRQ_STATE_INACTIVE || + irq_ptr->state == QDIO_IRQ_STATE_ERR, + 10 * HZ); + spin_lock_irqsave(get_ccwdev_lock(cdev), flags); + +no_cleanup: + qdio_shutdown_thinint(irq_ptr); + + /* restore interrupt handler */ + if ((void *)cdev->handler == (void *)qdio_int_handler) + cdev->handler = irq_ptr->orig_handler; + spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags); + + qdio_set_state(irq_ptr, QDIO_IRQ_STATE_INACTIVE); + mutex_unlock(&irq_ptr->setup_mutex); + module_put(THIS_MODULE); + if (rc) + return rc; + return 0; +} +EXPORT_SYMBOL_GPL(qdio_shutdown); + +/** + * qdio_free - free data structures for a qdio subchannel + * @cdev: associated ccw device + */ +int qdio_free(struct ccw_device *cdev) +{ + struct qdio_irq *irq_ptr; + char dbf_text[15]; + + irq_ptr = cdev->private->qdio_data; + if (!irq_ptr) + return -ENODEV; + + mutex_lock(&irq_ptr->setup_mutex); + + sprintf(dbf_text, "qfqs%4x", irq_ptr->schid.sch_no); + QDIO_DBF_TEXT1(0, trace, dbf_text); + QDIO_DBF_TEXT0(0, setup, dbf_text); + + cdev->private->qdio_data = NULL; + mutex_unlock(&irq_ptr->setup_mutex); + + qdio_release_memory(irq_ptr); + return 0; +} +EXPORT_SYMBOL_GPL(qdio_free); + +/** + * qdio_initialize - allocate and establish queues for a qdio subchannel + * @init_data: initialization data + * + * This function first allocates queues via qdio_allocate() and on success + * establishes them via qdio_establish(). + */ +int qdio_initialize(struct qdio_initialize *init_data) +{ + int rc; + char dbf_text[15]; + + sprintf(dbf_text, "qini%4x", init_data->cdev->private->schid.sch_no); + QDIO_DBF_TEXT0(0, setup, dbf_text); + QDIO_DBF_TEXT0(0, trace, dbf_text); + + rc = qdio_allocate(init_data); + if (rc) + return rc; + + rc = qdio_establish(init_data); + if (rc) + qdio_free(init_data->cdev); + return rc; +} +EXPORT_SYMBOL_GPL(qdio_initialize); + +/** + * qdio_allocate - allocate qdio queues and associated data + * @init_data: initialization data + */ +int qdio_allocate(struct qdio_initialize *init_data) +{ + struct qdio_irq *irq_ptr; + char dbf_text[15]; + + sprintf(dbf_text, "qalc%4x", init_data->cdev->private->schid.sch_no); + QDIO_DBF_TEXT0(0, setup, dbf_text); + QDIO_DBF_TEXT0(0, trace, dbf_text); + + if ((init_data->no_input_qs && !init_data->input_handler) || + (init_data->no_output_qs && !init_data->output_handler)) + return -EINVAL; + + if ((init_data->no_input_qs > QDIO_MAX_QUEUES_PER_IRQ) || + (init_data->no_output_qs > QDIO_MAX_QUEUES_PER_IRQ)) + return -EINVAL; + + if ((!init_data->input_sbal_addr_array) || + (!init_data->output_sbal_addr_array)) + return -EINVAL; + + qdio_allocate_do_dbf(init_data); + + /* irq_ptr must be in GFP_DMA since it contains ccw1.cda */ + irq_ptr = (void *) get_zeroed_page(GFP_KERNEL | GFP_DMA); + if (!irq_ptr) + goto out_err; + QDIO_DBF_TEXT0(0, setup, "irq_ptr:"); + QDIO_DBF_HEX0(0, setup, &irq_ptr, sizeof(void *)); + + mutex_init(&irq_ptr->setup_mutex); + + /* + * Allocate a page for the chsc calls in qdio_establish. + * Must be pre-allocated since a zfcp recovery will call + * qdio_establish. In case of low memory and swap on a zfcp disk + * we may not be able to allocate memory otherwise. + */ + irq_ptr->chsc_page = get_zeroed_page(GFP_KERNEL); + if (!irq_ptr->chsc_page) + goto out_rel; + + /* qdr is used in ccw1.cda which is u32 */ + irq_ptr->qdr = kzalloc(sizeof(struct qdr), GFP_KERNEL | GFP_DMA); + if (!irq_ptr->qdr) + goto out_rel; + WARN_ON((unsigned long)irq_ptr->qdr & 0xfff); + + QDIO_DBF_TEXT0(0, setup, "qdr:"); + QDIO_DBF_HEX0(0, setup, &irq_ptr->qdr, sizeof(void *)); + + if (qdio_allocate_qs(irq_ptr, init_data->no_input_qs, + init_data->no_output_qs)) + goto out_rel; + + init_data->cdev->private->qdio_data = irq_ptr; + qdio_set_state(irq_ptr, QDIO_IRQ_STATE_INACTIVE); + return 0; +out_rel: + qdio_release_memory(irq_ptr); +out_err: + return -ENOMEM; +} +EXPORT_SYMBOL_GPL(qdio_allocate); + +/** + * qdio_establish - establish queues on a qdio subchannel + * @init_data: initialization data + */ +int qdio_establish(struct qdio_initialize *init_data) +{ + char dbf_text[20]; + struct qdio_irq *irq_ptr; + struct ccw_device *cdev = init_data->cdev; + unsigned long saveflags; + int rc; + + irq_ptr = cdev->private->qdio_data; + if (!irq_ptr) + return -ENODEV; + + if (cdev->private->state != DEV_STATE_ONLINE) + return -EINVAL; + + if (!try_module_get(THIS_MODULE)) + return -EINVAL; + + sprintf(dbf_text, "qest%4x", cdev->private->schid.sch_no); + QDIO_DBF_TEXT0(0, setup, dbf_text); + QDIO_DBF_TEXT0(0, trace, dbf_text); + + mutex_lock(&irq_ptr->setup_mutex); + qdio_setup_irq(init_data); + + rc = qdio_establish_thinint(irq_ptr); + if (rc) { + mutex_unlock(&irq_ptr->setup_mutex); + qdio_shutdown(cdev, QDIO_FLAG_CLEANUP_USING_CLEAR); + return rc; + } + + /* establish q */ + irq_ptr->ccw.cmd_code = irq_ptr->equeue.cmd; + irq_ptr->ccw.flags = CCW_FLAG_SLI; + irq_ptr->ccw.count = irq_ptr->equeue.count; + irq_ptr->ccw.cda = (u32)((addr_t)irq_ptr->qdr); + + spin_lock_irqsave(get_ccwdev_lock(cdev), saveflags); + ccw_device_set_options_mask(cdev, 0); + + rc = ccw_device_start(cdev, &irq_ptr->ccw, QDIO_DOING_ESTABLISH, 0, 0); + if (rc) { + sprintf(dbf_text, "eq:io%4x", irq_ptr->schid.sch_no); + QDIO_DBF_TEXT2(1, setup, dbf_text); + sprintf(dbf_text, "eq:rc%4x", rc); + QDIO_DBF_TEXT2(1, setup, dbf_text); + } + spin_unlock_irqrestore(get_ccwdev_lock(cdev), saveflags); + + if (rc) { + mutex_unlock(&irq_ptr->setup_mutex); + qdio_shutdown(cdev, QDIO_FLAG_CLEANUP_USING_CLEAR); + return rc; + } + + wait_event_interruptible_timeout(cdev->private->wait_q, + irq_ptr->state == QDIO_IRQ_STATE_ESTABLISHED || + irq_ptr->state == QDIO_IRQ_STATE_ERR, HZ); + + if (irq_ptr->state != QDIO_IRQ_STATE_ESTABLISHED) { + mutex_unlock(&irq_ptr->setup_mutex); + qdio_shutdown(cdev, QDIO_FLAG_CLEANUP_USING_CLEAR); + return -EIO; + } + + qdio_setup_ssqd_info(irq_ptr); + sprintf(dbf_text, "qib ac%2x", irq_ptr->qib.ac); + QDIO_DBF_TEXT2(0, setup, dbf_text); + + /* qebsm is now setup if available, initialize buffer states */ + qdio_init_buf_states(irq_ptr); + + mutex_unlock(&irq_ptr->setup_mutex); + qdio_print_subchannel_info(irq_ptr, cdev); + qdio_setup_debug_entries(irq_ptr, cdev); + return 0; +} +EXPORT_SYMBOL_GPL(qdio_establish); + +/** + * qdio_activate - activate queues on a qdio subchannel + * @cdev: associated cdev + */ +int qdio_activate(struct ccw_device *cdev) +{ + struct qdio_irq *irq_ptr; + int rc; + unsigned long saveflags; + char dbf_text[20]; + + irq_ptr = cdev->private->qdio_data; + if (!irq_ptr) + return -ENODEV; + + if (cdev->private->state != DEV_STATE_ONLINE) + return -EINVAL; + + mutex_lock(&irq_ptr->setup_mutex); + if (irq_ptr->state == QDIO_IRQ_STATE_INACTIVE) { + rc = -EBUSY; + goto out; + } + + sprintf(dbf_text, "qact%4x", irq_ptr->schid.sch_no); + QDIO_DBF_TEXT2(0, setup, dbf_text); + QDIO_DBF_TEXT2(0, trace, dbf_text); + + irq_ptr->ccw.cmd_code = irq_ptr->aqueue.cmd; + irq_ptr->ccw.flags = CCW_FLAG_SLI; + irq_ptr->ccw.count = irq_ptr->aqueue.count; + irq_ptr->ccw.cda = 0; + + spin_lock_irqsave(get_ccwdev_lock(cdev), saveflags); + ccw_device_set_options(cdev, CCWDEV_REPORT_ALL); + + rc = ccw_device_start(cdev, &irq_ptr->ccw, QDIO_DOING_ACTIVATE, + 0, DOIO_DENY_PREFETCH); + if (rc) { + sprintf(dbf_text, "aq:io%4x", irq_ptr->schid.sch_no); + QDIO_DBF_TEXT2(1, setup, dbf_text); + sprintf(dbf_text, "aq:rc%4x", rc); + QDIO_DBF_TEXT2(1, setup, dbf_text); + } + spin_unlock_irqrestore(get_ccwdev_lock(cdev), saveflags); + + if (rc) + goto out; + + if (is_thinint_irq(irq_ptr)) + tiqdio_add_input_queues(irq_ptr); + + /* wait for subchannel to become active */ + msleep(5); + + switch (irq_ptr->state) { + case QDIO_IRQ_STATE_STOPPED: + case QDIO_IRQ_STATE_ERR: + mutex_unlock(&irq_ptr->setup_mutex); + qdio_shutdown(cdev, QDIO_FLAG_CLEANUP_USING_CLEAR); + return -EIO; + default: + qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ACTIVE); + rc = 0; + } +out: + mutex_unlock(&irq_ptr->setup_mutex); + return rc; +} +EXPORT_SYMBOL_GPL(qdio_activate); + +static inline int buf_in_between(int bufnr, int start, int count) +{ + int end = add_buf(start, count); + + if (end > start) { + if (bufnr >= start && bufnr < end) + return 1; + else + return 0; + } + + /* wrap-around case */ + if ((bufnr >= start && bufnr <= QDIO_MAX_BUFFERS_PER_Q) || + (bufnr < end)) + return 1; + else + return 0; +} + +/** + * handle_inbound - reset processed input buffers + * @q: queue containing the buffers + * @callflags: flags + * @bufnr: first buffer to process + * @count: how many buffers are emptied + */ +static void handle_inbound(struct qdio_q *q, unsigned int callflags, + int bufnr, int count) +{ + unsigned long flags; + int used, rc; + + /* + * do_QDIO could run in parallel with the queue tasklet so the + * upper-layer programm could empty the ACK'ed buffer here. + * If that happens we must clear the polling flag, otherwise + * qdio_stop_polling() could set the buffer to NOT_INIT after + * it was set to EMPTY which would kill us. + */ + spin_lock_irqsave(&q->u.in.lock, flags); + if (q->u.in.polling) + if (buf_in_between(q->last_move_ftc, bufnr, count)) + q->u.in.polling = 0; + + count = set_buf_states(q, bufnr, SLSB_CU_INPUT_EMPTY, count); + spin_unlock_irqrestore(&q->u.in.lock, flags); + + used = atomic_add_return(count, &q->nr_buf_used) - count; + BUG_ON(used + count > QDIO_MAX_BUFFERS_PER_Q); + + /* no need to signal as long as the adapter had free buffers */ + if (used) + return; + + if (need_siga_in(q)) { + rc = qdio_siga_input(q); + if (rc) + q->qdio_error = rc; + } +} + +/** + * handle_outbound - process filled outbound buffers + * @q: queue containing the buffers + * @callflags: flags + * @bufnr: first buffer to process + * @count: how many buffers are filled + */ +static void handle_outbound(struct qdio_q *q, unsigned int callflags, + int bufnr, int count) +{ + unsigned char state; + int used; + + qdio_perf_stat_inc(&perf_stats.outbound_handler); + + count = set_buf_states(q, bufnr, SLSB_CU_OUTPUT_PRIMED, count); + used = atomic_add_return(count, &q->nr_buf_used); + BUG_ON(used > QDIO_MAX_BUFFERS_PER_Q); + + if (callflags & QDIO_FLAG_PCI_OUT) + q->u.out.pci_out_enabled = 1; + else + q->u.out.pci_out_enabled = 0; + + if (queue_type(q) == QDIO_IQDIO_QFMT) { + if (multicast_outbound(q)) + qdio_kick_outbound_q(q); + else + /* + * One siga-w per buffer required for unicast + * HiperSockets. + */ + while (count--) + qdio_kick_outbound_q(q); + goto out; + } + + if (need_siga_sync(q)) { + qdio_siga_sync_q(q); + goto out; + } + + /* try to fast requeue buffers */ + get_buf_state(q, prev_buf(bufnr), &state); + if (state != SLSB_CU_OUTPUT_PRIMED) + qdio_kick_outbound_q(q); + else { + QDIO_DBF_TEXT5(0, trace, "fast-req"); + qdio_perf_stat_inc(&perf_stats.fast_requeue); + } +out: + /* Fixme: could wait forever if called from process context */ + tasklet_schedule(&q->tasklet); +} + +/** + * do_QDIO - process input or output buffers + * @cdev: associated ccw_device for the qdio subchannel + * @callflags: input or output and special flags from the program + * @q_nr: queue number + * @bufnr: buffer number + * @count: how many buffers to process + */ +int do_QDIO(struct ccw_device *cdev, unsigned int callflags, + int q_nr, int bufnr, int count) +{ + struct qdio_irq *irq_ptr; +#ifdef CONFIG_QDIO_DEBUG + char dbf_text[20]; + + sprintf(dbf_text, "doQD%04x", cdev->private->schid.sch_no); + QDIO_DBF_TEXT3(0, trace, dbf_text); +#endif /* CONFIG_QDIO_DEBUG */ + + if ((bufnr > QDIO_MAX_BUFFERS_PER_Q) || + (count > QDIO_MAX_BUFFERS_PER_Q) || + (q_nr > QDIO_MAX_QUEUES_PER_IRQ)) + return -EINVAL; + + if (!count) + return 0; + + irq_ptr = cdev->private->qdio_data; + if (!irq_ptr) + return -ENODEV; + +#ifdef CONFIG_QDIO_DEBUG + if (callflags & QDIO_FLAG_SYNC_INPUT) + QDIO_DBF_HEX3(0, trace, &irq_ptr->input_qs[q_nr], + sizeof(void *)); + else + QDIO_DBF_HEX3(0, trace, &irq_ptr->output_qs[q_nr], + sizeof(void *)); + + sprintf(dbf_text, "flag%04x", callflags); + QDIO_DBF_TEXT3(0, trace, dbf_text); + sprintf(dbf_text, "qi%02xct%02x", bufnr, count); + QDIO_DBF_TEXT3(0, trace, dbf_text); +#endif /* CONFIG_QDIO_DEBUG */ + + if (irq_ptr->state != QDIO_IRQ_STATE_ACTIVE) + return -EBUSY; + + if (callflags & QDIO_FLAG_SYNC_INPUT) + handle_inbound(irq_ptr->input_qs[q_nr], + callflags, bufnr, count); + else if (callflags & QDIO_FLAG_SYNC_OUTPUT) + handle_outbound(irq_ptr->output_qs[q_nr], + callflags, bufnr, count); + else { + QDIO_DBF_TEXT3(1, trace, "doQD:inv"); + return -EINVAL; + } + return 0; +} +EXPORT_SYMBOL_GPL(do_QDIO); + +static int __init init_QDIO(void) +{ + int rc; + + rc = qdio_setup_init(); + if (rc) + return rc; + rc = tiqdio_allocate_memory(); + if (rc) + goto out_cache; + rc = qdio_debug_init(); + if (rc) + goto out_ti; + rc = qdio_setup_perf_stats(); + if (rc) + goto out_debug; + rc = tiqdio_register_thinints(); + if (rc) + goto out_perf; + return 0; + +out_perf: + qdio_remove_perf_stats(); +out_debug: + qdio_debug_exit(); +out_ti: + tiqdio_free_memory(); +out_cache: + qdio_setup_exit(); + return rc; +} + +static void __exit exit_QDIO(void) +{ + tiqdio_unregister_thinints(); + tiqdio_free_memory(); + qdio_remove_perf_stats(); + qdio_debug_exit(); + qdio_setup_exit(); +} + +module_init(init_QDIO); +module_exit(exit_QDIO); diff --git a/drivers/s390/cio/qdio_perf.c b/drivers/s390/cio/qdio_perf.c new file mode 100644 index 0000000..ea01b85 --- /dev/null +++ b/drivers/s390/cio/qdio_perf.c @@ -0,0 +1,151 @@ +/* + * drivers/s390/cio/qdio_perf.c + * + * Copyright IBM Corp. 2008 + * + * Author: Jan Glauber (jang@linux.vnet.ibm.com) + */ +#include +#include +#include +#include + +#include "cio.h" +#include "css.h" +#include "device.h" +#include "ioasm.h" +#include "chsc.h" +#include "qdio_debug.h" +#include "qdio_perf.h" + +int qdio_performance_stats; +struct qdio_perf_stats perf_stats; + +#ifdef CONFIG_PROC_FS +static struct proc_dir_entry *qdio_perf_pde; +#endif + +inline void qdio_perf_stat_inc(atomic_long_t *count) +{ + if (qdio_performance_stats) + atomic_long_inc(count); +} + +inline void qdio_perf_stat_dec(atomic_long_t *count) +{ + if (qdio_performance_stats) + atomic_long_dec(count); +} + +/* + * procfs functions + */ +static int qdio_perf_proc_show(struct seq_file *m, void *v) +{ + seq_printf(m, "Number of qdio interrupts\t\t\t: %li\n", + (long)atomic_long_read(&perf_stats.qdio_int)); + seq_printf(m, "Number of PCI interrupts\t\t\t: %li\n", + (long)atomic_long_read(&perf_stats.pci_int)); + seq_printf(m, "Number of adapter interrupts\t\t\t: %li\n", + (long)atomic_long_read(&perf_stats.thin_int)); + seq_printf(m, "\n"); + seq_printf(m, "Inbound tasklet runs\t\t\t\t: %li\n", + (long)atomic_long_read(&perf_stats.tasklet_inbound)); + seq_printf(m, "Outbound tasklet runs\t\t\t\t: %li\n", + (long)atomic_long_read(&perf_stats.tasklet_outbound)); + seq_printf(m, "Adapter interrupt tasklet runs/loops\t\t: %li/%li\n", + (long)atomic_long_read(&perf_stats.tasklet_thinint), + (long)atomic_long_read(&perf_stats.tasklet_thinint_loop)); + seq_printf(m, "Adapter interrupt inbound tasklet runs/loops\t: %li/%li\n", + (long)atomic_long_read(&perf_stats.thinint_inbound), + (long)atomic_long_read(&perf_stats.thinint_inbound_loop)); + seq_printf(m, "\n"); + seq_printf(m, "Number of SIGA In issued\t\t\t: %li\n", + (long)atomic_long_read(&perf_stats.siga_in)); + seq_printf(m, "Number of SIGA Out issued\t\t\t: %li\n", + (long)atomic_long_read(&perf_stats.siga_out)); + seq_printf(m, "Number of SIGA Sync issued\t\t\t: %li\n", + (long)atomic_long_read(&perf_stats.siga_sync)); + seq_printf(m, "\n"); + seq_printf(m, "Number of inbound transfers\t\t\t: %li\n", + (long)atomic_long_read(&perf_stats.inbound_handler)); + seq_printf(m, "Number of outbound transfers\t\t\t: %li\n", + (long)atomic_long_read(&perf_stats.outbound_handler)); + seq_printf(m, "\n"); + seq_printf(m, "Number of fast requeues (outg. SBAL w/o SIGA)\t: %li\n", + (long)atomic_long_read(&perf_stats.fast_requeue)); + seq_printf(m, "Number of outbound tasklet mod_timer calls\t: %li\n", + (long)atomic_long_read(&perf_stats.debug_tl_out_timer)); + seq_printf(m, "Number of stop polling calls\t\t\t: %li\n", + (long)atomic_long_read(&perf_stats.debug_stop_polling)); + seq_printf(m, "AI inbound tasklet loops after stop polling\t: %li\n", + (long)atomic_long_read(&perf_stats.thinint_inbound_loop2)); + seq_printf(m, "\n"); + return 0; +} +static int qdio_perf_seq_open(struct inode *inode, struct file *filp) +{ + return single_open(filp, qdio_perf_proc_show, NULL); +} + +static struct file_operations qdio_perf_proc_fops = { + .owner = THIS_MODULE, + .open = qdio_perf_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +/* + * sysfs functions + */ +static ssize_t qdio_perf_stats_show(struct bus_type *bus, char *buf) +{ + return sprintf(buf, "%i\n", qdio_performance_stats ? 1 : 0); +} + +static ssize_t qdio_perf_stats_store(struct bus_type *bus, + const char *buf, size_t count) +{ + unsigned long i; + + if (strict_strtoul(buf, 16, &i) != 0) + return -EINVAL; + if ((i != 0) && (i != 1)) + return -EINVAL; + if (i == qdio_performance_stats) + return count; + + qdio_performance_stats = i; + /* reset performance statistics */ + if (i == 0) + memset(&perf_stats, 0, sizeof(struct qdio_perf_stats)); + return count; +} + +static BUS_ATTR(qdio_performance_stats, 0644, qdio_perf_stats_show, + qdio_perf_stats_store); + +int __init qdio_setup_perf_stats(void) +{ + int rc; + + rc = bus_create_file(&ccw_bus_type, &bus_attr_qdio_performance_stats); + if (rc) + return rc; + +#ifdef CONFIG_PROC_FS + memset(&perf_stats, 0, sizeof(struct qdio_perf_stats)); + qdio_perf_pde = proc_create("qdio_perf", S_IFREG | S_IRUGO, + NULL, &qdio_perf_proc_fops); +#endif + return 0; +} + +void __exit qdio_remove_perf_stats(void) +{ +#ifdef CONFIG_PROC_FS + remove_proc_entry("qdio_perf", NULL); +#endif + bus_remove_file(&ccw_bus_type, &bus_attr_qdio_performance_stats); +} diff --git a/drivers/s390/cio/qdio_perf.h b/drivers/s390/cio/qdio_perf.h new file mode 100644 index 0000000..5c406a8 --- /dev/null +++ b/drivers/s390/cio/qdio_perf.h @@ -0,0 +1,54 @@ +/* + * drivers/s390/cio/qdio_perf.h + * + * Copyright IBM Corp. 2008 + * + * Author: Jan Glauber (jang@linux.vnet.ibm.com) + */ +#ifndef QDIO_PERF_H +#define QDIO_PERF_H + +#include +#include +#include + +struct qdio_perf_stats { + /* interrupt handler calls */ + atomic_long_t qdio_int; + atomic_long_t pci_int; + atomic_long_t thin_int; + + /* tasklet runs */ + atomic_long_t tasklet_inbound; + atomic_long_t tasklet_outbound; + atomic_long_t tasklet_thinint; + atomic_long_t tasklet_thinint_loop; + atomic_long_t thinint_inbound; + atomic_long_t thinint_inbound_loop; + atomic_long_t thinint_inbound_loop2; + + /* signal adapter calls */ + atomic_long_t siga_out; + atomic_long_t siga_in; + atomic_long_t siga_sync; + + /* misc */ + atomic_long_t inbound_handler; + atomic_long_t outbound_handler; + atomic_long_t fast_requeue; + + /* for debugging */ + atomic_long_t debug_tl_out_timer; + atomic_long_t debug_stop_polling; +}; + +extern struct qdio_perf_stats perf_stats; +extern int qdio_performance_stats; + +int qdio_setup_perf_stats(void); +void qdio_remove_perf_stats(void); + +extern void qdio_perf_stat_inc(atomic_long_t *count); +extern void qdio_perf_stat_dec(atomic_long_t *count); + +#endif diff --git a/drivers/s390/cio/qdio_setup.c b/drivers/s390/cio/qdio_setup.c new file mode 100644 index 0000000..f0923a8 --- /dev/null +++ b/drivers/s390/cio/qdio_setup.c @@ -0,0 +1,521 @@ +/* + * driver/s390/cio/qdio_setup.c + * + * qdio queue initialization + * + * Copyright (C) IBM Corp. 2008 + * Author(s): Jan Glauber + */ +#include +#include +#include + +#include "cio.h" +#include "css.h" +#include "device.h" +#include "ioasm.h" +#include "chsc.h" +#include "qdio.h" +#include "qdio_debug.h" + +static struct kmem_cache *qdio_q_cache; + +/* + * qebsm is only available under 64bit but the adapter sets the feature + * flag anyway, so we manually override it. + */ +static inline int qebsm_possible(void) +{ +#ifdef CONFIG_64BIT + return css_general_characteristics.qebsm; +#endif + return 0; +} + +/* + * qib_param_field: pointer to 128 bytes or NULL, if no param field + * nr_input_qs: pointer to nr_queues*128 words of data or NULL + */ +static void set_impl_params(struct qdio_irq *irq_ptr, + unsigned int qib_param_field_format, + unsigned char *qib_param_field, + unsigned long *input_slib_elements, + unsigned long *output_slib_elements) +{ + struct qdio_q *q; + int i, j; + + if (!irq_ptr) + return; + + WARN_ON((unsigned long)&irq_ptr->qib & 0xff); + irq_ptr->qib.pfmt = qib_param_field_format; + if (qib_param_field) + memcpy(irq_ptr->qib.parm, qib_param_field, + QDIO_MAX_BUFFERS_PER_Q); + + if (!input_slib_elements) + goto output; + + for_each_input_queue(irq_ptr, q, i) { + for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; j++) + q->slib->slibe[j].parms = + input_slib_elements[i * QDIO_MAX_BUFFERS_PER_Q + j]; + } +output: + if (!output_slib_elements) + return; + + for_each_output_queue(irq_ptr, q, i) { + for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; j++) + q->slib->slibe[j].parms = + output_slib_elements[i * QDIO_MAX_BUFFERS_PER_Q + j]; + } +} + +static int __qdio_allocate_qs(struct qdio_q **irq_ptr_qs, int nr_queues) +{ + struct qdio_q *q; + int i; + + for (i = 0; i < nr_queues; i++) { + q = kmem_cache_alloc(qdio_q_cache, GFP_KERNEL); + if (!q) + return -ENOMEM; + WARN_ON((unsigned long)q & 0xff); + + q->slib = (struct slib *) __get_free_page(GFP_KERNEL); + if (!q->slib) { + kmem_cache_free(qdio_q_cache, q); + return -ENOMEM; + } + WARN_ON((unsigned long)q->slib & 0x7ff); + irq_ptr_qs[i] = q; + } + return 0; +} + +int qdio_allocate_qs(struct qdio_irq *irq_ptr, int nr_input_qs, int nr_output_qs) +{ + int rc; + + rc = __qdio_allocate_qs(irq_ptr->input_qs, nr_input_qs); + if (rc) + return rc; + rc = __qdio_allocate_qs(irq_ptr->output_qs, nr_output_qs); + return rc; +} + +static void setup_queues_misc(struct qdio_q *q, struct qdio_irq *irq_ptr, + qdio_handler_t *handler, int i) +{ + /* must be cleared by every qdio_establish */ + memset(q, 0, ((char *)&q->slib) - ((char *)q)); + memset(q->slib, 0, PAGE_SIZE); + + q->irq_ptr = irq_ptr; + q->mask = 1 << (31 - i); + q->nr = i; + q->handler = handler; +} + +static void setup_storage_lists(struct qdio_q *q, struct qdio_irq *irq_ptr, + void **sbals_array, char *dbf_text, int i) +{ + struct qdio_q *prev; + int j; + + QDIO_DBF_TEXT0(0, setup, dbf_text); + QDIO_DBF_HEX0(0, setup, &q, sizeof(void *)); + + q->sl = (struct sl *)((char *)q->slib + PAGE_SIZE / 2); + + /* fill in sbal */ + for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; j++) { + q->sbal[j] = *sbals_array++; + WARN_ON((unsigned long)q->sbal[j] & 0xff); + } + + /* fill in slib */ + if (i > 0) { + prev = (q->is_input_q) ? irq_ptr->input_qs[i - 1] + : irq_ptr->output_qs[i - 1]; + prev->slib->nsliba = (unsigned long)q->slib; + } + + q->slib->sla = (unsigned long)q->sl; + q->slib->slsba = (unsigned long)&q->slsb.val[0]; + + /* fill in sl */ + for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; j++) + q->sl->element[j].sbal = (unsigned long)q->sbal[j]; + + QDIO_DBF_TEXT2(0, setup, "sl-sb-b0"); + QDIO_DBF_HEX2(0, setup, q->sl, sizeof(void *)); + QDIO_DBF_HEX2(0, setup, &q->slsb, sizeof(void *)); + QDIO_DBF_HEX2(0, setup, q->sbal, sizeof(void *)); +} + +static void setup_queues(struct qdio_irq *irq_ptr, + struct qdio_initialize *qdio_init) +{ + char dbf_text[20]; + struct qdio_q *q; + void **input_sbal_array = qdio_init->input_sbal_addr_array; + void **output_sbal_array = qdio_init->output_sbal_addr_array; + int i; + + sprintf(dbf_text, "qfqs%4x", qdio_init->cdev->private->schid.sch_no); + QDIO_DBF_TEXT0(0, setup, dbf_text); + + for_each_input_queue(irq_ptr, q, i) { + sprintf(dbf_text, "in-q%4x", i); + setup_queues_misc(q, irq_ptr, qdio_init->input_handler, i); + + q->is_input_q = 1; + spin_lock_init(&q->u.in.lock); + setup_storage_lists(q, irq_ptr, input_sbal_array, dbf_text, i); + input_sbal_array += QDIO_MAX_BUFFERS_PER_Q; + + if (is_thinint_irq(irq_ptr)) + tasklet_init(&q->tasklet, tiqdio_inbound_processing, + (unsigned long) q); + else + tasklet_init(&q->tasklet, qdio_inbound_processing, + (unsigned long) q); + } + + for_each_output_queue(irq_ptr, q, i) { + sprintf(dbf_text, "outq%4x", i); + setup_queues_misc(q, irq_ptr, qdio_init->output_handler, i); + + q->is_input_q = 0; + setup_storage_lists(q, irq_ptr, output_sbal_array, + dbf_text, i); + output_sbal_array += QDIO_MAX_BUFFERS_PER_Q; + + tasklet_init(&q->tasklet, qdio_outbound_processing, + (unsigned long) q); + setup_timer(&q->u.out.timer, (void(*)(unsigned long)) + &qdio_outbound_timer, (unsigned long)q); + } +} + +static void process_ac_flags(struct qdio_irq *irq_ptr, unsigned char qdioac) +{ + if (qdioac & AC1_SIGA_INPUT_NEEDED) + irq_ptr->siga_flag.input = 1; + if (qdioac & AC1_SIGA_OUTPUT_NEEDED) + irq_ptr->siga_flag.output = 1; + if (qdioac & AC1_SIGA_SYNC_NEEDED) + irq_ptr->siga_flag.sync = 1; + if (qdioac & AC1_AUTOMATIC_SYNC_ON_THININT) + irq_ptr->siga_flag.no_sync_ti = 1; + if (qdioac & AC1_AUTOMATIC_SYNC_ON_OUT_PCI) + irq_ptr->siga_flag.no_sync_out_pci = 1; + + if (irq_ptr->siga_flag.no_sync_out_pci && + irq_ptr->siga_flag.no_sync_ti) + irq_ptr->siga_flag.no_sync_out_ti = 1; +} + +static void check_and_setup_qebsm(struct qdio_irq *irq_ptr, + unsigned char qdioac, unsigned long token) +{ + char dbf_text[15]; + + if (!(irq_ptr->qib.rflags & QIB_RFLAGS_ENABLE_QEBSM)) + goto no_qebsm; + if (!(qdioac & AC1_SC_QEBSM_AVAILABLE) || + (!(qdioac & AC1_SC_QEBSM_ENABLED))) + goto no_qebsm; + + irq_ptr->sch_token = token; + + QDIO_DBF_TEXT0(0, setup, "V=V:1"); + sprintf(dbf_text, "%8lx", irq_ptr->sch_token); + QDIO_DBF_TEXT0(0, setup, dbf_text); + return; + +no_qebsm: + irq_ptr->sch_token = 0; + irq_ptr->qib.rflags &= ~QIB_RFLAGS_ENABLE_QEBSM; + QDIO_DBF_TEXT0(0, setup, "noV=V"); +} + +static int __get_ssqd_info(struct qdio_irq *irq_ptr) +{ + struct chsc_ssqd_area *ssqd; + int rc; + + QDIO_DBF_TEXT0(0, setup, "getssqd"); + ssqd = (struct chsc_ssqd_area *)irq_ptr->chsc_page; + memset(ssqd, 0, PAGE_SIZE); + + ssqd->request = (struct chsc_header) { + .length = 0x0010, + .code = 0x0024, + }; + ssqd->first_sch = irq_ptr->schid.sch_no; + ssqd->last_sch = irq_ptr->schid.sch_no; + ssqd->ssid = irq_ptr->schid.ssid; + + if (chsc(ssqd)) + return -EIO; + rc = chsc_error_from_response(ssqd->response.code); + if (rc) + return rc; + + if (!(ssqd->qdio_ssqd.flags & CHSC_FLAG_QDIO_CAPABILITY) || + !(ssqd->qdio_ssqd.flags & CHSC_FLAG_VALIDITY) || + (ssqd->qdio_ssqd.sch != irq_ptr->schid.sch_no)) + return -EINVAL; + + memcpy(&irq_ptr->ssqd_desc, &ssqd->qdio_ssqd, + sizeof(struct qdio_ssqd_desc)); + return 0; +} + +void qdio_setup_ssqd_info(struct qdio_irq *irq_ptr) +{ + unsigned char qdioac; + char dbf_text[15]; + int rc; + + rc = __get_ssqd_info(irq_ptr); + if (rc) { + QDIO_DBF_TEXT2(0, setup, "ssqdasig"); + sprintf(dbf_text, "schno%x", irq_ptr->schid.sch_no); + QDIO_DBF_TEXT2(0, setup, dbf_text); + sprintf(dbf_text, "rc:%d", rc); + QDIO_DBF_TEXT2(0, setup, dbf_text); + /* all flags set, worst case */ + qdioac = AC1_SIGA_INPUT_NEEDED | AC1_SIGA_OUTPUT_NEEDED | + AC1_SIGA_SYNC_NEEDED; + } else + qdioac = irq_ptr->ssqd_desc.qdioac1; + + check_and_setup_qebsm(irq_ptr, qdioac, irq_ptr->ssqd_desc.sch_token); + process_ac_flags(irq_ptr, qdioac); + + sprintf(dbf_text, "qdioac%2x", qdioac); + QDIO_DBF_TEXT2(0, setup, dbf_text); +} + +void qdio_release_memory(struct qdio_irq *irq_ptr) +{ + struct qdio_q *q; + int i; + + /* + * Must check queue array manually since irq_ptr->nr_input_queues / + * irq_ptr->nr_input_queues may not yet be set. + */ + for (i = 0; i < QDIO_MAX_QUEUES_PER_IRQ; i++) { + q = irq_ptr->input_qs[i]; + if (q) { + free_page((unsigned long) q->slib); + kmem_cache_free(qdio_q_cache, q); + } + } + for (i = 0; i < QDIO_MAX_QUEUES_PER_IRQ; i++) { + q = irq_ptr->output_qs[i]; + if (q) { + free_page((unsigned long) q->slib); + kmem_cache_free(qdio_q_cache, q); + } + } + kfree(irq_ptr->qdr); + free_page(irq_ptr->chsc_page); + free_page((unsigned long) irq_ptr); +} + +static void __qdio_allocate_fill_qdr(struct qdio_irq *irq_ptr, + struct qdio_q **irq_ptr_qs, + int i, int nr) +{ + irq_ptr->qdr->qdf0[i + nr].sliba = + (unsigned long)irq_ptr_qs[i]->slib; + + irq_ptr->qdr->qdf0[i + nr].sla = + (unsigned long)irq_ptr_qs[i]->sl; + + irq_ptr->qdr->qdf0[i + nr].slsba = + (unsigned long)&irq_ptr_qs[i]->slsb.val[0]; + + irq_ptr->qdr->qdf0[i + nr].akey = PAGE_DEFAULT_KEY; + irq_ptr->qdr->qdf0[i + nr].bkey = PAGE_DEFAULT_KEY; + irq_ptr->qdr->qdf0[i + nr].ckey = PAGE_DEFAULT_KEY; + irq_ptr->qdr->qdf0[i + nr].dkey = PAGE_DEFAULT_KEY; +} + +static void setup_qdr(struct qdio_irq *irq_ptr, + struct qdio_initialize *qdio_init) +{ + int i; + + irq_ptr->qdr->qfmt = qdio_init->q_format; + irq_ptr->qdr->iqdcnt = qdio_init->no_input_qs; + irq_ptr->qdr->oqdcnt = qdio_init->no_output_qs; + irq_ptr->qdr->iqdsz = sizeof(struct qdesfmt0) / 4; /* size in words */ + irq_ptr->qdr->oqdsz = sizeof(struct qdesfmt0) / 4; + irq_ptr->qdr->qiba = (unsigned long)&irq_ptr->qib; + irq_ptr->qdr->qkey = PAGE_DEFAULT_KEY; + + for (i = 0; i < qdio_init->no_input_qs; i++) + __qdio_allocate_fill_qdr(irq_ptr, irq_ptr->input_qs, i, 0); + + for (i = 0; i < qdio_init->no_output_qs; i++) + __qdio_allocate_fill_qdr(irq_ptr, irq_ptr->output_qs, i, + qdio_init->no_input_qs); +} + +static void setup_qib(struct qdio_irq *irq_ptr, + struct qdio_initialize *init_data) +{ + if (qebsm_possible()) + irq_ptr->qib.rflags |= QIB_RFLAGS_ENABLE_QEBSM; + + irq_ptr->qib.qfmt = init_data->q_format; + if (init_data->no_input_qs) + irq_ptr->qib.isliba = + (unsigned long)(irq_ptr->input_qs[0]->slib); + if (init_data->no_output_qs) + irq_ptr->qib.osliba = + (unsigned long)(irq_ptr->output_qs[0]->slib); + memcpy(irq_ptr->qib.ebcnam, init_data->adapter_name, 8); +} + +int qdio_setup_irq(struct qdio_initialize *init_data) +{ + struct ciw *ciw; + struct qdio_irq *irq_ptr = init_data->cdev->private->qdio_data; + int rc; + + memset(irq_ptr, 0, ((char *)&irq_ptr->qdr) - ((char *)irq_ptr)); + /* wipes qib.ac, required by ar7063 */ + memset(irq_ptr->qdr, 0, sizeof(struct qdr)); + + irq_ptr->int_parm = init_data->int_parm; + irq_ptr->nr_input_qs = init_data->no_input_qs; + irq_ptr->nr_output_qs = init_data->no_output_qs; + + irq_ptr->schid = ccw_device_get_subchannel_id(init_data->cdev); + irq_ptr->cdev = init_data->cdev; + setup_queues(irq_ptr, init_data); + + setup_qib(irq_ptr, init_data); + qdio_setup_thinint(irq_ptr); + set_impl_params(irq_ptr, init_data->qib_param_field_format, + init_data->qib_param_field, + init_data->input_slib_elements, + init_data->output_slib_elements); + + /* fill input and output descriptors */ + setup_qdr(irq_ptr, init_data); + + /* qdr, qib, sls, slsbs, slibs, sbales are filled now */ + + /* get qdio commands */ + ciw = ccw_device_get_ciw(init_data->cdev, CIW_TYPE_EQUEUE); + if (!ciw) { + QDIO_DBF_TEXT2(1, setup, "no eq"); + rc = -EINVAL; + goto out_err; + } + irq_ptr->equeue = *ciw; + + ciw = ccw_device_get_ciw(init_data->cdev, CIW_TYPE_AQUEUE); + if (!ciw) { + QDIO_DBF_TEXT2(1, setup, "no aq"); + rc = -EINVAL; + goto out_err; + } + irq_ptr->aqueue = *ciw; + + /* set new interrupt handler */ + irq_ptr->orig_handler = init_data->cdev->handler; + init_data->cdev->handler = qdio_int_handler; + return 0; +out_err: + qdio_release_memory(irq_ptr); + return rc; +} + +void qdio_print_subchannel_info(struct qdio_irq *irq_ptr, + struct ccw_device *cdev) +{ + char s[80]; + + sprintf(s, "%s ", cdev->dev.bus_id); + + switch (irq_ptr->qib.qfmt) { + case QDIO_QETH_QFMT: + sprintf(s + strlen(s), "OSADE "); + break; + case QDIO_ZFCP_QFMT: + sprintf(s + strlen(s), "ZFCP "); + break; + case QDIO_IQDIO_QFMT: + sprintf(s + strlen(s), "HiperSockets "); + break; + } + sprintf(s + strlen(s), "using: "); + + if (!is_thinint_irq(irq_ptr)) + sprintf(s + strlen(s), "no"); + sprintf(s + strlen(s), "AdapterInterrupts "); + if (!(irq_ptr->sch_token != 0)) + sprintf(s + strlen(s), "no"); + sprintf(s + strlen(s), "QEBSM "); + if (!(irq_ptr->qib.ac & QIB_AC_OUTBOUND_PCI_SUPPORTED)) + sprintf(s + strlen(s), "no"); + sprintf(s + strlen(s), "OutboundPCI "); + if (!css_general_characteristics.aif_tdd) + sprintf(s + strlen(s), "no"); + sprintf(s + strlen(s), "TDD\n"); + printk(KERN_INFO "qdio: %s", s); + + memset(s, 0, sizeof(s)); + sprintf(s, "%s SIGA required: ", cdev->dev.bus_id); + if (irq_ptr->siga_flag.input) + sprintf(s + strlen(s), "Read "); + if (irq_ptr->siga_flag.output) + sprintf(s + strlen(s), "Write "); + if (irq_ptr->siga_flag.sync) + sprintf(s + strlen(s), "Sync "); + if (!irq_ptr->siga_flag.no_sync_ti) + sprintf(s + strlen(s), "SyncAI "); + if (!irq_ptr->siga_flag.no_sync_out_ti) + sprintf(s + strlen(s), "SyncOutAI "); + if (!irq_ptr->siga_flag.no_sync_out_pci) + sprintf(s + strlen(s), "SyncOutPCI"); + sprintf(s + strlen(s), "\n"); + printk(KERN_INFO "qdio: %s", s); +} + +int __init qdio_setup_init(void) +{ + char dbf_text[15]; + + qdio_q_cache = kmem_cache_create("qdio_q", sizeof(struct qdio_q), + 256, 0, NULL); + if (!qdio_q_cache) + return -ENOMEM; + + /* Check for OSA/FCP thin interrupts (bit 67). */ + sprintf(dbf_text, "thini%1x", + (css_general_characteristics.aif_osa) ? 1 : 0); + QDIO_DBF_TEXT0(0, setup, dbf_text); + + /* Check for QEBSM support in general (bit 58). */ + sprintf(dbf_text, "cssQBS:%1x", + (qebsm_possible()) ? 1 : 0); + QDIO_DBF_TEXT0(0, setup, dbf_text); + return 0; +} + +void __exit qdio_setup_exit(void) +{ + kmem_cache_destroy(qdio_q_cache); +} diff --git a/drivers/s390/cio/qdio_thinint.c b/drivers/s390/cio/qdio_thinint.c new file mode 100644 index 0000000..9291a77 --- /dev/null +++ b/drivers/s390/cio/qdio_thinint.c @@ -0,0 +1,380 @@ +/* + * linux/drivers/s390/cio/thinint_qdio.c + * + * thin interrupt support for qdio + * + * Copyright 2000-2008 IBM Corp. + * Author(s): Utz Bacher + * Cornelia Huck + * Jan Glauber + */ +#include +#include +#include +#include +#include +#include + +#include "cio.h" +#include "ioasm.h" +#include "qdio.h" +#include "qdio_debug.h" +#include "qdio_perf.h" + +/* + * Restriction: only 63 iqdio subchannels would have its own indicator, + * after that, subsequent subchannels share one indicator + */ +#define TIQDIO_NR_NONSHARED_IND 63 +#define TIQDIO_NR_INDICATORS (TIQDIO_NR_NONSHARED_IND + 1) +#define TIQDIO_SHARED_IND 63 + +/* list of thin interrupt input queues */ +static LIST_HEAD(tiq_list); + +/* adapter local summary indicator */ +static unsigned char *tiqdio_alsi; + +/* device state change indicators */ +struct indicator_t { + u32 ind; /* u32 because of compare-and-swap performance */ + atomic_t count; /* use count, 0 or 1 for non-shared indicators */ +}; +static struct indicator_t *q_indicators; + +static void tiqdio_tasklet_fn(unsigned long data); +static DECLARE_TASKLET(tiqdio_tasklet, tiqdio_tasklet_fn, 0); + +static int css_qdio_omit_svs; + +static inline unsigned long do_clear_global_summary(void) +{ + register unsigned long __fn asm("1") = 3; + register unsigned long __tmp asm("2"); + register unsigned long __time asm("3"); + + asm volatile( + " .insn rre,0xb2650000,2,0" + : "+d" (__fn), "=d" (__tmp), "=d" (__time)); + return __time; +} + +/* returns addr for the device state change indicator */ +static u32 *get_indicator(void) +{ + int i; + + for (i = 0; i < TIQDIO_NR_NONSHARED_IND; i++) + if (!atomic_read(&q_indicators[i].count)) { + atomic_set(&q_indicators[i].count, 1); + return &q_indicators[i].ind; + } + + /* use the shared indicator */ + atomic_inc(&q_indicators[TIQDIO_SHARED_IND].count); + return &q_indicators[TIQDIO_SHARED_IND].ind; +} + +static void put_indicator(u32 *addr) +{ + int i; + + if (!addr) + return; + i = ((unsigned long)addr - (unsigned long)q_indicators) / + sizeof(struct indicator_t); + atomic_dec(&q_indicators[i].count); +} + +void tiqdio_add_input_queues(struct qdio_irq *irq_ptr) +{ + struct qdio_q *q; + int i; + + /* No TDD facility? If we must use SIGA-s we can also omit SVS. */ + if (!css_qdio_omit_svs && irq_ptr->siga_flag.sync) + css_qdio_omit_svs = 1; + + for_each_input_queue(irq_ptr, q, i) { + list_add_rcu(&q->entry, &tiq_list); + synchronize_rcu(); + } + xchg(irq_ptr->dsci, 1); + tasklet_schedule(&tiqdio_tasklet); +} + +/* + * we cannot stop the tiqdio tasklet here since it is for all + * thinint qdio devices and it must run as long as there is a + * thinint device left + */ +void tiqdio_remove_input_queues(struct qdio_irq *irq_ptr) +{ + struct qdio_q *q; + int i; + + for_each_input_queue(irq_ptr, q, i) { + list_del_rcu(&q->entry); + synchronize_rcu(); + } +} + +static inline int tiqdio_inbound_q_done(struct qdio_q *q) +{ + unsigned char state; + + if (!atomic_read(&q->nr_buf_used)) + return 1; + + qdio_siga_sync_q(q); + get_buf_state(q, q->first_to_check, &state); + + if (state == SLSB_P_INPUT_PRIMED) + /* more work coming */ + return 0; + return 1; +} + +static inline int shared_ind(struct qdio_irq *irq_ptr) +{ + return irq_ptr->dsci == &q_indicators[TIQDIO_SHARED_IND].ind; +} + +static void __tiqdio_inbound_processing(struct qdio_q *q) +{ + qdio_perf_stat_inc(&perf_stats.thinint_inbound); + qdio_sync_after_thinint(q); + + /* + * Maybe we have work on our outbound queues... at least + * we have to check the PCI capable queues. + */ + qdio_check_outbound_after_thinint(q); + +again: + if (!qdio_inbound_q_moved(q)) + return; + + qdio_kick_inbound_handler(q); + + if (!tiqdio_inbound_q_done(q)) { + qdio_perf_stat_inc(&perf_stats.thinint_inbound_loop); + goto again; + } + + qdio_stop_polling(q); + /* + * We need to check again to not lose initiative after + * resetting the ACK state. + */ + if (!tiqdio_inbound_q_done(q)) { + qdio_perf_stat_inc(&perf_stats.thinint_inbound_loop2); + goto again; + } +} + +void tiqdio_inbound_processing(unsigned long data) +{ + struct qdio_q *q = (struct qdio_q *)data; + + __tiqdio_inbound_processing(q); +} + +/* check for work on all inbound thinint queues */ +static void tiqdio_tasklet_fn(unsigned long data) +{ + struct qdio_q *q; + + qdio_perf_stat_inc(&perf_stats.tasklet_thinint); +again: + + /* protect tiq_list entries, only changed in activate or shutdown */ + rcu_read_lock(); + + list_for_each_entry_rcu(q, &tiq_list, entry) + /* only process queues from changed sets */ + if (*q->irq_ptr->dsci) { + + /* only clear it if the indicator is non-shared */ + if (!shared_ind(q->irq_ptr)) + xchg(q->irq_ptr->dsci, 0); + /* + * don't call inbound processing directly since + * that could starve other thinint queues + */ + tasklet_schedule(&q->tasklet); + } + + rcu_read_unlock(); + + /* + * if we used the shared indicator clear it now after all queues + * were processed + */ + if (atomic_read(&q_indicators[TIQDIO_SHARED_IND].count)) { + xchg(&q_indicators[TIQDIO_SHARED_IND].ind, 0); + + /* prevent racing */ + if (*tiqdio_alsi) + xchg(&q_indicators[TIQDIO_SHARED_IND].ind, 1); + } + + /* check for more work */ + if (*tiqdio_alsi) { + xchg(tiqdio_alsi, 0); + qdio_perf_stat_inc(&perf_stats.tasklet_thinint_loop); + goto again; + } +} + +/** + * tiqdio_thinint_handler - thin interrupt handler for qdio + * @ind: pointer to adapter local summary indicator + * @drv_data: NULL + */ +static void tiqdio_thinint_handler(void *ind, void *drv_data) +{ + qdio_perf_stat_inc(&perf_stats.thin_int); + + /* + * SVS only when needed: issue SVS to benefit from iqdio interrupt + * avoidance (SVS clears adapter interrupt suppression overwrite) + */ + if (!css_qdio_omit_svs) + do_clear_global_summary(); + + /* + * reset local summary indicator (tiqdio_alsi) to stop adapter + * interrupts for now, the tasklet will clean all dsci's + */ + xchg((u8 *)ind, 0); + tasklet_hi_schedule(&tiqdio_tasklet); +} + +static int set_subchannel_ind(struct qdio_irq *irq_ptr, int reset) +{ + struct scssc_area *scssc_area; + char dbf_text[15]; + void *ptr; + int rc; + + scssc_area = (struct scssc_area *)irq_ptr->chsc_page; + memset(scssc_area, 0, PAGE_SIZE); + + if (reset) { + scssc_area->summary_indicator_addr = 0; + scssc_area->subchannel_indicator_addr = 0; + } else { + scssc_area->summary_indicator_addr = virt_to_phys(tiqdio_alsi); + scssc_area->subchannel_indicator_addr = + virt_to_phys(irq_ptr->dsci); + } + + scssc_area->request = (struct chsc_header) { + .length = 0x0fe0, + .code = 0x0021, + }; + scssc_area->operation_code = 0; + scssc_area->ks = PAGE_DEFAULT_KEY; + scssc_area->kc = PAGE_DEFAULT_KEY; + scssc_area->isc = QDIO_AIRQ_ISC; + scssc_area->schid = irq_ptr->schid; + + /* enable the time delay disablement facility */ + if (css_general_characteristics.aif_tdd) + scssc_area->word_with_d_bit = 0x10000000; + + rc = chsc(scssc_area); + if (rc) + return -EIO; + + rc = chsc_error_from_response(scssc_area->response.code); + if (rc) { + sprintf(dbf_text, "sidR%4x", scssc_area->response.code); + QDIO_DBF_TEXT1(0, trace, dbf_text); + QDIO_DBF_TEXT1(0, setup, dbf_text); + ptr = &scssc_area->response; + QDIO_DBF_HEX2(1, setup, &ptr, QDIO_DBF_SETUP_LEN); + return rc; + } + + QDIO_DBF_TEXT2(0, setup, "setscind"); + QDIO_DBF_HEX2(0, setup, &scssc_area->summary_indicator_addr, + sizeof(unsigned long)); + QDIO_DBF_HEX2(0, setup, &scssc_area->subchannel_indicator_addr, + sizeof(unsigned long)); + return 0; +} + +/* allocate non-shared indicators and shared indicator */ +int __init tiqdio_allocate_memory(void) +{ + q_indicators = kzalloc(sizeof(struct indicator_t) * TIQDIO_NR_INDICATORS, + GFP_KERNEL); + if (!q_indicators) + return -ENOMEM; + return 0; +} + +void tiqdio_free_memory(void) +{ + kfree(q_indicators); +} + +int __init tiqdio_register_thinints(void) +{ + char dbf_text[20]; + + isc_register(QDIO_AIRQ_ISC); + tiqdio_alsi = s390_register_adapter_interrupt(&tiqdio_thinint_handler, + NULL, QDIO_AIRQ_ISC); + if (IS_ERR(tiqdio_alsi)) { + sprintf(dbf_text, "regthn%lx", PTR_ERR(tiqdio_alsi)); + QDIO_DBF_TEXT0(0, setup, dbf_text); + tiqdio_alsi = NULL; + isc_unregister(QDIO_AIRQ_ISC); + return -ENOMEM; + } + return 0; +} + +int qdio_establish_thinint(struct qdio_irq *irq_ptr) +{ + if (!is_thinint_irq(irq_ptr)) + return 0; + + /* Check for aif time delay disablement. If installed, + * omit SVS even under LPAR + */ + if (css_general_characteristics.aif_tdd) + css_qdio_omit_svs = 1; + return set_subchannel_ind(irq_ptr, 0); +} + +void qdio_setup_thinint(struct qdio_irq *irq_ptr) +{ + if (!is_thinint_irq(irq_ptr)) + return; + irq_ptr->dsci = get_indicator(); + QDIO_DBF_HEX1(0, setup, &irq_ptr->dsci, sizeof(void *)); +} + +void qdio_shutdown_thinint(struct qdio_irq *irq_ptr) +{ + if (!is_thinint_irq(irq_ptr)) + return; + + /* reset adapter interrupt indicators */ + put_indicator(irq_ptr->dsci); + set_subchannel_ind(irq_ptr, 1); +} + +void __exit tiqdio_unregister_thinints(void) +{ + tasklet_disable(&tiqdio_tasklet); + + if (tiqdio_alsi) { + s390_unregister_adapter_interrupt(tiqdio_alsi, QDIO_AIRQ_ISC); + isc_unregister(QDIO_AIRQ_ISC); + } +} diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index 699ac11..1895dbb 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -239,11 +239,6 @@ static inline int qeth_is_ipa_enabled(struct qeth_ipa_info *ipa, /*not used unless the microcode gets patched*/ #define QETH_PCI_TIMER_VALUE(card) 3 -#define QETH_MIN_INPUT_THRESHOLD 1 -#define QETH_MAX_INPUT_THRESHOLD 500 -#define QETH_MIN_OUTPUT_THRESHOLD 1 -#define QETH_MAX_OUTPUT_THRESHOLD 300 - /* priority queing */ #define QETH_PRIOQ_DEFAULT QETH_NO_PRIO_QUEUEING #define QETH_DEFAULT_QUEUE 2 @@ -811,17 +806,14 @@ int qeth_send_ipa_cmd(struct qeth_card *, struct qeth_cmd_buffer *, struct qeth_cmd_buffer *qeth_get_ipacmd_buffer(struct qeth_card *, enum qeth_ipa_cmds, enum qeth_prot_versions); int qeth_query_setadapterparms(struct qeth_card *); -int qeth_check_qdio_errors(struct qdio_buffer *, unsigned int, - unsigned int, const char *); +int qeth_check_qdio_errors(struct qdio_buffer *, unsigned int, const char *); void qeth_queue_input_buffer(struct qeth_card *, int); struct sk_buff *qeth_core_get_next_skb(struct qeth_card *, struct qdio_buffer *, struct qdio_buffer_element **, int *, struct qeth_hdr **); void qeth_schedule_recovery(struct qeth_card *); void qeth_qdio_output_handler(struct ccw_device *, unsigned int, - unsigned int, unsigned int, - unsigned int, int, int, - unsigned long); + int, int, int, unsigned long); void qeth_clear_ipacmd_list(struct qeth_card *); int qeth_qdio_clear_card(struct qeth_card *, int); void qeth_clear_working_pool_list(struct qeth_card *); diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 0ac54dc..c3ad89e 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -2073,7 +2073,7 @@ static void qeth_create_qib_param_field_blkt(struct qeth_card *card, static int qeth_qdio_activate(struct qeth_card *card) { QETH_DBF_TEXT(SETUP, 3, "qdioact"); - return qdio_activate(CARD_DDEV(card), 0); + return qdio_activate(CARD_DDEV(card)); } static int qeth_dm_act(struct qeth_card *card) @@ -2349,16 +2349,11 @@ int qeth_init_qdio_queues(struct qeth_card *card) card->qdio.in_q->next_buf_to_init = card->qdio.in_buf_pool.buf_count - 1; rc = do_QDIO(CARD_DDEV(card), QDIO_FLAG_SYNC_INPUT, 0, 0, - card->qdio.in_buf_pool.buf_count - 1, NULL); + card->qdio.in_buf_pool.buf_count - 1); if (rc) { QETH_DBF_TEXT_(SETUP, 2, "1err%d", rc); return rc; } - rc = qdio_synchronize(CARD_DDEV(card), QDIO_FLAG_SYNC_INPUT, 0); - if (rc) { - QETH_DBF_TEXT_(SETUP, 2, "2err%d", rc); - return rc; - } /* outbound queue */ for (i = 0; i < card->qdio.no_out_queues; ++i) { memset(card->qdio.out_qs[i]->qdio_bufs, 0, @@ -2559,9 +2554,9 @@ int qeth_query_setadapterparms(struct qeth_card *card) EXPORT_SYMBOL_GPL(qeth_query_setadapterparms); int qeth_check_qdio_errors(struct qdio_buffer *buf, unsigned int qdio_error, - unsigned int siga_error, const char *dbftext) + const char *dbftext) { - if (qdio_error || siga_error) { + if (qdio_error) { QETH_DBF_TEXT(TRACE, 2, dbftext); QETH_DBF_TEXT(QERR, 2, dbftext); QETH_DBF_TEXT_(QERR, 2, " F15=%02X", @@ -2569,7 +2564,6 @@ int qeth_check_qdio_errors(struct qdio_buffer *buf, unsigned int qdio_error, QETH_DBF_TEXT_(QERR, 2, " F14=%02X", buf->element[14].flags & 0xff); QETH_DBF_TEXT_(QERR, 2, " qerr=%X", qdio_error); - QETH_DBF_TEXT_(QERR, 2, " serr=%X", siga_error); return 1; } return 0; @@ -2622,9 +2616,8 @@ void qeth_queue_input_buffer(struct qeth_card *card, int index) card->perf_stats.inbound_do_qdio_start_time = qeth_get_micros(); } - rc = do_QDIO(CARD_DDEV(card), - QDIO_FLAG_SYNC_INPUT | QDIO_FLAG_UNDER_INTERRUPT, - 0, queue->next_buf_to_init, count, NULL); + rc = do_QDIO(CARD_DDEV(card), QDIO_FLAG_SYNC_INPUT, 0, + queue->next_buf_to_init, count); if (card->options.performance_stats) card->perf_stats.inbound_do_qdio_time += qeth_get_micros() - @@ -2643,14 +2636,13 @@ void qeth_queue_input_buffer(struct qeth_card *card, int index) EXPORT_SYMBOL_GPL(qeth_queue_input_buffer); static int qeth_handle_send_error(struct qeth_card *card, - struct qeth_qdio_out_buffer *buffer, unsigned int qdio_err, - unsigned int siga_err) + struct qeth_qdio_out_buffer *buffer, unsigned int qdio_err) { int sbalf15 = buffer->buffer->element[15].flags & 0xff; - int cc = siga_err & 3; + int cc = qdio_err & 3; QETH_DBF_TEXT(TRACE, 6, "hdsnderr"); - qeth_check_qdio_errors(buffer->buffer, qdio_err, siga_err, "qouterr"); + qeth_check_qdio_errors(buffer->buffer, qdio_err, "qouterr"); switch (cc) { case 0: if (qdio_err) { @@ -2662,7 +2654,7 @@ static int qeth_handle_send_error(struct qeth_card *card, } return QETH_SEND_ERROR_NONE; case 2: - if (siga_err & QDIO_SIGA_ERROR_B_BIT_SET) { + if (qdio_err & QDIO_ERROR_SIGA_BUSY) { QETH_DBF_TEXT(TRACE, 1, "SIGAcc2B"); QETH_DBF_TEXT_(TRACE, 1, "%s", CARD_BUS_ID(card)); return QETH_SEND_ERROR_KICK_IT; @@ -2758,8 +2750,8 @@ static int qeth_flush_buffers_on_no_pci(struct qeth_qdio_out_q *queue) return 0; } -static void qeth_flush_buffers(struct qeth_qdio_out_q *queue, int under_int, - int index, int count) +static void qeth_flush_buffers(struct qeth_qdio_out_q *queue, int index, + int count) { struct qeth_qdio_out_buffer *buf; int rc; @@ -2807,12 +2799,10 @@ static void qeth_flush_buffers(struct qeth_qdio_out_q *queue, int under_int, qeth_get_micros(); } qdio_flags = QDIO_FLAG_SYNC_OUTPUT; - if (under_int) - qdio_flags |= QDIO_FLAG_UNDER_INTERRUPT; if (atomic_read(&queue->set_pci_flags_count)) qdio_flags |= QDIO_FLAG_PCI_OUT; rc = do_QDIO(CARD_DDEV(queue->card), qdio_flags, - queue->queue_no, index, count, NULL); + queue->queue_no, index, count); if (queue->card->options.performance_stats) queue->card->perf_stats.outbound_do_qdio_time += qeth_get_micros() - @@ -2866,16 +2856,15 @@ static void qeth_check_outbound_queue(struct qeth_qdio_out_q *queue) queue->card->perf_stats.bufs_sent_pack += flush_cnt; if (flush_cnt) - qeth_flush_buffers(queue, 1, index, flush_cnt); + qeth_flush_buffers(queue, index, flush_cnt); atomic_set(&queue->state, QETH_OUT_Q_UNLOCKED); } } } -void qeth_qdio_output_handler(struct ccw_device *ccwdev, unsigned int status, - unsigned int qdio_error, unsigned int siga_error, - unsigned int __queue, int first_element, int count, - unsigned long card_ptr) +void qeth_qdio_output_handler(struct ccw_device *ccwdev, + unsigned int qdio_error, int __queue, int first_element, + int count, unsigned long card_ptr) { struct qeth_card *card = (struct qeth_card *) card_ptr; struct qeth_qdio_out_q *queue = card->qdio.out_qs[__queue]; @@ -2883,15 +2872,12 @@ void qeth_qdio_output_handler(struct ccw_device *ccwdev, unsigned int status, int i; QETH_DBF_TEXT(TRACE, 6, "qdouhdl"); - if (status & QDIO_STATUS_LOOK_FOR_ERROR) { - if (status & QDIO_STATUS_ACTIVATE_CHECK_CONDITION) { - QETH_DBF_TEXT(TRACE, 2, "achkcond"); - QETH_DBF_TEXT_(TRACE, 2, "%s", CARD_BUS_ID(card)); - QETH_DBF_TEXT_(TRACE, 2, "%08x", status); - netif_stop_queue(card->dev); - qeth_schedule_recovery(card); - return; - } + if (qdio_error & QDIO_ERROR_ACTIVATE_CHECK_CONDITION) { + QETH_DBF_TEXT(TRACE, 2, "achkcond"); + QETH_DBF_TEXT_(TRACE, 2, "%s", CARD_BUS_ID(card)); + netif_stop_queue(card->dev); + qeth_schedule_recovery(card); + return; } if (card->options.performance_stats) { card->perf_stats.outbound_handler_cnt++; @@ -2901,8 +2887,7 @@ void qeth_qdio_output_handler(struct ccw_device *ccwdev, unsigned int status, for (i = first_element; i < (first_element + count); ++i) { buffer = &queue->bufs[i % QDIO_MAX_BUFFERS_PER_Q]; /*we only handle the KICK_IT error by doing a recovery */ - if (qeth_handle_send_error(card, buffer, - qdio_error, siga_error) + if (qeth_handle_send_error(card, buffer, qdio_error) == QETH_SEND_ERROR_KICK_IT){ netif_stop_queue(card->dev); qeth_schedule_recovery(card); @@ -3164,11 +3149,11 @@ int qeth_do_send_packet_fast(struct qeth_card *card, atomic_set(&queue->state, QETH_OUT_Q_UNLOCKED); if (ctx == NULL) { qeth_fill_buffer(queue, buffer, skb); - qeth_flush_buffers(queue, 0, index, 1); + qeth_flush_buffers(queue, index, 1); } else { flush_cnt = qeth_eddp_fill_buffer(queue, ctx, index); WARN_ON(buffers_needed != flush_cnt); - qeth_flush_buffers(queue, 0, index, flush_cnt); + qeth_flush_buffers(queue, index, flush_cnt); } return 0; out: @@ -3221,8 +3206,8 @@ int qeth_do_send_packet(struct qeth_card *card, struct qeth_qdio_out_q *queue, * again */ if (atomic_read(&buffer->state) != QETH_QDIO_BUF_EMPTY){ - qeth_flush_buffers(queue, 0, - start_index, flush_count); + qeth_flush_buffers(queue, start_index, + flush_count); atomic_set(&queue->state, QETH_OUT_Q_UNLOCKED); return -EBUSY; @@ -3253,7 +3238,7 @@ int qeth_do_send_packet(struct qeth_card *card, struct qeth_qdio_out_q *queue, flush_count += tmp; out: if (flush_count) - qeth_flush_buffers(queue, 0, start_index, flush_count); + qeth_flush_buffers(queue, start_index, flush_count); else if (!atomic_read(&queue->set_pci_flags_count)) atomic_xchg(&queue->state, QETH_OUT_Q_LOCKED_FLUSH); /* @@ -3274,7 +3259,7 @@ out: if (!flush_count && !atomic_read(&queue->set_pci_flags_count)) flush_count += qeth_flush_buffers_on_no_pci(queue); if (flush_count) - qeth_flush_buffers(queue, 0, start_index, flush_count); + qeth_flush_buffers(queue, start_index, flush_count); } /* at this point the queue is UNLOCKED again */ if (queue->card->options.performance_stats && do_pack) @@ -3686,10 +3671,6 @@ static int qeth_qdio_establish(struct qeth_card *card) init_data.q_format = qeth_get_qdio_q_format(card); init_data.qib_param_field_format = 0; init_data.qib_param_field = qib_param_field; - init_data.min_input_threshold = QETH_MIN_INPUT_THRESHOLD; - init_data.max_input_threshold = QETH_MAX_INPUT_THRESHOLD; - init_data.min_output_threshold = QETH_MIN_OUTPUT_THRESHOLD; - init_data.max_output_threshold = QETH_MAX_OUTPUT_THRESHOLD; init_data.no_input_qs = 1; init_data.no_output_qs = card->qdio.no_out_queues; init_data.input_handler = card->discipline.input_handler; @@ -3751,8 +3732,9 @@ static int qeth_core_driver_group(const char *buf, struct device *root_dev, int qeth_core_hardsetup_card(struct qeth_card *card) { + struct qdio_ssqd_desc *qdio_ssqd; int retries = 3; - int mpno; + int mpno = 0; int rc; QETH_DBF_TEXT(SETUP, 2, "hrdsetup"); @@ -3784,7 +3766,10 @@ retry: QETH_DBF_TEXT_(SETUP, 2, "2err%d", rc); return rc; } - mpno = qdio_get_ssqd_pct(CARD_DDEV(card)); + + qdio_ssqd = qdio_get_ssqd_desc(CARD_DDEV(card)); + if (qdio_ssqd) + mpno = qdio_ssqd->pcnt; if (mpno) mpno = min(mpno - 1, QETH_MAX_PORTNO); if (card->info.portno > mpno) { diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index f682f7b..3fbc3bd 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -726,8 +726,7 @@ tx_drop: } static void qeth_l2_qdio_input_handler(struct ccw_device *ccwdev, - unsigned int status, unsigned int qdio_err, - unsigned int siga_err, unsigned int queue, + unsigned int qdio_err, unsigned int queue, int first_element, int count, unsigned long card_ptr) { struct net_device *net_dev; @@ -742,23 +741,20 @@ static void qeth_l2_qdio_input_handler(struct ccw_device *ccwdev, card->perf_stats.inbound_cnt++; card->perf_stats.inbound_start_time = qeth_get_micros(); } - if (status & QDIO_STATUS_LOOK_FOR_ERROR) { - if (status & QDIO_STATUS_ACTIVATE_CHECK_CONDITION) { - QETH_DBF_TEXT(TRACE, 1, "qdinchk"); - QETH_DBF_TEXT_(TRACE, 1, "%s", CARD_BUS_ID(card)); - QETH_DBF_TEXT_(TRACE, 1, "%04X%04X", first_element, - count); - QETH_DBF_TEXT_(TRACE, 1, "%04X%04X", queue, status); - qeth_schedule_recovery(card); - return; - } + if (qdio_err & QDIO_ERROR_ACTIVATE_CHECK_CONDITION) { + QETH_DBF_TEXT(TRACE, 1, "qdinchk"); + QETH_DBF_TEXT_(TRACE, 1, "%s", CARD_BUS_ID(card)); + QETH_DBF_TEXT_(TRACE, 1, "%04X%04X", first_element, + count); + QETH_DBF_TEXT_(TRACE, 1, "%04X", queue); + qeth_schedule_recovery(card); + return; } for (i = first_element; i < (first_element + count); ++i) { index = i % QDIO_MAX_BUFFERS_PER_Q; buffer = &card->qdio.in_q->bufs[index]; - if (!((status & QDIO_STATUS_LOOK_FOR_ERROR) && - qeth_check_qdio_errors(buffer->buffer, - qdio_err, siga_err, "qinerr"))) + if (!(qdio_err && + qeth_check_qdio_errors(buffer->buffer, qdio_err, "qinerr"))) qeth_l2_process_inbound_buffer(card, buffer, index); /* clear buffer and give back to hardware */ qeth_put_buffer_pool_entry(card, buffer->pool_entry); diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 06deaee..22f64aa 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -2939,8 +2939,7 @@ static int qeth_l3_setup_netdev(struct qeth_card *card) } static void qeth_l3_qdio_input_handler(struct ccw_device *ccwdev, - unsigned int status, unsigned int qdio_err, - unsigned int siga_err, unsigned int queue, int first_element, + unsigned int qdio_err, unsigned int queue, int first_element, int count, unsigned long card_ptr) { struct net_device *net_dev; @@ -2955,23 +2954,21 @@ static void qeth_l3_qdio_input_handler(struct ccw_device *ccwdev, card->perf_stats.inbound_cnt++; card->perf_stats.inbound_start_time = qeth_get_micros(); } - if (status & QDIO_STATUS_LOOK_FOR_ERROR) { - if (status & QDIO_STATUS_ACTIVATE_CHECK_CONDITION) { - QETH_DBF_TEXT(TRACE, 1, "qdinchk"); - QETH_DBF_TEXT_(TRACE, 1, "%s", CARD_BUS_ID(card)); - QETH_DBF_TEXT_(TRACE, 1, "%04X%04X", - first_element, count); - QETH_DBF_TEXT_(TRACE, 1, "%04X%04X", queue, status); - qeth_schedule_recovery(card); - return; - } + if (qdio_err & QDIO_ERROR_ACTIVATE_CHECK_CONDITION) { + QETH_DBF_TEXT(TRACE, 1, "qdinchk"); + QETH_DBF_TEXT_(TRACE, 1, "%s", CARD_BUS_ID(card)); + QETH_DBF_TEXT_(TRACE, 1, "%04X%04X", + first_element, count); + QETH_DBF_TEXT_(TRACE, 1, "%04X", queue); + qeth_schedule_recovery(card); + return; } for (i = first_element; i < (first_element + count); ++i) { index = i % QDIO_MAX_BUFFERS_PER_Q; buffer = &card->qdio.in_q->bufs[index]; - if (!((status & QDIO_STATUS_LOOK_FOR_ERROR) && + if (!(qdio_err && qeth_check_qdio_errors(buffer->buffer, - qdio_err, siga_err, "qinerr"))) + qdio_err, "qinerr"))) qeth_l3_process_inbound_buffer(card, buffer, index); /* clear buffer and give back to hardware */ qeth_put_buffer_pool_entry(card, buffer->pool_entry); diff --git a/drivers/s390/scsi/zfcp_dbf.c b/drivers/s390/scsi/zfcp_dbf.c index 36169c6..fca48b8 100644 --- a/drivers/s390/scsi/zfcp_dbf.c +++ b/drivers/s390/scsi/zfcp_dbf.c @@ -297,15 +297,13 @@ void zfcp_hba_dbf_event_fsf_unsol(const char *tag, struct zfcp_adapter *adapter, /** * zfcp_hba_dbf_event_qdio - trace event for QDIO related failure * @adapter: adapter affected by this QDIO related event - * @status: as passed by qdio module * @qdio_error: as passed by qdio module - * @siga_error: as passed by qdio module * @sbal_index: first buffer with error condition, as passed by qdio module * @sbal_count: number of buffers affected, as passed by qdio module */ -void zfcp_hba_dbf_event_qdio(struct zfcp_adapter *adapter, unsigned int status, - unsigned int qdio_error, unsigned int siga_error, - int sbal_index, int sbal_count) +void zfcp_hba_dbf_event_qdio(struct zfcp_adapter *adapter, + unsigned int qdio_error, int sbal_index, + int sbal_count) { struct zfcp_hba_dbf_record *r = &adapter->hba_dbf_buf; unsigned long flags; @@ -313,9 +311,7 @@ void zfcp_hba_dbf_event_qdio(struct zfcp_adapter *adapter, unsigned int status, spin_lock_irqsave(&adapter->hba_dbf_lock, flags); memset(r, 0, sizeof(*r)); strncpy(r->tag, "qdio", ZFCP_DBF_TAG_SIZE); - r->u.qdio.status = status; r->u.qdio.qdio_error = qdio_error; - r->u.qdio.siga_error = siga_error; r->u.qdio.sbal_index = sbal_index; r->u.qdio.sbal_count = sbal_count; debug_event(adapter->hba_dbf, 0, r, sizeof(*r)); @@ -398,9 +394,7 @@ static void zfcp_hba_dbf_view_status(char **p, static void zfcp_hba_dbf_view_qdio(char **p, struct zfcp_hba_dbf_record_qdio *r) { - zfcp_dbf_out(p, "status", "0x%08x", r->status); zfcp_dbf_out(p, "qdio_error", "0x%08x", r->qdio_error); - zfcp_dbf_out(p, "siga_error", "0x%08x", r->siga_error); zfcp_dbf_out(p, "sbal_index", "0x%02x", r->sbal_index); zfcp_dbf_out(p, "sbal_count", "0x%02x", r->sbal_count); } diff --git a/drivers/s390/scsi/zfcp_dbf.h b/drivers/s390/scsi/zfcp_dbf.h index d04aea6..0ddb184 100644 --- a/drivers/s390/scsi/zfcp_dbf.h +++ b/drivers/s390/scsi/zfcp_dbf.h @@ -139,9 +139,7 @@ struct zfcp_hba_dbf_record_status { } __attribute__ ((packed)); struct zfcp_hba_dbf_record_qdio { - u32 status; u32 qdio_error; - u32 siga_error; u8 sbal_index; u8 sbal_count; } __attribute__ ((packed)); diff --git a/drivers/s390/scsi/zfcp_ext.h b/drivers/s390/scsi/zfcp_ext.h index 8065b2b..edfdb21 100644 --- a/drivers/s390/scsi/zfcp_ext.h +++ b/drivers/s390/scsi/zfcp_ext.h @@ -48,9 +48,8 @@ extern void zfcp_rec_dbf_event_action(u8, struct zfcp_erp_action *); extern void zfcp_hba_dbf_event_fsf_response(struct zfcp_fsf_req *); extern void zfcp_hba_dbf_event_fsf_unsol(const char *, struct zfcp_adapter *, struct fsf_status_read_buffer *); -extern void zfcp_hba_dbf_event_qdio(struct zfcp_adapter *, - unsigned int, unsigned int, unsigned int, - int, int); +extern void zfcp_hba_dbf_event_qdio(struct zfcp_adapter *, unsigned int, int, + int); extern void zfcp_san_dbf_event_ct_request(struct zfcp_fsf_req *); extern void zfcp_san_dbf_event_ct_response(struct zfcp_fsf_req *); extern void zfcp_san_dbf_event_els_request(struct zfcp_fsf_req *); diff --git a/drivers/s390/scsi/zfcp_qdio.c b/drivers/s390/scsi/zfcp_qdio.c index 72e3094..d6dbd65 100644 --- a/drivers/s390/scsi/zfcp_qdio.c +++ b/drivers/s390/scsi/zfcp_qdio.c @@ -74,17 +74,15 @@ static void zfcp_qdio_zero_sbals(struct qdio_buffer *sbal[], int first, int cnt) } } -static void zfcp_qdio_int_req(struct ccw_device *cdev, unsigned int status, - unsigned int qdio_err, unsigned int siga_err, - unsigned int queue_no, int first, int count, +static void zfcp_qdio_int_req(struct ccw_device *cdev, unsigned int qdio_err, + int queue_no, int first, int count, unsigned long parm) { struct zfcp_adapter *adapter = (struct zfcp_adapter *) parm; struct zfcp_qdio_queue *queue = &adapter->req_q; - if (unlikely(status & QDIO_STATUS_LOOK_FOR_ERROR)) { - zfcp_hba_dbf_event_qdio(adapter, status, qdio_err, siga_err, - first, count); + if (unlikely(qdio_err)) { + zfcp_hba_dbf_event_qdio(adapter, qdio_err, first, count); zfcp_qdio_handler_error(adapter, 140); return; } @@ -129,8 +127,7 @@ static void zfcp_qdio_resp_put_back(struct zfcp_adapter *adapter, int processed) count = atomic_read(&queue->count) + processed; - retval = do_QDIO(cdev, QDIO_FLAG_SYNC_INPUT | QDIO_FLAG_UNDER_INTERRUPT, - 0, start, count, NULL); + retval = do_QDIO(cdev, QDIO_FLAG_SYNC_INPUT, 0, start, count); if (unlikely(retval)) { atomic_set(&queue->count, count); @@ -142,9 +139,8 @@ static void zfcp_qdio_resp_put_back(struct zfcp_adapter *adapter, int processed) } } -static void zfcp_qdio_int_resp(struct ccw_device *cdev, unsigned int status, - unsigned int qdio_err, unsigned int siga_err, - unsigned int queue_no, int first, int count, +static void zfcp_qdio_int_resp(struct ccw_device *cdev, unsigned int qdio_err, + int queue_no, int first, int count, unsigned long parm) { struct zfcp_adapter *adapter = (struct zfcp_adapter *) parm; @@ -152,9 +148,8 @@ static void zfcp_qdio_int_resp(struct ccw_device *cdev, unsigned int status, volatile struct qdio_buffer_element *sbale; int sbal_idx, sbale_idx, sbal_no; - if (unlikely(status & QDIO_STATUS_LOOK_FOR_ERROR)) { - zfcp_hba_dbf_event_qdio(adapter, status, qdio_err, siga_err, - first, count); + if (unlikely(qdio_err)) { + zfcp_hba_dbf_event_qdio(adapter, qdio_err, first, count); zfcp_qdio_handler_error(adapter, 147); return; } @@ -362,7 +357,7 @@ int zfcp_qdio_send(struct zfcp_fsf_req *fsf_req) } retval = do_QDIO(adapter->ccw_device, QDIO_FLAG_SYNC_OUTPUT, 0, first, - count, NULL); + count); if (unlikely(retval)) { zfcp_qdio_zero_sbals(req_q->sbal, first, count); return retval; @@ -400,10 +395,6 @@ int zfcp_qdio_allocate(struct zfcp_adapter *adapter) init_data->qib_param_field = NULL; init_data->input_slib_elements = NULL; init_data->output_slib_elements = NULL; - init_data->min_input_threshold = 1; - init_data->max_input_threshold = 5000; - init_data->min_output_threshold = 1; - init_data->max_output_threshold = 1000; init_data->no_input_qs = 1; init_data->no_output_qs = 1; init_data->input_handler = zfcp_qdio_int_resp; @@ -436,9 +427,7 @@ void zfcp_qdio_close(struct zfcp_adapter *adapter) atomic_clear_mask(ZFCP_STATUS_ADAPTER_QDIOUP, &adapter->status); spin_unlock(&req_q->lock); - while (qdio_shutdown(adapter->ccw_device, QDIO_FLAG_CLEANUP_USING_CLEAR) - == -EINPROGRESS) - ssleep(1); + qdio_shutdown(adapter->ccw_device, QDIO_FLAG_CLEANUP_USING_CLEAR); /* cleanup used outbound sbals */ count = atomic_read(&req_q->count); @@ -473,7 +462,7 @@ int zfcp_qdio_open(struct zfcp_adapter *adapter) return -EIO; } - if (qdio_activate(adapter->ccw_device, 0)) { + if (qdio_activate(adapter->ccw_device)) { dev_err(&adapter->ccw_device->dev, "Activate of QDIO queues failed.\n"); goto failed_qdio; @@ -487,7 +476,7 @@ int zfcp_qdio_open(struct zfcp_adapter *adapter) } if (do_QDIO(adapter->ccw_device, QDIO_FLAG_SYNC_INPUT, 0, 0, - QDIO_MAX_BUFFERS_PER_Q, NULL)) { + QDIO_MAX_BUFFERS_PER_Q)) { dev_err(&adapter->ccw_device->dev, "Init of QDIO response queue failed.\n"); goto failed_qdio; @@ -501,9 +490,6 @@ int zfcp_qdio_open(struct zfcp_adapter *adapter) return 0; failed_qdio: - while (qdio_shutdown(adapter->ccw_device, QDIO_FLAG_CLEANUP_USING_CLEAR) - == -EINPROGRESS) - ssleep(1); - + qdio_shutdown(adapter->ccw_device, QDIO_FLAG_CLEANUP_USING_CLEAR); return -EIO; } diff --git a/include/asm-s390/qdio.h b/include/asm-s390/qdio.h index 1124034..6813772 100644 --- a/include/asm-s390/qdio.h +++ b/include/asm-s390/qdio.h @@ -1,404 +1,382 @@ /* * linux/include/asm-s390/qdio.h * - * Linux for S/390 QDIO base support, Hipersocket base support - * version 2 - * - * Copyright 2000,2002 IBM Corporation + * Copyright 2000,2008 IBM Corp. * Author(s): Utz Bacher + * Jan Glauber * */ #ifndef __QDIO_H__ #define __QDIO_H__ -/* note, that most of the typedef's are from ingo. */ - #include #include #include -#define QDIO_NAME "qdio " - -#ifndef __s390x__ -#define QDIO_32_BIT -#endif /* __s390x__ */ - -/**** CONSTANTS, that are relied on without using these symbols *****/ -#define QDIO_MAX_QUEUES_PER_IRQ 32 /* used in width of unsigned int */ -/************************ END of CONSTANTS **************************/ -#define QDIO_MAX_BUFFERS_PER_Q 128 /* must be a power of 2 (%x=&(x-1)*/ -#define QDIO_BUF_ORDER 7 /* 2**this == number of pages used for sbals in 1 q */ -#define QDIO_MAX_ELEMENTS_PER_BUFFER 16 -#define SBAL_SIZE 256 - -#define QDIO_QETH_QFMT 0 -#define QDIO_ZFCP_QFMT 1 -#define QDIO_IQDIO_QFMT 2 -#define QDIO_IQDIO_QFMT_ASYNCH 3 - -struct qdio_buffer_element{ - unsigned int flags; - unsigned int length; -#ifdef QDIO_32_BIT - void *reserved; -#endif /* QDIO_32_BIT */ - void *addr; -} __attribute__ ((packed,aligned(16))); - -struct qdio_buffer{ - volatile struct qdio_buffer_element element[16]; -} __attribute__ ((packed,aligned(256))); - - -/* params are: ccw_device, status, qdio_error, siga_error, - queue_number, first element processed, number of elements processed, - int_parm */ -typedef void qdio_handler_t(struct ccw_device *,unsigned int,unsigned int, - unsigned int,unsigned int,int,int,unsigned long); - - -#define QDIO_STATUS_INBOUND_INT 0x01 -#define QDIO_STATUS_OUTBOUND_INT 0x02 -#define QDIO_STATUS_LOOK_FOR_ERROR 0x04 -#define QDIO_STATUS_MORE_THAN_ONE_QDIO_ERROR 0x08 -#define QDIO_STATUS_MORE_THAN_ONE_SIGA_ERROR 0x10 -#define QDIO_STATUS_ACTIVATE_CHECK_CONDITION 0x20 - -#define QDIO_SIGA_ERROR_ACCESS_EXCEPTION 0x10 -#define QDIO_SIGA_ERROR_B_BIT_SET 0x20 - -/* for qdio_initialize */ -#define QDIO_INBOUND_0COPY_SBALS 0x01 -#define QDIO_OUTBOUND_0COPY_SBALS 0x02 -#define QDIO_USE_OUTBOUND_PCIS 0x04 - -/* for qdio_cleanup */ -#define QDIO_FLAG_CLEANUP_USING_CLEAR 0x01 -#define QDIO_FLAG_CLEANUP_USING_HALT 0x02 - -struct qdio_initialize { - struct ccw_device *cdev; - unsigned char q_format; - unsigned char adapter_name[8]; - unsigned int qib_param_field_format; /*adapter dependent*/ - /* pointer to 128 bytes or NULL, if no param field */ - unsigned char *qib_param_field; /* adapter dependent */ - /* pointer to no_queues*128 words of data or NULL */ - unsigned long *input_slib_elements; - unsigned long *output_slib_elements; - unsigned int min_input_threshold; - unsigned int max_input_threshold; - unsigned int min_output_threshold; - unsigned int max_output_threshold; - unsigned int no_input_qs; - unsigned int no_output_qs; - qdio_handler_t *input_handler; - qdio_handler_t *output_handler; - unsigned long int_parm; - unsigned long flags; - void **input_sbal_addr_array; /* addr of n*128 void ptrs */ - void **output_sbal_addr_array; /* addr of n*128 void ptrs */ -}; - -extern int qdio_initialize(struct qdio_initialize *init_data); -extern int qdio_allocate(struct qdio_initialize *init_data); -extern int qdio_establish(struct qdio_initialize *init_data); - -extern int qdio_activate(struct ccw_device *,int flags); - -#define QDIO_STATE_MUST_USE_OUTB_PCI 0x00000001 -#define QDIO_STATE_INACTIVE 0x00000002 /* after qdio_cleanup */ -#define QDIO_STATE_ESTABLISHED 0x00000004 /* after qdio_initialize */ -#define QDIO_STATE_ACTIVE 0x00000008 /* after qdio_activate */ -#define QDIO_STATE_STOPPED 0x00000010 /* after queues went down */ -extern unsigned long qdio_get_status(int irq); - - -#define QDIO_FLAG_SYNC_INPUT 0x01 -#define QDIO_FLAG_SYNC_OUTPUT 0x02 -#define QDIO_FLAG_UNDER_INTERRUPT 0x04 -#define QDIO_FLAG_NO_INPUT_INTERRUPT_CONTEXT 0x08 /* no effect on - adapter interrupts */ -#define QDIO_FLAG_DONT_SIGA 0x10 -#define QDIO_FLAG_PCI_OUT 0x20 - -extern int do_QDIO(struct ccw_device*, unsigned int flags, - unsigned int queue_number, - unsigned int qidx,unsigned int count, - struct qdio_buffer *buffers); - -extern int qdio_get_ssqd_pct(struct ccw_device*); -extern int qdio_synchronize(struct ccw_device*, unsigned int flags, - unsigned int queue_number); - -extern int qdio_cleanup(struct ccw_device*, int how); -extern int qdio_shutdown(struct ccw_device*, int how); -extern int qdio_free(struct ccw_device*); - -unsigned char qdio_get_slsb_state(struct ccw_device*, unsigned int flag, - unsigned int queue_number, - unsigned int qidx); - -extern void qdio_init_scrubber(void); - +#define QDIO_MAX_QUEUES_PER_IRQ 32 +#define QDIO_MAX_BUFFERS_PER_Q 128 +#define QDIO_MAX_BUFFERS_MASK (QDIO_MAX_BUFFERS_PER_Q - 1) +#define QDIO_MAX_ELEMENTS_PER_BUFFER 16 +#define QDIO_SBAL_SIZE 256 + +#define QDIO_QETH_QFMT 0 +#define QDIO_ZFCP_QFMT 1 +#define QDIO_IQDIO_QFMT 2 + +/** + * struct qdesfmt0 - queue descriptor, format 0 + * @sliba: storage list information block address + * @sla: storage list address + * @slsba: storage list state block address + * @akey: access key for DLIB + * @bkey: access key for SL + * @ckey: access key for SBALs + * @dkey: access key for SLSB + */ struct qdesfmt0 { -#ifdef QDIO_32_BIT - unsigned long res1; /* reserved */ -#endif /* QDIO_32_BIT */ - unsigned long sliba; /* storage-list-information-block - address */ -#ifdef QDIO_32_BIT - unsigned long res2; /* reserved */ -#endif /* QDIO_32_BIT */ - unsigned long sla; /* storage-list address */ -#ifdef QDIO_32_BIT - unsigned long res3; /* reserved */ -#endif /* QDIO_32_BIT */ - unsigned long slsba; /* storage-list-state-block address */ - unsigned int res4; /* reserved */ - unsigned int akey : 4; /* access key for DLIB */ - unsigned int bkey : 4; /* access key for SL */ - unsigned int ckey : 4; /* access key for SBALs */ - unsigned int dkey : 4; /* access key for SLSB */ - unsigned int res5 : 16; /* reserved */ + u64 sliba; + u64 sla; + u64 slsba; + u32 : 32; + u32 akey : 4; + u32 bkey : 4; + u32 ckey : 4; + u32 dkey : 4; + u32 : 16; } __attribute__ ((packed)); -/* - * Queue-Description record (QDR) +/** + * struct qdr - queue description record (QDR) + * @qfmt: queue format + * @pfmt: implementation dependent parameter format + * @ac: adapter characteristics + * @iqdcnt: input queue descriptor count + * @oqdcnt: output queue descriptor count + * @iqdsz: inpout queue descriptor size + * @oqdsz: output queue descriptor size + * @qiba: queue information block address + * @qkey: queue information block key + * @qdf0: queue descriptions */ struct qdr { - unsigned int qfmt : 8; /* queue format */ - unsigned int pfmt : 8; /* impl. dep. parameter format */ - unsigned int res1 : 8; /* reserved */ - unsigned int ac : 8; /* adapter characteristics */ - unsigned int res2 : 8; /* reserved */ - unsigned int iqdcnt : 8; /* input-queue-descriptor count */ - unsigned int res3 : 8; /* reserved */ - unsigned int oqdcnt : 8; /* output-queue-descriptor count */ - unsigned int res4 : 8; /* reserved */ - unsigned int iqdsz : 8; /* input-queue-descriptor size */ - unsigned int res5 : 8; /* reserved */ - unsigned int oqdsz : 8; /* output-queue-descriptor size */ - unsigned int res6[9]; /* reserved */ -#ifdef QDIO_32_BIT - unsigned long res7; /* reserved */ -#endif /* QDIO_32_BIT */ - unsigned long qiba; /* queue-information-block address */ - unsigned int res8; /* reserved */ - unsigned int qkey : 4; /* queue-information-block key */ - unsigned int res9 : 28; /* reserved */ -/* union _qd {*/ /* why this? */ - struct qdesfmt0 qdf0[126]; -/* } qd;*/ -} __attribute__ ((packed,aligned(4096))); - - -/* - * queue information block (QIB) - */ -#define QIB_AC_INBOUND_PCI_SUPPORTED 0x80 -#define QIB_AC_OUTBOUND_PCI_SUPPORTED 0x40 + u32 qfmt : 8; + u32 pfmt : 8; + u32 : 8; + u32 ac : 8; + u32 : 8; + u32 iqdcnt : 8; + u32 : 8; + u32 oqdcnt : 8; + u32 : 8; + u32 iqdsz : 8; + u32 : 8; + u32 oqdsz : 8; + /* private: */ + u32 res[9]; + /* public: */ + u64 qiba; + u32 : 32; + u32 qkey : 4; + u32 : 28; + struct qdesfmt0 qdf0[126]; +} __attribute__ ((packed, aligned(4096))); + +#define QIB_AC_OUTBOUND_PCI_SUPPORTED 0x40 #define QIB_RFLAGS_ENABLE_QEBSM 0x80 +/** + * struct qib - queue information block (QIB) + * @qfmt: queue format + * @pfmt: implementation dependent parameter format + * @rflags: QEBSM + * @ac: adapter characteristics + * @isliba: absolute address of first input SLIB + * @osliba: absolute address of first output SLIB + * @ebcnam: adapter identifier in EBCDIC + * @parm: implementation dependent parameters + */ struct qib { - unsigned int qfmt : 8; /* queue format */ - unsigned int pfmt : 8; /* impl. dep. parameter format */ - unsigned int rflags : 8; /* QEBSM */ - unsigned int ac : 8; /* adapter characteristics */ - unsigned int res2; /* reserved */ -#ifdef QDIO_32_BIT - unsigned long res3; /* reserved */ -#endif /* QDIO_32_BIT */ - unsigned long isliba; /* absolute address of 1st - input SLIB */ -#ifdef QDIO_32_BIT - unsigned long res4; /* reserved */ -#endif /* QDIO_32_BIT */ - unsigned long osliba; /* absolute address of 1st - output SLIB */ - unsigned int res5; /* reserved */ - unsigned int res6; /* reserved */ - unsigned char ebcnam[8]; /* adapter identifier in EBCDIC */ - unsigned char res7[88]; /* reserved */ - unsigned char parm[QDIO_MAX_BUFFERS_PER_Q]; - /* implementation dependent - parameters */ -} __attribute__ ((packed,aligned(256))); - - -/* - * storage-list-information block element (SLIBE) + u32 qfmt : 8; + u32 pfmt : 8; + u32 rflags : 8; + u32 ac : 8; + u32 : 32; + u64 isliba; + u64 osliba; + u32 : 32; + u32 : 32; + u8 ebcnam[8]; + /* private: */ + u8 res[88]; + /* public: */ + u8 parm[QDIO_MAX_BUFFERS_PER_Q]; +} __attribute__ ((packed, aligned(256))); + +/** + * struct slibe - storage list information block element (SLIBE) + * @parms: implementation dependent parameters */ struct slibe { -#ifdef QDIO_32_BIT - unsigned long res; /* reserved */ -#endif /* QDIO_32_BIT */ - unsigned long parms; /* implementation dependent - parameters */ + u64 parms; }; -/* - * storage-list-information block (SLIB) +/** + * struct slib - storage list information block (SLIB) + * @nsliba: next SLIB address (if any) + * @sla: SL address + * @slsba: SLSB address + * @slibe: SLIB elements */ struct slib { -#ifdef QDIO_32_BIT - unsigned long res1; /* reserved */ -#endif /* QDIO_32_BIT */ - unsigned long nsliba; /* next SLIB address (if any) */ -#ifdef QDIO_32_BIT - unsigned long res2; /* reserved */ -#endif /* QDIO_32_BIT */ - unsigned long sla; /* SL address */ -#ifdef QDIO_32_BIT - unsigned long res3; /* reserved */ -#endif /* QDIO_32_BIT */ - unsigned long slsba; /* SLSB address */ - unsigned char res4[1000]; /* reserved */ - struct slibe slibe[QDIO_MAX_BUFFERS_PER_Q]; /* SLIB elements */ -} __attribute__ ((packed,aligned(2048))); - + u64 nsliba; + u64 sla; + u64 slsba; + /* private: */ + u8 res[1000]; + /* public: */ + struct slibe slibe[QDIO_MAX_BUFFERS_PER_Q]; +} __attribute__ ((packed, aligned(2048))); + +/** + * struct sbal_flags - storage block address list flags + * @last: last entry + * @cont: contiguous storage + * @frag: fragmentation + */ struct sbal_flags { - unsigned char res1 : 1; /* reserved */ - unsigned char last : 1; /* last entry */ - unsigned char cont : 1; /* contiguous storage */ - unsigned char res2 : 1; /* reserved */ - unsigned char frag : 2; /* fragmentation (s.below) */ - unsigned char res3 : 2; /* reserved */ + u8 : 1; + u8 last : 1; + u8 cont : 1; + u8 : 1; + u8 frag : 2; + u8 : 2; } __attribute__ ((packed)); -#define SBAL_FLAGS_FIRST_FRAG 0x04000000UL -#define SBAL_FLAGS_MIDDLE_FRAG 0x08000000UL -#define SBAL_FLAGS_LAST_FRAG 0x0c000000UL -#define SBAL_FLAGS_LAST_ENTRY 0x40000000UL -#define SBAL_FLAGS_CONTIGUOUS 0x20000000UL +#define SBAL_FLAGS_FIRST_FRAG 0x04000000UL +#define SBAL_FLAGS_MIDDLE_FRAG 0x08000000UL +#define SBAL_FLAGS_LAST_FRAG 0x0c000000UL +#define SBAL_FLAGS_LAST_ENTRY 0x40000000UL +#define SBAL_FLAGS_CONTIGUOUS 0x20000000UL -#define SBAL_FLAGS0_DATA_CONTINUATION 0x20UL +#define SBAL_FLAGS0_DATA_CONTINUATION 0x20UL /* Awesome OpenFCP extensions */ -#define SBAL_FLAGS0_TYPE_STATUS 0x00UL -#define SBAL_FLAGS0_TYPE_WRITE 0x08UL -#define SBAL_FLAGS0_TYPE_READ 0x10UL -#define SBAL_FLAGS0_TYPE_WRITE_READ 0x18UL -#define SBAL_FLAGS0_MORE_SBALS 0x04UL -#define SBAL_FLAGS0_COMMAND 0x02UL -#define SBAL_FLAGS0_LAST_SBAL 0x00UL -#define SBAL_FLAGS0_ONLY_SBAL SBAL_FLAGS0_COMMAND -#define SBAL_FLAGS0_MIDDLE_SBAL SBAL_FLAGS0_MORE_SBALS -#define SBAL_FLAGS0_FIRST_SBAL SBAL_FLAGS0_MORE_SBALS | SBAL_FLAGS0_COMMAND -/* Naught of interest beyond this point */ - -#define SBAL_FLAGS0_PCI 0x40 +#define SBAL_FLAGS0_TYPE_STATUS 0x00UL +#define SBAL_FLAGS0_TYPE_WRITE 0x08UL +#define SBAL_FLAGS0_TYPE_READ 0x10UL +#define SBAL_FLAGS0_TYPE_WRITE_READ 0x18UL +#define SBAL_FLAGS0_MORE_SBALS 0x04UL +#define SBAL_FLAGS0_COMMAND 0x02UL +#define SBAL_FLAGS0_LAST_SBAL 0x00UL +#define SBAL_FLAGS0_ONLY_SBAL SBAL_FLAGS0_COMMAND +#define SBAL_FLAGS0_MIDDLE_SBAL SBAL_FLAGS0_MORE_SBALS +#define SBAL_FLAGS0_FIRST_SBAL SBAL_FLAGS0_MORE_SBALS | SBAL_FLAGS0_COMMAND +#define SBAL_FLAGS0_PCI 0x40 + +/** + * struct sbal_sbalf_0 - sbal flags for sbale 0 + * @pci: PCI indicator + * @cont: data continuation + * @sbtype: storage-block type (FCP) + */ struct sbal_sbalf_0 { - unsigned char res1 : 1; /* reserved */ - unsigned char pci : 1; /* PCI indicator */ - unsigned char cont : 1; /* data continuation */ - unsigned char sbtype: 2; /* storage-block type (OpenFCP) */ - unsigned char res2 : 3; /* reserved */ + u8 : 1; + u8 pci : 1; + u8 cont : 1; + u8 sbtype : 2; + u8 : 3; } __attribute__ ((packed)); +/** + * struct sbal_sbalf_1 - sbal flags for sbale 1 + * @key: storage key + */ struct sbal_sbalf_1 { - unsigned char res1 : 4; /* reserved */ - unsigned char key : 4; /* storage key */ + u8 : 4; + u8 key : 4; } __attribute__ ((packed)); +/** + * struct sbal_sbalf_14 - sbal flags for sbale 14 + * @erridx: error index + */ struct sbal_sbalf_14 { - unsigned char res1 : 4; /* reserved */ - unsigned char erridx : 4; /* error index */ + u8 : 4; + u8 erridx : 4; } __attribute__ ((packed)); +/** + * struct sbal_sbalf_15 - sbal flags for sbale 15 + * @reason: reason for error state + */ struct sbal_sbalf_15 { - unsigned char reason; /* reserved */ + u8 reason; } __attribute__ ((packed)); +/** + * union sbal_sbalf - storage block address list flags + * @i0: sbalf0 + * @i1: sbalf1 + * @i14: sbalf14 + * @i15: sblaf15 + * @value: raw value + */ union sbal_sbalf { struct sbal_sbalf_0 i0; struct sbal_sbalf_1 i1; struct sbal_sbalf_14 i14; struct sbal_sbalf_15 i15; - unsigned char value; + u8 value; }; -struct sbal_element { - union { - struct sbal_flags bits; /* flags */ - unsigned char value; - } flags; - unsigned int res1 : 16; /* reserved */ - union sbal_sbalf sbalf; /* SBAL flags */ - unsigned int res2 : 16; /* reserved */ - unsigned int count : 16; /* data count */ -#ifdef QDIO_32_BIT - unsigned long res3; /* reserved */ -#endif /* QDIO_32_BIT */ - unsigned long addr; /* absolute data address */ -} __attribute__ ((packed,aligned(16))); +/** + * struct qdio_buffer_element - SBAL entry + * @flags: flags + * @length: length + * @addr: address +*/ +struct qdio_buffer_element { + u32 flags; + u32 length; +#ifdef CONFIG_32BIT + /* private: */ + void *reserved; + /* public: */ +#endif + void *addr; +} __attribute__ ((packed, aligned(16))); -/* - * strorage-block access-list (SBAL) +/** + * struct qdio_buffer - storage block address list (SBAL) + * @element: SBAL entries */ -struct sbal { - struct sbal_element element[QDIO_MAX_ELEMENTS_PER_BUFFER]; -} __attribute__ ((packed,aligned(256))); +struct qdio_buffer { + struct qdio_buffer_element element[QDIO_MAX_ELEMENTS_PER_BUFFER]; +} __attribute__ ((packed, aligned(256))); -/* - * storage-list (SL) +/** + * struct sl_element - storage list entry + * @sbal: absolute SBAL address */ struct sl_element { -#ifdef QDIO_32_BIT - unsigned long res; /* reserved */ -#endif /* QDIO_32_BIT */ - unsigned long sbal; /* absolute SBAL address */ +#ifdef CONFIG_32BIT + /* private: */ + unsigned long reserved; + /* public: */ +#endif + unsigned long sbal; } __attribute__ ((packed)); +/** + * struct sl - storage list (SL) + * @element: SL entries + */ struct sl { struct sl_element element[QDIO_MAX_BUFFERS_PER_Q]; -} __attribute__ ((packed,aligned(1024))); +} __attribute__ ((packed, aligned(1024))); -/* - * storage-list-state block (SLSB) +/** + * struct slsb - storage list state block (SLSB) + * @val: state per buffer */ -struct slsb_flags { - unsigned char owner : 2; /* SBAL owner */ - unsigned char type : 1; /* buffer type */ - unsigned char state : 5; /* processing state */ +struct slsb { + u8 val[QDIO_MAX_BUFFERS_PER_Q]; +} __attribute__ ((packed, aligned(256))); + +struct qdio_ssqd_desc { + u8 flags; + u8:8; + u16 sch; + u8 qfmt; + u8 parm; + u8 qdioac1; + u8 sch_class; + u8 pcnt; + u8 icnt; + u8:8; + u8 ocnt; + u8:8; + u8 mbccnt; + u16 qdioac2; + u64 sch_token; + u64:64; } __attribute__ ((packed)); +/* params are: ccw_device, qdio_error, queue_number, + first element processed, number of elements processed, int_parm */ +typedef void qdio_handler_t(struct ccw_device *, unsigned int, int, + int, int, unsigned long); -struct slsb { - union { - unsigned char val[QDIO_MAX_BUFFERS_PER_Q]; - struct slsb_flags flags[QDIO_MAX_BUFFERS_PER_Q]; - } acc; -} __attribute__ ((packed,aligned(256))); +/* qdio errors reported to the upper-layer program */ +#define QDIO_ERROR_SIGA_ACCESS_EXCEPTION 0x10 +#define QDIO_ERROR_SIGA_BUSY 0x20 +#define QDIO_ERROR_ACTIVATE_CHECK_CONDITION 0x40 +#define QDIO_ERROR_SLSB_STATE 0x80 -/* - * SLSB values +/* for qdio_initialize */ +#define QDIO_INBOUND_0COPY_SBALS 0x01 +#define QDIO_OUTBOUND_0COPY_SBALS 0x02 +#define QDIO_USE_OUTBOUND_PCIS 0x04 + +/* for qdio_cleanup */ +#define QDIO_FLAG_CLEANUP_USING_CLEAR 0x01 +#define QDIO_FLAG_CLEANUP_USING_HALT 0x02 + +/** + * struct qdio_initialize - qdio initalization data + * @cdev: associated ccw device + * @q_format: queue format + * @adapter_name: name for the adapter + * @qib_param_field_format: format for qib_parm_field + * @qib_param_field: pointer to 128 bytes or NULL, if no param field + * @input_slib_elements: pointer to no_input_qs * 128 words of data or NULL + * @output_slib_elements: pointer to no_output_qs * 128 words of data or NULL + * @no_input_qs: number of input queues + * @no_output_qs: number of output queues + * @input_handler: handler to be called for input queues + * @output_handler: handler to be called for output queues + * @int_parm: interruption parameter + * @flags: initialization flags + * @input_sbal_addr_array: address of no_input_qs * 128 pointers + * @output_sbal_addr_array: address of no_output_qs * 128 pointers */ -#define SLSB_OWNER_PROG 1 -#define SLSB_OWNER_CU 2 - -#define SLSB_TYPE_INPUT 0 -#define SLSB_TYPE_OUTPUT 1 - -#define SLSB_STATE_NOT_INIT 0 -#define SLSB_STATE_EMPTY 1 -#define SLSB_STATE_PRIMED 2 -#define SLSB_STATE_HALTED 0xe -#define SLSB_STATE_ERROR 0xf - -#define SLSB_P_INPUT_NOT_INIT 0x80 -#define SLSB_P_INPUT_PROCESSING 0x81 -#define SLSB_CU_INPUT_EMPTY 0x41 -#define SLSB_P_INPUT_PRIMED 0x82 -#define SLSB_P_INPUT_HALTED 0x8E -#define SLSB_P_INPUT_ERROR 0x8F - -#define SLSB_P_OUTPUT_NOT_INIT 0xA0 -#define SLSB_P_OUTPUT_EMPTY 0xA1 -#define SLSB_CU_OUTPUT_PRIMED 0x62 -#define SLSB_P_OUTPUT_HALTED 0xAE -#define SLSB_P_OUTPUT_ERROR 0xAF - -#define SLSB_ERROR_DURING_LOOKUP 0xFF +struct qdio_initialize { + struct ccw_device *cdev; + unsigned char q_format; + unsigned char adapter_name[8]; + unsigned int qib_param_field_format; + unsigned char *qib_param_field; + unsigned long *input_slib_elements; + unsigned long *output_slib_elements; + unsigned int no_input_qs; + unsigned int no_output_qs; + qdio_handler_t *input_handler; + qdio_handler_t *output_handler; + unsigned long int_parm; + unsigned long flags; + void **input_sbal_addr_array; + void **output_sbal_addr_array; +}; + +#define QDIO_STATE_INACTIVE 0x00000002 /* after qdio_cleanup */ +#define QDIO_STATE_ESTABLISHED 0x00000004 /* after qdio_establish */ +#define QDIO_STATE_ACTIVE 0x00000008 /* after qdio_activate */ +#define QDIO_STATE_STOPPED 0x00000010 /* after queues went down */ + +#define QDIO_FLAG_SYNC_INPUT 0x01 +#define QDIO_FLAG_SYNC_OUTPUT 0x02 +#define QDIO_FLAG_PCI_OUT 0x10 + +extern int qdio_initialize(struct qdio_initialize *init_data); +extern int qdio_allocate(struct qdio_initialize *init_data); +extern int qdio_establish(struct qdio_initialize *init_data); +extern int qdio_activate(struct ccw_device *); + +extern int do_QDIO(struct ccw_device*, unsigned int flags, + int q_nr, int qidx, int count); +extern int qdio_cleanup(struct ccw_device*, int how); +extern int qdio_shutdown(struct ccw_device*, int how); +extern int qdio_free(struct ccw_device *); +extern struct qdio_ssqd_desc *qdio_get_ssqd_desc(struct ccw_device *cdev); #endif /* __QDIO_H__ */ diff --git a/include/asm-s390/setup.h b/include/asm-s390/setup.h index f09ee3f..4ba14e4 100644 --- a/include/asm-s390/setup.h +++ b/include/asm-s390/setup.h @@ -65,6 +65,7 @@ extern unsigned long machine_flags; #define MACHINE_FLAG_VM (1UL << 0) #define MACHINE_FLAG_IEEE (1UL << 1) +#define MACHINE_FLAG_P390 (1UL << 2) #define MACHINE_FLAG_CSP (1UL << 3) #define MACHINE_FLAG_MVPG (1UL << 4) #define MACHINE_FLAG_DIAG44 (1UL << 5) @@ -77,7 +78,6 @@ extern unsigned long machine_flags; #define MACHINE_IS_VM (machine_flags & MACHINE_FLAG_VM) #define MACHINE_IS_KVM (machine_flags & MACHINE_FLAG_KVM) -#define MACHINE_IS_P390 (machine_flags & MACHINE_FLAG_P390) #define MACHINE_HAS_DIAG9C (machine_flags & MACHINE_FLAG_DIAG9C) #ifndef __s390x__ -- cgit v0.10.2 From 8586cb60ce85f40431cf06fe97512269d3992f03 Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Thu, 17 Jul 2008 17:16:49 +0200 Subject: [S390] dasd: use -EOPNOTSUPP instead of -ENOTSUPP return value -ENOTSUPP is not valid in userspace context, use -EOPNOTSUPP instead Signed-off-by: Stefan Haberland Cc: Martin Schwidefsky Signed-off-by: Heiko Carstens diff --git a/drivers/s390/block/dasd_diag.c b/drivers/s390/block/dasd_diag.c index ab5c5b4..85fcb43 100644 --- a/drivers/s390/block/dasd_diag.c +++ b/drivers/s390/block/dasd_diag.c @@ -349,7 +349,7 @@ dasd_diag_check_device(struct dasd_device *device) if (rc) { DEV_MESSAGE(KERN_WARNING, device, "failed to retrieve device " "information (rc=%d)", rc); - rc = -ENOTSUPP; + rc = -EOPNOTSUPP; goto out; } @@ -364,7 +364,7 @@ dasd_diag_check_device(struct dasd_device *device) default: DEV_MESSAGE(KERN_WARNING, device, "unsupported device class " "(class=%d)", private->rdc_data.vdev_class); - rc = -ENOTSUPP; + rc = -EOPNOTSUPP; goto out; } -- cgit v0.10.2 From 9fa111372a54f695f65e0de2f2a2108fe6cf3584 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 17 Jul 2008 17:38:17 +0200 Subject: ftrace: fix lockup with MAXSMP MAXSMP brings in lots of use of various bitops in smp_processor_id() and friends - causing ftrace to lock up during bootup: calling anon_inode_init+0x0/0x130 initcall anon_inode_init+0x0/0x130 returned 0 after 0 msecs calling acpi_event_init+0x0/0x57 [ hard hang ] So exclude the bitops facilities from tracing. Signed-off-by: Ingo Molnar diff --git a/lib/Makefile b/lib/Makefile index 2c62a9c..d90d311 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -15,6 +15,9 @@ CFLAGS_REMOVE_string.o = -pg CFLAGS_REMOVE_spinlock_debug.o = -pg CFLAGS_REMOVE_list_debug.o = -pg CFLAGS_REMOVE_debugobjects.o = -pg +CFLAGS_REMOVE_find_next_bit.o = -pg +CFLAGS_REMOVE_cpumask.o = -pg +CFLAGS_REMOVE_bitmap.o = -pg endif lib-$(CONFIG_MMU) += ioremap.o -- cgit v0.10.2 From c349e0a01c3e0f70913db6a5bb61ab204e0602de Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 15 Apr 2008 22:39:31 +0200 Subject: ftrace: do not trace scheduler functions do not trace scheduler functions - it's still a bit fragile and can lock up with: http://redhat.com/~mingo/misc/config-Thu_Jul_17_13_34_52_CEST_2008 Signed-off-by: Ingo Molnar diff --git a/kernel/Makefile b/kernel/Makefile index 0a7ed83..985ddb7 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -11,8 +11,6 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ notifier.o ksysfs.o pm_qos_params.o sched_clock.o -CFLAGS_REMOVE_sched.o = -mno-spe - ifdef CONFIG_FTRACE # Do not trace debug files and internal ftrace files CFLAGS_REMOVE_lockdep.o = -pg @@ -21,6 +19,7 @@ CFLAGS_REMOVE_mutex-debug.o = -pg CFLAGS_REMOVE_rtmutex-debug.o = -pg CFLAGS_REMOVE_cgroup-debug.o = -pg CFLAGS_REMOVE_sched_clock.o = -pg +CFLAGS_REMOVE_sched.o = -mno-spe -pg endif obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o -- cgit v0.10.2 From 2464a609ded094204a3aed24823745ec58e3c879 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 17 Jul 2008 17:40:48 +0200 Subject: ftrace: do not trace library functions make function tracing more robust: do not trace library functions. We've already got a sizable list of exceptions: ifdef CONFIG_FTRACE # Do not profile string.o, since it may be used in early boot or vdso CFLAGS_REMOVE_string.o = -pg # Also do not profile any debug utilities CFLAGS_REMOVE_spinlock_debug.o = -pg CFLAGS_REMOVE_list_debug.o = -pg CFLAGS_REMOVE_debugobjects.o = -pg CFLAGS_REMOVE_find_next_bit.o = -pg CFLAGS_REMOVE_cpumask.o = -pg CFLAGS_REMOVE_bitmap.o = -pg endif ... and the pattern has been that random library functionality showed up in ftrace's critical path (outside of its recursion check), causing hard to debug lockups. So be a bit defensive about it and exclude all lib/*.o functions by default. It's not that they are overly interesting for tracing purposes anyway. Specific ones can still be traced, in an opt-in manner. Signed-off-by: Ingo Molnar diff --git a/lib/Makefile b/lib/Makefile index d90d311..818c4d4 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -2,24 +2,17 @@ # Makefile for some libs needed in the kernel. # +ifdef CONFIG_FTRACE +ORIG_CFLAGS := $(KBUILD_CFLAGS) +KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS)) +endif + lib-y := ctype.o string.o vsprintf.o cmdline.o \ rbtree.o radix-tree.o dump_stack.o \ idr.o int_sqrt.o extable.o prio_tree.o \ sha1.o irq_regs.o reciprocal_div.o argv_split.o \ proportions.o prio_heap.o ratelimit.o -ifdef CONFIG_FTRACE -# Do not profile string.o, since it may be used in early boot or vdso -CFLAGS_REMOVE_string.o = -pg -# Also do not profile any debug utilities -CFLAGS_REMOVE_spinlock_debug.o = -pg -CFLAGS_REMOVE_list_debug.o = -pg -CFLAGS_REMOVE_debugobjects.o = -pg -CFLAGS_REMOVE_find_next_bit.o = -pg -CFLAGS_REMOVE_cpumask.o = -pg -CFLAGS_REMOVE_bitmap.o = -pg -endif - lib-$(CONFIG_MMU) += ioremap.o lib-$(CONFIG_SMP) += cpumask.o -- cgit v0.10.2 From 7023cc61292f9cd61d99521206480f6e387132ff Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Thu, 17 Jul 2008 01:06:55 -0600 Subject: Fix collateral damage to top level Makefile The patch named "powerpc/mpc5121: Add clock driver", also contained an unrelated and bogus change to the top-level makefile. This patch backs out the bad bit. SHA1 of offending patch: 137e95906e294913fab02162e8a1948ade49acb5) Signed-off-by: Grant Likely Acked-by: Benjamin Herrenschmidt Repented-by: John Rigby [ Heh. Normally I pick these out from the diffstats, but I guess I've grown to trust the ppc tree too much ;) - Linus ] Signed-off-by: Linus Torvalds diff --git a/Makefile b/Makefile index 1564577..6192922 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,3 @@ -FRED=42 VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 26 -- cgit v0.10.2 From 7259d936c6af216198ae6af3a25ac6c9dbdbe779 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 16 Jul 2008 23:44:32 -0700 Subject: Update scripts/Makefile.fwinst to cope with older make Also fix unwanted rebuilds of the firmware/ihex2fw tool by including the .ihex2fw.cmd file when present. Signed-off-by: David Woodhouse Reported-and-tested-by: Wang Chen Signed-off-by: Linus Torvalds diff --git a/scripts/Makefile.fwinst b/scripts/Makefile.fwinst index 3d2f460..c972c0f 100644 --- a/scripts/Makefile.fwinst +++ b/scripts/Makefile.fwinst @@ -28,18 +28,39 @@ endif installed-fw := $(addprefix $(INSTALL_FW_PATH)/,$(fw-shipped-all)) installed-fw-dirs := $(sort $(dir $(installed-fw))) $(INSTALL_FW_PATH)/. +# Workaround for make < 3.81, where .SECONDEXPANSION doesn't work. +PHONY += $(INSTALL_FW_PATH)/$$(%) install-all-dirs +$(INSTALL_FW_PATH)/$$(%): install-all-dirs + @true +install-all-dirs: $(installed-fw-dirs) + @true + quiet_cmd_install = INSTALL $(subst $(srctree)/,,$@) cmd_install = $(INSTALL) -m0644 $< $@ $(installed-fw-dirs): $(call cmd,mkdir) -$(installed-fw): $(INSTALL_FW_PATH)/%: $(obj)/% | $(INSTALL_FW_PATH)/$$(dir %)/ +$(installed-fw): $(INSTALL_FW_PATH)/%: $(obj)/% | $(INSTALL_FW_PATH)/$$(dir %) $(call cmd,install) -.PHONY: __fw_install __fw_modinst FORCE +PHONY += __fw_install __fw_modinst FORCE + +.PHONY: $(PHONY) __fw_install: $(installed-fw) __fw_modinst: $(mod-fw) FORCE: + +# Read all saved command lines and dependencies for the $(targets) we +# may be building using $(if_changed{,_dep}). As an optimization, we +# don't need to read them if the target does not exist; we will rebuild +# anyway in that case. + +targets := $(wildcard $(sort $(targets))) +cmd_files := $(wildcard $(foreach f,$(targets),$(dir $(f)).$(notdir $(f)).cmd)) + +ifneq ($(cmd_files),) + include $(cmd_files) +endif -- cgit v0.10.2 From 2f73ccab5628b4f8e8f4b93fea8082dd31a87a10 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 17 Jul 2008 18:09:12 +0200 Subject: fix build error of arch/ia64/kvm/* Fix calls of smp_call_function*() in arch/ia64/kvm for recent API changes. CC [M] arch/ia64/kvm/kvm-ia64.o arch/ia64/kvm/kvm-ia64.c: In function 'handle_global_purge': arch/ia64/kvm/kvm-ia64.c:398: error: too many arguments to function 'smp_call_function_single' arch/ia64/kvm/kvm-ia64.c: In function 'kvm_vcpu_kick': arch/ia64/kvm/kvm-ia64.c:1696: error: too many arguments to function 'smp_call_function_single' Signed-off-by: Takashi Iwai Acked-by Xiantao Zhang Signed-off-by: Linus Torvalds diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 318b811..68c978b 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -395,7 +395,7 @@ static int handle_global_purge(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) if (kvm->vcpus[i]->cpu != -1) { call_data.vcpu = kvm->vcpus[i]; smp_call_function_single(kvm->vcpus[i]->cpu, - vcpu_global_purge, &call_data, 0, 1); + vcpu_global_purge, &call_data, 1); } else printk(KERN_WARNING"kvm: Uninit vcpu received ipi!\n"); @@ -1693,7 +1693,7 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu) wake_up_interruptible(&vcpu->wq); if (vcpu->guest_mode) - smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0, 0); + smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0); } int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 trig) diff --git a/arch/ia64/kvm/kvm_fw.c b/arch/ia64/kvm/kvm_fw.c index 091f936..0c69d9e 100644 --- a/arch/ia64/kvm/kvm_fw.c +++ b/arch/ia64/kvm/kvm_fw.c @@ -130,7 +130,7 @@ static struct ia64_pal_retval pal_cache_flush(struct kvm_vcpu *vcpu) args.cache_type = gr29; args.operation = gr30; smp_call_function(remote_pal_cache_flush, - (void *)&args, 1, 1); + (void *)&args, 1); if (args.status != 0) printk(KERN_ERR"pal_cache_flush error!," "status:0x%lx\n", args.status); -- cgit v0.10.2 From 9354094a95aed456a46b353b1051a7e2fab29045 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 14 Jul 2008 23:29:01 -0700 Subject: x86: fix numaq_tsc_disable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fix: arch/x86/kernel/numaq_32.c: In function ‘numaq_tsc_disable’: arch/x86/kernel/numaq_32.c:99: warning: ‘return’ with a value, in function returning void Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c index 5b20a5e..a23e823 100644 --- a/arch/x86/kernel/numaq_32.c +++ b/arch/x86/kernel/numaq_32.c @@ -96,7 +96,7 @@ int __init get_memcfg_numaq(void) void __init numaq_tsc_disable(void) { if (!found_numaq) - return -1; + return; if (num_online_nodes() > 1) { printk(KERN_DEBUG "NUMAQ: disabling TSC\n"); -- cgit v0.10.2 From 2567d71cc7acd99f0a0dd02e17fe17fd7df7b30c Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 15 Jul 2008 15:02:27 +1000 Subject: x86: fix asm/e820.h for userspace inclusion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit asm-x86/e820.h is included from userspace. 'x86: make e820.c to have common functions' (b79cd8f1268bab57ff85b19d131f7f23deab2dee) broke it: make -C Documentation/lguest cc -Wall -Wmissing-declarations -Wmissing-prototypes -O3 -I../../include lguest.c -lz -o lguest In file included from ../../include/asm-x86/bootparam.h:8, from lguest.c:45: ../../include/asm/e820.h:66: error: expected ‘)’ before ‘start’ ../../include/asm/e820.h:67: error: expected ‘)’ before ‘start’ ../../include/asm/e820.h:68: error: expected ‘)’ before ‘start’ ../../include/asm/e820.h:72: error: expected ‘=’, ‘,’, ‘;’, ‘asm’ or ‘__attribute__’ before ‘e820_update_range’ ... Signed-off-by: Rusty Russell Cc: Yinghai Lu Signed-off-by: Ingo Molnar diff --git a/include/asm-x86/e820.h b/include/asm-x86/e820.h index 33e793e..06633b0 100644 --- a/include/asm-x86/e820.h +++ b/include/asm-x86/e820.h @@ -59,6 +59,7 @@ struct e820map { struct e820entry map[E820_X_MAX]; }; +#ifdef __KERNEL__ /* see comment in arch/x86/kernel/e820.c */ extern struct e820map e820; extern struct e820map e820_saved; @@ -115,7 +116,7 @@ extern void setup_memory_map(void); extern char *default_machine_specific_memory_setup(void); extern char *machine_specific_memory_setup(void); extern char *memory_setup(void); - +#endif /* __KERNEL__ */ #endif /* __ASSEMBLY__ */ #define ISA_START_ADDRESS 0xa0000 -- cgit v0.10.2