From d694ad62bf539dbb20a0899ac2a954555f9e4a83 Mon Sep 17 00:00:00 2001 From: Manfred Spraul Date: Mon, 25 Jul 2011 17:11:47 -0700 Subject: ipc/sem.c: fix race with concurrent semtimedop() timeouts and IPC_RMID If a semaphore array is removed and in parallel a sleeping task is woken up (signal or timeout, does not matter), then the woken up task does not wait until wake_up_sem_queue_do() is completed. This will cause crashes, because wake_up_sem_queue_do() will read from a stale pointer. The fix is simple: Regardless of anything, always call get_queue_result(). This function waits until wake_up_sem_queue_do() has finished it's task. Addresses https://bugzilla.kernel.org/show_bug.cgi?id=27142 Reported-by: Yuriy Yevtukhov Reported-by: Harald Laabs Signed-off-by: Manfred Spraul Acked-by: Eric Dumazet Cc: [2.6.35+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/ipc/sem.c b/ipc/sem.c index 8b929e6..c8e00f8 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -1450,15 +1450,24 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, } sma = sem_lock(ns, semid); + + /* + * Wait until it's guaranteed that no wakeup_sem_queue_do() is ongoing. + */ + error = get_queue_result(&queue); + + /* + * Array removed? If yes, leave without sem_unlock(). + */ if (IS_ERR(sma)) { error = -EIDRM; goto out_free; } - error = get_queue_result(&queue); /* - * If queue.status != -EINTR we are woken up by another process + * If queue.status != -EINTR we are woken up by another process. + * Leave without unlink_queue(), but with sem_unlock(). */ if (error != -EINTR) { -- cgit v0.10.2 From 2b37c35e6552b0d04d5db9728bc7af22d53f731a Mon Sep 17 00:00:00 2001 From: Becky Bruce Date: Mon, 25 Jul 2011 17:11:49 -0700 Subject: fs/hugetlbfs/inode.c: fix pgoff alignment checking on 32-bit This: vma->vm_pgoff & ~(huge_page_mask(h) >> PAGE_SHIFT) is incorrect on 32-bit. It causes us to & the pgoff with something that looks like this (for a 4m hugepage): 0xfff003ff. The mask should be flipped and *then* shifted, to give you 0x0000_03fff. Signed-off-by: Becky Bruce Cc: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 7aafeb8..537a209 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -94,7 +94,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) vma->vm_flags |= VM_HUGETLB | VM_RESERVED; vma->vm_ops = &hugetlb_vm_ops; - if (vma->vm_pgoff & ~(huge_page_mask(h) >> PAGE_SHIFT)) + if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT)) return -EINVAL; vma_len = (loff_t)(vma->vm_end - vma->vm_start); -- cgit v0.10.2 From ee8f248d266ec6966c0ce6b7dec24de43dcc1b58 Mon Sep 17 00:00:00 2001 From: Becky Bruce Date: Mon, 25 Jul 2011 17:11:50 -0700 Subject: hugetlb: add phys addr to struct huge_bootmem_page This is needed on HIGHMEM systems - we don't always have a virtual address so store the physical address and map it in as needed. [akpm@linux-foundation.org: cleanup] Signed-off-by: Becky Bruce Cc: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 59225ef..19644e0 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -231,6 +231,9 @@ struct hstate { struct huge_bootmem_page { struct list_head list; struct hstate *hstate; +#ifdef CONFIG_HIGHMEM + phys_addr_t phys; +#endif }; struct page *alloc_huge_page_node(struct hstate *h, int nid); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index bfcf153..c6d342d 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1105,8 +1105,16 @@ static void __init gather_bootmem_prealloc(void) struct huge_bootmem_page *m; list_for_each_entry(m, &huge_boot_pages, list) { - struct page *page = virt_to_page(m); struct hstate *h = m->hstate; + struct page *page; + +#ifdef CONFIG_HIGHMEM + page = pfn_to_page(m->phys >> PAGE_SHIFT); + free_bootmem_late((unsigned long)m, + sizeof(struct huge_bootmem_page)); +#else + page = virt_to_page(m); +#endif __ClearPageReserved(page); WARN_ON(page_count(page) != 1); prep_compound_huge_page(page, h->order); -- cgit v0.10.2 From 33dd4e0ec91138c3d80e790c08a3db47426c81f2 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Mon, 25 Jul 2011 17:11:51 -0700 Subject: mm: make some struct page's const These uses are read-only and in a subsequent patch I have a const struct page in my hand... [akpm@linux-foundation.org: fix warnings in lowmem_page_address()] Signed-off-by: Ian Campbell Cc: Rik van Riel Cc: Andrea Arcangeli Cc: Mel Gorman Cc: Michel Lespinasse Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/mm.h b/include/linux/mm.h index 8a45ad2..6321e84 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -637,7 +637,7 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma) #define SECTIONS_MASK ((1UL << SECTIONS_WIDTH) - 1) #define ZONEID_MASK ((1UL << ZONEID_SHIFT) - 1) -static inline enum zone_type page_zonenum(struct page *page) +static inline enum zone_type page_zonenum(const struct page *page) { return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK; } @@ -665,15 +665,15 @@ static inline int zone_to_nid(struct zone *zone) } #ifdef NODE_NOT_IN_PAGE_FLAGS -extern int page_to_nid(struct page *page); +extern int page_to_nid(const struct page *page); #else -static inline int page_to_nid(struct page *page) +static inline int page_to_nid(const struct page *page) { return (page->flags >> NODES_PGSHIFT) & NODES_MASK; } #endif -static inline struct zone *page_zone(struct page *page) +static inline struct zone *page_zone(const struct page *page) { return &NODE_DATA(page_to_nid(page))->node_zones[page_zonenum(page)]; } @@ -718,9 +718,9 @@ static inline void set_page_links(struct page *page, enum zone_type zone, */ #include -static __always_inline void *lowmem_page_address(struct page *page) +static __always_inline void *lowmem_page_address(const struct page *page) { - return __va(PFN_PHYS(page_to_pfn(page))); + return __va(PFN_PHYS(page_to_pfn((struct page *)page))); } #if defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL) diff --git a/mm/sparse.c b/mm/sparse.c index aa64b12..858e1df 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -40,7 +40,7 @@ static u8 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned; static u16 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned; #endif -int page_to_nid(struct page *page) +int page_to_nid(const struct page *page) { return section_to_node_table[page_to_section(page)]; } -- cgit v0.10.2 From 67db392d1124e14684e23deb572de2a63b9b3b69 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Mon, 25 Jul 2011 17:11:52 -0700 Subject: mm: use const struct page for r/o page-flag accessor methods In a subsquent patch I have a const struct page in my hand... [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Ian Campbell Cc: Andrea Arcangeli Cc: Rik van Riel Cc: Martin Schwidefsky Cc: Michel Lespinasse Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 6081493..3e5a1b1 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -135,7 +135,7 @@ enum pageflags { * Macros to create function definitions for page flags */ #define TESTPAGEFLAG(uname, lname) \ -static inline int Page##uname(struct page *page) \ +static inline int Page##uname(const struct page *page) \ { return test_bit(PG_##lname, &page->flags); } #define SETPAGEFLAG(uname, lname) \ @@ -173,7 +173,7 @@ static inline int __TestClearPage##uname(struct page *page) \ __SETPAGEFLAG(uname, lname) __CLEARPAGEFLAG(uname, lname) #define PAGEFLAG_FALSE(uname) \ -static inline int Page##uname(struct page *page) \ +static inline int Page##uname(const struct page *page) \ { return 0; } #define TESTSCFLAG(uname, lname) \ -- cgit v0.10.2 From 0d0138ebe24b94065580bd2601f8bb7eb6152f56 Mon Sep 17 00:00:00 2001 From: Dan Rosenberg Date: Mon, 25 Jul 2011 17:11:53 -0700 Subject: xtensa: prevent arbitrary read in ptrace Prevent an arbitrary kernel read. Check the user pointer with access_ok() before copying data in. [akpm@linux-foundation.org: s/EIO/EFAULT/] Signed-off-by: Dan Rosenberg Cc: Christian Zankel Cc: Oleg Nesterov Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/xtensa/kernel/ptrace.c b/arch/xtensa/kernel/ptrace.c index c72c947..a0d042a 100644 --- a/arch/xtensa/kernel/ptrace.c +++ b/arch/xtensa/kernel/ptrace.c @@ -147,6 +147,9 @@ int ptrace_setxregs(struct task_struct *child, void __user *uregs) elf_xtregs_t *xtregs = uregs; int ret = 0; + if (!access_ok(VERIFY_READ, uregs, sizeof(elf_xtregs_t))) + return -EFAULT; + #if XTENSA_HAVE_COPROCESSORS /* Flush all coprocessors before we overwrite them. */ coprocessor_flush_all(ti); -- cgit v0.10.2 From 90b03f5052be92ab0ba0aa36abcf33a207706866 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Mon, 25 Jul 2011 17:11:54 -0700 Subject: xtensa: fix a build error in arch/xtensa/include/asm/uaccess.h Fix the following build error: arch/xtensa/include/asm/uaccess.h:403: error: implicit declaration of function 'prefetch' arch/xtensa/include/asm/uaccess.h:412: error: implicit declaration of function 'prefetchw' Signed-off-by: WANG Cong Cc: Chris Zankel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/xtensa/include/asm/uaccess.h b/arch/xtensa/include/asm/uaccess.h index 5b0c18c..c141b11 100644 --- a/arch/xtensa/include/asm/uaccess.h +++ b/arch/xtensa/include/asm/uaccess.h @@ -17,6 +17,7 @@ #define _XTENSA_UACCESS_H #include +#include #define VERIFY_READ 0 #define VERIFY_WRITE 1 -- cgit v0.10.2 From 2b934c6236983392d01bef22e43af3051cac16f5 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Mon, 25 Jul 2011 17:11:54 -0700 Subject: drivers/misc/pch_phub.c: don't oops if dmi_get_system_info returns NULL If dmi_get_system_info() returns NULL, pch_phub_probe() will dereferencea a zero pointer. This oops was observed on an Atom based board which has no BIOS, but a bootloder which doesn't privde DMI data. Signed-off-by: Alexander Stein Cc: Tomoya MORINAGA Cc: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/misc/pch_phub.c b/drivers/misc/pch_phub.c index 5fe79df..01eb67b 100644 --- a/drivers/misc/pch_phub.c +++ b/drivers/misc/pch_phub.c @@ -686,6 +686,8 @@ static int __devinit pch_phub_probe(struct pci_dev *pdev, } if (id->driver_data == 1) { /* EG20T PCH */ + const char *board_name; + retval = sysfs_create_file(&pdev->dev.kobj, &dev_attr_pch_mac.attr); if (retval) @@ -701,7 +703,8 @@ static int __devinit pch_phub_probe(struct pci_dev *pdev, CLKCFG_CANCLK_MASK); /* quirk for CM-iTC board */ - if (strstr(dmi_get_system_info(DMI_BOARD_NAME), "CM-iTC")) + board_name = dmi_get_system_info(DMI_BOARD_NAME); + if (board_name && strstr(board_name, "CM-iTC")) pch_phub_read_modify_write_reg(chip, (unsigned int)CLKCFG_REG_OFFSET, CLKCFG_UART_48MHZ | CLKCFG_BAUDDIV | -- cgit v0.10.2 From ccb6108f5b0b541d3eb332c3a73e645c0f84278e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 25 Jul 2011 17:11:57 -0700 Subject: mm/backing-dev.c: reset bdi min_ratio in bdi_unregister() Vito said: : The system has many usb disks coming and going day to day, with their : respective bdi's having min_ratio set to 1 when inserted. It works for : some time until eventually min_ratio can no longer be set, even when the : active set of bdi's seen in /sys/class/bdi/*/min_ratio doesn't add up to : anywhere near 100. : : This then leads to an unrelated starvation problem caused by write-heavy : fuse mounts being used atop the usb disks, a problem the min_ratio setting : at the underlying devices bdi effectively prevents. Fix this leakage by resetting the bdi min_ratio when unregistering the BDI. Signed-off-by: Peter Zijlstra Reported-by: Vito Caputo Cc: Wu Fengguang Cc: Miklos Szeredi Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/backing-dev.c b/mm/backing-dev.c index f032e6e..e56fe35 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -606,6 +606,7 @@ static void bdi_prune_sb(struct backing_dev_info *bdi) void bdi_unregister(struct backing_dev_info *bdi) { if (bdi->dev) { + bdi_set_min_ratio(bdi, 0); trace_writeback_bdi_unregister(bdi); bdi_prune_sb(bdi); del_timer_sync(&bdi->wb.wakeup_timer); -- cgit v0.10.2 From e2e7da9bccd77ef6793b865c1b78c7e519ccc562 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Mon, 25 Jul 2011 17:11:58 -0700 Subject: drivers/video/backlight/ld9040.c: small fixes - Fix checking of wrong return value for backlight_device_register() - Properly free allocated resources in ld9040_probe() error path and ld9040_remove(). Signed-off-by: Axel Lin Cc: Donghwa Lee Cc: Richard Purdie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/video/backlight/ld9040.c b/drivers/video/backlight/ld9040.c index 7281b25..62bcde3 100644 --- a/drivers/video/backlight/ld9040.c +++ b/drivers/video/backlight/ld9040.c @@ -701,9 +701,9 @@ static int ld9040_probe(struct spi_device *spi) bd = backlight_device_register("ld9040-bl", &spi->dev, lcd, &ld9040_backlight_ops, NULL); - if (IS_ERR(ld)) { - ret = PTR_ERR(ld); - goto out_free_lcd; + if (IS_ERR(bd)) { + ret = PTR_ERR(bd); + goto out_unregister_lcd; } bd->props.max_brightness = MAX_BRIGHTNESS; @@ -731,6 +731,8 @@ static int ld9040_probe(struct spi_device *spi) dev_info(&spi->dev, "ld9040 panel driver has been probed.\n"); return 0; +out_unregister_lcd: + lcd_device_unregister(lcd->ld); out_free_lcd: kfree(lcd); return ret; @@ -741,6 +743,7 @@ static int __devexit ld9040_remove(struct spi_device *spi) struct ld9040 *lcd = dev_get_drvdata(&spi->dev); ld9040_power(lcd, FB_BLANK_POWERDOWN); + backlight_device_unregister(lcd->bd); lcd_device_unregister(lcd->ld); kfree(lcd); -- cgit v0.10.2 From 6b19bad8b2d4cf7150b58ce6619b30364bdab1db Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Mon, 25 Jul 2011 17:11:59 -0700 Subject: drivers/video/backlight/adp8860_bl.c: remove a redundant assignment for max_brightness We have set props.max_brightness before registering backlight device. Signed-off-by: Axel Lin Acked-by: Mike Frysinger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/video/backlight/adp8860_bl.c b/drivers/video/backlight/adp8860_bl.c index d2a96a4..183b6f6 100644 --- a/drivers/video/backlight/adp8860_bl.c +++ b/drivers/video/backlight/adp8860_bl.c @@ -722,8 +722,7 @@ static int __devinit adp8860_probe(struct i2c_client *client, goto out2; } - bl->props.max_brightness = - bl->props.brightness = ADP8860_MAX_BRIGHTNESS; + bl->props.brightness = ADP8860_MAX_BRIGHTNESS; data->bl = bl; -- cgit v0.10.2 From a4c8aaa559733d03cb6bb4fa62c25ae756c53e94 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Mon, 25 Jul 2011 17:12:00 -0700 Subject: backlight: add ams369fg06 amoled driver Add the ams369fg06 amoled panel driver. The ams369fg06 amoled panel (480 x 800) driver uses 3-wired SPI inteface. The brightness can be controlled by gamma setting of amoled panel. [sfr@canb.auug.org.au: fix build error] [axel.lin@gmail.com: unregister backlight device when unloading the module] [axel.lin@gmail.com: staticize ams369fg06_shutdown] Signed-off-by: Jingoo Han Cc: Richard Purdie Cc: Inki Dae Cc: anish singh Signed-off-by: Stephen Rothwell Signed-off-by: Axel Lin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/video/backlight/Kconfig b/drivers/video/backlight/Kconfig index 2d93c8d..1e54b8b 100644 --- a/drivers/video/backlight/Kconfig +++ b/drivers/video/backlight/Kconfig @@ -117,6 +117,14 @@ config LCD_LD9040 If you have an LD9040 Panel, say Y to enable its control driver. +config LCD_AMS369FG06 + tristate "AMS369FG06 AMOLED LCD Driver" + depends on SPI && BACKLIGHT_CLASS_DEVICE + default n + help + If you have an AMS369FG06 AMOLED Panel, say Y to enable its + LCD control driver. + endif # LCD_CLASS_DEVICE # diff --git a/drivers/video/backlight/Makefile b/drivers/video/backlight/Makefile index ee72adb..bf1dd92 100644 --- a/drivers/video/backlight/Makefile +++ b/drivers/video/backlight/Makefile @@ -13,6 +13,7 @@ obj-$(CONFIG_LCD_TDO24M) += tdo24m.o obj-$(CONFIG_LCD_TOSA) += tosa_lcd.o obj-$(CONFIG_LCD_S6E63M0) += s6e63m0.o obj-$(CONFIG_LCD_LD9040) += ld9040.o +obj-$(CONFIG_LCD_AMS369FG06) += ams369fg06.o obj-$(CONFIG_BACKLIGHT_CLASS_DEVICE) += backlight.o obj-$(CONFIG_BACKLIGHT_ATMEL_PWM) += atmel-pwm-bl.o diff --git a/drivers/video/backlight/ams369fg06.c b/drivers/video/backlight/ams369fg06.c new file mode 100644 index 0000000..2c31430 --- /dev/null +++ b/drivers/video/backlight/ams369fg06.c @@ -0,0 +1,643 @@ +/* + * ams369fg06 AMOLED LCD panel driver. + * + * Copyright (c) 2011 Samsung Electronics Co., Ltd. + * Author: Jingoo Han + * + * Derived from drivers/video/s6e63m0.c + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#include +#include +#include +#include +#include +#include +#include + +#define SLEEPMSEC 0x1000 +#define ENDDEF 0x2000 +#define DEFMASK 0xFF00 +#define COMMAND_ONLY 0xFE +#define DATA_ONLY 0xFF + +#define MAX_GAMMA_LEVEL 5 +#define GAMMA_TABLE_COUNT 21 + +#define MIN_BRIGHTNESS 0 +#define MAX_BRIGHTNESS 255 +#define DEFAULT_BRIGHTNESS 150 + +struct ams369fg06 { + struct device *dev; + struct spi_device *spi; + unsigned int power; + struct lcd_device *ld; + struct backlight_device *bd; + struct lcd_platform_data *lcd_pd; +}; + +static const unsigned short seq_display_on[] = { + 0x14, 0x03, + ENDDEF, 0x0000 +}; + +static const unsigned short seq_display_off[] = { + 0x14, 0x00, + ENDDEF, 0x0000 +}; + +static const unsigned short seq_stand_by_on[] = { + 0x1D, 0xA1, + SLEEPMSEC, 200, + ENDDEF, 0x0000 +}; + +static const unsigned short seq_stand_by_off[] = { + 0x1D, 0xA0, + SLEEPMSEC, 250, + ENDDEF, 0x0000 +}; + +static const unsigned short seq_setting[] = { + 0x31, 0x08, + 0x32, 0x14, + 0x30, 0x02, + 0x27, 0x01, + 0x12, 0x08, + 0x13, 0x08, + 0x15, 0x00, + 0x16, 0x00, + + 0xef, 0xd0, + DATA_ONLY, 0xe8, + + 0x39, 0x44, + 0x40, 0x00, + 0x41, 0x3f, + 0x42, 0x2a, + 0x43, 0x27, + 0x44, 0x27, + 0x45, 0x1f, + 0x46, 0x44, + 0x50, 0x00, + 0x51, 0x00, + 0x52, 0x17, + 0x53, 0x24, + 0x54, 0x26, + 0x55, 0x1f, + 0x56, 0x43, + 0x60, 0x00, + 0x61, 0x3f, + 0x62, 0x2a, + 0x63, 0x25, + 0x64, 0x24, + 0x65, 0x1b, + 0x66, 0x5c, + + 0x17, 0x22, + 0x18, 0x33, + 0x19, 0x03, + 0x1a, 0x01, + 0x22, 0xa4, + 0x23, 0x00, + 0x26, 0xa0, + + 0x1d, 0xa0, + SLEEPMSEC, 300, + + 0x14, 0x03, + + ENDDEF, 0x0000 +}; + +/* gamma value: 2.2 */ +static const unsigned int ams369fg06_22_250[] = { + 0x00, 0x3f, 0x2a, 0x27, 0x27, 0x1f, 0x44, + 0x00, 0x00, 0x17, 0x24, 0x26, 0x1f, 0x43, + 0x00, 0x3f, 0x2a, 0x25, 0x24, 0x1b, 0x5c, +}; + +static const unsigned int ams369fg06_22_200[] = { + 0x00, 0x3f, 0x28, 0x29, 0x27, 0x21, 0x3e, + 0x00, 0x00, 0x10, 0x25, 0x27, 0x20, 0x3d, + 0x00, 0x3f, 0x28, 0x27, 0x25, 0x1d, 0x53, +}; + +static const unsigned int ams369fg06_22_150[] = { + 0x00, 0x3f, 0x2d, 0x29, 0x28, 0x23, 0x37, + 0x00, 0x00, 0x0b, 0x25, 0x28, 0x22, 0x36, + 0x00, 0x3f, 0x2b, 0x28, 0x26, 0x1f, 0x4a, +}; + +static const unsigned int ams369fg06_22_100[] = { + 0x00, 0x3f, 0x30, 0x2a, 0x2b, 0x24, 0x2f, + 0x00, 0x00, 0x00, 0x25, 0x29, 0x24, 0x2e, + 0x00, 0x3f, 0x2f, 0x29, 0x29, 0x21, 0x3f, +}; + +static const unsigned int ams369fg06_22_50[] = { + 0x00, 0x3f, 0x3c, 0x2c, 0x2d, 0x27, 0x24, + 0x00, 0x00, 0x00, 0x22, 0x2a, 0x27, 0x23, + 0x00, 0x3f, 0x3b, 0x2c, 0x2b, 0x24, 0x31, +}; + +struct ams369fg06_gamma { + unsigned int *gamma_22_table[MAX_GAMMA_LEVEL]; +}; + +static struct ams369fg06_gamma gamma_table = { + .gamma_22_table[0] = (unsigned int *)&ams369fg06_22_50, + .gamma_22_table[1] = (unsigned int *)&ams369fg06_22_100, + .gamma_22_table[2] = (unsigned int *)&ams369fg06_22_150, + .gamma_22_table[3] = (unsigned int *)&ams369fg06_22_200, + .gamma_22_table[4] = (unsigned int *)&ams369fg06_22_250, +}; + +static int ams369fg06_spi_write_byte(struct ams369fg06 *lcd, int addr, int data) +{ + u16 buf[1]; + struct spi_message msg; + + struct spi_transfer xfer = { + .len = 2, + .tx_buf = buf, + }; + + buf[0] = (addr << 8) | data; + + spi_message_init(&msg); + spi_message_add_tail(&xfer, &msg); + + return spi_sync(lcd->spi, &msg); +} + +static int ams369fg06_spi_write(struct ams369fg06 *lcd, unsigned char address, + unsigned char command) +{ + int ret = 0; + + if (address != DATA_ONLY) + ret = ams369fg06_spi_write_byte(lcd, 0x70, address); + if (command != COMMAND_ONLY) + ret = ams369fg06_spi_write_byte(lcd, 0x72, command); + + return ret; +} + +static int ams369fg06_panel_send_sequence(struct ams369fg06 *lcd, + const unsigned short *wbuf) +{ + int ret = 0, i = 0; + + while ((wbuf[i] & DEFMASK) != ENDDEF) { + if ((wbuf[i] & DEFMASK) != SLEEPMSEC) { + ret = ams369fg06_spi_write(lcd, wbuf[i], wbuf[i+1]); + if (ret) + break; + } else + mdelay(wbuf[i+1]); + i += 2; + } + + return ret; +} + +static int _ams369fg06_gamma_ctl(struct ams369fg06 *lcd, + const unsigned int *gamma) +{ + unsigned int i = 0; + int ret = 0; + + for (i = 0 ; i < GAMMA_TABLE_COUNT / 3; i++) { + ret = ams369fg06_spi_write(lcd, 0x40 + i, gamma[i]); + ret = ams369fg06_spi_write(lcd, 0x50 + i, gamma[i+7*1]); + ret = ams369fg06_spi_write(lcd, 0x60 + i, gamma[i+7*2]); + if (ret) { + dev_err(lcd->dev, "failed to set gamma table.\n"); + goto gamma_err; + } + } + +gamma_err: + return ret; +} + +static int ams369fg06_gamma_ctl(struct ams369fg06 *lcd, int brightness) +{ + int ret = 0; + int gamma = 0; + + if ((brightness >= 0) && (brightness <= 50)) + gamma = 0; + else if ((brightness > 50) && (brightness <= 100)) + gamma = 1; + else if ((brightness > 100) && (brightness <= 150)) + gamma = 2; + else if ((brightness > 150) && (brightness <= 200)) + gamma = 3; + else if ((brightness > 200) && (brightness <= 255)) + gamma = 4; + + ret = _ams369fg06_gamma_ctl(lcd, gamma_table.gamma_22_table[gamma]); + + return ret; +} + +static int ams369fg06_ldi_init(struct ams369fg06 *lcd) +{ + int ret, i; + static const unsigned short *init_seq[] = { + seq_setting, + seq_stand_by_off, + }; + + for (i = 0; i < ARRAY_SIZE(init_seq); i++) { + ret = ams369fg06_panel_send_sequence(lcd, init_seq[i]); + if (ret) + break; + } + + return ret; +} + +static int ams369fg06_ldi_enable(struct ams369fg06 *lcd) +{ + int ret, i; + static const unsigned short *init_seq[] = { + seq_stand_by_off, + seq_display_on, + }; + + for (i = 0; i < ARRAY_SIZE(init_seq); i++) { + ret = ams369fg06_panel_send_sequence(lcd, init_seq[i]); + if (ret) + break; + } + + return ret; +} + +static int ams369fg06_ldi_disable(struct ams369fg06 *lcd) +{ + int ret, i; + + static const unsigned short *init_seq[] = { + seq_display_off, + seq_stand_by_on, + }; + + for (i = 0; i < ARRAY_SIZE(init_seq); i++) { + ret = ams369fg06_panel_send_sequence(lcd, init_seq[i]); + if (ret) + break; + } + + return ret; +} + +static int ams369fg06_power_is_on(int power) +{ + return ((power) <= FB_BLANK_NORMAL); +} + +static int ams369fg06_power_on(struct ams369fg06 *lcd) +{ + int ret = 0; + struct lcd_platform_data *pd = NULL; + struct backlight_device *bd = NULL; + + pd = lcd->lcd_pd; + if (!pd) { + dev_err(lcd->dev, "platform data is NULL.\n"); + return -EFAULT; + } + + bd = lcd->bd; + if (!bd) { + dev_err(lcd->dev, "backlight device is NULL.\n"); + return -EFAULT; + } + + if (!pd->power_on) { + dev_err(lcd->dev, "power_on is NULL.\n"); + return -EFAULT; + } else { + pd->power_on(lcd->ld, 1); + mdelay(pd->power_on_delay); + } + + if (!pd->reset) { + dev_err(lcd->dev, "reset is NULL.\n"); + return -EFAULT; + } else { + pd->reset(lcd->ld); + mdelay(pd->reset_delay); + } + + ret = ams369fg06_ldi_init(lcd); + if (ret) { + dev_err(lcd->dev, "failed to initialize ldi.\n"); + return ret; + } + + ret = ams369fg06_ldi_enable(lcd); + if (ret) { + dev_err(lcd->dev, "failed to enable ldi.\n"); + return ret; + } + + /* set brightness to current value after power on or resume. */ + ret = ams369fg06_gamma_ctl(lcd, bd->props.brightness); + if (ret) { + dev_err(lcd->dev, "lcd gamma setting failed.\n"); + return ret; + } + + return 0; +} + +static int ams369fg06_power_off(struct ams369fg06 *lcd) +{ + int ret = 0; + struct lcd_platform_data *pd = NULL; + + pd = lcd->lcd_pd; + if (!pd) { + dev_err(lcd->dev, "platform data is NULL\n"); + return -EFAULT; + } + + ret = ams369fg06_ldi_disable(lcd); + if (ret) { + dev_err(lcd->dev, "lcd setting failed.\n"); + return -EIO; + } + + mdelay(pd->power_off_delay); + + if (!pd->power_on) { + dev_err(lcd->dev, "power_on is NULL.\n"); + return -EFAULT; + } else + pd->power_on(lcd->ld, 0); + + return 0; +} + +static int ams369fg06_power(struct ams369fg06 *lcd, int power) +{ + int ret = 0; + + if (ams369fg06_power_is_on(power) && + !ams369fg06_power_is_on(lcd->power)) + ret = ams369fg06_power_on(lcd); + else if (!ams369fg06_power_is_on(power) && + ams369fg06_power_is_on(lcd->power)) + ret = ams369fg06_power_off(lcd); + + if (!ret) + lcd->power = power; + + return ret; +} + +static int ams369fg06_get_power(struct lcd_device *ld) +{ + struct ams369fg06 *lcd = lcd_get_data(ld); + + return lcd->power; +} + +static int ams369fg06_set_power(struct lcd_device *ld, int power) +{ + struct ams369fg06 *lcd = lcd_get_data(ld); + + if (power != FB_BLANK_UNBLANK && power != FB_BLANK_POWERDOWN && + power != FB_BLANK_NORMAL) { + dev_err(lcd->dev, "power value should be 0, 1 or 4.\n"); + return -EINVAL; + } + + return ams369fg06_power(lcd, power); +} + +static int ams369fg06_get_brightness(struct backlight_device *bd) +{ + return bd->props.brightness; +} + +static int ams369fg06_set_brightness(struct backlight_device *bd) +{ + int ret = 0; + int brightness = bd->props.brightness; + struct ams369fg06 *lcd = dev_get_drvdata(&bd->dev); + + if (brightness < MIN_BRIGHTNESS || + brightness > bd->props.max_brightness) { + dev_err(&bd->dev, "lcd brightness should be %d to %d.\n", + MIN_BRIGHTNESS, MAX_BRIGHTNESS); + return -EINVAL; + } + + ret = ams369fg06_gamma_ctl(lcd, bd->props.brightness); + if (ret) { + dev_err(&bd->dev, "lcd brightness setting failed.\n"); + return -EIO; + } + + return ret; +} + +static struct lcd_ops ams369fg06_lcd_ops = { + .get_power = ams369fg06_get_power, + .set_power = ams369fg06_set_power, +}; + +static const struct backlight_ops ams369fg06_backlight_ops = { + .get_brightness = ams369fg06_get_brightness, + .update_status = ams369fg06_set_brightness, +}; + +static int __devinit ams369fg06_probe(struct spi_device *spi) +{ + int ret = 0; + struct ams369fg06 *lcd = NULL; + struct lcd_device *ld = NULL; + struct backlight_device *bd = NULL; + + lcd = kzalloc(sizeof(struct ams369fg06), GFP_KERNEL); + if (!lcd) + return -ENOMEM; + + /* ams369fg06 lcd panel uses 3-wire 16bits SPI Mode. */ + spi->bits_per_word = 16; + + ret = spi_setup(spi); + if (ret < 0) { + dev_err(&spi->dev, "spi setup failed.\n"); + goto out_free_lcd; + } + + lcd->spi = spi; + lcd->dev = &spi->dev; + + lcd->lcd_pd = spi->dev.platform_data; + if (!lcd->lcd_pd) { + dev_err(&spi->dev, "platform data is NULL\n"); + goto out_free_lcd; + } + + ld = lcd_device_register("ams369fg06", &spi->dev, lcd, + &ams369fg06_lcd_ops); + if (IS_ERR(ld)) { + ret = PTR_ERR(ld); + goto out_free_lcd; + } + + lcd->ld = ld; + + bd = backlight_device_register("ams369fg06-bl", &spi->dev, lcd, + &ams369fg06_backlight_ops, NULL); + if (IS_ERR(bd)) { + ret = PTR_ERR(bd); + goto out_lcd_unregister; + } + + bd->props.max_brightness = MAX_BRIGHTNESS; + bd->props.brightness = DEFAULT_BRIGHTNESS; + bd->props.type = BACKLIGHT_RAW; + lcd->bd = bd; + + if (!lcd->lcd_pd->lcd_enabled) { + /* + * if lcd panel was off from bootloader then + * current lcd status is powerdown and then + * it enables lcd panel. + */ + lcd->power = FB_BLANK_POWERDOWN; + + ams369fg06_power(lcd, FB_BLANK_UNBLANK); + } else + lcd->power = FB_BLANK_UNBLANK; + + dev_set_drvdata(&spi->dev, lcd); + + dev_info(&spi->dev, "ams369fg06 panel driver has been probed.\n"); + + return 0; + +out_lcd_unregister: + lcd_device_unregister(ld); +out_free_lcd: + kfree(lcd); + return ret; +} + +static int __devexit ams369fg06_remove(struct spi_device *spi) +{ + struct ams369fg06 *lcd = dev_get_drvdata(&spi->dev); + + ams369fg06_power(lcd, FB_BLANK_POWERDOWN); + backlight_device_unregister(lcd->bd); + lcd_device_unregister(lcd->ld); + kfree(lcd); + + return 0; +} + +#if defined(CONFIG_PM) +static unsigned int before_power; + +static int ams369fg06_suspend(struct spi_device *spi, pm_message_t mesg) +{ + int ret = 0; + struct ams369fg06 *lcd = dev_get_drvdata(&spi->dev); + + dev_dbg(&spi->dev, "lcd->power = %d\n", lcd->power); + + before_power = lcd->power; + + /* + * when lcd panel is suspend, lcd panel becomes off + * regardless of status. + */ + ret = ams369fg06_power(lcd, FB_BLANK_POWERDOWN); + + return ret; +} + +static int ams369fg06_resume(struct spi_device *spi) +{ + int ret = 0; + struct ams369fg06 *lcd = dev_get_drvdata(&spi->dev); + + /* + * after suspended, if lcd panel status is FB_BLANK_UNBLANK + * (at that time, before_power is FB_BLANK_UNBLANK) then + * it changes that status to FB_BLANK_POWERDOWN to get lcd on. + */ + if (before_power == FB_BLANK_UNBLANK) + lcd->power = FB_BLANK_POWERDOWN; + + dev_dbg(&spi->dev, "before_power = %d\n", before_power); + + ret = ams369fg06_power(lcd, before_power); + + return ret; +} +#else +#define ams369fg06_suspend NULL +#define ams369fg06_resume NULL +#endif + +static void ams369fg06_shutdown(struct spi_device *spi) +{ + struct ams369fg06 *lcd = dev_get_drvdata(&spi->dev); + + ams369fg06_power(lcd, FB_BLANK_POWERDOWN); +} + +static struct spi_driver ams369fg06_driver = { + .driver = { + .name = "ams369fg06", + .bus = &spi_bus_type, + .owner = THIS_MODULE, + }, + .probe = ams369fg06_probe, + .remove = __devexit_p(ams369fg06_remove), + .shutdown = ams369fg06_shutdown, + .suspend = ams369fg06_suspend, + .resume = ams369fg06_resume, +}; + +static int __init ams369fg06_init(void) +{ + return spi_register_driver(&ams369fg06_driver); +} + +static void __exit ams369fg06_exit(void) +{ + spi_unregister_driver(&ams369fg06_driver); +} + +module_init(ams369fg06_init); +module_exit(ams369fg06_exit); + +MODULE_AUTHOR("Jingoo Han "); +MODULE_DESCRIPTION("ams369fg06 LCD Driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/video/backlight/s6e63m0.c b/drivers/video/backlight/s6e63m0.c index 322040f..e1803ba 100644 --- a/drivers/video/backlight/s6e63m0.c +++ b/drivers/video/backlight/s6e63m0.c @@ -840,7 +840,7 @@ static int __devexit s6e63m0_remove(struct spi_device *spi) } #if defined(CONFIG_PM) -unsigned int before_power; +static unsigned int before_power; static int s6e63m0_suspend(struct spi_device *spi, pm_message_t mesg) { -- cgit v0.10.2 From ef22f6a70c9186c8e25f757b0e8f7374b37f69bf Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Mon, 25 Jul 2011 17:12:01 -0700 Subject: backlight: set backlight type and max_brightness before backlights are registered Since commit a19a6ee "backlight: Allow properties to be passed at registration" and commit bb7ca74 "backlight: add backlight type", we can set backlight type and max_brightness before backlights are registered. Some newly added drivers did not set it properly, let's fix it. Signed-off-by: Axel Lin Cc: Matthew Garrett Cc: Jingoo Han Cc: Donghwa Lee Cc: InKi Dae Cc: Richard Purdie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/video/backlight/ams369fg06.c b/drivers/video/backlight/ams369fg06.c index 2c31430..9f0a491 100644 --- a/drivers/video/backlight/ams369fg06.c +++ b/drivers/video/backlight/ams369fg06.c @@ -479,6 +479,7 @@ static int __devinit ams369fg06_probe(struct spi_device *spi) struct ams369fg06 *lcd = NULL; struct lcd_device *ld = NULL; struct backlight_device *bd = NULL; + struct backlight_properties props; lcd = kzalloc(sizeof(struct ams369fg06), GFP_KERNEL); if (!lcd) @@ -511,16 +512,18 @@ static int __devinit ams369fg06_probe(struct spi_device *spi) lcd->ld = ld; + memset(&props, 0, sizeof(struct backlight_properties)); + props.type = BACKLIGHT_RAW; + props.max_brightness = MAX_BRIGHTNESS; + bd = backlight_device_register("ams369fg06-bl", &spi->dev, lcd, - &ams369fg06_backlight_ops, NULL); + &ams369fg06_backlight_ops, &props); if (IS_ERR(bd)) { ret = PTR_ERR(bd); goto out_lcd_unregister; } - bd->props.max_brightness = MAX_BRIGHTNESS; bd->props.brightness = DEFAULT_BRIGHTNESS; - bd->props.type = BACKLIGHT_RAW; lcd->bd = bd; if (!lcd->lcd_pd->lcd_enabled) { diff --git a/drivers/video/backlight/ld9040.c b/drivers/video/backlight/ld9040.c index 62bcde3..5934655 100644 --- a/drivers/video/backlight/ld9040.c +++ b/drivers/video/backlight/ld9040.c @@ -668,6 +668,7 @@ static int ld9040_probe(struct spi_device *spi) struct ld9040 *lcd = NULL; struct lcd_device *ld = NULL; struct backlight_device *bd = NULL; + struct backlight_properties props; lcd = kzalloc(sizeof(struct ld9040), GFP_KERNEL); if (!lcd) @@ -699,14 +700,17 @@ static int ld9040_probe(struct spi_device *spi) lcd->ld = ld; + memset(&props, 0, sizeof(struct backlight_properties)); + props.type = BACKLIGHT_RAW; + props.max_brightness = MAX_BRIGHTNESS; + bd = backlight_device_register("ld9040-bl", &spi->dev, - lcd, &ld9040_backlight_ops, NULL); + lcd, &ld9040_backlight_ops, &props); if (IS_ERR(bd)) { ret = PTR_ERR(bd); goto out_unregister_lcd; } - bd->props.max_brightness = MAX_BRIGHTNESS; bd->props.brightness = MAX_BRIGHTNESS; lcd->bd = bd; diff --git a/drivers/video/backlight/s6e63m0.c b/drivers/video/backlight/s6e63m0.c index e1803ba..694e5aa 100644 --- a/drivers/video/backlight/s6e63m0.c +++ b/drivers/video/backlight/s6e63m0.c @@ -738,6 +738,7 @@ static int __devinit s6e63m0_probe(struct spi_device *spi) struct s6e63m0 *lcd = NULL; struct lcd_device *ld = NULL; struct backlight_device *bd = NULL; + struct backlight_properties props; lcd = kzalloc(sizeof(struct s6e63m0), GFP_KERNEL); if (!lcd) @@ -769,16 +770,18 @@ static int __devinit s6e63m0_probe(struct spi_device *spi) lcd->ld = ld; + memset(&props, 0, sizeof(struct backlight_properties)); + props.type = BACKLIGHT_RAW; + props.max_brightness = MAX_BRIGHTNESS; + bd = backlight_device_register("s6e63m0bl-bl", &spi->dev, lcd, - &s6e63m0_backlight_ops, NULL); + &s6e63m0_backlight_ops, &props); if (IS_ERR(bd)) { ret = PTR_ERR(bd); goto out_lcd_unregister; } - bd->props.max_brightness = MAX_BRIGHTNESS; bd->props.brightness = MAX_BRIGHTNESS; - bd->props.type = BACKLIGHT_RAW; lcd->bd = bd; /* -- cgit v0.10.2 From 9d0ad8ca43ce8023bb834a409c2258bd7197fb05 Mon Sep 17 00:00:00 2001 From: Daniel Kiper Date: Mon, 25 Jul 2011 17:12:05 -0700 Subject: mm: extend memory hotplug API to allow memory hotplug in virtual machines This patch contains online_page_callback and apropriate functions for registering/unregistering online page callbacks. It allows to do some machine specific tasks during online page stage which is required to implement memory hotplug in virtual machines. Currently this patch is required by latest memory hotplug support for Xen balloon driver patch which will be posted soon. Additionally, originial online_page() function was splited into following functions doing "atomic" operations: - __online_page_set_limits() - set new limits for memory management code, - __online_page_increment_counters() - increment totalram_pages and totalhigh_pages, - __online_page_free() - free page to allocator. It was done to: - not duplicate existing code, - ease hotplug code devolpment by usage of well defined interface, - avoid stupid bugs which are unavoidable when the same code (by design) is developed in many places. [akpm@linux-foundation.org: use explicit indirect-call syntax] Signed-off-by: Daniel Kiper Reviewed-by: Konrad Rzeszutek Wilk Cc: Ian Campbell Cc: Jeremy Fitzhardinge Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 8122018d..0b8e2a7 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -68,12 +68,19 @@ static inline void zone_seqlock_init(struct zone *zone) extern int zone_grow_free_lists(struct zone *zone, unsigned long new_nr_pages); extern int zone_grow_waitqueues(struct zone *zone, unsigned long nr_pages); extern int add_one_highpage(struct page *page, int pfn, int bad_ppro); -/* need some defines for these for archs that don't support it */ -extern void online_page(struct page *page); /* VM interface that may be used by firmware interface */ extern int online_pages(unsigned long, unsigned long); extern void __offline_isolated_pages(unsigned long, unsigned long); +typedef void (*online_page_callback_t)(struct page *page); + +extern int set_online_page_callback(online_page_callback_t callback); +extern int restore_online_page_callback(online_page_callback_t callback); + +extern void __online_page_set_limits(struct page *page); +extern void __online_page_increment_counters(struct page *page); +extern void __online_page_free(struct page *page); + #ifdef CONFIG_MEMORY_HOTREMOVE extern bool is_pageblock_removable_nolock(struct page *page); #endif /* CONFIG_MEMORY_HOTREMOVE */ diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index c46887b..6e7d8b2 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -34,6 +34,17 @@ #include "internal.h" +/* + * online_page_callback contains pointer to current page onlining function. + * Initially it is generic_online_page(). If it is required it could be + * changed by calling set_online_page_callback() for callback registration + * and restore_online_page_callback() for generic callback restore. + */ + +static void generic_online_page(struct page *page); + +static online_page_callback_t online_page_callback = generic_online_page; + DEFINE_MUTEX(mem_hotplug_mutex); void lock_memory_hotplug(void) @@ -361,23 +372,74 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn, } EXPORT_SYMBOL_GPL(__remove_pages); -void online_page(struct page *page) +int set_online_page_callback(online_page_callback_t callback) +{ + int rc = -EINVAL; + + lock_memory_hotplug(); + + if (online_page_callback == generic_online_page) { + online_page_callback = callback; + rc = 0; + } + + unlock_memory_hotplug(); + + return rc; +} +EXPORT_SYMBOL_GPL(set_online_page_callback); + +int restore_online_page_callback(online_page_callback_t callback) +{ + int rc = -EINVAL; + + lock_memory_hotplug(); + + if (online_page_callback == callback) { + online_page_callback = generic_online_page; + rc = 0; + } + + unlock_memory_hotplug(); + + return rc; +} +EXPORT_SYMBOL_GPL(restore_online_page_callback); + +void __online_page_set_limits(struct page *page) { unsigned long pfn = page_to_pfn(page); - totalram_pages++; if (pfn >= num_physpages) num_physpages = pfn + 1; +} +EXPORT_SYMBOL_GPL(__online_page_set_limits); + +void __online_page_increment_counters(struct page *page) +{ + totalram_pages++; #ifdef CONFIG_HIGHMEM if (PageHighMem(page)) totalhigh_pages++; #endif +} +EXPORT_SYMBOL_GPL(__online_page_increment_counters); +void __online_page_free(struct page *page) +{ ClearPageReserved(page); init_page_count(page); __free_page(page); } +EXPORT_SYMBOL_GPL(__online_page_free); + +static void generic_online_page(struct page *page) +{ + __online_page_set_limits(page); + __online_page_increment_counters(page); + __online_page_free(page); +} static int online_pages_range(unsigned long start_pfn, unsigned long nr_pages, void *arg) @@ -388,7 +450,7 @@ static int online_pages_range(unsigned long start_pfn, unsigned long nr_pages, if (PageReserved(pfn_to_page(start_pfn))) for (i = 0; i < nr_pages; i++) { page = pfn_to_page(start_pfn + i); - online_page(page); + (*online_page_callback)(page); onlined_pages++; } *(unsigned long *)arg = onlined_pages; -- cgit v0.10.2 From 080e2be7884322daffe75a831e879fbe7de383ab Mon Sep 17 00:00:00 2001 From: Daniel Kiper Date: Mon, 25 Jul 2011 17:12:06 -0700 Subject: xen/balloon: memory hotplug support for Xen balloon driver Memory hotplug support for Xen balloon driver. It should be mentioned that hotplugged memory is not onlined automatically. It should be onlined by user through standard sysfs interface. Memory could be hotplugged in following steps: 1) dom0: xl mem-max where is >= requested memory size, 2) dom0: xl mem-set where is requested memory size; alternatively memory could be added by writing proper value to /sys/devices/system/xen_memory/xen_memory0/target or /sys/devices/system/xen_memory/xen_memory0/target_kb on dumU, 3) domU: for i in /sys/devices/system/memory/memory*/state; do \ [ "`cat "$i"`" = offline ] && echo online > "$i"; done Memory could be onlined automatically on domU by adding following line to udev rules: SUBSYSTEM=="memory", ACTION=="add", RUN+="/bin/sh -c '[ -f /sys$devpath/state ] && echo online > /sys$devpath/state'" In that case step 3 should be omitted. Signed-off-by: Daniel Kiper Acked-by: Konrad Rzeszutek Wilk Cc: Ian Campbell Cc: Jeremy Fitzhardinge Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index 03bc471..f815283 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -26,6 +26,36 @@ config XEN_SELFBALLOONING kernel boot parameter. Note that systems without a sufficiently large swap device should not enable self-ballooning. +config XEN_BALLOON_MEMORY_HOTPLUG + bool "Memory hotplug support for Xen balloon driver" + default n + depends on XEN_BALLOON && MEMORY_HOTPLUG + help + Memory hotplug support for Xen balloon driver allows expanding memory + available for the system above limit declared at system startup. + It is very useful on critical systems which require long + run without rebooting. + + Memory could be hotplugged in following steps: + + 1) dom0: xl mem-max + where is >= requested memory size, + + 2) dom0: xl mem-set + where is requested memory size; alternatively memory + could be added by writing proper value to + /sys/devices/system/xen_memory/xen_memory0/target or + /sys/devices/system/xen_memory/xen_memory0/target_kb on dumU, + + 3) domU: for i in /sys/devices/system/memory/memory*/state; do \ + [ "`cat "$i"`" = offline ] && echo online > "$i"; done + + Memory could be onlined automatically on domU by adding following line to udev rules: + + SUBSYSTEM=="memory", ACTION=="add", RUN+="/bin/sh -c '[ -f /sys$devpath/state ] && echo online > /sys$devpath/state'" + + In that case step 3 should be omitted. + config XEN_SCRUB_PAGES bool "Scrub pages before returning them to system" depends on XEN_BALLOON diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index f54290b..5dfd8f8 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -4,6 +4,12 @@ * Copyright (c) 2003, B Dragovic * Copyright (c) 2003-2004, M Williamson, K Fraser * Copyright (c) 2005 Dan M. Smith, IBM Corporation + * Copyright (c) 2010 Daniel Kiper + * + * Memory hotplug support was written by Daniel Kiper. Work on + * it was sponsored by Google under Google Summer of Code 2010 + * program. Jeremy Fitzhardinge from Citrix was the mentor for + * this project. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License version 2 @@ -40,6 +46,9 @@ #include #include #include +#include +#include +#include #include #include @@ -194,6 +203,87 @@ static enum bp_state update_schedule(enum bp_state state) return BP_EAGAIN; } +#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG +static long current_credit(void) +{ + return balloon_stats.target_pages - balloon_stats.current_pages - + balloon_stats.hotplug_pages; +} + +static bool balloon_is_inflated(void) +{ + if (balloon_stats.balloon_low || balloon_stats.balloon_high || + balloon_stats.balloon_hotplug) + return true; + else + return false; +} + +/* + * reserve_additional_memory() adds memory region of size >= credit above + * max_pfn. New region is section aligned and size is modified to be multiple + * of section size. Those features allow optimal use of address space and + * establish proper alignment when this function is called first time after + * boot (last section not fully populated at boot time contains unused memory + * pages with PG_reserved bit not set; online_pages_range() does not allow page + * onlining in whole range if first onlined page does not have PG_reserved + * bit set). Real size of added memory is established at page onlining stage. + */ + +static enum bp_state reserve_additional_memory(long credit) +{ + int nid, rc; + u64 hotplug_start_paddr; + unsigned long balloon_hotplug = credit; + + hotplug_start_paddr = PFN_PHYS(SECTION_ALIGN_UP(max_pfn)); + balloon_hotplug = round_up(balloon_hotplug, PAGES_PER_SECTION); + nid = memory_add_physaddr_to_nid(hotplug_start_paddr); + + rc = add_memory(nid, hotplug_start_paddr, balloon_hotplug << PAGE_SHIFT); + + if (rc) { + pr_info("xen_balloon: %s: add_memory() failed: %i\n", __func__, rc); + return BP_EAGAIN; + } + + balloon_hotplug -= credit; + + balloon_stats.hotplug_pages += credit; + balloon_stats.balloon_hotplug = balloon_hotplug; + + return BP_DONE; +} + +static void xen_online_page(struct page *page) +{ + __online_page_set_limits(page); + + mutex_lock(&balloon_mutex); + + __balloon_append(page); + + if (balloon_stats.hotplug_pages) + --balloon_stats.hotplug_pages; + else + --balloon_stats.balloon_hotplug; + + mutex_unlock(&balloon_mutex); +} + +static int xen_memory_notifier(struct notifier_block *nb, unsigned long val, void *v) +{ + if (val == MEM_ONLINE) + schedule_delayed_work(&balloon_worker, 0); + + return NOTIFY_OK; +} + +static struct notifier_block xen_memory_nb = { + .notifier_call = xen_memory_notifier, + .priority = 0 +}; +#else static long current_credit(void) { unsigned long target = balloon_stats.target_pages; @@ -206,6 +296,21 @@ static long current_credit(void) return target - balloon_stats.current_pages; } +static bool balloon_is_inflated(void) +{ + if (balloon_stats.balloon_low || balloon_stats.balloon_high) + return true; + else + return false; +} + +static enum bp_state reserve_additional_memory(long credit) +{ + balloon_stats.target_pages = balloon_stats.current_pages; + return BP_DONE; +} +#endif /* CONFIG_XEN_BALLOON_MEMORY_HOTPLUG */ + static enum bp_state increase_reservation(unsigned long nr_pages) { int rc; @@ -217,6 +322,15 @@ static enum bp_state increase_reservation(unsigned long nr_pages) .domid = DOMID_SELF }; +#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG + if (!balloon_stats.balloon_low && !balloon_stats.balloon_high) { + nr_pages = min(nr_pages, balloon_stats.balloon_hotplug); + balloon_stats.hotplug_pages += nr_pages; + balloon_stats.balloon_hotplug -= nr_pages; + return BP_DONE; + } +#endif + if (nr_pages > ARRAY_SIZE(frame_list)) nr_pages = ARRAY_SIZE(frame_list); @@ -279,6 +393,15 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp) .domid = DOMID_SELF }; +#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG + if (balloon_stats.hotplug_pages) { + nr_pages = min(nr_pages, balloon_stats.hotplug_pages); + balloon_stats.hotplug_pages -= nr_pages; + balloon_stats.balloon_hotplug += nr_pages; + return BP_DONE; + } +#endif + if (nr_pages > ARRAY_SIZE(frame_list)) nr_pages = ARRAY_SIZE(frame_list); @@ -340,8 +463,12 @@ static void balloon_process(struct work_struct *work) do { credit = current_credit(); - if (credit > 0) - state = increase_reservation(credit); + if (credit > 0) { + if (balloon_is_inflated()) + state = increase_reservation(credit); + else + state = reserve_additional_memory(credit); + } if (credit < 0) state = decrease_reservation(-credit, GFP_BALLOON); @@ -448,6 +575,14 @@ static int __init balloon_init(void) balloon_stats.retry_count = 1; balloon_stats.max_retry_count = RETRY_UNLIMITED; +#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG + balloon_stats.hotplug_pages = 0; + balloon_stats.balloon_hotplug = 0; + + set_online_page_callback(&xen_online_page); + register_memory_notifier(&xen_memory_nb); +#endif + /* * Initialise the balloon with excess memory space. We need * to make sure we don't add memory which doesn't exist or diff --git a/include/xen/balloon.h b/include/xen/balloon.h index 4076ed7..76f7538 100644 --- a/include/xen/balloon.h +++ b/include/xen/balloon.h @@ -15,6 +15,10 @@ struct balloon_stats { unsigned long max_schedule_delay; unsigned long retry_count; unsigned long max_retry_count; +#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG + unsigned long hotplug_pages; + unsigned long balloon_hotplug; +#endif }; extern struct balloon_stats balloon_stats; -- cgit v0.10.2 From e21c7ffd6f7493aa01bccd17ebc13dbdfecce880 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Mon, 25 Jul 2011 17:12:06 -0700 Subject: mm: swap-token: fix dead link http://www.cs.wm.edu/~sjiang/token.pdf is now dead. Replace it with an alive alternative. Signed-off-by: KOSAKI Motohiro Acked-by: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/thrash.c b/mm/thrash.c index fabf2d0..d416d40 100644 --- a/mm/thrash.c +++ b/mm/thrash.c @@ -6,7 +6,7 @@ * Released under the GPL, see the file COPYING for details. * * Simple token based thrashing protection, using the algorithm - * described in: http://www.cs.wm.edu/~sjiang/token.pdf + * described in: http://www.cse.ohio-state.edu/hpcs/WWW/HTML/publications/abs05-1.html * * Sep 2006, Ashwin Chaugule * Improved algorithm to pass token: -- cgit v0.10.2 From 53bb01f593d50188c8d638f89db96f9b6b042bcd Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Mon, 25 Jul 2011 17:12:07 -0700 Subject: mm: swap-token: makes global variables to function local global_faults and last_aging are only used in grab_swap_token(). Move them into grab_swap_token(). Signed-off-by: KOSAKI Motohiro Acked-by: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/thrash.c b/mm/thrash.c index d416d40..42ffb01 100644 --- a/mm/thrash.c +++ b/mm/thrash.c @@ -30,8 +30,6 @@ static DEFINE_SPINLOCK(swap_token_lock); struct mm_struct *swap_token_mm; struct mem_cgroup *swap_token_memcg; -static unsigned int global_faults; -static unsigned int last_aging; #ifdef CONFIG_CGROUP_MEM_RES_CTLR static struct mem_cgroup *swap_token_memcg_from_mm(struct mm_struct *mm) @@ -55,6 +53,8 @@ void grab_swap_token(struct mm_struct *mm) { int current_interval; unsigned int old_prio = mm->token_priority; + static unsigned int global_faults; + static unsigned int last_aging; global_faults++; -- cgit v0.10.2 From 45ebb840257b060ec54416aebffd9747e210962c Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Mon, 25 Jul 2011 17:12:08 -0700 Subject: mm: swap-token: add a comment for priority aging Document some swap token aging design decisions. Signed-off-by: KOSAKI Motohiro Acked-by: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/thrash.c b/mm/thrash.c index 42ffb01..e53f7d0 100644 --- a/mm/thrash.c +++ b/mm/thrash.c @@ -67,6 +67,17 @@ void grab_swap_token(struct mm_struct *mm) if (!swap_token_mm) goto replace_token; + /* + * Usually, we don't need priority aging because long interval faults + * makes priority decrease quickly. But there is one exception. If the + * token owner task is sleeping, it never make long interval faults. + * Thus, we need a priority aging mechanism instead. The requirements + * of priority aging are + * 1) An aging interval is reasonable enough long. Too short aging + * interval makes quick swap token lost and decrease performance. + * 2) The swap token owner task have to get priority aging even if + * it's under sleep. + */ if ((global_faults - last_aging) > TOKEN_AGING_INTERVAL) { swap_token_mm->token_priority /= 2; last_aging = global_faults; -- cgit v0.10.2 From 4b6ddbf7ed4ef2f40e0a27418146eedaa68953c6 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Mon, 25 Jul 2011 17:12:09 -0700 Subject: pagewalk: fix walk_page_range() don't check find_vma() result properly The doc of find_vma() says, /* Look up the first VMA which satisfies addr < vm_end, NULL if none. */ struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr) { (snip) Thus, caller should confirm whether the returned vma matches a desired one. Signed-off-by: KOSAKI Motohiro Cc: Naoya Horiguchi Cc: Hiroyuki Kamezawa Cc: Andrea Arcangeli Cc: Matt Mackall Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/pagewalk.c b/mm/pagewalk.c index c3450d5..606bbb4 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c @@ -176,7 +176,7 @@ int walk_page_range(unsigned long addr, unsigned long end, * we can't handled it in the same manner as non-huge pages. */ vma = find_vma(walk->mm, addr); - if (vma && is_vm_hugetlb_page(vma)) { + if (vma && vma->vm_start <= addr && is_vm_hugetlb_page(vma)) { if (vma->vm_end < next) next = vma->vm_end; /* -- cgit v0.10.2 From 6c6d5280431544e4036886ea74e3334a98bc5f96 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Mon, 25 Jul 2011 17:12:09 -0700 Subject: pagewalk: don't look up vma if walk->hugetlb_entry is unused Currently, walk_page_range() calls find_vma() every page table for walk iteration. but it's completely unnecessary if walk->hugetlb_entry is unused. And we don't have to assume find_vma() is a lightweight operation. So this patch checks the walk->hugetlb_entry and avoids the find_vma() call if possible. This patch also makes some cleanups. 1) remove ugly uninitialized_var() and 2) #ifdef in function body. Signed-off-by: KOSAKI Motohiro Cc: Naoya Horiguchi Cc: Hiroyuki Kamezawa Cc: Andrea Arcangeli Cc: Matt Mackall Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/pagewalk.c b/mm/pagewalk.c index 606bbb4..ee4ff87 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c @@ -126,7 +126,39 @@ static int walk_hugetlb_range(struct vm_area_struct *vma, return 0; } -#endif + +static struct vm_area_struct* hugetlb_vma(unsigned long addr, struct mm_walk *walk) +{ + struct vm_area_struct *vma; + + /* We don't need vma lookup at all. */ + if (!walk->hugetlb_entry) + return NULL; + + VM_BUG_ON(!rwsem_is_locked(&walk->mm->mmap_sem)); + vma = find_vma(walk->mm, addr); + if (vma && vma->vm_start <= addr && is_vm_hugetlb_page(vma)) + return vma; + + return NULL; +} + +#else /* CONFIG_HUGETLB_PAGE */ +static struct vm_area_struct* hugetlb_vma(unsigned long addr, struct mm_walk *walk) +{ + return NULL; +} + +static int walk_hugetlb_range(struct vm_area_struct *vma, + unsigned long addr, unsigned long end, + struct mm_walk *walk) +{ + return 0; +} + +#endif /* CONFIG_HUGETLB_PAGE */ + + /** * walk_page_range - walk a memory map's page tables with a callback @@ -165,18 +197,17 @@ int walk_page_range(unsigned long addr, unsigned long end, pgd = pgd_offset(walk->mm, addr); do { - struct vm_area_struct *uninitialized_var(vma); + struct vm_area_struct *vma; next = pgd_addr_end(addr, end); -#ifdef CONFIG_HUGETLB_PAGE /* * handle hugetlb vma individually because pagetable walk for * the hugetlb page is dependent on the architecture and * we can't handled it in the same manner as non-huge pages. */ - vma = find_vma(walk->mm, addr); - if (vma && vma->vm_start <= addr && is_vm_hugetlb_page(vma)) { + vma = hugetlb_vma(addr, walk); + if (vma) { if (vma->vm_end < next) next = vma->vm_end; /* @@ -189,7 +220,7 @@ int walk_page_range(unsigned long addr, unsigned long end, pgd = pgd_offset(walk->mm, next); continue; } -#endif + if (pgd_none_or_clear_bad(pgd)) { if (walk->pte_hole) err = walk->pte_hole(addr, next, walk); -- cgit v0.10.2 From c27fe4c8942d3ca715986f79cc26f44608d7d9fb Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Mon, 25 Jul 2011 17:12:10 -0700 Subject: pagewalk: add locking-rule comments Originally, walk_hugetlb_range() didn't require a caller take any lock. But commit d33b9f45bd ("mm: hugetlb: fix hugepage memory leak in walk_page_range") changed its rule. Because it added find_vma() call in walk_hugetlb_range(). Any locking-rule change commit should write a doc too. [akpm@linux-foundation.org: clarify comment] Signed-off-by: KOSAKI Motohiro Cc: Naoya Horiguchi Cc: Hiroyuki Kamezawa Cc: Andrea Arcangeli Cc: Matt Mackall Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/mm.h b/include/linux/mm.h index 6321e84..3c5505a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -911,6 +911,8 @@ unsigned long unmap_vmas(struct mmu_gather *tlb, * @pte_entry: if set, called for each non-empty PTE (4th-level) entry * @pte_hole: if set, called for each hole at all levels * @hugetlb_entry: if set, called for each hugetlb entry + * *Caution*: The caller must hold mmap_sem() if @hugetlb_entry + * is used. * * (see walk_page_range for more details) */ diff --git a/mm/pagewalk.c b/mm/pagewalk.c index ee4ff87..f792940 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c @@ -181,6 +181,9 @@ static int walk_hugetlb_range(struct vm_area_struct *vma, * * If any callback returns a non-zero value, the walk is aborted and * the return value is propagated back to the caller. Otherwise 0 is returned. + * + * walk->mm->mmap_sem must be held for at least read if walk->hugetlb_entry + * is !NULL. */ int walk_page_range(unsigned long addr, unsigned long end, struct mm_walk *walk) -- cgit v0.10.2 From dd78553b5e7a0b34c0b60478d04ee16d8d8f4fa7 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Mon, 25 Jul 2011 17:12:11 -0700 Subject: pagewalk: fix code comment for THP Commit bae9c19bf1 ("thp: split_huge_page_mm/vma") changed locking behavior of walk_page_range(). Thus this patch changes the comment too. Signed-off-by: KOSAKI Motohiro Cc: Naoya Horiguchi Cc: Hiroyuki Kamezawa Cc: Andrea Arcangeli Cc: Matt Mackall Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/pagewalk.c b/mm/pagewalk.c index f792940..2f5cf10 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c @@ -176,7 +176,8 @@ static int walk_hugetlb_range(struct vm_area_struct *vma, * associated range, and a copy of the original mm_walk for access to * the ->private or ->mm fields. * - * No locks are taken, but the bottom level iterator will map PTE + * Usually no locks are taken, but splitting transparent huge page may + * take page table lock. And the bottom level iterator will map PTE * directories from highmem if necessary. * * If any callback returns a non-zero value, the walk is aborted and -- cgit v0.10.2 From 00a66d2974485d7d95d61d5772142b2a2231ed2a Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Mon, 25 Jul 2011 17:12:12 -0700 Subject: mm: remove the leftovers of noswapaccount In commit a2c8990aed5ab ("memsw: remove noswapaccount kernel parameter"), Michal forgot to remove some left pieces of noswapaccount in the tree, this patch removes them all. Signed-off-by: WANG Cong Acked-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index d59e71d..f9d240d 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -518,22 +518,6 @@ Files: net/netfilter/xt_connlimit.c ---------------------------- -What: noswapaccount kernel command line parameter -When: 2.6.40 -Why: The original implementation of memsw feature enabled by - CONFIG_CGROUP_MEM_RES_CTLR_SWAP could be disabled by the noswapaccount - kernel parameter (introduced in 2.6.29-rc1). Later on, this decision - turned out to be not ideal because we cannot have the feature compiled - in and disabled by default and let only interested to enable it - (e.g. general distribution kernels might need it). Therefore we have - added swapaccount[=0|1] parameter (introduced in 2.6.37) which provides - the both possibilities. If we remove noswapaccount we will have - less command line parameters with the same functionality and we - can also cleanup the parameter handling a bit (). -Who: Michal Hocko - ----------------------------- - What: ipt_addrtype match include file When: 2012 Why: superseded by xt_addrtype diff --git a/init/Kconfig b/init/Kconfig index e20aa31..d627783 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -673,7 +673,7 @@ config CGROUP_MEM_RES_CTLR_SWAP be careful about enabling this. When memory resource controller is disabled by boot option, this will be automatically disabled and there will be no overhead from this. Even when you set this config=y, - if boot option "noswapaccount" is set, swap will not be accounted. + if boot option "swapaccount=0" is set, swap will not be accounted. Now, memory usage of swap_cgroup is 2 bytes per entry. If swap page size is 4096bytes, 512k per 1Gbytes of swap. config CGROUP_MEM_RES_CTLR_SWAP_ENABLED @@ -688,7 +688,7 @@ config CGROUP_MEM_RES_CTLR_SWAP_ENABLED parameter should have this option unselected. For those who want to have the feature enabled by default should select this option (if, for some reason, they need to disable it - then noswapaccount does the trick). + then swapaccount=0 does the trick). config CGROUP_PERF bool "Enable perf_event per-cpu per-container group (cgroup) monitoring" diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c index 53bffc6..9cb1c44 100644 --- a/mm/page_cgroup.c +++ b/mm/page_cgroup.c @@ -537,7 +537,7 @@ int swap_cgroup_swapon(int type, unsigned long max_pages) nomem: printk(KERN_INFO "couldn't allocate enough memory for swap_cgroup.\n"); printk(KERN_INFO - "swap_cgroup can be disabled by noswapaccount boot option\n"); + "swap_cgroup can be disabled by swapaccount=0 boot option\n"); return -ENOMEM; } -- cgit v0.10.2 From 1bb36fbd4d58ec3fab4dab5ed39a2af492c263ea Mon Sep 17 00:00:00 2001 From: Daniel Kiper Date: Mon, 25 Jul 2011 17:12:13 -0700 Subject: mm/page_cgroup.c: simplify code by using SECTION_ALIGN_UP() and SECTION_ALIGN_DOWN() macros Commit a539f3533b78e3 ("mm: add SECTION_ALIGN_UP() and SECTION_ALIGN_DOWN() macro") introduced the SECTION_ALIGN_UP() and SECTION_ALIGN_DOWN() macros. Use those macros to increase code readability. Signed-off-by: Daniel Kiper Acked-by: David Rientjes Acked-by: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c index 9cb1c44..39d216d 100644 --- a/mm/page_cgroup.c +++ b/mm/page_cgroup.c @@ -225,8 +225,8 @@ int __meminit online_page_cgroup(unsigned long start_pfn, unsigned long start, end, pfn; int fail = 0; - start = start_pfn & ~(PAGES_PER_SECTION - 1); - end = ALIGN(start_pfn + nr_pages, PAGES_PER_SECTION); + start = SECTION_ALIGN_DOWN(start_pfn); + end = SECTION_ALIGN_UP(start_pfn + nr_pages); if (nid == -1) { /* @@ -258,8 +258,8 @@ int __meminit offline_page_cgroup(unsigned long start_pfn, { unsigned long start, end, pfn; - start = start_pfn & ~(PAGES_PER_SECTION - 1); - end = ALIGN(start_pfn + nr_pages, PAGES_PER_SECTION); + start = SECTION_ALIGN_DOWN(start_pfn); + end = SECTION_ALIGN_UP(start_pfn + nr_pages); for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) __free_page_cgroup(pfn); -- cgit v0.10.2 From d788e80a8c83ecdbdd55b6e985cced9cfe3a815b Mon Sep 17 00:00:00 2001 From: Chris Wright Date: Mon, 25 Jul 2011 17:12:14 -0700 Subject: mm/huge_memory.c: minor lock simplification in __khugepaged_exit The lock is released first thing in all three branches. Simplify this by unconditionally releasing lock and remove else clause which was only there to be sure lock was released. Signed-off-by: Chris Wright Reviewed-by: Michal Hocko Cc: Andrea Arcangeli Acked-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 81532f2..e2d1587 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1596,14 +1596,13 @@ void __khugepaged_exit(struct mm_struct *mm) list_del(&mm_slot->mm_node); free = 1; } + spin_unlock(&khugepaged_mm_lock); if (free) { - spin_unlock(&khugepaged_mm_lock); clear_bit(MMF_VM_HUGEPAGE, &mm->flags); free_mm_slot(mm_slot); mmdrop(mm); } else if (mm_slot) { - spin_unlock(&khugepaged_mm_lock); /* * This is required to serialize against * khugepaged_test_exit() (which is guaranteed to run @@ -1614,8 +1613,7 @@ void __khugepaged_exit(struct mm_struct *mm) */ down_write(&mm->mmap_sem); up_write(&mm->mmap_sem); - } else - spin_unlock(&khugepaged_mm_lock); + } } static void release_pte_page(struct page *page) -- cgit v0.10.2 From 32f84528fbb5177275193a3311be8756f0cbd62c Mon Sep 17 00:00:00 2001 From: Chris Forbes Date: Mon, 25 Jul 2011 17:12:14 -0700 Subject: mm: hugetlb: fix coding style issues Fix coding style issues flagged by checkpatch.pl Signed-off-by: Chris Forbes Acked-by: Eric B Munson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/hugetlb.c b/mm/hugetlb.c index c6d342d..dae27ba 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -24,7 +24,7 @@ #include #include -#include +#include #include #include @@ -62,10 +62,10 @@ static DEFINE_SPINLOCK(hugetlb_lock); * must either hold the mmap_sem for write, or the mmap_sem for read and * the hugetlb_instantiation mutex: * - * down_write(&mm->mmap_sem); + * down_write(&mm->mmap_sem); * or - * down_read(&mm->mmap_sem); - * mutex_lock(&hugetlb_instantiation_mutex); + * down_read(&mm->mmap_sem); + * mutex_lock(&hugetlb_instantiation_mutex); */ struct file_region { struct list_head link; @@ -503,9 +503,10 @@ static void update_and_free_page(struct hstate *h, struct page *page) h->nr_huge_pages--; h->nr_huge_pages_node[page_to_nid(page)]--; for (i = 0; i < pages_per_huge_page(h); i++) { - page[i].flags &= ~(1 << PG_locked | 1 << PG_error | 1 << PG_referenced | - 1 << PG_dirty | 1 << PG_active | 1 << PG_reserved | - 1 << PG_private | 1<< PG_writeback); + page[i].flags &= ~(1 << PG_locked | 1 << PG_error | + 1 << PG_referenced | 1 << PG_dirty | + 1 << PG_active | 1 << PG_reserved | + 1 << PG_private | 1 << PG_writeback); } set_compound_page_dtor(page, NULL); set_page_refcounted(page); @@ -591,7 +592,6 @@ int PageHuge(struct page *page) return dtor == free_huge_page; } - EXPORT_SYMBOL_GPL(PageHuge); static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid) @@ -2132,9 +2132,8 @@ static void set_huge_ptep_writable(struct vm_area_struct *vma, pte_t entry; entry = pte_mkwrite(pte_mkdirty(huge_ptep_get(ptep))); - if (huge_ptep_set_access_flags(vma, address, ptep, entry, 1)) { + if (huge_ptep_set_access_flags(vma, address, ptep, entry, 1)) update_mmu_cache(vma, address, ptep); - } } @@ -2189,9 +2188,9 @@ static int is_hugetlb_entry_migration(pte_t pte) if (huge_pte_none(pte) || pte_present(pte)) return 0; swp = pte_to_swp_entry(pte); - if (non_swap_entry(swp) && is_migration_entry(swp)) { + if (non_swap_entry(swp) && is_migration_entry(swp)) return 1; - } else + else return 0; } @@ -2202,9 +2201,9 @@ static int is_hugetlb_entry_hwpoisoned(pte_t pte) if (huge_pte_none(pte) || pte_present(pte)) return 0; swp = pte_to_swp_entry(pte); - if (non_swap_entry(swp) && is_hwpoison_entry(swp)) { + if (non_swap_entry(swp) && is_hwpoison_entry(swp)) return 1; - } else + else return 0; } @@ -2567,7 +2566,7 @@ retry: * So we need to block hugepage fault by PG_hwpoison bit check. */ if (unlikely(PageHWPoison(page))) { - ret = VM_FAULT_HWPOISON | + ret = VM_FAULT_HWPOISON | VM_FAULT_SET_HINDEX(h - hstates); goto backout_unlocked; } @@ -2635,7 +2634,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, migration_entry_wait(mm, (pmd_t *)ptep, address); return 0; } else if (unlikely(is_hugetlb_entry_hwpoisoned(entry))) - return VM_FAULT_HWPOISON_LARGE | + return VM_FAULT_HWPOISON_LARGE | VM_FAULT_SET_HINDEX(h - hstates); } -- cgit v0.10.2 From 6ac47520063b230641a64062b8a229201cd0a3a8 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Mon, 25 Jul 2011 17:12:16 -0700 Subject: mm/memory.c: remove ZAP_BLOCK_SIZE ZAP_BLOCK_SIZE became unused in the preemptible-mmu_gather work ("mm: Remove i_mmap_lock lockbreak"). So zap it. Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/memory.c b/mm/memory.c index 9b8a01d..a58bbeb 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1290,13 +1290,6 @@ static unsigned long unmap_page_range(struct mmu_gather *tlb, return addr; } -#ifdef CONFIG_PREEMPT -# define ZAP_BLOCK_SIZE (8 * PAGE_SIZE) -#else -/* No preempt: go for improved straight-line efficiency */ -# define ZAP_BLOCK_SIZE (1024 * PAGE_SIZE) -#endif - /** * unmap_vmas - unmap a range of memory covered by a list of vma's * @tlb: address of the caller's struct mmu_gather @@ -1310,10 +1303,6 @@ static unsigned long unmap_page_range(struct mmu_gather *tlb, * * Unmap all pages in the vma list. * - * We aim to not hold locks for too long (for scheduling latency reasons). - * So zap pages in ZAP_BLOCK_SIZE bytecounts. This means we need to - * return the ending mmu_gather to the caller. - * * Only addresses between `start' and `end' will be unmapped. * * The VMA list must be sorted in ascending virtual address order. -- cgit v0.10.2 From 11239836c04b50ba8453ec58ca7a7bd716ef02c1 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Mon, 25 Jul 2011 17:12:17 -0700 Subject: oom: remove references to old badness() function The badness() function in the oom killer was renamed to oom_badness() in a63d83f427fb ("oom: badness heuristic rewrite") since it is a globally exported function for clarity. The prototype for the old function still existed in linux/oom.h, so remove it. There are no existing users. Also fixes documentation and comment references to badness() and adjusts them accordingly. Signed-off-by: David Rientjes Reviewed-by: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/Documentation/ABI/obsolete/proc-pid-oom_adj b/Documentation/ABI/obsolete/proc-pid-oom_adj index cf63f26..9a3cb88 100644 --- a/Documentation/ABI/obsolete/proc-pid-oom_adj +++ b/Documentation/ABI/obsolete/proc-pid-oom_adj @@ -14,7 +14,7 @@ Why: /proc//oom_adj allows userspace to influence the oom killer's A much more powerful interface, /proc//oom_score_adj, was introduced with the oom killer rewrite that allows users to increase or - decrease the badness() score linearly. This interface will replace + decrease the badness score linearly. This interface will replace /proc//oom_adj. A warning will be emitted to the kernel log if an application uses this diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index f9d240d..d093e55 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -184,7 +184,7 @@ Why: /proc//oom_adj allows userspace to influence the oom killer's A much more powerful interface, /proc//oom_score_adj, was introduced with the oom killer rewrite that allows users to increase or - decrease the badness() score linearly. This interface will replace + decrease the badness score linearly. This interface will replace /proc//oom_adj. A warning will be emitted to the kernel log if an application uses this diff --git a/include/linux/oom.h b/include/linux/oom.h index 4952fb8..13b7b02 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -64,10 +64,6 @@ static inline void oom_killer_enable(void) oom_killer_disabled = false; } -/* The badness from the OOM killer */ -extern unsigned long badness(struct task_struct *p, struct mem_cgroup *mem, - const nodemask_t *nodemask, unsigned long uptime); - extern struct task_struct *find_lock_task_mm(struct task_struct *p); /* sysctls */ diff --git a/mm/oom_kill.c b/mm/oom_kill.c index b0be989..eafff89 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -487,7 +487,7 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, /* * If any of p's children has a different mm and is eligible for kill, - * the one with the highest badness() score is sacrificed for its + * the one with the highest oom_badness() score is sacrificed for its * parent. This attempts to lose the minimal amount of work done while * still freeing memory. */ -- cgit v0.10.2 From be8f684d73d8d916847e996bf69cef14352872c6 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Mon, 25 Jul 2011 17:12:18 -0700 Subject: oom: make deprecated use of oom_adj more verbose /proc/pid/oom_adj is deprecated and scheduled for removal in August 2012 according to Documentation/feature-removal-schedule.txt. This patch makes the warning more verbose by making it appear as a more serious problem (the presence of a stack trace and being multiline should attract more attention) so that applications still using the old interface can get fixed. Very popular users of the old interface have been converted since the oom killer rewrite has been introduced. udevd switched to the /proc/pid/oom_score_adj interface for v162, kde switched in 4.6.1, and opensshd switched in 5.7p1. At the start of 2012, this should be changed into a WARN() to emit all such incidents and then finally remove the tunable in August 2012 as scheduled. Signed-off-by: David Rientjes Cc: KAMEZAWA Hiroyuki Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/proc/base.c b/fs/proc/base.c index 91fb655..c9e3f65 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1118,10 +1118,9 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf, * Warn that /proc/pid/oom_adj is deprecated, see * Documentation/feature-removal-schedule.txt. */ - printk_once(KERN_WARNING "%s (%d): /proc/%d/oom_adj is deprecated, " - "please use /proc/%d/oom_score_adj instead.\n", - current->comm, task_pid_nr(current), - task_pid_nr(task), task_pid_nr(task)); + WARN_ONCE(1, "%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n", + current->comm, task_pid_nr(current), task_pid_nr(task), + task_pid_nr(task)); task->signal->oom_adj = oom_adjust; /* * Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum -- cgit v0.10.2 From c9d8c3d0896bfa5b57531ecc41a85ffbc6d87dbe Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Mon, 25 Jul 2011 17:12:18 -0700 Subject: mm/memblock.c: avoid abuse of RED_INACTIVE RED_INACTIVE is a slab thing, and reusing it for memblock was inappropriate, because memblock is dealing with phys_addr_t's which have a Kconfigurable sizeof(). Create a new poison type for this application. Fixes the sparse warning warning: cast truncates bits from constant value (9f911029d74e35b becomes 9d74e35b) Reported-by: H Hartley Sweeten Tested-by: H Hartley Sweeten Acked-by: Pekka Enberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/poison.h b/include/linux/poison.h index 2110a81..79159de 100644 --- a/include/linux/poison.h +++ b/include/linux/poison.h @@ -40,6 +40,12 @@ #define RED_INACTIVE 0x09F911029D74E35BULL /* when obj is inactive */ #define RED_ACTIVE 0xD84156C5635688C0ULL /* when obj is active */ +#ifdef CONFIG_PHYS_ADDR_T_64BIT +#define MEMBLOCK_INACTIVE 0x3a84fb0144c9e71bULL +#else +#define MEMBLOCK_INACTIVE 0x44c9e71bUL +#endif + #define SLUB_RED_INACTIVE 0xbb #define SLUB_RED_ACTIVE 0xcc diff --git a/mm/memblock.c b/mm/memblock.c index a0562d1..ccbf973 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -758,9 +758,9 @@ void __init memblock_analyze(void) /* Check marker in the unused last array entry */ WARN_ON(memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS].base - != (phys_addr_t)RED_INACTIVE); + != MEMBLOCK_INACTIVE); WARN_ON(memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS].base - != (phys_addr_t)RED_INACTIVE); + != MEMBLOCK_INACTIVE); memblock.memory_size = 0; @@ -786,8 +786,8 @@ void __init memblock_init(void) memblock.reserved.max = INIT_MEMBLOCK_REGIONS; /* Write a marker in the unused last array entry */ - memblock.memory.regions[INIT_MEMBLOCK_REGIONS].base = (phys_addr_t)RED_INACTIVE; - memblock.reserved.regions[INIT_MEMBLOCK_REGIONS].base = (phys_addr_t)RED_INACTIVE; + memblock.memory.regions[INIT_MEMBLOCK_REGIONS].base = MEMBLOCK_INACTIVE; + memblock.reserved.regions[INIT_MEMBLOCK_REGIONS].base = MEMBLOCK_INACTIVE; /* Create a dummy zero size MEMBLOCK which will get coalesced away later. * This simplifies the memblock_add() code below... -- cgit v0.10.2 From c15bef3099c346f2124367bff46954b59e13c3ee Mon Sep 17 00:00:00 2001 From: Dmitry Fink Date: Mon, 25 Jul 2011 17:12:19 -0700 Subject: mmap: fix and tidy up overcommit page arithmetic - shmem pages are not immediately available, but they are not potentially available either, even if we swap them out, they will just relocate from memory into swap, total amount of immediate and potentially available memory is not going to be affected, so we shouldn't count them as potentially free in the first place. - nr_free_pages() is not an expensive operation anymore, there is no need to split the decision making in two halves and repeat code. Signed-off-by: Dmitry Fink Reviewed-by: Minchan Kim Acked-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/mmap.c b/mm/mmap.c index d49736f..a65efd4 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -122,9 +122,17 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) return 0; if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) { - unsigned long n; + free = global_page_state(NR_FREE_PAGES); + free += global_page_state(NR_FILE_PAGES); + + /* + * shmem pages shouldn't be counted as free in this + * case, they can't be purged, only swapped out, and + * that won't affect the overall amount of available + * memory in the system. + */ + free -= global_page_state(NR_SHMEM); - free = global_page_state(NR_FILE_PAGES); free += nr_swap_pages; /* @@ -136,34 +144,18 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) free += global_page_state(NR_SLAB_RECLAIMABLE); /* - * Leave the last 3% for root - */ - if (!cap_sys_admin) - free -= free / 32; - - if (free > pages) - return 0; - - /* - * nr_free_pages() is very expensive on large systems, - * only call if we're about to fail. - */ - n = nr_free_pages(); - - /* * Leave reserved pages. The pages are not for anonymous pages. */ - if (n <= totalreserve_pages) + if (free <= totalreserve_pages) goto error; else - n -= totalreserve_pages; + free -= totalreserve_pages; /* * Leave the last 3% for root */ if (!cap_sys_admin) - n -= n / 32; - free += n; + free -= free / 32; if (free > pages) return 0; diff --git a/mm/nommu.c b/mm/nommu.c index 5c5c2d4..4358032 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1884,9 +1884,17 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) return 0; if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) { - unsigned long n; + free = global_page_state(NR_FREE_PAGES); + free += global_page_state(NR_FILE_PAGES); + + /* + * shmem pages shouldn't be counted as free in this + * case, they can't be purged, only swapped out, and + * that won't affect the overall amount of available + * memory in the system. + */ + free -= global_page_state(NR_SHMEM); - free = global_page_state(NR_FILE_PAGES); free += nr_swap_pages; /* @@ -1898,34 +1906,18 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) free += global_page_state(NR_SLAB_RECLAIMABLE); /* - * Leave the last 3% for root - */ - if (!cap_sys_admin) - free -= free / 32; - - if (free > pages) - return 0; - - /* - * nr_free_pages() is very expensive on large systems, - * only call if we're about to fail. - */ - n = nr_free_pages(); - - /* * Leave reserved pages. The pages are not for anonymous pages. */ - if (n <= totalreserve_pages) + if (free <= totalreserve_pages) goto error; else - n -= totalreserve_pages; + free -= totalreserve_pages; /* * Leave the last 3% for root */ if (!cap_sys_admin) - n -= n / 32; - free += n; + free -= free / 32; if (free > pages) return 0; -- cgit v0.10.2 From 4dedbf8dc1c825f2e4aa2d2624058533b5d76f85 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 25 Jul 2011 17:12:20 -0700 Subject: sparc64: kill page table quicklists With the recent mmu_gather changes that included generic RCU freeing of page-tables, it is now quite straightforward to implement gup_fast() on sparc64. This patch: Remove the page table quicklists. They are pointless and make it harder to use RCU page table freeing and share code with other architectures. BTW, this is the second time this has happened, see commit 3c936465249f ("[SPARC64]: Kill pgtable quicklists and use SLAB.") Signed-off-by: David S. Miller Signed-off-by: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 253986b..af64348 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -81,10 +81,6 @@ config IOMMU_HELPER bool default y if SPARC64 -config QUICKLIST - bool - default y if SPARC64 - config STACKTRACE_SUPPORT bool default y if SPARC64 diff --git a/arch/sparc/include/asm/pgalloc_64.h b/arch/sparc/include/asm/pgalloc_64.h index 4e5e087..cb4867d 100644 --- a/arch/sparc/include/asm/pgalloc_64.h +++ b/arch/sparc/include/asm/pgalloc_64.h @@ -5,7 +5,6 @@ #include #include #include -#include #include #include @@ -14,69 +13,68 @@ /* Page table allocation/freeing. */ +extern struct kmem_cache *pgtable_cache; + static inline pgd_t *pgd_alloc(struct mm_struct *mm) { - return quicklist_alloc(0, GFP_KERNEL, NULL); + return kmem_cache_alloc(pgtable_cache, GFP_KERNEL); } static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) { - quicklist_free(0, NULL, pgd); + kmem_cache_free(pgtable_cache, pgd); } #define pud_populate(MM, PUD, PMD) pud_set(PUD, PMD) static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) { - return quicklist_alloc(0, GFP_KERNEL, NULL); + return kmem_cache_alloc(pgtable_cache, + GFP_KERNEL|__GFP_REPEAT); } static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) { - quicklist_free(0, NULL, pmd); + kmem_cache_free(pgtable_cache, pmd); } static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - return quicklist_alloc(0, GFP_KERNEL, NULL); + return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO); } static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) { struct page *page; - void *pg; + pte_t *pte; - pg = quicklist_alloc(0, GFP_KERNEL, NULL); - if (!pg) + pte = pte_alloc_one_kernel(mm, address); + if (!pte) return NULL; - page = virt_to_page(pg); + page = virt_to_page(pte); pgtable_page_ctor(page); return page; } static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) { - quicklist_free(0, NULL, pte); + free_page((unsigned long)pte); } static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage) { pgtable_page_dtor(ptepage); - quicklist_free_page(0, NULL, ptepage); + __free_page(ptepage); } - #define pmd_populate_kernel(MM, PMD, PTE) pmd_set(PMD, PTE) #define pmd_populate(MM,PMD,PTE_PAGE) \ pmd_populate_kernel(MM,PMD,page_address(PTE_PAGE)) #define pmd_pgtable(pmd) pmd_page(pmd) -static inline void check_pgt_cache(void) -{ - quicklist_trim(0, NULL, 25, 16); -} +#define check_pgt_cache() do { } while (0) #define __pte_free_tlb(tlb, pte, addr) pte_free((tlb)->mm, pte) #define __pmd_free_tlb(tlb, pmd, addr) pmd_free((tlb)->mm, pmd) diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c index a5f51b2..536412d 100644 --- a/arch/sparc/mm/tsb.c +++ b/arch/sparc/mm/tsb.c @@ -236,6 +236,8 @@ static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_idx, unsign } } +struct kmem_cache *pgtable_cache __read_mostly; + static struct kmem_cache *tsb_caches[8] __read_mostly; static const char *tsb_cache_names[8] = { @@ -253,6 +255,15 @@ void __init pgtable_cache_init(void) { unsigned long i; + pgtable_cache = kmem_cache_create("pgtable_cache", + PAGE_SIZE, PAGE_SIZE, + 0, + _clear_page); + if (!pgtable_cache) { + prom_printf("pgtable_cache_init(): Could not create!\n"); + prom_halt(); + } + for (i = 0; i < 8; i++) { unsigned long size = 8192 << i; const char *name = tsb_cache_names[i]; -- cgit v0.10.2 From 4a0100f7546fb642e0e04a38fa7ca198cd016b47 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 25 Jul 2011 17:12:21 -0700 Subject: sparc64: use RCU page table freeing Make use of the generic RCU page table freeing on Sparc64, doing so allows for race-free software page-table walkers like gup_fast(). Signed-off-by: David S. Miller Signed-off-by: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index af64348..1074ddd 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -41,6 +41,7 @@ config SPARC64 select HAVE_FUNCTION_TRACE_MCOUNT_TEST select HAVE_KRETPROBES select HAVE_KPROBES + select HAVE_RCU_TABLE_FREE if SMP select HAVE_MEMBLOCK select HAVE_SYSCALL_WRAPPERS select HAVE_DYNAMIC_FTRACE diff --git a/arch/sparc/include/asm/pgalloc_64.h b/arch/sparc/include/asm/pgalloc_64.h index cb4867d..40b2d7a 100644 --- a/arch/sparc/include/asm/pgalloc_64.h +++ b/arch/sparc/include/asm/pgalloc_64.h @@ -76,7 +76,51 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage) #define check_pgt_cache() do { } while (0) -#define __pte_free_tlb(tlb, pte, addr) pte_free((tlb)->mm, pte) -#define __pmd_free_tlb(tlb, pmd, addr) pmd_free((tlb)->mm, pmd) +static inline void pgtable_free(void *table, bool is_page) +{ + if (is_page) + free_page((unsigned long)table); + else + kmem_cache_free(pgtable_cache, table); +} + +#ifdef CONFIG_SMP + +struct mmu_gather; +extern void tlb_remove_table(struct mmu_gather *, void *); + +static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, bool is_page) +{ + unsigned long pgf = (unsigned long)table; + if (is_page) + pgf |= 0x1UL; + tlb_remove_table(tlb, (void *)pgf); +} + +static inline void __tlb_remove_table(void *_table) +{ + void *table = (void *)((unsigned long)_table & ~0x1UL); + bool is_page = false; + + if ((unsigned long)_table & 0x1UL) + is_page = true; + pgtable_free(table, is_page); +} +#else /* CONFIG_SMP */ +static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, bool is_page) +{ + pgtable_free(table, is_page); +} +#endif /* !CONFIG_SMP */ + +static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *ptepage, + unsigned long address) +{ + pgtable_page_dtor(ptepage); + pgtable_free_tlb(tlb, page_address(ptepage), true); +} + +#define __pmd_free_tlb(tlb, pmd, addr) \ + pgtable_free_tlb(tlb, pmd, false) #endif /* _SPARC64_PGALLOC_H */ -- cgit v0.10.2 From 683d2fa672da5e3b4fe96f13c43eba32b068d64b Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 25 Jul 2011 17:12:21 -0700 Subject: sparc64: add support for _PAGE_SPECIAL Luckily there are still a few software PTE bits remaining and they even match up in both the sun4u and sun4v pte layouts. Signed-off-by: David S. Miller Signed-off-by: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 1e03c5a..adf8932 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -95,6 +95,10 @@ /* PTE bits which are the same in SUN4U and SUN4V format. */ #define _PAGE_VALID _AC(0x8000000000000000,UL) /* Valid TTE */ #define _PAGE_R _AC(0x8000000000000000,UL) /* Keep ref bit uptodate*/ +#define _PAGE_SPECIAL _AC(0x0200000000000000,UL) /* Special page */ + +/* Advertise support for _PAGE_SPECIAL */ +#define __HAVE_ARCH_PTE_SPECIAL /* SUN4U pte bits... */ #define _PAGE_SZ4MB_4U _AC(0x6000000000000000,UL) /* 4MB Page */ @@ -104,6 +108,7 @@ #define _PAGE_NFO_4U _AC(0x1000000000000000,UL) /* No Fault Only */ #define _PAGE_IE_4U _AC(0x0800000000000000,UL) /* Invert Endianness */ #define _PAGE_SOFT2_4U _AC(0x07FC000000000000,UL) /* Software bits, set 2 */ +#define _PAGE_SPECIAL_4U _AC(0x0200000000000000,UL) /* Special page */ #define _PAGE_RES1_4U _AC(0x0002000000000000,UL) /* Reserved */ #define _PAGE_SZ32MB_4U _AC(0x0001000000000000,UL) /* (Panther) 32MB page */ #define _PAGE_SZ256MB_4U _AC(0x2001000000000000,UL) /* (Panther) 256MB page */ @@ -133,6 +138,7 @@ #define _PAGE_ACCESSED_4V _AC(0x1000000000000000,UL) /* Accessed (ref'd) */ #define _PAGE_READ_4V _AC(0x0800000000000000,UL) /* Readable SW Bit */ #define _PAGE_WRITE_4V _AC(0x0400000000000000,UL) /* Writable SW Bit */ +#define _PAGE_SPECIAL_4V _AC(0x0200000000000000,UL) /* Special page */ #define _PAGE_PADDR_4V _AC(0x00FFFFFFFFFFE000,UL) /* paddr[55:13] */ #define _PAGE_IE_4V _AC(0x0000000000001000,UL) /* Invert Endianness */ #define _PAGE_E_4V _AC(0x0000000000000800,UL) /* side-Effect */ @@ -302,10 +308,10 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t prot) : "=r" (mask), "=r" (tmp) : "i" (_PAGE_PADDR_4U | _PAGE_MODIFIED_4U | _PAGE_ACCESSED_4U | _PAGE_CP_4U | _PAGE_CV_4U | _PAGE_E_4U | _PAGE_PRESENT_4U | - _PAGE_SZBITS_4U), + _PAGE_SZBITS_4U | _PAGE_SPECIAL), "i" (_PAGE_PADDR_4V | _PAGE_MODIFIED_4V | _PAGE_ACCESSED_4V | _PAGE_CP_4V | _PAGE_CV_4V | _PAGE_E_4V | _PAGE_PRESENT_4V | - _PAGE_SZBITS_4V)); + _PAGE_SZBITS_4V | _PAGE_SPECIAL)); return __pte((pte_val(pte) & mask) | (pgprot_val(prot) & ~mask)); } @@ -502,6 +508,7 @@ static inline pte_t pte_mkyoung(pte_t pte) static inline pte_t pte_mkspecial(pte_t pte) { + pte_val(pte) |= _PAGE_SPECIAL; return pte; } @@ -607,9 +614,9 @@ static inline unsigned long pte_present(pte_t pte) return val; } -static inline int pte_special(pte_t pte) +static inline unsigned long pte_special(pte_t pte) { - return 0; + return pte_val(pte) & _PAGE_SPECIAL; } #define pmd_set(pmdp, ptep) \ -- cgit v0.10.2 From df077ac4687500e02a273a628057ff5ab17dc19f Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 25 Jul 2011 17:12:22 -0700 Subject: sparc64: implement get_user_pages_fast() Signed-off-by: David S. Miller Signed-off-by: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/sparc/mm/Makefile b/arch/sparc/mm/Makefile index 79836a7..e3cda21 100644 --- a/arch/sparc/mm/Makefile +++ b/arch/sparc/mm/Makefile @@ -4,7 +4,7 @@ asflags-y := -ansi ccflags-y := -Werror -obj-$(CONFIG_SPARC64) += ultra.o tlb.o tsb.o +obj-$(CONFIG_SPARC64) += ultra.o tlb.o tsb.o gup.o obj-y += fault_$(BITS).o obj-y += init_$(BITS).o obj-$(CONFIG_SPARC32) += loadmmu.o diff --git a/arch/sparc/mm/gup.c b/arch/sparc/mm/gup.c new file mode 100644 index 0000000..a986b5d --- /dev/null +++ b/arch/sparc/mm/gup.c @@ -0,0 +1,181 @@ +/* + * Lockless get_user_pages_fast for sparc, cribbed from powerpc + * + * Copyright (C) 2008 Nick Piggin + * Copyright (C) 2008 Novell Inc. + */ + +#include +#include +#include +#include +#include +#include + +/* + * The performance critical leaf functions are made noinline otherwise gcc + * inlines everything into a single function which results in too much + * register pressure. + */ +static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, + unsigned long end, int write, struct page **pages, int *nr) +{ + unsigned long mask, result; + pte_t *ptep; + + if (tlb_type == hypervisor) { + result = _PAGE_PRESENT_4V|_PAGE_P_4V; + if (write) + result |= _PAGE_WRITE_4V; + } else { + result = _PAGE_PRESENT_4U|_PAGE_P_4U; + if (write) + result |= _PAGE_WRITE_4U; + } + mask = result | _PAGE_SPECIAL; + + ptep = pte_offset_kernel(&pmd, addr); + do { + struct page *page, *head; + pte_t pte = *ptep; + + if ((pte_val(pte) & mask) != result) + return 0; + VM_BUG_ON(!pfn_valid(pte_pfn(pte))); + + /* The hugepage case is simplified on sparc64 because + * we encode the sub-page pfn offsets into the + * hugepage PTEs. We could optimize this in the future + * use page_cache_add_speculative() for the hugepage case. + */ + page = pte_page(pte); + head = compound_head(page); + if (!page_cache_get_speculative(head)) + return 0; + if (unlikely(pte_val(pte) != pte_val(*ptep))) { + put_page(head); + return 0; + } + + pages[*nr] = page; + (*nr)++; + } while (ptep++, addr += PAGE_SIZE, addr != end); + + return 1; +} + +static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, + int write, struct page **pages, int *nr) +{ + unsigned long next; + pmd_t *pmdp; + + pmdp = pmd_offset(&pud, addr); + do { + pmd_t pmd = *pmdp; + + next = pmd_addr_end(addr, end); + if (pmd_none(pmd)) + return 0; + if (!gup_pte_range(pmd, addr, next, write, pages, nr)) + return 0; + } while (pmdp++, addr = next, addr != end); + + return 1; +} + +static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, + int write, struct page **pages, int *nr) +{ + unsigned long next; + pud_t *pudp; + + pudp = pud_offset(&pgd, addr); + do { + pud_t pud = *pudp; + + next = pud_addr_end(addr, end); + if (pud_none(pud)) + return 0; + if (!gup_pmd_range(pud, addr, next, write, pages, nr)) + return 0; + } while (pudp++, addr = next, addr != end); + + return 1; +} + +int get_user_pages_fast(unsigned long start, int nr_pages, int write, + struct page **pages) +{ + struct mm_struct *mm = current->mm; + unsigned long addr, len, end; + unsigned long next; + pgd_t *pgdp; + int nr = 0; + + start &= PAGE_MASK; + addr = start; + len = (unsigned long) nr_pages << PAGE_SHIFT; + end = start + len; + + /* + * XXX: batch / limit 'nr', to avoid large irq off latency + * needs some instrumenting to determine the common sizes used by + * important workloads (eg. DB2), and whether limiting the batch size + * will decrease performance. + * + * It seems like we're in the clear for the moment. Direct-IO is + * the main guy that batches up lots of get_user_pages, and even + * they are limited to 64-at-a-time which is not so many. + */ + /* + * This doesn't prevent pagetable teardown, but does prevent + * the pagetables from being freed on sparc. + * + * So long as we atomically load page table pointers versus teardown, + * we can follow the address down to the the page and take a ref on it. + */ + local_irq_disable(); + + pgdp = pgd_offset(mm, addr); + do { + pgd_t pgd = *pgdp; + + next = pgd_addr_end(addr, end); + if (pgd_none(pgd)) + goto slow; + if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) + goto slow; + } while (pgdp++, addr = next, addr != end); + + local_irq_enable(); + + VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT); + return nr; + + { + int ret; + +slow: + local_irq_enable(); + + /* Try to get the remaining pages with get_user_pages */ + start += nr << PAGE_SHIFT; + pages += nr; + + down_read(&mm->mmap_sem); + ret = get_user_pages(current, mm, start, + (end - start) >> PAGE_SHIFT, write, 0, pages, NULL); + up_read(&mm->mmap_sem); + + /* Have to be a bit careful with return values */ + if (nr > 0) { + if (ret < 0) + ret = nr; + else + ret += nr; + } + + return ret; + } +} -- cgit v0.10.2 From 5e5358e7cf48aa079b8761a7d806ad536023745c Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 25 Jul 2011 17:12:23 -0700 Subject: mm: cleanup descriptions of filler arg The often-NULL data arg to read_cache_page() and read_mapping_page() functions is misdescribed as "destination for read data": no, it's the first arg to the filler function, often struct file * to ->readpage(). Satisfy checkpatch.pl on those filler prototypes, and tidy up the declarations in linux/pagemap.h. Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 8e38d4c..cfaaa69 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -255,26 +255,24 @@ static inline struct page *grab_cache_page(struct address_space *mapping, extern struct page * grab_cache_page_nowait(struct address_space *mapping, pgoff_t index); extern struct page * read_cache_page_async(struct address_space *mapping, - pgoff_t index, filler_t *filler, - void *data); + pgoff_t index, filler_t *filler, void *data); extern struct page * read_cache_page(struct address_space *mapping, - pgoff_t index, filler_t *filler, - void *data); + pgoff_t index, filler_t *filler, void *data); extern struct page * read_cache_page_gfp(struct address_space *mapping, pgoff_t index, gfp_t gfp_mask); extern int read_cache_pages(struct address_space *mapping, struct list_head *pages, filler_t *filler, void *data); static inline struct page *read_mapping_page_async( - struct address_space *mapping, - pgoff_t index, void *data) + struct address_space *mapping, + pgoff_t index, void *data) { filler_t *filler = (filler_t *)mapping->a_ops->readpage; return read_cache_page_async(mapping, index, filler, data); } static inline struct page *read_mapping_page(struct address_space *mapping, - pgoff_t index, void *data) + pgoff_t index, void *data) { filler_t *filler = (filler_t *)mapping->a_ops->readpage; return read_cache_page(mapping, index, filler, data); diff --git a/mm/filemap.c b/mm/filemap.c index f820e60..2780be4 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1792,7 +1792,7 @@ EXPORT_SYMBOL(generic_file_readonly_mmap); static struct page *__read_cache_page(struct address_space *mapping, pgoff_t index, - int (*filler)(void *,struct page*), + int (*filler)(void *, struct page *), void *data, gfp_t gfp) { @@ -1823,7 +1823,7 @@ repeat: static struct page *do_read_cache_page(struct address_space *mapping, pgoff_t index, - int (*filler)(void *,struct page*), + int (*filler)(void *, struct page *), void *data, gfp_t gfp) @@ -1863,7 +1863,7 @@ out: * @mapping: the page's address_space * @index: the page index * @filler: function to perform the read - * @data: destination for read data + * @data: first arg to filler(data, page) function, often left as NULL * * Same as read_cache_page, but don't wait for page to become unlocked * after submitting it to the filler. @@ -1875,7 +1875,7 @@ out: */ struct page *read_cache_page_async(struct address_space *mapping, pgoff_t index, - int (*filler)(void *,struct page*), + int (*filler)(void *, struct page *), void *data) { return do_read_cache_page(mapping, index, filler, data, mapping_gfp_mask(mapping)); @@ -1923,7 +1923,7 @@ EXPORT_SYMBOL(read_cache_page_gfp); * @mapping: the page's address_space * @index: the page index * @filler: function to perform the read - * @data: destination for read data + * @data: first arg to filler(data, page) function, often left as NULL * * Read into the page cache. If a page already exists, and PageUptodate() is * not set, try to fill the page then wait for it to become unlocked. @@ -1932,7 +1932,7 @@ EXPORT_SYMBOL(read_cache_page_gfp); */ struct page *read_cache_page(struct address_space *mapping, pgoff_t index, - int (*filler)(void *,struct page*), + int (*filler)(void *, struct page *), void *data) { return wait_on_page_read(read_cache_page_async(mapping, index, filler, data)); -- cgit v0.10.2 From 85821aab39b3403a8b5731812a930b78684d1642 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 25 Jul 2011 17:12:23 -0700 Subject: mm: truncate functions are in truncate.c Correct comment on truncate_inode_pages*() in linux/mm.h; and remove declaration of page_unuse(), it didn't exist even in 2.2.26 or 2.4.0! Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/mm.h b/include/linux/mm.h index 3c5505a..3cccd05 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1411,8 +1411,7 @@ extern int do_munmap(struct mm_struct *, unsigned long, size_t); extern unsigned long do_brk(unsigned long, unsigned long); -/* filemap.c */ -extern unsigned long page_unuse(struct page *); +/* truncate.c */ extern void truncate_inode_pages(struct address_space *, loff_t); extern void truncate_inode_pages_range(struct address_space *, loff_t lstart, loff_t lend); -- cgit v0.10.2 From 8a549bea51138be2126a2cc6aabe8f17ef66b79b Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 25 Jul 2011 17:12:24 -0700 Subject: mm: tidy vmtruncate_range and related functions Use consistent variable names in truncate_pagecache(), truncate_setsize(), vmtruncate() and vmtruncate_range(). unmap_mapping_range() and vmtruncate_range() have mismatched interfaces: don't change either, but make the vmtruncates more precise about what they expect unmap_mapping_range() to do. vmtruncate_range() is currently called only with page-aligned start and end+1: can handle unaligned start, but unaligned end+1 would hit BUG_ON in truncate_inode_pages_range() (lacks partial clearing of the end page). Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/truncate.c b/mm/truncate.c index 003c6c6..c924764 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -531,8 +531,8 @@ EXPORT_SYMBOL_GPL(invalidate_inode_pages2); /** * truncate_pagecache - unmap and remove pagecache that has been truncated * @inode: inode - * @old: old file offset - * @new: new file offset + * @oldsize: old file size + * @newsize: new file size * * inode's new i_size must already be written before truncate_pagecache * is called. @@ -544,9 +544,10 @@ EXPORT_SYMBOL_GPL(invalidate_inode_pages2); * situations such as writepage being called for a page that has already * had its underlying blocks deallocated. */ -void truncate_pagecache(struct inode *inode, loff_t old, loff_t new) +void truncate_pagecache(struct inode *inode, loff_t oldsize, loff_t newsize) { struct address_space *mapping = inode->i_mapping; + loff_t holebegin = round_up(newsize, PAGE_SIZE); /* * unmap_mapping_range is called twice, first simply for @@ -557,9 +558,9 @@ void truncate_pagecache(struct inode *inode, loff_t old, loff_t new) * truncate_inode_pages finishes, hence the second * unmap_mapping_range call must be made for correctness. */ - unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1); - truncate_inode_pages(mapping, new); - unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1); + unmap_mapping_range(mapping, holebegin, 0, 1); + truncate_inode_pages(mapping, newsize); + unmap_mapping_range(mapping, holebegin, 0, 1); } EXPORT_SYMBOL(truncate_pagecache); @@ -589,29 +590,31 @@ EXPORT_SYMBOL(truncate_setsize); /** * vmtruncate - unmap mappings "freed" by truncate() syscall * @inode: inode of the file used - * @offset: file offset to start truncating + * @newsize: file offset to start truncating * * This function is deprecated and truncate_setsize or truncate_pagecache * should be used instead, together with filesystem specific block truncation. */ -int vmtruncate(struct inode *inode, loff_t offset) +int vmtruncate(struct inode *inode, loff_t newsize) { int error; - error = inode_newsize_ok(inode, offset); + error = inode_newsize_ok(inode, newsize); if (error) return error; - truncate_setsize(inode, offset); + truncate_setsize(inode, newsize); if (inode->i_op->truncate) inode->i_op->truncate(inode); return 0; } EXPORT_SYMBOL(vmtruncate); -int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end) +int vmtruncate_range(struct inode *inode, loff_t lstart, loff_t lend) { struct address_space *mapping = inode->i_mapping; + loff_t holebegin = round_up(lstart, PAGE_SIZE); + loff_t holelen = 1 + lend - holebegin; /* * If the underlying filesystem is not going to provide @@ -623,10 +626,10 @@ int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end) mutex_lock(&inode->i_mutex); inode_dio_wait(inode); - unmap_mapping_range(mapping, offset, (end - offset), 1); - inode->i_op->truncate_range(inode, offset, end); + unmap_mapping_range(mapping, holebegin, holelen, 1); + inode->i_op->truncate_range(inode, lstart, lend); /* unmap again to remove racily COWed private pages */ - unmap_mapping_range(mapping, offset, (end - offset), 1); + unmap_mapping_range(mapping, holebegin, holelen, 1); mutex_unlock(&inode->i_mutex); return 0; -- cgit v0.10.2 From b85e0effd3dcbf9118b896232f59526ab1a39a74 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 25 Jul 2011 17:12:25 -0700 Subject: mm: consistent truncate and invalidate loops Make the pagevec_lookup loops in truncate_inode_pages_range(), invalidate_mapping_pages() and invalidate_inode_pages2_range() more consistent with each other. They were relying upon page->index of an unlocked page, but apologizing for it: accept it, embrace it, add comments and WARN_ONs, and simplify the index handling. invalidate_inode_pages2_range() had special handling for a wrapped page->index + 1 = 0 case; but MAX_LFS_FILESIZE doesn't let us anywhere near there, and a corrupt page->index in the radix_tree could cause more trouble than that would catch. Remove that wrapped handling. invalidate_inode_pages2_range() uses min() to limit the pagevec_lookup when near the end of the range: copy that into the other two, although it's less useful than you might think (it limits the use of the buffer, rather than the indices looked up). Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/filemap.c b/mm/filemap.c index 2780be4..10a1711 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -128,6 +128,7 @@ void __delete_from_page_cache(struct page *page) radix_tree_delete(&mapping->page_tree, page->index); page->mapping = NULL; + /* Leave page->index set: truncation lookup relies upon it */ mapping->nrpages--; __dec_zone_page_state(page, NR_FILE_PAGES); if (PageSwapBacked(page)) @@ -483,6 +484,7 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping, spin_unlock_irq(&mapping->tree_lock); } else { page->mapping = NULL; + /* Leave page->index set: truncation relies upon it */ spin_unlock_irq(&mapping->tree_lock); mem_cgroup_uncharge_cache_page(page); page_cache_release(page); diff --git a/mm/truncate.c b/mm/truncate.c index c924764..dc45901 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -199,9 +199,6 @@ int invalidate_inode_page(struct page *page) * The first pass will remove most pages, so the search cost of the second pass * is low. * - * When looking at page->index outside the page lock we need to be careful to - * copy it into a local to avoid races (it could change at any time). - * * We pass down the cache-hot hint to the page freeing code. Even if the * mapping is large, it is probably the case that the final pages are the most * recently touched, and freeing happens in ascending file offset order. @@ -210,10 +207,10 @@ void truncate_inode_pages_range(struct address_space *mapping, loff_t lstart, loff_t lend) { const pgoff_t start = (lstart + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT; - pgoff_t end; const unsigned partial = lstart & (PAGE_CACHE_SIZE - 1); struct pagevec pvec; - pgoff_t next; + pgoff_t index; + pgoff_t end; int i; cleancache_flush_inode(mapping); @@ -224,24 +221,21 @@ void truncate_inode_pages_range(struct address_space *mapping, end = (lend >> PAGE_CACHE_SHIFT); pagevec_init(&pvec, 0); - next = start; - while (next <= end && - pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { + index = start; + while (index <= end && pagevec_lookup(&pvec, mapping, index, + min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) { mem_cgroup_uncharge_start(); for (i = 0; i < pagevec_count(&pvec); i++) { struct page *page = pvec.pages[i]; - pgoff_t page_index = page->index; - if (page_index > end) { - next = page_index; + /* We rely upon deletion not changing page->index */ + index = page->index; + if (index > end) break; - } - if (page_index > next) - next = page_index; - next++; if (!trylock_page(page)) continue; + WARN_ON(page->index != index); if (PageWriteback(page)) { unlock_page(page); continue; @@ -252,6 +246,7 @@ void truncate_inode_pages_range(struct address_space *mapping, pagevec_release(&pvec); mem_cgroup_uncharge_end(); cond_resched(); + index++; } if (partial) { @@ -264,13 +259,14 @@ void truncate_inode_pages_range(struct address_space *mapping, } } - next = start; + index = start; for ( ; ; ) { cond_resched(); - if (!pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { - if (next == start) + if (!pagevec_lookup(&pvec, mapping, index, + min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) { + if (index == start) break; - next = start; + index = start; continue; } if (pvec.pages[0]->index > end) { @@ -281,18 +277,20 @@ void truncate_inode_pages_range(struct address_space *mapping, for (i = 0; i < pagevec_count(&pvec); i++) { struct page *page = pvec.pages[i]; - if (page->index > end) + /* We rely upon deletion not changing page->index */ + index = page->index; + if (index > end) break; + lock_page(page); + WARN_ON(page->index != index); wait_on_page_writeback(page); truncate_inode_page(mapping, page); - if (page->index > next) - next = page->index; - next++; unlock_page(page); } pagevec_release(&pvec); mem_cgroup_uncharge_end(); + index++; } cleancache_flush_inode(mapping); } @@ -333,35 +331,26 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping, pgoff_t start, pgoff_t end) { struct pagevec pvec; - pgoff_t next = start; + pgoff_t index = start; unsigned long ret; unsigned long count = 0; int i; pagevec_init(&pvec, 0); - while (next <= end && - pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { + while (index <= end && pagevec_lookup(&pvec, mapping, index, + min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) { mem_cgroup_uncharge_start(); for (i = 0; i < pagevec_count(&pvec); i++) { struct page *page = pvec.pages[i]; - pgoff_t index; - int lock_failed; - - lock_failed = !trylock_page(page); - /* - * We really shouldn't be looking at the ->index of an - * unlocked page. But we're not allowed to lock these - * pages. So we rely upon nobody altering the ->index - * of this (pinned-by-us) page. - */ + /* We rely upon deletion not changing page->index */ index = page->index; - if (index > next) - next = index; - next++; - if (lock_failed) - continue; + if (index > end) + break; + if (!trylock_page(page)) + continue; + WARN_ON(page->index != index); ret = invalidate_inode_page(page); unlock_page(page); /* @@ -371,12 +360,11 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping, if (!ret) deactivate_page(page); count += ret; - if (next > end) - break; } pagevec_release(&pvec); mem_cgroup_uncharge_end(); cond_resched(); + index++; } return count; } @@ -442,37 +430,32 @@ int invalidate_inode_pages2_range(struct address_space *mapping, pgoff_t start, pgoff_t end) { struct pagevec pvec; - pgoff_t next; + pgoff_t index; int i; int ret = 0; int ret2 = 0; int did_range_unmap = 0; - int wrapped = 0; cleancache_flush_inode(mapping); pagevec_init(&pvec, 0); - next = start; - while (next <= end && !wrapped && - pagevec_lookup(&pvec, mapping, next, - min(end - next, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) { + index = start; + while (index <= end && pagevec_lookup(&pvec, mapping, index, + min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) { mem_cgroup_uncharge_start(); for (i = 0; i < pagevec_count(&pvec); i++) { struct page *page = pvec.pages[i]; - pgoff_t page_index; + + /* We rely upon deletion not changing page->index */ + index = page->index; + if (index > end) + break; lock_page(page); + WARN_ON(page->index != index); if (page->mapping != mapping) { unlock_page(page); continue; } - page_index = page->index; - next = page_index + 1; - if (next == 0) - wrapped = 1; - if (page_index > end) { - unlock_page(page); - break; - } wait_on_page_writeback(page); if (page_mapped(page)) { if (!did_range_unmap) { @@ -480,9 +463,9 @@ int invalidate_inode_pages2_range(struct address_space *mapping, * Zap the rest of the file in one hit. */ unmap_mapping_range(mapping, - (loff_t)page_index< Date: Mon, 25 Jul 2011 17:12:25 -0700 Subject: mm: pincer in truncate_inode_pages_range truncate_inode_pages_range()'s final loop has a nice pincer property, bringing start and end together, squeezing out the last pages. But the range handling missed out on that, just sliding up the range, perhaps letting pages come in behind it. Add one more test to give it the same pincer effect. Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/truncate.c b/mm/truncate.c index dc45901..232eb27 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -269,7 +269,7 @@ void truncate_inode_pages_range(struct address_space *mapping, index = start; continue; } - if (pvec.pages[0]->index > end) { + if (index == start && pvec.pages[0]->index > end) { pagevec_release(&pvec); break; } -- cgit v0.10.2 From d515afe88a32e567c550e3db914f3e378f86453a Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 25 Jul 2011 17:12:26 -0700 Subject: tmpfs: no need to use i_lock 2.6.36's 7e496299d4d2 ("tmpfs: make tmpfs scalable with percpu_counter for used blocks") to make tmpfs scalable with percpu_counter used inode->i_lock in place of sbinfo->stat_lock around i_blocks updates; but that was adverse to scalability, and unnecessary, since info->lock is already held there in the fast paths. Remove those uses of i_lock, and add info->lock in the three error paths where it's then needed across shmem_free_blocks(). It's not actually needed across shmem_unacct_blocks(), but they're so often paired that it looks wrong to split them apart. Signed-off-by: Hugh Dickins Acked-by: Tim Chen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/shmem.c b/mm/shmem.c index fcedf54..c1db11c 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -241,9 +241,7 @@ static void shmem_free_blocks(struct inode *inode, long pages) struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); if (sbinfo->max_blocks) { percpu_counter_add(&sbinfo->used_blocks, -pages); - spin_lock(&inode->i_lock); inode->i_blocks -= pages*BLOCKS_PER_PAGE; - spin_unlock(&inode->i_lock); } } @@ -432,9 +430,7 @@ static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long sbinfo->max_blocks - 1) >= 0) return ERR_PTR(-ENOSPC); percpu_counter_inc(&sbinfo->used_blocks); - spin_lock(&inode->i_lock); inode->i_blocks += BLOCKS_PER_PAGE; - spin_unlock(&inode->i_lock); } spin_unlock(&info->lock); @@ -1421,9 +1417,7 @@ repeat: shmem_acct_block(info->flags)) goto nospace; percpu_counter_inc(&sbinfo->used_blocks); - spin_lock(&inode->i_lock); inode->i_blocks += BLOCKS_PER_PAGE; - spin_unlock(&inode->i_lock); } else if (shmem_acct_block(info->flags)) goto nospace; @@ -1434,8 +1428,10 @@ repeat: spin_unlock(&info->lock); filepage = shmem_alloc_page(gfp, info, idx); if (!filepage) { + spin_lock(&info->lock); shmem_unacct_blocks(info->flags, 1); shmem_free_blocks(inode, 1); + spin_unlock(&info->lock); error = -ENOMEM; goto failed; } @@ -1449,8 +1445,10 @@ repeat: current->mm, GFP_KERNEL); if (error) { page_cache_release(filepage); + spin_lock(&info->lock); shmem_unacct_blocks(info->flags, 1); shmem_free_blocks(inode, 1); + spin_unlock(&info->lock); filepage = NULL; goto failed; } @@ -1480,10 +1478,10 @@ repeat: * be done automatically. */ if (ret) { - spin_unlock(&info->lock); - page_cache_release(filepage); shmem_unacct_blocks(info->flags, 1); shmem_free_blocks(inode, 1); + spin_unlock(&info->lock); + page_cache_release(filepage); filepage = NULL; if (error) goto failed; -- cgit v0.10.2 From 1d65f86db14806cf7b1218c7b4ecb8b4db5af27d Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Mon, 25 Jul 2011 17:12:27 -0700 Subject: mm: preallocate page before lock_page() at filemap COW Currently we are keeping faulted page locked throughout whole __do_fault call (except for page_mkwrite code path) after calling file system's fault code. If we do early COW, we allocate a new page which has to be charged for a memcg (mem_cgroup_newpage_charge). This function, however, might block for unbounded amount of time if memcg oom killer is disabled or fork-bomb is running because the only way out of the OOM situation is either an external event or OOM-situation fix. In the end we are keeping the faulted page locked and blocking other processes from faulting it in which is not good at all because we are basically punishing potentially an unrelated process for OOM condition in a different group (I have seen stuck system because of ld-2.11.1.so being locked). We can do test easily. % cgcreate -g memory:A % cgset -r memory.limit_in_bytes=64M A % cgset -r memory.memsw.limit_in_bytes=64M A % cd kernel_dir; cgexec -g memory:A make -j Then, the whole system will live-locked until you kill 'make -j' by hands (or push reboot...) This is because some important page in a a shared library are locked. Considering again, the new page is not necessary to be allocated with lock_page() held. And usual page allocation may dive into long memory reclaim loop with holding lock_page() and can cause very long latency. There are 3 ways. 1. do allocation/charge before lock_page() Pros. - simple and can handle page allocation in the same manner. This will reduce holding time of lock_page() in general. Cons. - we do page allocation even if ->fault() returns error. 2. do charge after unlock_page(). Even if charge fails, it's just OOM. Pros. - no impact to non-memcg path. Cons. - implemenation requires special cares of LRU and we need to modify page_add_new_anon_rmap()... 3. do unlock->charge->lock again method. Pros. - no impact to non-memcg path. Cons. - This may kill LOCK_PAGE_RETRY optimization. We need to release lock and get it again... This patch moves "charge" and memory allocation for COW page before lock_page(). Then, we can avoid scanning LRU with holding a lock on a page and latency under lock_page() will be reduced. Then, above livelock disappears. [akpm@linux-foundation.org: fix code layout] Signed-off-by: KAMEZAWA Hiroyuki Reported-by: Lutz Vieweg Original-idea-by: Michal Hocko Cc: Michal Hocko Cc: Ying Han Cc: Johannes Weiner Cc: Daisuke Nishimura Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/memory.c b/mm/memory.c index a58bbeb..3c9f3aa 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3093,14 +3093,34 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, pte_t *page_table; spinlock_t *ptl; struct page *page; + struct page *cow_page; pte_t entry; int anon = 0; - int charged = 0; struct page *dirty_page = NULL; struct vm_fault vmf; int ret; int page_mkwrite = 0; + /* + * If we do COW later, allocate page befor taking lock_page() + * on the file cache page. This will reduce lock holding time. + */ + if ((flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) { + + if (unlikely(anon_vma_prepare(vma))) + return VM_FAULT_OOM; + + cow_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); + if (!cow_page) + return VM_FAULT_OOM; + + if (mem_cgroup_newpage_charge(cow_page, mm, GFP_KERNEL)) { + page_cache_release(cow_page); + return VM_FAULT_OOM; + } + } else + cow_page = NULL; + vmf.virtual_address = (void __user *)(address & PAGE_MASK); vmf.pgoff = pgoff; vmf.flags = flags; @@ -3109,12 +3129,13 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, ret = vma->vm_ops->fault(vma, &vmf); if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) - return ret; + goto uncharge_out; if (unlikely(PageHWPoison(vmf.page))) { if (ret & VM_FAULT_LOCKED) unlock_page(vmf.page); - return VM_FAULT_HWPOISON; + ret = VM_FAULT_HWPOISON; + goto uncharge_out; } /* @@ -3132,23 +3153,8 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, page = vmf.page; if (flags & FAULT_FLAG_WRITE) { if (!(vma->vm_flags & VM_SHARED)) { + page = cow_page; anon = 1; - if (unlikely(anon_vma_prepare(vma))) { - ret = VM_FAULT_OOM; - goto out; - } - page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, - vma, address); - if (!page) { - ret = VM_FAULT_OOM; - goto out; - } - if (mem_cgroup_newpage_charge(page, mm, GFP_KERNEL)) { - ret = VM_FAULT_OOM; - page_cache_release(page); - goto out; - } - charged = 1; copy_user_highpage(page, vmf.page, address, vma); __SetPageUptodate(page); } else { @@ -3217,8 +3223,8 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, /* no need to invalidate: a not-present page won't be cached */ update_mmu_cache(vma, address, page_table); } else { - if (charged) - mem_cgroup_uncharge_page(page); + if (cow_page) + mem_cgroup_uncharge_page(cow_page); if (anon) page_cache_release(page); else @@ -3227,7 +3233,6 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, pte_unmap_unlock(page_table, ptl); -out: if (dirty_page) { struct address_space *mapping = page->mapping; @@ -3257,6 +3262,13 @@ out: unwritable_page: page_cache_release(page); return ret; +uncharge_out: + /* fs's fault handler get error */ + if (cow_page) { + mem_cgroup_uncharge_page(cow_page); + page_cache_release(cow_page); + } + return ret; } static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma, -- cgit v0.10.2 From cd38b115d5ad79b0100ac6daa103c4fe2c50a913 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Mon, 25 Jul 2011 17:12:29 -0700 Subject: mm: page allocator: initialise ZLC for first zone eligible for zone_reclaim There have been a small number of complaints about significant stalls while copying large amounts of data on NUMA machines reported on a distribution bugzilla. In these cases, zone_reclaim was enabled by default due to large NUMA distances. In general, the complaints have not been about the workload itself unless it was a file server (in which case the recommendation was disable zone_reclaim). The stalls are mostly due to significant amounts of time spent scanning the preferred zone for pages to free. After a failure, it might fallback to another node (as zonelists are often node-ordered rather than zone-ordered) but stall quickly again when the next allocation attempt occurs. In bad cases, each page allocated results in a full scan of the preferred zone. Patch 1 checks the preferred zone for recent allocation failure which is particularly important if zone_reclaim has failed recently. This avoids rescanning the zone in the near future and instead falling back to another node. This may hurt node locality in some cases but a failure to zone_reclaim is more expensive than a remote access. Patch 2 clears the zlc information after direct reclaim. Otherwise, zone_reclaim can mark zones full, direct reclaim can reclaim enough pages but the zone is still not considered for allocation. This was tested on a 24-thread 2-node x86_64 machine. The tests were focused on large amounts of IO. All tests were bound to the CPUs on node-0 to avoid disturbances due to processes being scheduled on different nodes. The kernels tested are 3.0-rc6-vanilla Vanilla 3.0-rc6 zlcfirst Patch 1 applied zlcreconsider Patches 1+2 applied FS-Mark ./fs_mark -d /tmp/fsmark-10813 -D 100 -N 5000 -n 208 -L 35 -t 24 -S0 -s 524288 fsmark-3.0-rc6 3.0-rc6 3.0-rc6 vanilla zlcfirs zlcreconsider Files/s min 54.90 ( 0.00%) 49.80 (-10.24%) 49.10 (-11.81%) Files/s mean 100.11 ( 0.00%) 135.17 (25.94%) 146.93 (31.87%) Files/s stddev 57.51 ( 0.00%) 138.97 (58.62%) 158.69 (63.76%) Files/s max 361.10 ( 0.00%) 834.40 (56.72%) 802.40 (55.00%) Overhead min 76704.00 ( 0.00%) 76501.00 ( 0.27%) 77784.00 (-1.39%) Overhead mean 1485356.51 ( 0.00%) 1035797.83 (43.40%) 1594680.26 (-6.86%) Overhead stddev 1848122.53 ( 0.00%) 881489.88 (109.66%) 1772354.90 ( 4.27%) Overhead max 7989060.00 ( 0.00%) 3369118.00 (137.13%) 10135324.00 (-21.18%) MMTests Statistics: duration User/Sys Time Running Test (seconds) 501.49 493.91 499.93 Total Elapsed Time (seconds) 2451.57 2257.48 2215.92 MMTests Statistics: vmstat Page Ins 46268 63840 66008 Page Outs 90821596 90671128 88043732 Swap Ins 0 0 0 Swap Outs 0 0 0 Direct pages scanned 13091697 8966863 8971790 Kswapd pages scanned 0 1830011 1831116 Kswapd pages reclaimed 0 1829068 1829930 Direct pages reclaimed 13037777 8956828 8648314 Kswapd efficiency 100% 99% 99% Kswapd velocity 0.000 810.643 826.346 Direct efficiency 99% 99% 96% Direct velocity 5340.128 3972.068 4048.788 Percentage direct scans 100% 83% 83% Page writes by reclaim 0 3 0 Slabs scanned 796672 720640 720256 Direct inode steals 7422667 7160012 7088638 Kswapd inode steals 0 1736840 2021238 Test completes far faster with a large increase in the number of files created per second. Standard deviation is high as a small number of iterations were much higher than the mean. The number of pages scanned by zone_reclaim is reduced and kswapd is used for more work. LARGE DD 3.0-rc6 3.0-rc6 3.0-rc6 vanilla zlcfirst zlcreconsider download tar 59 ( 0.00%) 59 ( 0.00%) 55 ( 7.27%) dd source files 527 ( 0.00%) 296 (78.04%) 320 (64.69%) delete source 36 ( 0.00%) 19 (89.47%) 20 (80.00%) MMTests Statistics: duration User/Sys Time Running Test (seconds) 125.03 118.98 122.01 Total Elapsed Time (seconds) 624.56 375.02 398.06 MMTests Statistics: vmstat Page Ins 3594216 439368 407032 Page Outs 23380832 23380488 23377444 Swap Ins 0 0 0 Swap Outs 0 436 287 Direct pages scanned 17482342 69315973 82864918 Kswapd pages scanned 0 519123 575425 Kswapd pages reclaimed 0 466501 522487 Direct pages reclaimed 5858054 2732949 2712547 Kswapd efficiency 100% 89% 90% Kswapd velocity 0.000 1384.254 1445.574 Direct efficiency 33% 3% 3% Direct velocity 27991.453 184832.737 208171.929 Percentage direct scans 100% 99% 99% Page writes by reclaim 0 5082 13917 Slabs scanned 17280 29952 35328 Direct inode steals 115257 1431122 332201 Kswapd inode steals 0 0 979532 This test downloads a large tarfile and copies it with dd a number of times - similar to the most recent bug report I've dealt with. Time to completion is reduced. The number of pages scanned directly is still disturbingly high with a low efficiency but this is likely due to the number of dirty pages encountered. The figures could probably be improved with more work around how kswapd is used and how dirty pages are handled but that is separate work and this result is significant on its own. Streaming Mapped Writer MMTests Statistics: duration User/Sys Time Running Test (seconds) 124.47 111.67 112.64 Total Elapsed Time (seconds) 2138.14 1816.30 1867.56 MMTests Statistics: vmstat Page Ins 90760 89124 89516 Page Outs 121028340 120199524 120736696 Swap Ins 0 86 55 Swap Outs 0 0 0 Direct pages scanned 114989363 96461439 96330619 Kswapd pages scanned 56430948 56965763 57075875 Kswapd pages reclaimed 27743219 27752044 27766606 Direct pages reclaimed 49777 46884 36655 Kswapd efficiency 49% 48% 48% Kswapd velocity 26392.541 31363.631 30561.736 Direct efficiency 0% 0% 0% Direct velocity 53780.091 53108.759 51581.004 Percentage direct scans 67% 62% 62% Page writes by reclaim 385 122 1513 Slabs scanned 43008 39040 42112 Direct inode steals 0 10 8 Kswapd inode steals 733 534 477 This test just creates a large file mapping and writes to it linearly. Time to completion is again reduced. The gains are mostly down to two things. In many cases, there is less scanning as zone_reclaim simply gives up faster due to recent failures. The second reason is that memory is used more efficiently. Instead of scanning the preferred zone every time, the allocator falls back to another zone and uses it instead improving overall memory utilisation. This patch: initialise ZLC for first zone eligible for zone_reclaim. The zonelist cache (ZLC) is used among other things to record if zone_reclaim() failed for a particular zone recently. The intention is to avoid a high cost scanning extremely long zonelists or scanning within the zone uselessly. Currently the zonelist cache is setup only after the first zone has been considered and zone_reclaim() has been called. The objective was to avoid a costly setup but zone_reclaim is itself quite expensive. If it is failing regularly such as the first eligible zone having mostly mapped pages, the cost in scanning and allocation stalls is far higher than the ZLC initialisation step. This patch initialises ZLC before the first eligible zone calls zone_reclaim(). Once initialised, it is checked whether the zone failed zone_reclaim recently. If it has, the zone is skipped. As the first zone is now being checked, additional care has to be taken about zones marked full. A zone can be marked "full" because it should not have enough unmapped pages for zone_reclaim but this is excessive as direct reclaim or kswapd may succeed where zone_reclaim fails. Only mark zones "full" after zone_reclaim fails if it failed to reclaim enough pages after scanning. Signed-off-by: Mel Gorman Cc: Minchan Kim Cc: KOSAKI Motohiro Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 9119faa..830a465 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1664,7 +1664,7 @@ zonelist_scan: continue; if ((alloc_flags & ALLOC_CPUSET) && !cpuset_zone_allowed_softwall(zone, gfp_mask)) - goto try_next_zone; + continue; BUILD_BUG_ON(ALLOC_NO_WATERMARKS < NR_WMARK); if (!(alloc_flags & ALLOC_NO_WATERMARKS)) { @@ -1676,17 +1676,36 @@ zonelist_scan: classzone_idx, alloc_flags)) goto try_this_zone; + if (NUMA_BUILD && !did_zlc_setup && nr_online_nodes > 1) { + /* + * we do zlc_setup if there are multiple nodes + * and before considering the first zone allowed + * by the cpuset. + */ + allowednodes = zlc_setup(zonelist, alloc_flags); + zlc_active = 1; + did_zlc_setup = 1; + } + if (zone_reclaim_mode == 0) goto this_zone_full; + /* + * As we may have just activated ZLC, check if the first + * eligible zone has failed zone_reclaim recently. + */ + if (NUMA_BUILD && zlc_active && + !zlc_zone_worth_trying(zonelist, z, allowednodes)) + continue; + ret = zone_reclaim(zone, gfp_mask, order); switch (ret) { case ZONE_RECLAIM_NOSCAN: /* did not scan */ - goto try_next_zone; + continue; case ZONE_RECLAIM_FULL: /* scanned but unreclaimable */ - goto this_zone_full; + continue; default: /* did we reclaim enough */ if (!zone_watermark_ok(zone, order, mark, @@ -1703,16 +1722,6 @@ try_this_zone: this_zone_full: if (NUMA_BUILD) zlc_mark_zone_full(zonelist, z); -try_next_zone: - if (NUMA_BUILD && !did_zlc_setup && nr_online_nodes > 1) { - /* - * we do zlc_setup after the first zone is tried but only - * if there are multiple nodes make it worthwhile - */ - allowednodes = zlc_setup(zonelist, alloc_flags); - zlc_active = 1; - did_zlc_setup = 1; - } } if (unlikely(NUMA_BUILD && page == NULL && zlc_active)) { -- cgit v0.10.2 From 76d3fbf8fbf6cc78ceb63549e0e0c5bc8a88f838 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Mon, 25 Jul 2011 17:12:30 -0700 Subject: mm: page allocator: reconsider zones for allocation after direct reclaim With zone_reclaim_mode enabled, it's possible for zones to be considered full in the zonelist_cache so they are skipped in the future. If the process enters direct reclaim, the ZLC may still consider zones to be full even after reclaiming pages. Reconsider all zones for allocation if direct reclaim returns successfully. Signed-off-by: Mel Gorman Cc: Minchan Kim Cc: KOSAKI Motohiro Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 830a465..0944723 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1616,6 +1616,21 @@ static void zlc_mark_zone_full(struct zonelist *zonelist, struct zoneref *z) set_bit(i, zlc->fullzones); } +/* + * clear all zones full, called after direct reclaim makes progress so that + * a zone that was recently full is not skipped over for up to a second + */ +static void zlc_clear_zones_full(struct zonelist *zonelist) +{ + struct zonelist_cache *zlc; /* cached zonelist speedup info */ + + zlc = zonelist->zlcache_ptr; + if (!zlc) + return; + + bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST); +} + #else /* CONFIG_NUMA */ static nodemask_t *zlc_setup(struct zonelist *zonelist, int alloc_flags) @@ -1632,6 +1647,10 @@ static int zlc_zone_worth_trying(struct zonelist *zonelist, struct zoneref *z, static void zlc_mark_zone_full(struct zonelist *zonelist, struct zoneref *z) { } + +static void zlc_clear_zones_full(struct zonelist *zonelist) +{ +} #endif /* CONFIG_NUMA */ /* @@ -1963,6 +1982,10 @@ __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order, if (unlikely(!(*did_some_progress))) return NULL; + /* After successful reclaim, reconsider all zones for allocation */ + if (NUMA_BUILD) + zlc_clear_zones_full(zonelist); + retry: page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist, high_zoneidx, -- cgit v0.10.2 From 72c4783210f77fd743f0a316858d33f27db51e7c Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Mon, 25 Jul 2011 17:12:31 -0700 Subject: mm: remove useless rcu lock-unlock from mapping_tagged() radix_tree_tagged() is lockless - it reads from a member of the raid-tree root node. It does not require any protection. Signed-off-by: Konstantin Khlebnikov Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 31f6988..919b45e 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -1405,10 +1405,6 @@ EXPORT_SYMBOL(test_set_page_writeback); */ int mapping_tagged(struct address_space *mapping, int tag) { - int ret; - rcu_read_lock(); - ret = radix_tree_tagged(&mapping->page_tree, tag); - rcu_read_unlock(); - return ret; + return radix_tree_tagged(&mapping->page_tree, tag); } EXPORT_SYMBOL(mapping_tagged); -- cgit v0.10.2 From 2efaca927f5cd7ecd0f1554b8f9b6a9a2c329c03 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 25 Jul 2011 17:12:32 -0700 Subject: mm/futex: fix futex writes on archs with SW tracking of dirty & young I haven't reproduced it myself but the fail scenario is that on such machines (notably ARM and some embedded powerpc), if you manage to hit that futex path on a writable page whose dirty bit has gone from the PTE, you'll livelock inside the kernel from what I can tell. It will go in a loop of trying the atomic access, failing, trying gup to "fix it up", getting succcess from gup, go back to the atomic access, failing again because dirty wasn't fixed etc... So I think you essentially hang in the kernel. The scenario is probably rare'ish because affected architecture are embedded and tend to not swap much (if at all) so we probably rarely hit the case where dirty is missing or young is missing, but I think Shan has a piece of SW that can reliably reproduce it using a shared writable mapping & fork or something like that. On archs who use SW tracking of dirty & young, a page without dirty is effectively mapped read-only and a page without young unaccessible in the PTE. Additionally, some architectures might lazily flush the TLB when relaxing write protection (by doing only a local flush), and expect a fault to invalidate the stale entry if it's still present on another processor. The futex code assumes that if the "in_atomic()" access -EFAULT's, it can "fix it up" by causing get_user_pages() which would then be equivalent to taking the fault. However that isn't the case. get_user_pages() will not call handle_mm_fault() in the case where the PTE seems to have the right permissions, regardless of the dirty and young state. It will eventually update those bits ... in the struct page, but not in the PTE. Additionally, it will not handle the lazy TLB flushing that can be required by some architectures in the fault case. Basically, gup is the wrong interface for the job. The patch provides a more appropriate one which boils down to just calling handle_mm_fault() since what we are trying to do is simulate a real page fault. The futex code currently attempts to write to user memory within a pagefault disabled section, and if that fails, tries to fix it up using get_user_pages(). This doesn't work on archs where the dirty and young bits are maintained by software, since they will gate access permission in the TLB, and will not be updated by gup(). In addition, there's an expectation on some archs that a spurious write fault triggers a local TLB flush, and that is missing from the picture as well. I decided that adding those "features" to gup() would be too much for this already too complex function, and instead added a new simpler fixup_user_fault() which is essentially a wrapper around handle_mm_fault() which the futex code can call. [akpm@linux-foundation.org: coding-style fixes] [akpm@linux-foundation.org: fix some nits Darren saw, fiddle comment layout] Signed-off-by: Benjamin Herrenschmidt Reported-by: Shan Hai Tested-by: Shan Hai Cc: David Laight Acked-by: Peter Zijlstra Cc: Darren Hart Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/mm.h b/include/linux/mm.h index 3cccd05..3172a1c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -988,6 +988,8 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, int get_user_pages_fast(unsigned long start, int nr_pages, int write, struct page **pages); struct page *get_dump_page(unsigned long addr); +extern int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm, + unsigned long address, unsigned int fault_flags); extern int try_to_release_page(struct page * page, gfp_t gfp_mask); extern void do_invalidatepage(struct page *page, unsigned long offset); diff --git a/kernel/futex.c b/kernel/futex.c index 3fbc76c..0a30897 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -355,8 +355,8 @@ static int fault_in_user_writeable(u32 __user *uaddr) int ret; down_read(&mm->mmap_sem); - ret = get_user_pages(current, mm, (unsigned long)uaddr, - 1, 1, 0, NULL, NULL); + ret = fixup_user_fault(current, mm, (unsigned long)uaddr, + FAULT_FLAG_WRITE); up_read(&mm->mmap_sem); return ret < 0 ? ret : 0; diff --git a/mm/memory.c b/mm/memory.c index 3c9f3aa..a56e3ba 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1805,7 +1805,63 @@ next_page: } EXPORT_SYMBOL(__get_user_pages); -/** +/* + * fixup_user_fault() - manually resolve a user page fault + * @tsk: the task_struct to use for page fault accounting, or + * NULL if faults are not to be recorded. + * @mm: mm_struct of target mm + * @address: user address + * @fault_flags:flags to pass down to handle_mm_fault() + * + * This is meant to be called in the specific scenario where for locking reasons + * we try to access user memory in atomic context (within a pagefault_disable() + * section), this returns -EFAULT, and we want to resolve the user fault before + * trying again. + * + * Typically this is meant to be used by the futex code. + * + * The main difference with get_user_pages() is that this function will + * unconditionally call handle_mm_fault() which will in turn perform all the + * necessary SW fixup of the dirty and young bits in the PTE, while + * handle_mm_fault() only guarantees to update these in the struct page. + * + * This is important for some architectures where those bits also gate the + * access permission to the page because they are maintained in software. On + * such architectures, gup() will not be enough to make a subsequent access + * succeed. + * + * This should be called with the mm_sem held for read. + */ +int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm, + unsigned long address, unsigned int fault_flags) +{ + struct vm_area_struct *vma; + int ret; + + vma = find_extend_vma(mm, address); + if (!vma || address < vma->vm_start) + return -EFAULT; + + ret = handle_mm_fault(mm, vma, address, fault_flags); + if (ret & VM_FAULT_ERROR) { + if (ret & VM_FAULT_OOM) + return -ENOMEM; + if (ret & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE)) + return -EHWPOISON; + if (ret & VM_FAULT_SIGBUS) + return -EFAULT; + BUG(); + } + if (tsk) { + if (ret & VM_FAULT_MAJOR) + tsk->maj_flt++; + else + tsk->min_flt++; + } + return 0; +} + +/* * get_user_pages() - pin user pages in memory * @tsk: the task_struct to use for page fault accounting, or * NULL if faults are not to be recorded. -- cgit v0.10.2 From 708e3508c2a2204cc276dcdb543009a441bfe91b Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 25 Jul 2011 17:12:32 -0700 Subject: tmpfs: clone shmem_file_splice_read() Copy __generic_file_splice_read() and generic_file_splice_read() from fs/splice.c to shmem_file_splice_read() in mm/shmem.c. Make page_cache_pipe_buf_ops and spd_release_page() accessible to it. Signed-off-by: Hugh Dickins Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/splice.c b/fs/splice.c index aa866d3..fa2defa 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -132,7 +132,7 @@ error: return err; } -static const struct pipe_buf_operations page_cache_pipe_buf_ops = { +const struct pipe_buf_operations page_cache_pipe_buf_ops = { .can_merge = 0, .map = generic_pipe_buf_map, .unmap = generic_pipe_buf_unmap, @@ -264,7 +264,7 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe, return ret; } -static void spd_release_page(struct splice_pipe_desc *spd, unsigned int i) +void spd_release_page(struct splice_pipe_desc *spd, unsigned int i) { page_cache_release(spd->pages[i]); } diff --git a/include/linux/splice.h b/include/linux/splice.h index 997c3b4..26e5b61 100644 --- a/include/linux/splice.h +++ b/include/linux/splice.h @@ -88,5 +88,7 @@ extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *, extern int splice_grow_spd(struct pipe_inode_info *, struct splice_pipe_desc *); extern void splice_shrink_spd(struct pipe_inode_info *, struct splice_pipe_desc *); +extern void spd_release_page(struct splice_pipe_desc *, unsigned int); +extern const struct pipe_buf_operations page_cache_pipe_buf_ops; #endif diff --git a/mm/shmem.c b/mm/shmem.c index c1db11c..d176e48 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -51,6 +51,7 @@ static struct vfsmount *shm_mnt; #include #include #include +#include #include #include #include @@ -1844,6 +1845,221 @@ static ssize_t shmem_file_aio_read(struct kiocb *iocb, return retval; } +static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos, + struct pipe_inode_info *pipe, size_t len, + unsigned int flags) +{ + struct address_space *mapping = in->f_mapping; + unsigned int loff, nr_pages, req_pages; + struct page *pages[PIPE_DEF_BUFFERS]; + struct partial_page partial[PIPE_DEF_BUFFERS]; + struct page *page; + pgoff_t index, end_index; + loff_t isize, left; + int error, page_nr; + struct splice_pipe_desc spd = { + .pages = pages, + .partial = partial, + .flags = flags, + .ops = &page_cache_pipe_buf_ops, + .spd_release = spd_release_page, + }; + + isize = i_size_read(in->f_mapping->host); + if (unlikely(*ppos >= isize)) + return 0; + + left = isize - *ppos; + if (unlikely(left < len)) + len = left; + + if (splice_grow_spd(pipe, &spd)) + return -ENOMEM; + + index = *ppos >> PAGE_CACHE_SHIFT; + loff = *ppos & ~PAGE_CACHE_MASK; + req_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + nr_pages = min(req_pages, pipe->buffers); + + /* + * Lookup the (hopefully) full range of pages we need. + */ + spd.nr_pages = find_get_pages_contig(mapping, index, + nr_pages, spd.pages); + index += spd.nr_pages; + + /* + * If find_get_pages_contig() returned fewer pages than we needed, + * readahead/allocate the rest and fill in the holes. + */ + if (spd.nr_pages < nr_pages) + page_cache_sync_readahead(mapping, &in->f_ra, in, + index, req_pages - spd.nr_pages); + + error = 0; + while (spd.nr_pages < nr_pages) { + /* + * Page could be there, find_get_pages_contig() breaks on + * the first hole. + */ + page = find_get_page(mapping, index); + if (!page) { + /* + * page didn't exist, allocate one. + */ + page = page_cache_alloc_cold(mapping); + if (!page) + break; + + error = add_to_page_cache_lru(page, mapping, index, + GFP_KERNEL); + if (unlikely(error)) { + page_cache_release(page); + if (error == -EEXIST) + continue; + break; + } + /* + * add_to_page_cache() locks the page, unlock it + * to avoid convoluting the logic below even more. + */ + unlock_page(page); + } + + spd.pages[spd.nr_pages++] = page; + index++; + } + + /* + * Now loop over the map and see if we need to start IO on any + * pages, fill in the partial map, etc. + */ + index = *ppos >> PAGE_CACHE_SHIFT; + nr_pages = spd.nr_pages; + spd.nr_pages = 0; + for (page_nr = 0; page_nr < nr_pages; page_nr++) { + unsigned int this_len; + + if (!len) + break; + + /* + * this_len is the max we'll use from this page + */ + this_len = min_t(unsigned long, len, PAGE_CACHE_SIZE - loff); + page = spd.pages[page_nr]; + + if (PageReadahead(page)) + page_cache_async_readahead(mapping, &in->f_ra, in, + page, index, req_pages - page_nr); + + /* + * If the page isn't uptodate, we may need to start io on it + */ + if (!PageUptodate(page)) { + lock_page(page); + + /* + * Page was truncated, or invalidated by the + * filesystem. Redo the find/create, but this time the + * page is kept locked, so there's no chance of another + * race with truncate/invalidate. + */ + if (!page->mapping) { + unlock_page(page); + page = find_or_create_page(mapping, index, + mapping_gfp_mask(mapping)); + + if (!page) { + error = -ENOMEM; + break; + } + page_cache_release(spd.pages[page_nr]); + spd.pages[page_nr] = page; + } + /* + * page was already under io and is now done, great + */ + if (PageUptodate(page)) { + unlock_page(page); + goto fill_it; + } + + /* + * need to read in the page + */ + error = mapping->a_ops->readpage(in, page); + if (unlikely(error)) { + /* + * We really should re-lookup the page here, + * but it complicates things a lot. Instead + * lets just do what we already stored, and + * we'll get it the next time we are called. + */ + if (error == AOP_TRUNCATED_PAGE) + error = 0; + + break; + } + } +fill_it: + /* + * i_size must be checked after PageUptodate. + */ + isize = i_size_read(mapping->host); + end_index = (isize - 1) >> PAGE_CACHE_SHIFT; + if (unlikely(!isize || index > end_index)) + break; + + /* + * if this is the last page, see if we need to shrink + * the length and stop + */ + if (end_index == index) { + unsigned int plen; + + /* + * max good bytes in this page + */ + plen = ((isize - 1) & ~PAGE_CACHE_MASK) + 1; + if (plen <= loff) + break; + + /* + * force quit after adding this page + */ + this_len = min(this_len, plen - loff); + len = this_len; + } + + spd.partial[page_nr].offset = loff; + spd.partial[page_nr].len = this_len; + len -= this_len; + loff = 0; + spd.nr_pages++; + index++; + } + + /* + * Release any pages at the end, if we quit early. 'page_nr' is how far + * we got, 'nr_pages' is how many pages are in the map. + */ + while (page_nr < nr_pages) + page_cache_release(spd.pages[page_nr++]); + in->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT; + + if (spd.nr_pages) + error = splice_to_pipe(pipe, &spd); + + splice_shrink_spd(pipe, &spd); + + if (error > 0) { + *ppos += error; + file_accessed(in); + } + return error; +} + static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf) { struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb); @@ -2699,7 +2915,7 @@ static const struct file_operations shmem_file_operations = { .aio_read = shmem_file_aio_read, .aio_write = generic_file_aio_write, .fsync = noop_fsync, - .splice_read = generic_file_splice_read, + .splice_read = shmem_file_splice_read, .splice_write = generic_file_splice_write, #endif }; -- cgit v0.10.2 From 71f0e07a605fad1fb6b288e4dc1dd8dfa78f4872 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 25 Jul 2011 17:12:33 -0700 Subject: tmpfs: refine shmem_file_splice_read Tidy up shmem_file_splice_read(): Remove readahead: okay, we could implement shmem readahead on swap, but have never done so before, swap being the slow exceptional path. Use shmem_getpage() instead of find_or_create_page() plus ->readpage(). Remove several comments: sorry, I found them more distracting than helpful, and this will not be the reference version of splice_read(). Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/shmem.c b/mm/shmem.c index d176e48..f966145 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1850,6 +1850,7 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos, unsigned int flags) { struct address_space *mapping = in->f_mapping; + struct inode *inode = mapping->host; unsigned int loff, nr_pages, req_pages; struct page *pages[PIPE_DEF_BUFFERS]; struct partial_page partial[PIPE_DEF_BUFFERS]; @@ -1865,7 +1866,7 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos, .spd_release = spd_release_page, }; - isize = i_size_read(in->f_mapping->host); + isize = i_size_read(inode); if (unlikely(*ppos >= isize)) return 0; @@ -1881,153 +1882,57 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos, req_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; nr_pages = min(req_pages, pipe->buffers); - /* - * Lookup the (hopefully) full range of pages we need. - */ spd.nr_pages = find_get_pages_contig(mapping, index, nr_pages, spd.pages); index += spd.nr_pages; - - /* - * If find_get_pages_contig() returned fewer pages than we needed, - * readahead/allocate the rest and fill in the holes. - */ - if (spd.nr_pages < nr_pages) - page_cache_sync_readahead(mapping, &in->f_ra, in, - index, req_pages - spd.nr_pages); - error = 0; - while (spd.nr_pages < nr_pages) { - /* - * Page could be there, find_get_pages_contig() breaks on - * the first hole. - */ - page = find_get_page(mapping, index); - if (!page) { - /* - * page didn't exist, allocate one. - */ - page = page_cache_alloc_cold(mapping); - if (!page) - break; - - error = add_to_page_cache_lru(page, mapping, index, - GFP_KERNEL); - if (unlikely(error)) { - page_cache_release(page); - if (error == -EEXIST) - continue; - break; - } - /* - * add_to_page_cache() locks the page, unlock it - * to avoid convoluting the logic below even more. - */ - unlock_page(page); - } + while (spd.nr_pages < nr_pages) { + page = NULL; + error = shmem_getpage(inode, index, &page, SGP_CACHE, NULL); + if (error) + break; + unlock_page(page); spd.pages[spd.nr_pages++] = page; index++; } - /* - * Now loop over the map and see if we need to start IO on any - * pages, fill in the partial map, etc. - */ index = *ppos >> PAGE_CACHE_SHIFT; nr_pages = spd.nr_pages; spd.nr_pages = 0; + for (page_nr = 0; page_nr < nr_pages; page_nr++) { unsigned int this_len; if (!len) break; - /* - * this_len is the max we'll use from this page - */ this_len = min_t(unsigned long, len, PAGE_CACHE_SIZE - loff); page = spd.pages[page_nr]; - if (PageReadahead(page)) - page_cache_async_readahead(mapping, &in->f_ra, in, - page, index, req_pages - page_nr); - - /* - * If the page isn't uptodate, we may need to start io on it - */ - if (!PageUptodate(page)) { - lock_page(page); - - /* - * Page was truncated, or invalidated by the - * filesystem. Redo the find/create, but this time the - * page is kept locked, so there's no chance of another - * race with truncate/invalidate. - */ - if (!page->mapping) { - unlock_page(page); - page = find_or_create_page(mapping, index, - mapping_gfp_mask(mapping)); - - if (!page) { - error = -ENOMEM; - break; - } - page_cache_release(spd.pages[page_nr]); - spd.pages[page_nr] = page; - } - /* - * page was already under io and is now done, great - */ - if (PageUptodate(page)) { - unlock_page(page); - goto fill_it; - } - - /* - * need to read in the page - */ - error = mapping->a_ops->readpage(in, page); - if (unlikely(error)) { - /* - * We really should re-lookup the page here, - * but it complicates things a lot. Instead - * lets just do what we already stored, and - * we'll get it the next time we are called. - */ - if (error == AOP_TRUNCATED_PAGE) - error = 0; - + if (!PageUptodate(page) || page->mapping != mapping) { + page = NULL; + error = shmem_getpage(inode, index, &page, + SGP_CACHE, NULL); + if (error) break; - } + unlock_page(page); + page_cache_release(spd.pages[page_nr]); + spd.pages[page_nr] = page; } -fill_it: - /* - * i_size must be checked after PageUptodate. - */ - isize = i_size_read(mapping->host); + + isize = i_size_read(inode); end_index = (isize - 1) >> PAGE_CACHE_SHIFT; if (unlikely(!isize || index > end_index)) break; - /* - * if this is the last page, see if we need to shrink - * the length and stop - */ if (end_index == index) { unsigned int plen; - /* - * max good bytes in this page - */ plen = ((isize - 1) & ~PAGE_CACHE_MASK) + 1; if (plen <= loff) break; - /* - * force quit after adding this page - */ this_len = min(this_len, plen - loff); len = this_len; } @@ -2040,13 +1945,8 @@ fill_it: index++; } - /* - * Release any pages at the end, if we quit early. 'page_nr' is how far - * we got, 'nr_pages' is how many pages are in the map. - */ while (page_nr < nr_pages) page_cache_release(spd.pages[page_nr++]); - in->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT; if (spd.nr_pages) error = splice_to_pipe(pipe, &spd); -- cgit v0.10.2 From 68da9f055755ee2609a1686722e6d6a7980019ee Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 25 Jul 2011 17:12:34 -0700 Subject: tmpfs: pass gfp to shmem_getpage_gfp Make shmem_getpage() a wrapper, passing mapping_gfp_mask() down to shmem_getpage_gfp(), which in turn passes gfp down to shmem_swp_alloc(). Change shmem_read_mapping_page_gfp() to use shmem_getpage_gfp() in the CONFIG_SHMEM case; but leave tiny !SHMEM using read_cache_page_gfp(). Add a BUG_ON() in case anyone happens to call this on a non-shmem mapping; though we might later want to let that case route to read_cache_page_gfp(). It annoys me to have these two almost-redundant args, gfp and fault_type: I can't find a better way; but initialize fault_type only in shmem_fault(). Note that before, read_cache_page_gfp() was allocating i915_gem's pages with __GFP_NORETRY as intended; but the corresponding swap vector pages got allocated without it, leaving a small possibility of OOM. Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/shmem.c b/mm/shmem.c index f966145..f6c94ba 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -127,8 +127,15 @@ static unsigned long shmem_default_max_inodes(void) } #endif -static int shmem_getpage(struct inode *inode, unsigned long idx, - struct page **pagep, enum sgp_type sgp, int *type); +static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, + struct page **pagep, enum sgp_type sgp, gfp_t gfp, int *fault_type); + +static inline int shmem_getpage(struct inode *inode, pgoff_t index, + struct page **pagep, enum sgp_type sgp, int *fault_type) +{ + return shmem_getpage_gfp(inode, index, pagep, sgp, + mapping_gfp_mask(inode->i_mapping), fault_type); +} static inline struct page *shmem_dir_alloc(gfp_t gfp_mask) { @@ -404,10 +411,12 @@ static void shmem_swp_set(struct shmem_inode_info *info, swp_entry_t *entry, uns * @info: info structure for the inode * @index: index of the page to find * @sgp: check and recheck i_size? skip allocation? + * @gfp: gfp mask to use for any page allocation * * If the entry does not exist, allocate it. */ -static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long index, enum sgp_type sgp) +static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, + unsigned long index, enum sgp_type sgp, gfp_t gfp) { struct inode *inode = &info->vfs_inode; struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); @@ -435,7 +444,7 @@ static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long } spin_unlock(&info->lock); - page = shmem_dir_alloc(mapping_gfp_mask(inode->i_mapping)); + page = shmem_dir_alloc(gfp); spin_lock(&info->lock); if (!page) { @@ -1225,14 +1234,14 @@ static inline struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo) #endif /* - * shmem_getpage - either get the page from swap or allocate a new one + * shmem_getpage_gfp - find page in cache, or get from swap, or allocate * * If we allocate a new one we do not mark it dirty. That's up to the * vm. If we swap it in we mark it dirty since we also free the swap * entry since a page cannot live in both the swap and page cache */ -static int shmem_getpage(struct inode *inode, unsigned long idx, - struct page **pagep, enum sgp_type sgp, int *type) +static int shmem_getpage_gfp(struct inode *inode, pgoff_t idx, + struct page **pagep, enum sgp_type sgp, gfp_t gfp, int *fault_type) { struct address_space *mapping = inode->i_mapping; struct shmem_inode_info *info = SHMEM_I(inode); @@ -1242,15 +1251,11 @@ static int shmem_getpage(struct inode *inode, unsigned long idx, struct page *prealloc_page = NULL; swp_entry_t *entry; swp_entry_t swap; - gfp_t gfp; int error; if (idx >= SHMEM_MAX_INDEX) return -EFBIG; - if (type) - *type = 0; - /* * Normally, filepage is NULL on entry, and either found * uptodate immediately, or allocated and zeroed, or read @@ -1264,13 +1269,12 @@ repeat: filepage = find_lock_page(mapping, idx); if (filepage && PageUptodate(filepage)) goto done; - gfp = mapping_gfp_mask(mapping); if (!filepage) { /* * Try to preload while we can wait, to not make a habit of * draining atomic reserves; but don't latch on to this cpu. */ - error = radix_tree_preload(gfp & ~__GFP_HIGHMEM); + error = radix_tree_preload(gfp & GFP_RECLAIM_MASK); if (error) goto failed; radix_tree_preload_end(); @@ -1290,7 +1294,7 @@ repeat: spin_lock(&info->lock); shmem_recalc_inode(inode); - entry = shmem_swp_alloc(info, idx, sgp); + entry = shmem_swp_alloc(info, idx, sgp, gfp); if (IS_ERR(entry)) { spin_unlock(&info->lock); error = PTR_ERR(entry); @@ -1305,12 +1309,12 @@ repeat: shmem_swp_unmap(entry); spin_unlock(&info->lock); /* here we actually do the io */ - if (type) - *type |= VM_FAULT_MAJOR; + if (fault_type) + *fault_type |= VM_FAULT_MAJOR; swappage = shmem_swapin(swap, gfp, info, idx); if (!swappage) { spin_lock(&info->lock); - entry = shmem_swp_alloc(info, idx, sgp); + entry = shmem_swp_alloc(info, idx, sgp, gfp); if (IS_ERR(entry)) error = PTR_ERR(entry); else { @@ -1461,7 +1465,7 @@ repeat: SetPageSwapBacked(filepage); } - entry = shmem_swp_alloc(info, idx, sgp); + entry = shmem_swp_alloc(info, idx, sgp, gfp); if (IS_ERR(entry)) error = PTR_ERR(entry); else { @@ -1539,7 +1543,7 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { struct inode *inode = vma->vm_file->f_path.dentry->d_inode; int error; - int ret; + int ret = VM_FAULT_LOCKED; if (((loff_t)vmf->pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode)) return VM_FAULT_SIGBUS; @@ -1547,11 +1551,12 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_CACHE, &ret); if (error) return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS); + if (ret & VM_FAULT_MAJOR) { count_vm_event(PGMAJFAULT); mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT); } - return ret | VM_FAULT_LOCKED; + return ret; } #ifdef CONFIG_NUMA @@ -3162,13 +3167,29 @@ int shmem_zero_setup(struct vm_area_struct *vma) * suit tmpfs, since it may have pages in swapcache, and needs to find those * for itself; although drivers/gpu/drm i915 and ttm rely upon this support. * - * Provide a stub for those callers to start using now, then later - * flesh it out to call shmem_getpage() with additional gfp mask, when - * shmem_file_splice_read() is added and shmem_readpage() is removed. + * i915_gem_object_get_pages_gtt() mixes __GFP_NORETRY | __GFP_NOWARN in + * with the mapping_gfp_mask(), to avoid OOMing the machine unnecessarily. */ struct page *shmem_read_mapping_page_gfp(struct address_space *mapping, pgoff_t index, gfp_t gfp) { +#ifdef CONFIG_SHMEM + struct inode *inode = mapping->host; + struct page *page = NULL; + int error; + + BUG_ON(mapping->a_ops != &shmem_aops); + error = shmem_getpage_gfp(inode, index, &page, SGP_CACHE, gfp, NULL); + if (error) + page = ERR_PTR(error); + else + unlock_page(page); + return page; +#else + /* + * The tiny !SHMEM case uses ramfs without swap + */ return read_cache_page_gfp(mapping, index, gfp); +#endif } EXPORT_SYMBOL_GPL(shmem_read_mapping_page_gfp); -- cgit v0.10.2 From 9276aad6c898dbcc31d095f2934dedd5cbb2e93e Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 25 Jul 2011 17:12:34 -0700 Subject: tmpfs: remove_shmem_readpage Remove that pernicious shmem_readpage() at last: the things we needed it for (splice, loop, sendfile, i915 GEM) are now fully taken care of by shmem_file_splice_read() and shmem_read_mapping_page_gfp(). This removal clears the way for a simpler shmem_getpage_gfp(), since page is never passed in; but leave most of that cleanup until after. sys_readahead() and sys_fadvise(POSIX_FADV_WILLNEED) will now EINVAL, instead of unexpectedly trying to read ahead on tmpfs: if that proves to be an issue for someone, then we can either arrange for them to return success instead, or try to implement async readahead on tmpfs. Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/shmem.c b/mm/shmem.c index f6c94ba..ff6713a 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1246,7 +1246,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t idx, struct address_space *mapping = inode->i_mapping; struct shmem_inode_info *info = SHMEM_I(inode); struct shmem_sb_info *sbinfo; - struct page *filepage = *pagep; + struct page *filepage; struct page *swappage; struct page *prealloc_page = NULL; swp_entry_t *entry; @@ -1255,18 +1255,8 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t idx, if (idx >= SHMEM_MAX_INDEX) return -EFBIG; - - /* - * Normally, filepage is NULL on entry, and either found - * uptodate immediately, or allocated and zeroed, or read - * in under swappage, which is then assigned to filepage. - * But shmem_readpage (required for splice) passes in a locked - * filepage, which may be found not uptodate by other callers - * too, and may need to be copied from the swappage read in. - */ repeat: - if (!filepage) - filepage = find_lock_page(mapping, idx); + filepage = find_lock_page(mapping, idx); if (filepage && PageUptodate(filepage)) goto done; if (!filepage) { @@ -1513,8 +1503,7 @@ nospace: * Perhaps the page was brought in from swap between find_lock_page * and taking info->lock? We allow for that at add_to_page_cache_lru, * but must also avoid reporting a spurious ENOSPC while working on a - * full tmpfs. (When filepage has been passed in to shmem_getpage, it - * is already in page cache, which prevents this race from occurring.) + * full tmpfs. */ if (!filepage) { struct page *page = find_get_page(mapping, idx); @@ -1527,7 +1516,7 @@ nospace: spin_unlock(&info->lock); error = -ENOSPC; failed: - if (*pagep != filepage) { + if (filepage) { unlock_page(filepage); page_cache_release(filepage); } @@ -1673,19 +1662,6 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode static const struct inode_operations shmem_symlink_inode_operations; static const struct inode_operations shmem_symlink_inline_operations; -/* - * Normally tmpfs avoids the use of shmem_readpage and shmem_write_begin; - * but providing them allows a tmpfs file to be used for splice, sendfile, and - * below the loop driver, in the generic fashion that many filesystems support. - */ -static int shmem_readpage(struct file *file, struct page *page) -{ - struct inode *inode = page->mapping->host; - int error = shmem_getpage(inode, page->index, &page, SGP_CACHE, NULL); - unlock_page(page); - return error; -} - static int shmem_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, @@ -1693,7 +1669,6 @@ shmem_write_begin(struct file *file, struct address_space *mapping, { struct inode *inode = mapping->host; pgoff_t index = pos >> PAGE_CACHE_SHIFT; - *pagep = NULL; return shmem_getpage(inode, index, pagep, SGP_WRITE, NULL); } @@ -1893,7 +1868,6 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos, error = 0; while (spd.nr_pages < nr_pages) { - page = NULL; error = shmem_getpage(inode, index, &page, SGP_CACHE, NULL); if (error) break; @@ -1916,7 +1890,6 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos, page = spd.pages[page_nr]; if (!PageUptodate(page) || page->mapping != mapping) { - page = NULL; error = shmem_getpage(inode, index, &page, SGP_CACHE, NULL); if (error) @@ -2125,7 +2098,7 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s int error; int len; struct inode *inode; - struct page *page = NULL; + struct page *page; char *kaddr; struct shmem_inode_info *info; @@ -2803,7 +2776,6 @@ static const struct address_space_operations shmem_aops = { .writepage = shmem_writepage, .set_page_dirty = __set_page_dirty_no_writeback, #ifdef CONFIG_TMPFS - .readpage = shmem_readpage, .write_begin = shmem_write_begin, .write_end = shmem_write_end, #endif @@ -3175,7 +3147,7 @@ struct page *shmem_read_mapping_page_gfp(struct address_space *mapping, { #ifdef CONFIG_SHMEM struct inode *inode = mapping->host; - struct page *page = NULL; + struct page *page; int error; BUG_ON(mapping->a_ops != &shmem_aops); -- cgit v0.10.2 From e83c32e8f92724a06a22a3b42f3afc07db93e131 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 25 Jul 2011 17:12:35 -0700 Subject: tmpfs: simplify prealloc_page The prealloc_page handling in shmem_getpage_gfp() is unnecessarily complicated: first simplify that before going on to filepage/swappage. That's right, don't report ENOMEM when the preallocation fails: we may or may not need the page. But simply report ENOMEM once we find we do need it, instead of dropping lock, repeating allocation, unwinding on failure etc. And leave the out label on the fast path, don't goto. Fix something that looks like a bug but turns out not to be: set PageSwapBacked on prealloc_page before its mem_cgroup_cache_charge(), as the removed case was doing. That's important before adding to LRU (determines which LRU the page goes on), and does affect which path it takes through memcontrol.c, but in the end MEM_CGROUP_CHANGE_TYPE_ SHMEM is handled no differently from CACHE. Signed-off-by: Hugh Dickins Acked-by: Shaohua Li Cc: "Zhang, Yanmin" Cc: Tim Chen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/shmem.c b/mm/shmem.c index ff6713a..8f8534f 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1269,9 +1269,9 @@ repeat: goto failed; radix_tree_preload_end(); if (sgp != SGP_READ && !prealloc_page) { - /* We don't care if this fails */ prealloc_page = shmem_alloc_page(gfp, info, idx); if (prealloc_page) { + SetPageSwapBacked(prealloc_page); if (mem_cgroup_cache_charge(prealloc_page, current->mm, GFP_KERNEL)) { page_cache_release(prealloc_page); @@ -1403,7 +1403,8 @@ repeat: goto repeat; } spin_unlock(&info->lock); - } else { + + } else if (prealloc_page) { shmem_swp_unmap(entry); sbinfo = SHMEM_SB(inode->i_sb); if (sbinfo->max_blocks) { @@ -1419,41 +1420,8 @@ repeat: if (!filepage) { int ret; - if (!prealloc_page) { - spin_unlock(&info->lock); - filepage = shmem_alloc_page(gfp, info, idx); - if (!filepage) { - spin_lock(&info->lock); - shmem_unacct_blocks(info->flags, 1); - shmem_free_blocks(inode, 1); - spin_unlock(&info->lock); - error = -ENOMEM; - goto failed; - } - SetPageSwapBacked(filepage); - - /* - * Precharge page while we can wait, compensate - * after - */ - error = mem_cgroup_cache_charge(filepage, - current->mm, GFP_KERNEL); - if (error) { - page_cache_release(filepage); - spin_lock(&info->lock); - shmem_unacct_blocks(info->flags, 1); - shmem_free_blocks(inode, 1); - spin_unlock(&info->lock); - filepage = NULL; - goto failed; - } - - spin_lock(&info->lock); - } else { - filepage = prealloc_page; - prealloc_page = NULL; - SetPageSwapBacked(filepage); - } + filepage = prealloc_page; + prealloc_page = NULL; entry = shmem_swp_alloc(info, idx, sgp, gfp); if (IS_ERR(entry)) @@ -1492,11 +1460,20 @@ repeat: SetPageUptodate(filepage); if (sgp == SGP_DIRTY) set_page_dirty(filepage); + } else { + spin_unlock(&info->lock); + error = -ENOMEM; + goto out; } done: *pagep = filepage; error = 0; - goto out; +out: + if (prealloc_page) { + mem_cgroup_uncharge_cache_page(prealloc_page); + page_cache_release(prealloc_page); + } + return error; nospace: /* @@ -1520,12 +1497,7 @@ failed: unlock_page(filepage); page_cache_release(filepage); } -out: - if (prealloc_page) { - mem_cgroup_uncharge_cache_page(prealloc_page); - page_cache_release(prealloc_page); - } - return error; + goto out; } static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) -- cgit v0.10.2 From 27ab700626f048407e9466d389a43c7d3aa45967 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 25 Jul 2011 17:12:36 -0700 Subject: tmpfs: simplify filepage/swappage We can now simplify shmem_getpage_gfp(): there is no longer a dilemma of filepage passed in via shmem_readpage(), then swappage found, which must then be copied over to it. Although at first it's tempting to replace the **pagep arg by returning struct page *, that makes a mess of IS_ERR_OR_NULL(page)s in all the callers, so leave as is. Insert BUG_ON(!PageUptodate) when we find and lock page: some of the complication came from uninitialized pages inserted into filecache prior to readpage; but now we're in control, and only release pagelock on filecache once it's uptodate (if an error occurs in reading back from swap, the page remains in swapcache, never moved to filecache). Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/shmem.c b/mm/shmem.c index 8f8534f..bf6e9c1 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1246,41 +1246,47 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t idx, struct address_space *mapping = inode->i_mapping; struct shmem_inode_info *info = SHMEM_I(inode); struct shmem_sb_info *sbinfo; - struct page *filepage; - struct page *swappage; + struct page *page; struct page *prealloc_page = NULL; swp_entry_t *entry; swp_entry_t swap; int error; + int ret; if (idx >= SHMEM_MAX_INDEX) return -EFBIG; repeat: - filepage = find_lock_page(mapping, idx); - if (filepage && PageUptodate(filepage)) - goto done; - if (!filepage) { + page = find_lock_page(mapping, idx); + if (page) { /* - * Try to preload while we can wait, to not make a habit of - * draining atomic reserves; but don't latch on to this cpu. + * Once we can get the page lock, it must be uptodate: + * if there were an error in reading back from swap, + * the page would not be inserted into the filecache. */ - error = radix_tree_preload(gfp & GFP_RECLAIM_MASK); - if (error) - goto failed; - radix_tree_preload_end(); - if (sgp != SGP_READ && !prealloc_page) { - prealloc_page = shmem_alloc_page(gfp, info, idx); - if (prealloc_page) { - SetPageSwapBacked(prealloc_page); - if (mem_cgroup_cache_charge(prealloc_page, - current->mm, GFP_KERNEL)) { - page_cache_release(prealloc_page); - prealloc_page = NULL; - } + BUG_ON(!PageUptodate(page)); + goto done; + } + + /* + * Try to preload while we can wait, to not make a habit of + * draining atomic reserves; but don't latch on to this cpu. + */ + error = radix_tree_preload(gfp & GFP_RECLAIM_MASK); + if (error) + goto out; + radix_tree_preload_end(); + + if (sgp != SGP_READ && !prealloc_page) { + prealloc_page = shmem_alloc_page(gfp, info, idx); + if (prealloc_page) { + SetPageSwapBacked(prealloc_page); + if (mem_cgroup_cache_charge(prealloc_page, + current->mm, GFP_KERNEL)) { + page_cache_release(prealloc_page); + prealloc_page = NULL; } } } - error = 0; spin_lock(&info->lock); shmem_recalc_inode(inode); @@ -1288,21 +1294,21 @@ repeat: if (IS_ERR(entry)) { spin_unlock(&info->lock); error = PTR_ERR(entry); - goto failed; + goto out; } swap = *entry; if (swap.val) { /* Look it up and read it in.. */ - swappage = lookup_swap_cache(swap); - if (!swappage) { + page = lookup_swap_cache(swap); + if (!page) { shmem_swp_unmap(entry); spin_unlock(&info->lock); /* here we actually do the io */ if (fault_type) *fault_type |= VM_FAULT_MAJOR; - swappage = shmem_swapin(swap, gfp, info, idx); - if (!swappage) { + page = shmem_swapin(swap, gfp, info, idx); + if (!page) { spin_lock(&info->lock); entry = shmem_swp_alloc(info, idx, sgp, gfp); if (IS_ERR(entry)) @@ -1314,62 +1320,42 @@ repeat: } spin_unlock(&info->lock); if (error) - goto failed; + goto out; goto repeat; } - wait_on_page_locked(swappage); - page_cache_release(swappage); + wait_on_page_locked(page); + page_cache_release(page); goto repeat; } /* We have to do this with page locked to prevent races */ - if (!trylock_page(swappage)) { + if (!trylock_page(page)) { shmem_swp_unmap(entry); spin_unlock(&info->lock); - wait_on_page_locked(swappage); - page_cache_release(swappage); + wait_on_page_locked(page); + page_cache_release(page); goto repeat; } - if (PageWriteback(swappage)) { + if (PageWriteback(page)) { shmem_swp_unmap(entry); spin_unlock(&info->lock); - wait_on_page_writeback(swappage); - unlock_page(swappage); - page_cache_release(swappage); + wait_on_page_writeback(page); + unlock_page(page); + page_cache_release(page); goto repeat; } - if (!PageUptodate(swappage)) { + if (!PageUptodate(page)) { shmem_swp_unmap(entry); spin_unlock(&info->lock); - unlock_page(swappage); - page_cache_release(swappage); + unlock_page(page); + page_cache_release(page); error = -EIO; - goto failed; + goto out; } - if (filepage) { - shmem_swp_set(info, entry, 0); - shmem_swp_unmap(entry); - delete_from_swap_cache(swappage); - spin_unlock(&info->lock); - copy_highpage(filepage, swappage); - unlock_page(swappage); - page_cache_release(swappage); - flush_dcache_page(filepage); - SetPageUptodate(filepage); - set_page_dirty(filepage); - swap_free(swap); - } else if (!(error = add_to_page_cache_locked(swappage, mapping, - idx, GFP_NOWAIT))) { - info->flags |= SHMEM_PAGEIN; - shmem_swp_set(info, entry, 0); - shmem_swp_unmap(entry); - delete_from_swap_cache(swappage); - spin_unlock(&info->lock); - filepage = swappage; - set_page_dirty(filepage); - swap_free(swap); - } else { + error = add_to_page_cache_locked(page, mapping, + idx, GFP_NOWAIT); + if (error) { shmem_swp_unmap(entry); spin_unlock(&info->lock); if (error == -ENOMEM) { @@ -1378,28 +1364,33 @@ repeat: * call memcg's OOM if needed. */ error = mem_cgroup_shmem_charge_fallback( - swappage, - current->mm, - gfp); + page, current->mm, gfp); if (error) { - unlock_page(swappage); - page_cache_release(swappage); - goto failed; + unlock_page(page); + page_cache_release(page); + goto out; } } - unlock_page(swappage); - page_cache_release(swappage); + unlock_page(page); + page_cache_release(page); goto repeat; } - } else if (sgp == SGP_READ && !filepage) { + + info->flags |= SHMEM_PAGEIN; + shmem_swp_set(info, entry, 0); shmem_swp_unmap(entry); - filepage = find_get_page(mapping, idx); - if (filepage && - (!PageUptodate(filepage) || !trylock_page(filepage))) { + delete_from_swap_cache(page); + spin_unlock(&info->lock); + set_page_dirty(page); + swap_free(swap); + + } else if (sgp == SGP_READ) { + shmem_swp_unmap(entry); + page = find_get_page(mapping, idx); + if (page && !trylock_page(page)) { spin_unlock(&info->lock); - wait_on_page_locked(filepage); - page_cache_release(filepage); - filepage = NULL; + wait_on_page_locked(page); + page_cache_release(page); goto repeat; } spin_unlock(&info->lock); @@ -1417,56 +1408,52 @@ repeat: } else if (shmem_acct_block(info->flags)) goto nospace; - if (!filepage) { - int ret; + page = prealloc_page; + prealloc_page = NULL; - filepage = prealloc_page; - prealloc_page = NULL; - - entry = shmem_swp_alloc(info, idx, sgp, gfp); - if (IS_ERR(entry)) - error = PTR_ERR(entry); - else { - swap = *entry; - shmem_swp_unmap(entry); - } - ret = error || swap.val; - if (ret) - mem_cgroup_uncharge_cache_page(filepage); - else - ret = add_to_page_cache_lru(filepage, mapping, + entry = shmem_swp_alloc(info, idx, sgp, gfp); + if (IS_ERR(entry)) + error = PTR_ERR(entry); + else { + swap = *entry; + shmem_swp_unmap(entry); + } + ret = error || swap.val; + if (ret) + mem_cgroup_uncharge_cache_page(page); + else + ret = add_to_page_cache_lru(page, mapping, idx, GFP_NOWAIT); - /* - * At add_to_page_cache_lru() failure, uncharge will - * be done automatically. - */ - if (ret) { - shmem_unacct_blocks(info->flags, 1); - shmem_free_blocks(inode, 1); - spin_unlock(&info->lock); - page_cache_release(filepage); - filepage = NULL; - if (error) - goto failed; - goto repeat; - } - info->flags |= SHMEM_PAGEIN; + /* + * At add_to_page_cache_lru() failure, + * uncharge will be done automatically. + */ + if (ret) { + shmem_unacct_blocks(info->flags, 1); + shmem_free_blocks(inode, 1); + spin_unlock(&info->lock); + page_cache_release(page); + if (error) + goto out; + goto repeat; } + info->flags |= SHMEM_PAGEIN; info->alloced++; spin_unlock(&info->lock); - clear_highpage(filepage); - flush_dcache_page(filepage); - SetPageUptodate(filepage); + clear_highpage(page); + flush_dcache_page(page); + SetPageUptodate(page); if (sgp == SGP_DIRTY) - set_page_dirty(filepage); + set_page_dirty(page); + } else { spin_unlock(&info->lock); error = -ENOMEM; goto out; } done: - *pagep = filepage; + *pagep = page; error = 0; out: if (prealloc_page) { @@ -1482,21 +1469,13 @@ nospace: * but must also avoid reporting a spurious ENOSPC while working on a * full tmpfs. */ - if (!filepage) { - struct page *page = find_get_page(mapping, idx); - if (page) { - spin_unlock(&info->lock); - page_cache_release(page); - goto repeat; - } - } + page = find_get_page(mapping, idx); spin_unlock(&info->lock); - error = -ENOSPC; -failed: - if (filepage) { - unlock_page(filepage); - page_cache_release(filepage); + if (page) { + page_cache_release(page); + goto repeat; } + error = -ENOSPC; goto out; } -- cgit v0.10.2 From 48f170fb7d7db8789ccc23e051af61f62af5f685 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 25 Jul 2011 17:12:37 -0700 Subject: tmpfs: simplify unuse and writepage shmem_unuse_inode() and shmem_writepage() contain a little code to cope with pages inserted independently into the filecache, probably by a filesystem stacked on top of tmpfs, then fed to its ->readpage() or ->writepage(). Unionfs was indeed experimenting with working in that way three years ago, but I find no current examples: nowadays the stacking filesystems use vfs interfaces to the lower filesystem. It's now illegal: remove most of that code, adding some WARN_ON_ONCEs. Signed-off-by: Hugh Dickins Cc: Erez Zadok Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/shmem.c b/mm/shmem.c index bf6e9c1..7533574 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -972,20 +972,7 @@ found: error = add_to_page_cache_locked(page, mapping, idx, GFP_NOWAIT); /* which does mem_cgroup_uncharge_cache_page on error */ - if (error == -EEXIST) { - struct page *filepage = find_get_page(mapping, idx); - error = 1; - if (filepage) { - /* - * There might be a more uptodate page coming down - * from a stacked writepage: forget our swappage if so. - */ - if (PageUptodate(filepage)) - error = 0; - page_cache_release(filepage); - } - } - if (!error) { + if (error != -ENOMEM) { delete_from_swap_cache(page); set_page_dirty(page); info->flags |= SHMEM_PAGEIN; @@ -1072,16 +1059,17 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) /* * shmem_backing_dev_info's capabilities prevent regular writeback or * sync from ever calling shmem_writepage; but a stacking filesystem - * may use the ->writepage of its underlying filesystem, in which case + * might use ->writepage of its underlying filesystem, in which case * tmpfs should write out to swap only in response to memory pressure, - * and not for the writeback threads or sync. However, in those cases, - * we do still want to check if there's a redundant swappage to be - * discarded. + * and not for the writeback threads or sync. */ - if (wbc->for_reclaim) - swap = get_swap_page(); - else - swap.val = 0; + if (!wbc->for_reclaim) { + WARN_ON_ONCE(1); /* Still happens? Tell us about it! */ + goto redirty; + } + swap = get_swap_page(); + if (!swap.val) + goto redirty; /* * Add inode to shmem_unuse()'s list of swapped-out inodes, @@ -1092,15 +1080,12 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) * we've taken the spinlock, because shmem_unuse_inode() will * prune a !swapped inode from the swaplist under both locks. */ - if (swap.val) { - mutex_lock(&shmem_swaplist_mutex); - if (list_empty(&info->swaplist)) - list_add_tail(&info->swaplist, &shmem_swaplist); - } + mutex_lock(&shmem_swaplist_mutex); + if (list_empty(&info->swaplist)) + list_add_tail(&info->swaplist, &shmem_swaplist); spin_lock(&info->lock); - if (swap.val) - mutex_unlock(&shmem_swaplist_mutex); + mutex_unlock(&shmem_swaplist_mutex); if (index >= info->next_index) { BUG_ON(!(info->flags & SHMEM_TRUNCATE)); @@ -1108,16 +1093,13 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) } entry = shmem_swp_entry(info, index, NULL); if (entry->val) { - /* - * The more uptodate page coming down from a stacked - * writepage should replace our old swappage. - */ + WARN_ON_ONCE(1); /* Still happens? Tell us about it! */ free_swap_and_cache(*entry); shmem_swp_set(info, entry, 0); } shmem_recalc_inode(inode); - if (swap.val && add_to_swap_cache(page, swap, GFP_ATOMIC) == 0) { + if (add_to_swap_cache(page, swap, GFP_ATOMIC) == 0) { delete_from_page_cache(page); shmem_swp_set(info, entry, swap.val); shmem_swp_unmap(entry); -- cgit v0.10.2 From 99b12e3d882bc7ebdfe0de381dff3b16d21c38f7 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Mon, 25 Jul 2011 17:12:37 -0700 Subject: writeback: account NR_WRITTEN at IO completion time NR_WRITTEN is now accounted at block IO enqueue time, which is not very accurate as to common understanding. This moves NR_WRITTEN accounting to the IO completion time and makes it more consistent with BDI_WRITTEN, which is used for bandwidth estimation. Signed-off-by: Wu Fengguang Cc: Michael Rubin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 919b45e..d8767b3 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -1141,7 +1141,6 @@ EXPORT_SYMBOL(account_page_dirtied); void account_page_writeback(struct page *page) { inc_zone_page_state(page, NR_WRITEBACK); - inc_zone_page_state(page, NR_WRITTEN); } EXPORT_SYMBOL(account_page_writeback); @@ -1358,8 +1357,10 @@ int test_clear_page_writeback(struct page *page) } else { ret = TestClearPageWriteback(page); } - if (ret) + if (ret) { dec_zone_page_state(page, NR_WRITEBACK); + inc_zone_page_state(page, NR_WRITTEN); + } return ret; } -- cgit v0.10.2 From 023f21b9ccab71ae963781044a96b922cb4437bf Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Mon, 25 Jul 2011 17:12:38 -0700 Subject: h8300, exec: remove redundant set_fs(USER_DS) The address limit is already set in flush_old_exec() so those calls to set_fs(USER_DS) are redundant. Signed-off-by: Mathias Krause Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/h8300/include/asm/processor.h b/arch/h8300/include/asm/processor.h index 69e8a34..e834b60 100644 --- a/arch/h8300/include/asm/processor.h +++ b/arch/h8300/include/asm/processor.h @@ -81,7 +81,6 @@ struct thread_struct { #if defined(__H8300H__) #define start_thread(_regs, _pc, _usp) \ do { \ - set_fs(USER_DS); /* reads from user space */ \ (_regs)->pc = (_pc); \ (_regs)->ccr = 0x00; /* clear all flags */ \ (_regs)->er5 = current->mm->start_data; /* GOT base */ \ @@ -91,7 +90,6 @@ do { \ #if defined(__H8300S__) #define start_thread(_regs, _pc, _usp) \ do { \ - set_fs(USER_DS); /* reads from user space */ \ (_regs)->pc = (_pc); \ (_regs)->ccr = 0x00; /* clear kernel flag */ \ (_regs)->exr = 0x78; /* enable all interrupts */ \ -- cgit v0.10.2 From 6fa80900db58b547fe1cfdc77c5413091cafa7bd Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Mon, 25 Jul 2011 17:12:39 -0700 Subject: alpha, exec: remove redundant set_fs(USER_DS) The address limit is already set in flush_old_exec() so this set_fs(USER_DS) is redundant. Signed-off-by: Mathias Krause Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Matt Turner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c index 838eac1..89bbe5b 100644 --- a/arch/alpha/kernel/process.c +++ b/arch/alpha/kernel/process.c @@ -200,7 +200,6 @@ show_regs(struct pt_regs *regs) void start_thread(struct pt_regs * regs, unsigned long pc, unsigned long sp) { - set_fs(USER_DS); regs->pc = pc; regs->ps = 8; wrusp(sp); -- cgit v0.10.2 From f796062598235b9f73086d0fb779e120ae9581a0 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Mon, 25 Jul 2011 17:12:40 -0700 Subject: m32r, exec: remove redundant set_fs(USER_DS) The address limit is already set in flush_old_exec() so this set_fs(USER_DS) is redundant. Signed-off-by: Mathias Krause Cc: Hirokazu Takata Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/m32r/include/asm/processor.h b/arch/m32r/include/asm/processor.h index 8397c24..e1f46d7 100644 --- a/arch/m32r/include/asm/processor.h +++ b/arch/m32r/include/asm/processor.h @@ -106,7 +106,6 @@ struct thread_struct { #define start_thread(regs, new_pc, new_spu) \ do { \ - set_fs(USER_DS); \ regs->psw = (regs->psw | USERPS_BPSW) & 0x0000FFFFUL; \ regs->bpc = new_pc; \ regs->spu = new_spu; \ -- cgit v0.10.2 From b7de110044b4e26adcb7b278d14da93133692ed7 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Mon, 25 Jul 2011 17:12:42 -0700 Subject: m68k, exec: remove redundant set_fs(USER_DS) The address limit is already set in flush_old_exec() so those calls to set_fs(USER_DS) are redundant. Signed-off-by: Mathias Krause Cc: Greg Ungerer Cc: Geert Uytterhoeven Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/m68k/include/asm/processor.h b/arch/m68k/include/asm/processor.h index f111b02..d8ef53a 100644 --- a/arch/m68k/include/asm/processor.h +++ b/arch/m68k/include/asm/processor.h @@ -105,9 +105,6 @@ struct thread_struct { static inline void start_thread(struct pt_regs * regs, unsigned long pc, unsigned long usp) { - /* reads from user space */ - set_fs(USER_DS); - regs->pc = pc; regs->sr &= ~0x2000; wrusp(usp); @@ -129,7 +126,6 @@ extern int handle_kernel_fault(struct pt_regs *regs); #define start_thread(_regs, _pc, _usp) \ do { \ - set_fs(USER_DS); /* reads from user space */ \ (_regs)->pc = (_pc); \ ((struct switch_stack *)(_regs))[-1].a6 = 0; \ reformat(_regs); \ diff --git a/arch/m68k/kernel/process_mm.c b/arch/m68k/kernel/process_mm.c index c2a1fc2..1bc223a 100644 --- a/arch/m68k/kernel/process_mm.c +++ b/arch/m68k/kernel/process_mm.c @@ -185,7 +185,7 @@ EXPORT_SYMBOL(kernel_thread); void flush_thread(void) { unsigned long zero = 0; - set_fs(USER_DS); + current->thread.fs = __USER_DS; if (!FPU_IS_EMU) asm volatile (".chip 68k/68881\n\t" diff --git a/arch/m68k/kernel/process_no.c b/arch/m68k/kernel/process_no.c index 9b86ad1..69c1803 100644 --- a/arch/m68k/kernel/process_no.c +++ b/arch/m68k/kernel/process_no.c @@ -158,7 +158,7 @@ void flush_thread(void) #ifdef CONFIG_FPU unsigned long zero = 0; #endif - set_fs(USER_DS); + current->thread.fs = __USER_DS; #ifdef CONFIG_FPU if (!FPU_IS_EMU) -- cgit v0.10.2 From ce1bb7afc5648056cf66896c4aebc0062bb12f79 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Mon, 25 Jul 2011 17:12:43 -0700 Subject: cris: fix some build warnings in pinmux.c Fix some harmless warnings such as arch/cris/arch-v32/mach-a3/pinmux.c:273: warning: ISO C90 forbids mixed declarations and code: Signed-off-by: WANG Cong Cc: Mikael Starvik Cc: Jesper Nilsson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/cris/arch-v32/mach-a3/pinmux.c b/arch/cris/arch-v32/mach-a3/pinmux.c index 18648ef..591f775 100644 --- a/arch/cris/arch-v32/mach-a3/pinmux.c +++ b/arch/cris/arch-v32/mach-a3/pinmux.c @@ -85,6 +85,8 @@ crisv32_pinmux_alloc_fixed(enum fixed_function function) int ret = -EINVAL; char saved[sizeof pins]; unsigned long flags; + reg_pinmux_rw_hwprot hwprot; + reg_clkgen_rw_clk_ctrl clk_ctrl; spin_lock_irqsave(&pinmux_lock, flags); @@ -93,9 +95,8 @@ crisv32_pinmux_alloc_fixed(enum fixed_function function) crisv32_pinmux_init(); /* must be done before we read rw_hwprot */ - reg_pinmux_rw_hwprot hwprot = REG_RD(pinmux, regi_pinmux, rw_hwprot); - reg_clkgen_rw_clk_ctrl clk_ctrl = REG_RD(clkgen, regi_clkgen, - rw_clk_ctrl); + hwprot = REG_RD(pinmux, regi_pinmux, rw_hwprot); + clk_ctrl = REG_RD(clkgen, regi_clkgen, rw_clk_ctrl); switch (function) { case pinmux_eth: @@ -262,6 +263,7 @@ crisv32_pinmux_dealloc_fixed(enum fixed_function function) int ret = -EINVAL; char saved[sizeof pins]; unsigned long flags; + reg_pinmux_rw_hwprot hwprot; spin_lock_irqsave(&pinmux_lock, flags); @@ -270,7 +272,7 @@ crisv32_pinmux_dealloc_fixed(enum fixed_function function) crisv32_pinmux_init(); /* must be done before we read rw_hwprot */ - reg_pinmux_rw_hwprot hwprot = REG_RD(pinmux, regi_pinmux, rw_hwprot); + hwprot = REG_RD(pinmux, regi_pinmux, rw_hwprot); switch (function) { case pinmux_eth: -- cgit v0.10.2 From 5ec80e50e11bcc3f39e47b9dc73110dbc58e4780 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Mon, 25 Jul 2011 17:12:44 -0700 Subject: cris, exec: remove redundant set_fs(USER_DS) The address limit is already set in flush_old_exec() so those calls to set_fs(USER_DS) are redundant. Signed-off-by: Mathias Krause Cc: Jesper Nilsson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/cris/include/arch-v10/arch/processor.h b/arch/cris/include/arch-v10/arch/processor.h index cc692c7..93feb2a 100644 --- a/arch/cris/include/arch-v10/arch/processor.h +++ b/arch/cris/include/arch-v10/arch/processor.h @@ -53,7 +53,6 @@ struct thread_struct { */ #define start_thread(regs, ip, usp) do { \ - set_fs(USER_DS); \ regs->irp = ip; \ regs->dccr |= 1 << U_DCCR_BITNR; \ wrusp(usp); \ diff --git a/arch/cris/include/arch-v32/arch/processor.h b/arch/cris/include/arch-v32/arch/processor.h index f80b477..9603c90 100644 --- a/arch/cris/include/arch-v32/arch/processor.h +++ b/arch/cris/include/arch-v32/arch/processor.h @@ -47,7 +47,6 @@ struct thread_struct { */ #define start_thread(regs, ip, usp) \ do { \ - set_fs(USER_DS); \ regs->erp = ip; \ regs->ccs |= 1 << (U_CCS_BITNR + CCS_SHIFT); \ wrusp(usp); \ -- cgit v0.10.2 From 7213de044e11856f5c2aee0e43eff8dc507d421c Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Mon, 25 Jul 2011 17:12:44 -0700 Subject: um: clean up vm-flags.h There is no need to define VM_{STACK,DATA}_DEFAULT_FLAGS as variable. It's also useless to test for TIF_IA32 as 64bit UML has no IA32 emulation. Signed-off-by: Richard Weinberger Acked-by: Randy Dunlap Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/um/sys-x86_64/Makefile b/arch/um/sys-x86_64/Makefile index 61fc99a..4f95637 100644 --- a/arch/um/sys-x86_64/Makefile +++ b/arch/um/sys-x86_64/Makefile @@ -4,7 +4,7 @@ # Licensed under the GPL # -obj-y = bug.o bugs.o delay.o fault.o ldt.o mem.o ptrace.o ptrace_user.o \ +obj-y = bug.o bugs.o delay.o fault.o ldt.o ptrace.o ptrace_user.o \ setjmp.o signal.o stub.o stub_segv.o syscalls.o syscall_table.o \ sysrq.o ksyms.o tls.o diff --git a/arch/um/sys-x86_64/mem.c b/arch/um/sys-x86_64/mem.c deleted file mode 100644 index 3f8df8a..0000000 --- a/arch/um/sys-x86_64/mem.c +++ /dev/null @@ -1,16 +0,0 @@ -/* - * Copyright 2003 PathScale, Inc. - * - * Licensed under the GPL - */ - -#include "linux/mm.h" -#include "asm/page.h" -#include "asm/mman.h" - -unsigned long vm_stack_flags = __VM_STACK_FLAGS; -unsigned long vm_stack_flags32 = __VM_STACK_FLAGS; -unsigned long vm_data_default_flags = __VM_DATA_DEFAULT_FLAGS; -unsigned long vm_data_default_flags32 = __VM_DATA_DEFAULT_FLAGS; -unsigned long vm_force_exec32 = PROT_EXEC; - diff --git a/arch/um/sys-x86_64/shared/sysdep/vm-flags.h b/arch/um/sys-x86_64/shared/sysdep/vm-flags.h index 3213edf..3978e55 100644 --- a/arch/um/sys-x86_64/shared/sysdep/vm-flags.h +++ b/arch/um/sys-x86_64/shared/sysdep/vm-flags.h @@ -7,27 +7,9 @@ #ifndef __VM_FLAGS_X86_64_H #define __VM_FLAGS_X86_64_H -#define __VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) -#define __VM_STACK_FLAGS (VM_GROWSDOWN | VM_READ | VM_WRITE | \ - VM_EXEC | VM_MAYREAD | VM_MAYWRITE | \ - VM_MAYEXEC) - -extern unsigned long vm_stack_flags, vm_stack_flags32; -extern unsigned long vm_data_default_flags, vm_data_default_flags32; -extern unsigned long vm_force_exec32; - -#ifdef TIF_IA32 -#define VM_DATA_DEFAULT_FLAGS \ - (test_thread_flag(TIF_IA32) ? vm_data_default_flags32 : \ - vm_data_default_flags) - -#define VM_STACK_DEFAULT_FLAGS \ - (test_thread_flag(TIF_IA32) ? vm_stack_flags32 : vm_stack_flags) -#endif - -#define VM_DATA_DEFAULT_FLAGS vm_data_default_flags - -#define VM_STACK_DEFAULT_FLAGS vm_stack_flags +#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_STACK_DEFAULT_FLAGS (VM_GROWSDOWN | VM_READ | VM_WRITE | \ + VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) #endif -- cgit v0.10.2 From c0ce5b673457123406f19e616bb8cf20a0c12cb5 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Mon, 25 Jul 2011 17:12:45 -0700 Subject: um, exec: remove redundant set_fs(USER_DS) The address limit is already set in flush_old_exec() so this set_fs(USER_DS) is redundant. Signed-off-by: Mathias Krause Cc: Richard Weinberger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c index 09bd7b5..939a4a6 100644 --- a/arch/um/kernel/exec.c +++ b/arch/um/kernel/exec.c @@ -38,7 +38,6 @@ void flush_thread(void) void start_thread(struct pt_regs *regs, unsigned long eip, unsigned long esp) { - set_fs(USER_DS); PT_REGS_IP(regs) = eip; PT_REGS_SP(regs) = esp; } -- cgit v0.10.2 From 22e9b917ab7ccd98046ad9428fb28967f0744605 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Mon, 25 Jul 2011 17:12:46 -0700 Subject: um: clean up delay functions Both sys-i386 and sys-x86_64 support now ndelay(). The delay functions are based on arch/x86/lib/delay.c. Signed-off-by: Richard Weinberger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/um/include/asm/delay.h b/arch/um/include/asm/delay.h index c71e32b..8a5576d 100644 --- a/arch/um/include/asm/delay.h +++ b/arch/um/include/asm/delay.h @@ -1,20 +1,18 @@ #ifndef __UM_DELAY_H #define __UM_DELAY_H -#define MILLION 1000000 - /* Undefined on purpose */ extern void __bad_udelay(void); +extern void __bad_ndelay(void); extern void __udelay(unsigned long usecs); +extern void __ndelay(unsigned long usecs); extern void __delay(unsigned long loops); #define udelay(n) ((__builtin_constant_p(n) && (n) > 20000) ? \ __bad_udelay() : __udelay(n)) -/* It appears that ndelay is not used at all for UML, and has never been - * implemented. */ -extern void __unimplemented_ndelay(void); -#define ndelay(n) __unimplemented_ndelay() +#define ndelay(n) ((__builtin_constant_p(n) && (n) > 20000) ? \ + __bad_ndelay() : __ndelay(n)) #endif diff --git a/arch/um/sys-i386/delay.c b/arch/um/sys-i386/delay.c index d623e07..f3fe1a6 100644 --- a/arch/um/sys-i386/delay.c +++ b/arch/um/sys-i386/delay.c @@ -1,29 +1,60 @@ +/* + * Copyright (C) 2011 Richard Weinberger + * Mostly copied from arch/x86/lib/delay.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + #include #include #include #include -void __delay(unsigned long time) +void __delay(unsigned long loops) { - /* Stolen from the i386 __loop_delay */ - int d0; - __asm__ __volatile__( - "\tjmp 1f\n" + asm volatile( + "test %0,%0\n" + "jz 3f\n" + "jmp 1f\n" + ".align 16\n" - "1:\tjmp 2f\n" + "1: jmp 2f\n" + ".align 16\n" - "2:\tdecl %0\n\tjns 2b" - :"=&a" (d0) - :"0" (time)); + "2: dec %0\n" + " jnz 2b\n" + "3: dec %0\n" + + : /* we don't need output */ + : "a" (loops) + ); } +EXPORT_SYMBOL(__delay); -void __udelay(unsigned long usecs) +inline void __const_udelay(unsigned long xloops) { - int i, n; + int d0; - n = (loops_per_jiffy * HZ * usecs) / MILLION; - for(i=0;i + * Mostly copied from arch/x86/lib/delay.c * - * Licensed under the GPL + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. */ #include +#include #include -#include #include void __delay(unsigned long loops) { - unsigned long i; + asm volatile( + "test %0,%0\n" + "jz 3f\n" + "jmp 1f\n" - for(i = 0; i < loops; i++) - cpu_relax(); + ".align 16\n" + "1: jmp 2f\n" + + ".align 16\n" + "2: dec %0\n" + " jnz 2b\n" + "3: dec %0\n" + + : /* we don't need output */ + : "a" (loops) + ); } +EXPORT_SYMBOL(__delay); -void __udelay(unsigned long usecs) +inline void __const_udelay(unsigned long xloops) { - unsigned long i, n; + int d0; - n = (loops_per_jiffy * HZ * usecs) / MILLION; - for(i=0;i Date: Mon, 25 Jul 2011 17:12:47 -0700 Subject: um: fix _FORTIFY_SOURCE=2 support for kernel modules When UML is compiled with _FORTIFY_SOURCE we have to export all _chk() functions which are used in modules. For now it's only the case for __sprintf_chk(). Tested-by: Florian Fainelli Reported-by: Florian Fainelli Signed-off-by: Richard Weinberger Acked-by: Vitaliy Ivanov Cc: Geert Uytterhoeven Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/um/os-Linux/user_syms.c b/arch/um/os-Linux/user_syms.c index 05f5ea8..45ffe46 100644 --- a/arch/um/os-Linux/user_syms.c +++ b/arch/um/os-Linux/user_syms.c @@ -113,3 +113,8 @@ EXPORT_SYMBOL(__stack_smash_handler); extern long __guard __attribute__((weak)); EXPORT_SYMBOL(__guard); + +#ifdef _FORTIFY_SOURCE +extern int __sprintf_chk(char *str, int flag, size_t strlen, const char *format); +EXPORT_SYMBOL(__sprintf_chk); +#endif -- cgit v0.10.2 From dd71dc4c335a8957d269ce063b3e80933f3482fc Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Mon, 25 Jul 2011 17:12:48 -0700 Subject: um: add netpoll support To make netconsole usable on UML, its ethernet driver needs netpoll support. Signed-off-by: Richard Weinberger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c index 47d0c37..22745b4 100644 --- a/arch/um/drivers/net_kern.c +++ b/arch/um/drivers/net_kern.c @@ -262,6 +262,15 @@ static int uml_net_change_mtu(struct net_device *dev, int new_mtu) return 0; } +#ifdef CONFIG_NET_POLL_CONTROLLER +static void uml_net_poll_controller(struct net_device *dev) +{ + disable_irq(dev->irq); + uml_net_interrupt(dev->irq, dev); + enable_irq(dev->irq); +} +#endif + static void uml_net_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) { @@ -364,6 +373,9 @@ static const struct net_device_ops uml_netdev_ops = { .ndo_set_mac_address = eth_mac_addr, .ndo_change_mtu = uml_net_change_mtu, .ndo_validate_addr = eth_validate_addr, +#ifdef CONFIG_NET_POLL_CONTROLLER + .ndo_poll_controller = uml_net_poll_controller, +#endif }; /* -- cgit v0.10.2 From 7fa38e7e9ead2dc7a346db10dd27a40dd83b2a26 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 25 Jul 2011 17:12:48 -0700 Subject: um: reinstate kernel version in generated .config Commit 0954828fcbf3 ("kconfig: replace KERNELVERSION usage by the mainmenu's prompt") made the kernel version disappear from the generated .config file when configuring for UML. As UML's Kconfig doesn't have a mainmenu, kconfig falls back to the default string "Linux Kernel Configuration". Add a suitable mainmenu to the main UML Kconfig file to fix this. Signed-off-by: Geert Uytterhoeven Signed-off-by: Richard Weinberger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/um/Kconfig.x86 b/arch/um/Kconfig.x86 index 8aae429..d31ecf3 100644 --- a/arch/um/Kconfig.x86 +++ b/arch/um/Kconfig.x86 @@ -1,3 +1,5 @@ +mainmenu "User Mode Linux/$SUBARCH $KERNELVERSION Kernel Configuration" + source "arch/um/Kconfig.common" menu "UML-specific options" -- cgit v0.10.2 From fbee8d933c775cdb9daac3063659cda38c1776c2 Mon Sep 17 00:00:00 2001 From: Vitaliy Ivanov Date: Mon, 25 Jul 2011 17:12:49 -0700 Subject: uml: drivers/net_user.c memory leak fix Perform memory cleanup on exit. On receiving invalid 'pid' we still should clean 'output' variable. Signed-off-by: Vitaliy Ivanov Signed-off-by: Richard Weinberger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/um/drivers/net_user.c b/arch/um/drivers/net_user.c index 9415dd9..5201188 100644 --- a/arch/um/drivers/net_user.c +++ b/arch/um/drivers/net_user.c @@ -228,7 +228,10 @@ static void change(char *dev, char *what, unsigned char *addr, "buffer\n"); pid = change_tramp(argv, output, output_len); - if (pid < 0) return; + if (pid < 0) { + kfree(output); + return; + } if (output != NULL) { printk("%s", output); -- cgit v0.10.2 From 7a99ae7c57d2de5cad9562bed32addf064521116 Mon Sep 17 00:00:00 2001 From: Vitaliy Ivanov Date: Mon, 25 Jul 2011 17:12:50 -0700 Subject: uml: cow_user.c warning corrections Fix this warning: arch/um/drivers/cow_user.c: In function `absolutize': arch/um/drivers/cow_user.c:189:7: warning: ignoring return value of `chdir', declared with attribute warn_unused_result [richard@nod.at: happens only with -D_FORTIFY_SOURCE=2] Signed-off-by: Vitaliy Ivanov Signed-off-by: Richard Weinberger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/um/drivers/cow_user.c b/arch/um/drivers/cow_user.c index 93f227a..9cbb426 100644 --- a/arch/um/drivers/cow_user.c +++ b/arch/um/drivers/cow_user.c @@ -186,7 +186,11 @@ static int absolutize(char *to, int size, char *from) strcat(to, "/"); strcat(to, from); } - chdir(save_cwd); + if (chdir(save_cwd)) { + cow_printf("absolutize : Can't cd to '%s' - " + "errno = %d\n", save_cwd, errno); + return -1; + } return 0; } -- cgit v0.10.2 From 2fdf2130926f07881c75e3905ece472c37d7703f Mon Sep 17 00:00:00 2001 From: Vitaliy Ivanov Date: Mon, 25 Jul 2011 17:12:50 -0700 Subject: uml: helper.c warning corrections Fix this warning: arch/um/os-Linux/helper.c: In function `helper_child': arch/um/os-Linux/helper.c:38:7: warning: ignoring return value of `write', declared with attribute warn_unused_result [richard@nod.at: happens only with -D_FORTIFY_SOURCE=2] Signed-off-by: Vitaliy Ivanov Signed-off-by: Richard Weinberger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/um/os-Linux/helper.c b/arch/um/os-Linux/helper.c index b6b1096..feff22d 100644 --- a/arch/um/os-Linux/helper.c +++ b/arch/um/os-Linux/helper.c @@ -28,14 +28,14 @@ static int helper_child(void *arg) { struct helper_data *data = arg; char **argv = data->argv; - int err; + int err, ret; if (data->pre_exec != NULL) (*data->pre_exec)(data->pre_data); err = execvp_noalloc(data->buf, argv[0], argv); /* If the exec succeeds, we don't get here */ - write(data->fd, &err, sizeof(err)); + CATCH_EINTR(ret = write(data->fd, &err, sizeof(err))); return 0; } -- cgit v0.10.2 From 9a8beb93067764344523386e0e5388d3fd78add7 Mon Sep 17 00:00:00 2001 From: Vitaliy Ivanov Date: Mon, 25 Jul 2011 17:12:51 -0700 Subject: uml: drivers/slip_user.c memory leak fix Do not free memory when you failed to allocate it. Signed-off-by: Vitaliy Ivanov Signed-off-by: Richard Weinberger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/um/drivers/slip_user.c b/arch/um/drivers/slip_user.c index a1c2d2c..cbacfc4 100644 --- a/arch/um/drivers/slip_user.c +++ b/arch/um/drivers/slip_user.c @@ -102,7 +102,7 @@ static int slip_tramp(char **argv, int fd) "buffer\n"); os_kill_process(pid, 1); err = -ENOMEM; - goto out_free; + goto out_close; } close(fds[1]); @@ -112,7 +112,6 @@ static int slip_tramp(char **argv, int fd) err = helper_wait(pid); close(fds[0]); -out_free: kfree(output); return err; -- cgit v0.10.2 From 2a6d0ac182688d4d23357ece744978706c044aad Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Mon, 25 Jul 2011 17:12:52 -0700 Subject: uml: free resources When creating the temp file there's a memory and file descriptor leak upon error. Signed-off-by: Davidlohr Bueso Signed-off-by: Richard Weinberger Reviewed-by: Vitaliy Ivanov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/um/os-Linux/mem.c b/arch/um/os-Linux/mem.c index e696144..62878cf 100644 --- a/arch/um/os-Linux/mem.c +++ b/arch/um/os-Linux/mem.c @@ -176,7 +176,7 @@ static int __init make_tempfile(const char *template, char **out_tempname, find_tempdir(); if ((tempdir == NULL) || (strlen(tempdir) >= MAXPATHLEN)) - return -1; + goto out; if (template[0] != '/') strcpy(tempname, tempdir); @@ -191,13 +191,15 @@ static int __init make_tempfile(const char *template, char **out_tempname, } if (do_unlink && (unlink(tempname) < 0)) { perror("unlink"); - goto out; + goto close; } if (out_tempname) { *out_tempname = tempname; } else free(tempname); return fd; +close: + close(fd); out: free(tempname); return -1; -- cgit v0.10.2 From b743ac54e54ca18fb7164a97cc77cf1ccee0b0f7 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Mon, 25 Jul 2011 17:12:52 -0700 Subject: um: disable scan_elf_aux() on x86_64 Reusing the host's vDSO makes only sense on x86_32. Signed-off-by: Richard Weinberger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/um/os-Linux/Makefile b/arch/um/os-Linux/Makefile index d66f038..b33f4df 100644 --- a/arch/um/os-Linux/Makefile +++ b/arch/um/os-Linux/Makefile @@ -3,10 +3,12 @@ # Licensed under the GPL # -obj-y = aio.o elf_aux.o execvp.o file.o helper.o irq.o main.o mem.o process.o \ +obj-y = aio.o execvp.o file.o helper.o irq.o main.o mem.o process.o \ registers.o sigio.o signal.o start_up.o time.o tty.o uaccess.o \ umid.o tls.o user_syms.o util.o drivers/ sys-$(SUBARCH)/ skas/ +obj-$(CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA) += elf_aux.o + USER_OBJS := $(user-objs-y) aio.o elf_aux.o execvp.o file.o helper.o irq.o \ main.o mem.o process.o registers.o sigio.o signal.o start_up.o time.o \ tty.o tls.o uaccess.o umid.o util.o diff --git a/arch/um/os-Linux/elf_aux.c b/arch/um/os-Linux/elf_aux.c index 608784d..9533237 100644 --- a/arch/um/os-Linux/elf_aux.c +++ b/arch/um/os-Linux/elf_aux.c @@ -14,16 +14,11 @@ #include "mem_user.h" #include -/* Use the one from the kernel - the host may miss it, if having old headers. */ -#if UM_ELF_CLASS == UM_ELFCLASS32 typedef Elf32_auxv_t elf_auxv_t; -#else -typedef Elf64_auxv_t elf_auxv_t; -#endif /* These are initialized very early in boot and never changed */ char * elf_aux_platform; -long elf_aux_hwcap; +extern long elf_aux_hwcap; unsigned long vsyscall_ehdr; unsigned long vsyscall_end; unsigned long __kernel_vsyscall; diff --git a/arch/um/os-Linux/main.c b/arch/um/os-Linux/main.c index fb2a97a..8471b81 100644 --- a/arch/um/os-Linux/main.c +++ b/arch/um/os-Linux/main.c @@ -21,6 +21,8 @@ #define STACKSIZE (8 * 1024 * 1024) #define THREAD_NAME_LEN (256) +long elf_aux_hwcap; + static void set_stklim(void) { struct rlimit lim; @@ -143,7 +145,9 @@ int __init main(int argc, char **argv, char **envp) install_fatal_handler(SIGINT); install_fatal_handler(SIGTERM); +#ifdef CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA scan_elf_aux(envp); +#endif do_uml_initcalls(); ret = linux_main(argc, argv); -- cgit v0.10.2 From 548f0a4e02f6fa33278e77a2a6477cdeb512317f Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Mon, 25 Jul 2011 17:12:53 -0700 Subject: um: Set __HAVE_ARCH_GATE_AREA for i386 When UML is unable to reuse the host's vDSO FIXADDR_USER_START is zero. To handle this special case correclty we have to implement custom gate area helper methods. Signed-off-by: Richard Weinberger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/um/sys-i386/Makefile b/arch/um/sys-i386/Makefile index 87b659d..3923cfb 100644 --- a/arch/um/sys-i386/Makefile +++ b/arch/um/sys-i386/Makefile @@ -4,7 +4,7 @@ obj-y = bug.o bugs.o checksum.o delay.o fault.o ksyms.o ldt.o ptrace.o \ ptrace_user.o setjmp.o signal.o stub.o stub_segv.o syscalls.o sysrq.o \ - sys_call_table.o tls.o atomic64_cx8_32.o + sys_call_table.o tls.o atomic64_cx8_32.o mem.o obj-$(CONFIG_BINFMT_ELF) += elfcore.o diff --git a/arch/um/sys-i386/asm/elf.h b/arch/um/sys-i386/asm/elf.h index d964a41..4230555 100644 --- a/arch/um/sys-i386/asm/elf.h +++ b/arch/um/sys-i386/asm/elf.h @@ -105,6 +105,8 @@ extern unsigned long __kernel_vsyscall; #define FIXADDR_USER_START VSYSCALL_BASE #define FIXADDR_USER_END VSYSCALL_END +#define __HAVE_ARCH_GATE_AREA 1 + /* * Architecture-neutral AT_ values in 0-17, leave some room * for more of them, start the x86-specific ones at 32. diff --git a/arch/um/sys-i386/mem.c b/arch/um/sys-i386/mem.c new file mode 100644 index 0000000..639900a --- /dev/null +++ b/arch/um/sys-i386/mem.c @@ -0,0 +1,62 @@ +/* + * Copyright (C) 2011 Richard Weinberger + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include + +static struct vm_area_struct gate_vma; + +static int __init gate_vma_init(void) +{ + if (!FIXADDR_USER_START) + return 0; + + gate_vma.vm_mm = NULL; + gate_vma.vm_start = FIXADDR_USER_START; + gate_vma.vm_end = FIXADDR_USER_END; + gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC; + gate_vma.vm_page_prot = __P101; + + /* + * Make sure the vDSO gets into every core dump. + * Dumping its contents makes post-mortem fully interpretable later + * without matching up the same kernel and hardware config to see + * what PC values meant. + */ + gate_vma.vm_flags |= VM_ALWAYSDUMP; + + return 0; +} +__initcall(gate_vma_init); + +struct vm_area_struct *get_gate_vma(struct mm_struct *mm) +{ + return FIXADDR_USER_START ? &gate_vma : NULL; +} + +int in_gate_area_no_mm(unsigned long addr) +{ + if (!FIXADDR_USER_START) + return 0; + + if ((addr >= FIXADDR_USER_START) && (addr < FIXADDR_USER_END)) + return 1; + + return 0; +} + +int in_gate_area(struct mm_struct *mm, unsigned long addr) +{ + struct vm_area_struct *vma = get_gate_vma(mm); + + if (!vma) + return 0; + + return (addr >= vma->vm_start) && (addr < vma->vm_end); +} -- cgit v0.10.2 From fc9a00187ba1300a0baae8e613cc62598e1a7de7 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Mon, 25 Jul 2011 17:12:54 -0700 Subject: um: set __HAVE_ARCH_GATE_AREA for x86_64 Implement arch_vma_name() and make get_gate_vma(), in_gate_area() and in_gate_area_no_mm() a nop. We need arch_vma_name() to support vDSO. Signed-off-by: Richard Weinberger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/um/sys-x86_64/Makefile b/arch/um/sys-x86_64/Makefile index 4f95637..bf2fe83 100644 --- a/arch/um/sys-x86_64/Makefile +++ b/arch/um/sys-x86_64/Makefile @@ -4,7 +4,7 @@ # Licensed under the GPL # -obj-y = bug.o bugs.o delay.o fault.o ldt.o ptrace.o ptrace_user.o \ +obj-y = bug.o bugs.o delay.o fault.o ldt.o ptrace.o ptrace_user.o mem.o \ setjmp.o signal.o stub.o stub_segv.o syscalls.o syscall_table.o \ sysrq.o ksyms.o tls.o diff --git a/arch/um/sys-x86_64/asm/elf.h b/arch/um/sys-x86_64/asm/elf.h index d6d5af3..11a2bfb 100644 --- a/arch/um/sys-x86_64/asm/elf.h +++ b/arch/um/sys-x86_64/asm/elf.h @@ -119,4 +119,14 @@ extern long elf_aux_hwcap; #define SET_PERSONALITY(ex) do ; while(0) +#define __HAVE_ARCH_GATE_AREA 1 +#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 +struct linux_binprm; +extern int arch_setup_additional_pages(struct linux_binprm *bprm, + int uses_interp); + +extern unsigned long um_vdso_addr; +#define AT_SYSINFO_EHDR 33 +#define ARCH_DLINFO NEW_AUX_ENT(AT_SYSINFO_EHDR, um_vdso_addr) + #endif diff --git a/arch/um/sys-x86_64/mem.c b/arch/um/sys-x86_64/mem.c new file mode 100644 index 0000000..5465187 --- /dev/null +++ b/arch/um/sys-x86_64/mem.c @@ -0,0 +1,26 @@ +#include "linux/mm.h" +#include "asm/page.h" +#include "asm/mman.h" + +const char *arch_vma_name(struct vm_area_struct *vma) +{ + if (vma->vm_mm && vma->vm_start == um_vdso_addr) + return "[vdso]"; + + return NULL; +} + +struct vm_area_struct *get_gate_vma(struct mm_struct *mm) +{ + return NULL; +} + +int in_gate_area(struct mm_struct *mm, unsigned long addr) +{ + return 0; +} + +int in_gate_area_no_mm(unsigned long addr) +{ + return 0; +} -- cgit v0.10.2 From f1c2bb8b9964ed31de988910f8b1cfb586d30091 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Mon, 25 Jul 2011 17:12:54 -0700 Subject: um: implement a x86_64 vDSO Until now UML had no x86_64 vDSO. So glibc always used the vsyscall page for gettimeday() and friends. Calls to gettimeday() returned falsely the host time and confused some programs. This patch adds a vDSO which turns all __vdso_* calls into a system call so that UML can trap them. As glibc still uses the vsyscall page for static binaries this patch improves the situation only for dynamic binaries. Signed-off-by: Richard Weinberger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/um/sys-x86_64/Makefile b/arch/um/sys-x86_64/Makefile index bf2fe83..bd4d1d3 100644 --- a/arch/um/sys-x86_64/Makefile +++ b/arch/um/sys-x86_64/Makefile @@ -8,6 +8,8 @@ obj-y = bug.o bugs.o delay.o fault.o ldt.o ptrace.o ptrace_user.o mem.o \ setjmp.o signal.o stub.o stub_segv.o syscalls.o syscall_table.o \ sysrq.o ksyms.o tls.o +obj-y += vdso/ + subarch-obj-y = lib/csum-partial_64.o lib/memcpy_64.o lib/thunk_64.o \ lib/rwsem.o subarch-obj-$(CONFIG_MODULES) += kernel/module.o diff --git a/arch/um/sys-x86_64/vdso/Makefile b/arch/um/sys-x86_64/vdso/Makefile new file mode 100644 index 0000000..5dffe6d --- /dev/null +++ b/arch/um/sys-x86_64/vdso/Makefile @@ -0,0 +1,90 @@ +# +# Building vDSO images for x86. +# + +VDSO64-y := y + +vdso-install-$(VDSO64-y) += vdso.so + + +# files to link into the vdso +vobjs-y := vdso-note.o um_vdso.o + +# files to link into kernel +obj-$(VDSO64-y) += vdso.o vma.o + +vobjs := $(foreach F,$(vobjs-y),$(obj)/$F) + +$(obj)/vdso.o: $(obj)/vdso.so + +targets += vdso.so vdso.so.dbg vdso.lds $(vobjs-y) + +export CPPFLAGS_vdso.lds += -P -C + +VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \ + -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096 + +$(obj)/vdso.o: $(src)/vdso.S $(obj)/vdso.so + +$(obj)/vdso.so.dbg: $(src)/vdso.lds $(vobjs) FORCE + $(call if_changed,vdso) + +$(obj)/%.so: OBJCOPYFLAGS := -S +$(obj)/%.so: $(obj)/%.so.dbg FORCE + $(call if_changed,objcopy) + +# +# Don't omit frame pointers for ease of userspace debugging, but do +# optimize sibling calls. +# +CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \ + $(filter -g%,$(KBUILD_CFLAGS)) $(call cc-option, -fno-stack-protector) \ + -fno-omit-frame-pointer -foptimize-sibling-calls + +$(vobjs): KBUILD_CFLAGS += $(CFL) + +# +# vDSO code runs in userspace and -pg doesn't help with profiling anyway. +# +CFLAGS_REMOVE_vdso-note.o = -pg +CFLAGS_REMOVE_um_vdso.o = -pg + +targets += vdso-syms.lds +obj-$(VDSO64-y) += vdso-syms.lds + +# +# Match symbols in the DSO that look like VDSO*; produce a file of constants. +# +sed-vdsosym := -e 's/^00*/0/' \ + -e 's/^\([0-9a-fA-F]*\) . \(VDSO[a-zA-Z0-9_]*\)$$/\2 = 0x\1;/p' +quiet_cmd_vdsosym = VDSOSYM $@ +define cmd_vdsosym + $(NM) $< | LC_ALL=C sed -n $(sed-vdsosym) | LC_ALL=C sort > $@ +endef + +$(obj)/%-syms.lds: $(obj)/%.so.dbg FORCE + $(call if_changed,vdsosym) + +# +# The DSO images are built using a special linker script. +# +quiet_cmd_vdso = VDSO $@ + cmd_vdso = $(CC) -nostdlib -o $@ \ + $(VDSO_LDFLAGS) $(VDSO_LDFLAGS_$(filter %.lds,$(^F))) \ + -Wl,-T,$(filter %.lds,$^) $(filter %.o,$^) && \ + sh $(srctree)/$(src)/checkundef.sh '$(NM)' '$@' + +VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) +GCOV_PROFILE := n + +# +# Install the unstripped copy of vdso*.so listed in $(vdso-install-y). +# +quiet_cmd_vdso_install = INSTALL $@ + cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@ +$(vdso-install-y): %.so: $(obj)/%.so.dbg FORCE + @mkdir -p $(MODLIB)/vdso + $(call cmd,vdso_install) + +PHONY += vdso_install $(vdso-install-y) +vdso_install: $(vdso-install-y) diff --git a/arch/um/sys-x86_64/vdso/checkundef.sh b/arch/um/sys-x86_64/vdso/checkundef.sh new file mode 100644 index 0000000..7ee90a9 --- /dev/null +++ b/arch/um/sys-x86_64/vdso/checkundef.sh @@ -0,0 +1,10 @@ +#!/bin/sh +nm="$1" +file="$2" +$nm "$file" | grep '^ *U' > /dev/null 2>&1 +if [ $? -eq 1 ]; then + exit 0 +else + echo "$file: undefined symbols found" >&2 + exit 1 +fi diff --git a/arch/um/sys-x86_64/vdso/um_vdso.c b/arch/um/sys-x86_64/vdso/um_vdso.c new file mode 100644 index 0000000..7c441b5 --- /dev/null +++ b/arch/um/sys-x86_64/vdso/um_vdso.c @@ -0,0 +1,71 @@ +/* + * Copyright (C) 2011 Richard Weinberger + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This vDSO turns all calls into a syscall so that UML can trap them. + */ + + +/* Disable profiling for userspace code */ +#define DISABLE_BRANCH_PROFILING + +#include +#include +#include + +int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) +{ + long ret; + + asm("syscall" : "=a" (ret) : + "0" (__NR_clock_gettime), "D" (clock), "S" (ts) : "memory"); + + return ret; +} +int clock_gettime(clockid_t, struct timespec *) + __attribute__((weak, alias("__vdso_clock_gettime"))); + +int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) +{ + long ret; + + asm("syscall" : "=a" (ret) : + "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory"); + + return ret; +} +int gettimeofday(struct timeval *, struct timezone *) + __attribute__((weak, alias("__vdso_gettimeofday"))); + +time_t __vdso_time(time_t *t) +{ + long secs; + + asm volatile("syscall" + : "=a" (secs) + : "0" (__NR_time), "D" (t) : "cc", "r11", "cx", "memory"); + + return secs; +} +int time(time_t *t) __attribute__((weak, alias("__vdso_time"))); + +long +__vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused) +{ + /* + * UML does not support SMP, we can cheat here. :) + */ + + if (cpu) + *cpu = 0; + if (node) + *node = 0; + + return 0; +} + +long getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache) + __attribute__((weak, alias("__vdso_getcpu"))); diff --git a/arch/um/sys-x86_64/vdso/vdso-layout.lds.S b/arch/um/sys-x86_64/vdso/vdso-layout.lds.S new file mode 100644 index 0000000..634a2cf --- /dev/null +++ b/arch/um/sys-x86_64/vdso/vdso-layout.lds.S @@ -0,0 +1,64 @@ +/* + * Linker script for vDSO. This is an ELF shared object prelinked to + * its virtual address, and with only one read-only segment. + * This script controls its layout. + */ + +SECTIONS +{ + . = VDSO_PRELINK + SIZEOF_HEADERS; + + .hash : { *(.hash) } :text + .gnu.hash : { *(.gnu.hash) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + + .note : { *(.note.*) } :text :note + + .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr + .eh_frame : { KEEP (*(.eh_frame)) } :text + + .dynamic : { *(.dynamic) } :text :dynamic + + .rodata : { *(.rodata*) } :text + .data : { + *(.data*) + *(.sdata*) + *(.got.plt) *(.got) + *(.gnu.linkonce.d.*) + *(.bss*) + *(.dynbss*) + *(.gnu.linkonce.b.*) + } + + .altinstructions : { *(.altinstructions) } + .altinstr_replacement : { *(.altinstr_replacement) } + + /* + * Align the actual code well away from the non-instruction data. + * This is the best thing for the I-cache. + */ + . = ALIGN(0x100); + + .text : { *(.text*) } :text =0x90909090 +} + +/* + * Very old versions of ld do not recognize this name token; use the constant. + */ +#define PT_GNU_EH_FRAME 0x6474e550 + +/* + * We must supply the ELF program headers explicitly to get just one + * PT_LOAD segment, and set the flags explicitly to make segments read-only. + */ +PHDRS +{ + text PT_LOAD FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */ + dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ + note PT_NOTE FLAGS(4); /* PF_R */ + eh_frame_hdr PT_GNU_EH_FRAME; +} diff --git a/arch/um/sys-x86_64/vdso/vdso-note.S b/arch/um/sys-x86_64/vdso/vdso-note.S new file mode 100644 index 0000000..79a071e --- /dev/null +++ b/arch/um/sys-x86_64/vdso/vdso-note.S @@ -0,0 +1,12 @@ +/* + * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text. + * Here we can supply some information useful to userland. + */ + +#include +#include +#include + +ELFNOTE_START(Linux, 0, "a") + .long LINUX_VERSION_CODE +ELFNOTE_END diff --git a/arch/um/sys-x86_64/vdso/vdso.S b/arch/um/sys-x86_64/vdso/vdso.S new file mode 100644 index 0000000..ec82c16 --- /dev/null +++ b/arch/um/sys-x86_64/vdso/vdso.S @@ -0,0 +1,10 @@ +#include + +__INITDATA + + .globl vdso_start, vdso_end +vdso_start: + .incbin "arch/um/sys-x86_64/vdso/vdso.so" +vdso_end: + +__FINIT diff --git a/arch/um/sys-x86_64/vdso/vdso.lds.S b/arch/um/sys-x86_64/vdso/vdso.lds.S new file mode 100644 index 0000000..b96b267 --- /dev/null +++ b/arch/um/sys-x86_64/vdso/vdso.lds.S @@ -0,0 +1,32 @@ +/* + * Linker script for 64-bit vDSO. + * We #include the file to define the layout details. + * Here we only choose the prelinked virtual address. + * + * This file defines the version script giving the user-exported symbols in + * the DSO. We can define local symbols here called VDSO* to make their + * values visible using the asm-x86/vdso.h macros from the kernel proper. + */ + +#define VDSO_PRELINK 0xffffffffff700000 +#include "vdso-layout.lds.S" + +/* + * This controls what userland symbols we export from the vDSO. + */ +VERSION { + LINUX_2.6 { + global: + clock_gettime; + __vdso_clock_gettime; + gettimeofday; + __vdso_gettimeofday; + getcpu; + __vdso_getcpu; + time; + __vdso_time; + local: *; + }; +} + +VDSO64_PRELINK = VDSO_PRELINK; diff --git a/arch/um/sys-x86_64/vdso/vma.c b/arch/um/sys-x86_64/vdso/vma.c new file mode 100644 index 0000000..9495c8d --- /dev/null +++ b/arch/um/sys-x86_64/vdso/vma.c @@ -0,0 +1,74 @@ +/* + * Copyright (C) 2011 Richard Weinberger + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include + +unsigned int __read_mostly vdso_enabled = 1; +unsigned long um_vdso_addr; + +extern unsigned long task_size; +extern char vdso_start[], vdso_end[]; + +static struct page **vdsop; + +static int __init init_vdso(void) +{ + struct page *um_vdso; + + BUG_ON(vdso_end - vdso_start > PAGE_SIZE); + + um_vdso_addr = task_size - PAGE_SIZE; + + vdsop = kmalloc(GFP_KERNEL, sizeof(struct page *)); + if (!vdsop) + goto oom; + + um_vdso = alloc_page(GFP_KERNEL); + if (!um_vdso) { + kfree(vdsop); + + goto oom; + } + + copy_page(page_address(um_vdso), vdso_start); + *vdsop = um_vdso; + + return 0; + +oom: + printk(KERN_ERR "Cannot allocate vdso\n"); + vdso_enabled = 0; + + return -ENOMEM; +} +subsys_initcall(init_vdso); + +int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) +{ + int err; + struct mm_struct *mm = current->mm; + + if (!vdso_enabled) + return 0; + + down_write(&mm->mmap_sem); + + err = install_special_mapping(mm, um_vdso_addr, PAGE_SIZE, + VM_READ|VM_EXEC| + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| + VM_ALWAYSDUMP, + vdsop); + + up_write(&mm->mmap_sem); + + return err; +} -- cgit v0.10.2 From 39073db15bf9016003e8a4e607d517f4928b713c Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Mon, 25 Jul 2011 17:12:55 -0700 Subject: um: adjust size of pid_buf Linux can have pids up to 4*1024*1024. To handle such huge numbers pid_buf needs to be larger. Reported-by: Geert Uytterhoeven Signed-off-by: Richard Weinberger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/um/drivers/harddog_user.c b/arch/um/drivers/harddog_user.c index b56f8e0..84dce3f 100644 --- a/arch/um/drivers/harddog_user.c +++ b/arch/um/drivers/harddog_user.c @@ -32,7 +32,7 @@ int start_watchdog(int *in_fd_ret, int *out_fd_ret, char *sock) { struct dog_data data; int in_fds[2], out_fds[2], pid, n, err; - char pid_buf[sizeof("nnnnn\0")], c; + char pid_buf[sizeof("nnnnnnn\0")], c; char *pid_args[] = { "/usr/bin/uml_watchdog", "-pid", pid_buf, NULL }; char *mconsole_args[] = { "/usr/bin/uml_watchdog", "-mconsole", NULL, NULL }; -- cgit v0.10.2 From f1c93e4946ae1cc268729451de6335a7e7d2bea9 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Mon, 25 Jul 2011 17:12:55 -0700 Subject: um: remove dead code GCC 4.6's -Wunused-but-set-variable found some dead code. Signed-off-by: Richard Weinberger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/um/drivers/chan_kern.c b/arch/um/drivers/chan_kern.c index 25e1965..d4191fe 100644 --- a/arch/um/drivers/chan_kern.c +++ b/arch/um/drivers/chan_kern.c @@ -543,11 +543,10 @@ int parse_chan_pair(char *str, struct line *line, int device, const struct chan_opts *opts, char **error_out) { struct list_head *chans = &line->chan_list; - struct chan *new, *chan; + struct chan *new; char *in, *out; if (!list_empty(chans)) { - chan = list_entry(chans->next, struct chan, list); free_chan(chans, 0); INIT_LIST_HEAD(chans); } diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c index 35dd0b8..d51c404 100644 --- a/arch/um/drivers/line.c +++ b/arch/um/drivers/line.c @@ -176,10 +176,9 @@ void line_flush_buffer(struct tty_struct *tty) { struct line *line = tty->driver_data; unsigned long flags; - int err; spin_lock_irqsave(&line->lock, flags); - err = flush_buffer(line); + flush_buffer(line); spin_unlock_irqrestore(&line->lock, flags); } diff --git a/arch/um/kernel/reboot.c b/arch/um/kernel/reboot.c index 869bec9..4d93dff 100644 --- a/arch/um/kernel/reboot.c +++ b/arch/um/kernel/reboot.c @@ -20,9 +20,8 @@ static void kill_off_processes(void) os_kill_ptraced_process(userspace_pid[0], 1); else { struct task_struct *p; - int pid, me; + int pid; - me = os_getpid(); for_each_process(p) { if (p->mm == NULL) continue; -- cgit v0.10.2 From f35119d6681300ba6d76da53cb1ebc2eed62e77a Mon Sep 17 00:00:00 2001 From: Rakib Mullick Date: Mon, 25 Jul 2011 17:12:56 -0700 Subject: drivers: use kzalloc/kcalloc instead of 'kmalloc+memset', where possible Signed-off-by: Rakib Mullick Cc: Jeff Garzik Cc: David Airlie Cc: Tejun Heo Cc: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/ata/sata_dwc_460ex.c b/drivers/ata/sata_dwc_460ex.c index b02c4ff..0a9a774 100644 --- a/drivers/ata/sata_dwc_460ex.c +++ b/drivers/ata/sata_dwc_460ex.c @@ -1642,13 +1642,12 @@ static int sata_dwc_probe(struct platform_device *ofdev) const struct ata_port_info *ppi[] = { &pi, NULL }; /* Allocate DWC SATA device */ - hsdev = kmalloc(sizeof(*hsdev), GFP_KERNEL); + hsdev = kzalloc(sizeof(*hsdev), GFP_KERNEL); if (hsdev == NULL) { dev_err(&ofdev->dev, "kmalloc failed for hsdev\n"); err = -ENOMEM; goto error; } - memset(hsdev, 0, sizeof(*hsdev)); /* Ioremap SATA registers */ base = of_iomap(ofdev->dev.of_node, 0); diff --git a/drivers/gpu/drm/drm_scatter.c b/drivers/gpu/drm/drm_scatter.c index d15e09b..7525e03 100644 --- a/drivers/gpu/drm/drm_scatter.c +++ b/drivers/gpu/drm/drm_scatter.c @@ -83,30 +83,26 @@ int drm_sg_alloc(struct drm_device *dev, struct drm_scatter_gather * request) if (dev->sg) return -EINVAL; - entry = kmalloc(sizeof(*entry), GFP_KERNEL); + entry = kzalloc(sizeof(*entry), GFP_KERNEL); if (!entry) return -ENOMEM; - memset(entry, 0, sizeof(*entry)); pages = (request->size + PAGE_SIZE - 1) / PAGE_SIZE; DRM_DEBUG("size=%ld pages=%ld\n", request->size, pages); entry->pages = pages; - entry->pagelist = kmalloc(pages * sizeof(*entry->pagelist), GFP_KERNEL); + entry->pagelist = kcalloc(pages, sizeof(*entry->pagelist), GFP_KERNEL); if (!entry->pagelist) { kfree(entry); return -ENOMEM; } - memset(entry->pagelist, 0, pages * sizeof(*entry->pagelist)); - - entry->busaddr = kmalloc(pages * sizeof(*entry->busaddr), GFP_KERNEL); + entry->busaddr = kcalloc(pages, sizeof(*entry->busaddr), GFP_KERNEL); if (!entry->busaddr) { kfree(entry->pagelist); kfree(entry); return -ENOMEM; } - memset((void *)entry->busaddr, 0, pages * sizeof(*entry->busaddr)); entry->virtual = drm_vmalloc_dma(pages << PAGE_SHIFT); if (!entry->virtual) { diff --git a/drivers/gpu/drm/radeon/radeon_mem.c b/drivers/gpu/drm/radeon/radeon_mem.c index ed95155..988548e 100644 --- a/drivers/gpu/drm/radeon/radeon_mem.c +++ b/drivers/gpu/drm/radeon/radeon_mem.c @@ -139,7 +139,7 @@ static int init_heap(struct mem_block **heap, int start, int size) if (!blocks) return -ENOMEM; - *heap = kmalloc(sizeof(**heap), GFP_KERNEL); + *heap = kzalloc(sizeof(**heap), GFP_KERNEL); if (!*heap) { kfree(blocks); return -ENOMEM; @@ -150,7 +150,6 @@ static int init_heap(struct mem_block **heap, int start, int size) blocks->file_priv = NULL; blocks->next = blocks->prev = *heap; - memset(*heap, 0, sizeof(**heap)); (*heap)->file_priv = (struct drm_file *) - 1; (*heap)->next = (*heap)->prev = blocks; return 0; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c b/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c index f1a52f9..07ce02d 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c @@ -585,11 +585,10 @@ int vmw_overlay_init(struct vmw_private *dev_priv) return -ENOSYS; } - overlay = kmalloc(sizeof(*overlay), GFP_KERNEL); + overlay = kzalloc(sizeof(*overlay), GFP_KERNEL); if (!overlay) return -ENOMEM; - memset(overlay, 0, sizeof(*overlay)); mutex_init(&overlay->mutex); for (i = 0; i < VMW_MAX_NUM_STREAMS; i++) { overlay->stream[i].buf = NULL; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c index 5408b1b..bfe1bcc 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c @@ -612,11 +612,9 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data, srf->sizes[0].height == 64 && srf->format == SVGA3D_A8R8G8B8) { - srf->snooper.image = kmalloc(64 * 64 * 4, GFP_KERNEL); - /* clear the image */ - if (srf->snooper.image) { - memset(srf->snooper.image, 0x00, 64 * 64 * 4); - } else { + /* allocate image area and clear it */ + srf->snooper.image = kzalloc(64 * 64 * 4, GFP_KERNEL); + if (!srf->snooper.image) { DRM_ERROR("Failed to allocate cursor_image\n"); ret = -ENOMEM; goto out_err1; diff --git a/drivers/gpu/vga/vgaarb.c b/drivers/gpu/vga/vgaarb.c index 8a1021f..c72f1c0 100644 --- a/drivers/gpu/vga/vgaarb.c +++ b/drivers/gpu/vga/vgaarb.c @@ -1171,10 +1171,9 @@ static int vga_arb_open(struct inode *inode, struct file *file) pr_debug("%s\n", __func__); - priv = kmalloc(sizeof(struct vga_arb_private), GFP_KERNEL); + priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (priv == NULL) return -ENOMEM; - memset(priv, 0, sizeof(*priv)); spin_lock_init(&priv->lock); file->private_data = priv; -- cgit v0.10.2 From 5190f0c030f46b3169205f34f6d9ef480fa39ef2 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Mon, 25 Jul 2011 17:12:58 -0700 Subject: asm-generic/system.h: drop useless __KERNEL__ This header isn't exported to user-space, and even if it was, the __KERNEL__ check covers the entire file, so we'd get a useless stub in the first place. So punt it. Signed-off-by: Mike Frysinger Cc: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/asm-generic/system.h b/include/asm-generic/system.h index 4b0b9cb..215efa7 100644 --- a/include/asm-generic/system.h +++ b/include/asm-generic/system.h @@ -14,7 +14,6 @@ #ifndef __ASM_GENERIC_SYSTEM_H #define __ASM_GENERIC_SYSTEM_H -#ifdef __KERNEL__ #ifndef __ASSEMBLY__ #include @@ -139,5 +138,4 @@ unsigned long __xchg(unsigned long x, volatile void *ptr, int size) #endif /* !__ASSEMBLY__ */ -#endif /* __KERNEL__ */ #endif /* __ASM_GENERIC_SYSTEM_H */ -- cgit v0.10.2 From ae891a1b93bf62e9aaa116a7a71312375047fc9f Mon Sep 17 00:00:00 2001 From: Maxin B John Date: Mon, 25 Jul 2011 17:12:59 -0700 Subject: devres: fix possible use after free devres uses the pointer value as key after it's freed, which is safe but triggers spurious use-after-free warnings on some static analysis tools. Rearrange code to avoid such warnings. Signed-off-by: Maxin B. John Reviewed-by: Rolf Eike Beer Acked-by: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/kernel/irq/devres.c b/kernel/irq/devres.c index 1ef4ffc..bd8e788 100644 --- a/kernel/irq/devres.c +++ b/kernel/irq/devres.c @@ -87,8 +87,8 @@ void devm_free_irq(struct device *dev, unsigned int irq, void *dev_id) { struct irq_devres match_data = { irq, dev_id }; - free_irq(irq, dev_id); WARN_ON(devres_destroy(dev, devm_irq_release, devm_irq_match, &match_data)); + free_irq(irq, dev_id); } EXPORT_SYMBOL(devm_free_irq); diff --git a/lib/devres.c b/lib/devres.c index 6efddf5..7c0e953 100644 --- a/lib/devres.c +++ b/lib/devres.c @@ -79,9 +79,9 @@ EXPORT_SYMBOL(devm_ioremap_nocache); */ void devm_iounmap(struct device *dev, void __iomem *addr) { - iounmap(addr); WARN_ON(devres_destroy(dev, devm_ioremap_release, devm_ioremap_match, (void *)addr)); + iounmap(addr); } EXPORT_SYMBOL(devm_iounmap); diff --git a/mm/dmapool.c b/mm/dmapool.c index 03bf3bb..fbb58e3 100644 --- a/mm/dmapool.c +++ b/mm/dmapool.c @@ -500,7 +500,7 @@ void dmam_pool_destroy(struct dma_pool *pool) { struct device *dev = pool->dev; - dma_pool_destroy(pool); WARN_ON(devres_destroy(dev, dmam_pool_release, dmam_pool_match, pool)); + dma_pool_destroy(pool); } EXPORT_SYMBOL(dmam_pool_destroy); -- cgit v0.10.2 From a0e44d4a7a3935afe425ec8dd1a5b63895e1f9c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 25 Jul 2011 17:13:00 -0700 Subject: include/linux/ioport.h: new helper to define common struct resource constructs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Resource definitions that just define start, end and flags = IORESOURCE_MEM or IORESOURCE_IRQ (with start=end) are quite common. So introduce a shortcut for them. For completeness add macros for IORESOURCE_DMA and IORESOURCE_IO, too and also make available a set of macros to specify named resources of all types which are less common. Signed-off-by: Uwe Kleine-König Cc: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/ioport.h b/include/linux/ioport.h index e9bb22c..c2ebfe6 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -109,6 +109,36 @@ struct resource_list { /* PCI control bits. Shares IORESOURCE_BITS with above PCI ROM. */ #define IORESOURCE_PCI_FIXED (1<<4) /* Do not move resource */ + +/* helpers to define resources */ +#define DEFINE_RES_NAMED(_start, _size, _name, _flags) \ + { \ + .start = (_start), \ + .end = (_start) + (_size) - 1, \ + .name = (_name), \ + .flags = (_flags), \ + } + +#define DEFINE_RES_IO_NAMED(_start, _size, _name) \ + DEFINE_RES_NAMED((_start), (_size), (_name), IORESOURCE_IO) +#define DEFINE_RES_IO(_start, _size) \ + DEFINE_RES_IO_NAMED((_start), (_size), NULL) + +#define DEFINE_RES_MEM_NAMED(_start, _size, _name) \ + DEFINE_RES_NAMED((_start), (_size), (_name), IORESOURCE_MEM) +#define DEFINE_RES_MEM(_start, _size) \ + DEFINE_RES_MEM_NAMED((_start), (_size), NULL) + +#define DEFINE_RES_IRQ_NAMED(_irq, _name) \ + DEFINE_RES_NAMED((_irq), 1, (_name), IORESOURCE_IRQ) +#define DEFINE_RES_IRQ(_irq) \ + DEFINE_RES_IRQ_NAMED((_irq), NULL) + +#define DEFINE_RES_DMA_NAMED(_dma, _name) \ + DEFINE_RES_NAMED((_dma), 1, (_name), IORESOURCE_DMA) +#define DEFINE_RES_DMA(_dma) \ + DEFINE_RES_DMA_NAMED((_dma), NULL) + /* PC/ISA/whatever - the normal PC address spaces: IO and memory */ extern struct resource ioport_resource; extern struct resource iomem_resource; -- cgit v0.10.2 From 9d00f92f437f0ff682876cab9f2d94cf3b6e5d17 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Mon, 25 Jul 2011 17:13:02 -0700 Subject: include/linux/kernel.h: fix a headers_check warning Fix the warning: usr/include/linux/kernel.h:65: userspace cannot reference function or variable defined in the kernel As Michal noted, BUILD_BUG_ON stuffs should be moved under #ifdef __KERNEL__. Signed-off-by: WANG Cong Cc: Michal Marek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 567a6f7..bf66f06 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -646,29 +646,6 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } const typeof( ((type *)0)->member ) *__mptr = (ptr); \ (type *)( (char *)__mptr - offsetof(type,member) );}) -struct sysinfo; -extern int do_sysinfo(struct sysinfo *info); - -#endif /* __KERNEL__ */ - -#define SI_LOAD_SHIFT 16 -struct sysinfo { - long uptime; /* Seconds since boot */ - unsigned long loads[3]; /* 1, 5, and 15 minute load averages */ - unsigned long totalram; /* Total usable main memory size */ - unsigned long freeram; /* Available memory size */ - unsigned long sharedram; /* Amount of shared memory */ - unsigned long bufferram; /* Memory used by buffers */ - unsigned long totalswap; /* Total swap space size */ - unsigned long freeswap; /* swap space still available */ - unsigned short procs; /* Number of current processes */ - unsigned short pad; /* explicit padding for m68k */ - unsigned long totalhigh; /* Total high memory size */ - unsigned long freehigh; /* Available high memory size */ - unsigned int mem_unit; /* Memory unit size in bytes */ - char _f[20-2*sizeof(long)-sizeof(int)]; /* Padding: libc5 uses this.. */ -}; - #ifdef __CHECKER__ #define BUILD_BUG_ON_NOT_POWER_OF_2(n) #define BUILD_BUG_ON_ZERO(e) (0) @@ -714,6 +691,30 @@ extern int __build_bug_on_failed; #endif #endif /* __CHECKER__ */ + +struct sysinfo; +extern int do_sysinfo(struct sysinfo *info); + +#endif /* __KERNEL__ */ + +#define SI_LOAD_SHIFT 16 +struct sysinfo { + long uptime; /* Seconds since boot */ + unsigned long loads[3]; /* 1, 5, and 15 minute load averages */ + unsigned long totalram; /* Total usable main memory size */ + unsigned long freeram; /* Available memory size */ + unsigned long sharedram; /* Amount of shared memory */ + unsigned long bufferram; /* Memory used by buffers */ + unsigned long totalswap; /* Total swap space size */ + unsigned long freeswap; /* swap space still available */ + unsigned short procs; /* Number of current processes */ + unsigned short pad; /* explicit padding for m68k */ + unsigned long totalhigh; /* Total high memory size */ + unsigned long freehigh; /* Available high memory size */ + unsigned int mem_unit; /* Memory unit size in bytes */ + char _f[20-2*sizeof(long)-sizeof(int)]; /* Padding: libc5 uses this.. */ +}; + /* Trap pasters of __FUNCTION__ at compile-time */ #define __FUNCTION__ (__func__) -- cgit v0.10.2 From b9d4f426689765d1e066913f6872c8d59e0f2ac9 Mon Sep 17 00:00:00 2001 From: Arnaud Lacombe Date: Mon, 25 Jul 2011 17:13:03 -0700 Subject: include/linux/kernel.h: hide internal macros from userspace Unexpose to userland the following macros - __FUNCTION__ - NUMA_BUILD - COMPACTION_BUILD - REBUILD_DUE_TO_FTRACE_MCOUNT_RECORD Signed-off-by: Arnaud Lacombe Cc: Rusty Russell Cc: KOSAKI Motohiro Cc: Christoph Lameter Cc: Mel Gorman Cc: Steven Rostedt Acked-by: WANG Cong Cc: Michal Marek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/kernel.h b/include/linux/kernel.h index bf66f06..9a43ad7 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -691,6 +691,27 @@ extern int __build_bug_on_failed; #endif #endif /* __CHECKER__ */ +/* Trap pasters of __FUNCTION__ at compile-time */ +#define __FUNCTION__ (__func__) + +/* This helps us to avoid #ifdef CONFIG_NUMA */ +#ifdef CONFIG_NUMA +#define NUMA_BUILD 1 +#else +#define NUMA_BUILD 0 +#endif + +/* This helps us avoid #ifdef CONFIG_COMPACTION */ +#ifdef CONFIG_COMPACTION +#define COMPACTION_BUILD 1 +#else +#define COMPACTION_BUILD 0 +#endif + +/* Rebuild everything on CONFIG_FTRACE_MCOUNT_RECORD */ +#ifdef CONFIG_FTRACE_MCOUNT_RECORD +# define REBUILD_DUE_TO_FTRACE_MCOUNT_RECORD +#endif struct sysinfo; extern int do_sysinfo(struct sysinfo *info); @@ -715,26 +736,4 @@ struct sysinfo { char _f[20-2*sizeof(long)-sizeof(int)]; /* Padding: libc5 uses this.. */ }; -/* Trap pasters of __FUNCTION__ at compile-time */ -#define __FUNCTION__ (__func__) - -/* This helps us to avoid #ifdef CONFIG_NUMA */ -#ifdef CONFIG_NUMA -#define NUMA_BUILD 1 -#else -#define NUMA_BUILD 0 -#endif - -/* This helps us avoid #ifdef CONFIG_COMPACTION */ -#ifdef CONFIG_COMPACTION -#define COMPACTION_BUILD 1 -#else -#define COMPACTION_BUILD 0 -#endif - -/* Rebuild everything on CONFIG_FTRACE_MCOUNT_RECORD */ -#ifdef CONFIG_FTRACE_MCOUNT_RECORD -# define REBUILD_DUE_TO_FTRACE_MCOUNT_RECORD -#endif - #endif -- cgit v0.10.2 From 005bdad7b80ac017ca21d795639d4214b9844a84 Mon Sep 17 00:00:00 2001 From: Arnaud Lacombe Date: Mon, 25 Jul 2011 17:13:04 -0700 Subject: eisa/pci_eisa.c: fix section mismatch Fixes WARNING: vmlinux.o(.data+0x15d3ac): Section mismatch in reference from the variable pci_eisa_driver to the function .init.text:pci_eisa_init() The variable pci_eisa_driver references the function __init pci_eisa_init() If the reference is valid then annotate the variable with __init* or __refdata (see linux/init.h) or name the variable: *_template, *_timer, *_sht, *_ops, *_probe, *_probe_one, *_console Signed-off-by: Arnaud Lacombe Cc: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/eisa/pci_eisa.c b/drivers/eisa/pci_eisa.c index 0dd0f63..30da70d 100644 --- a/drivers/eisa/pci_eisa.c +++ b/drivers/eisa/pci_eisa.c @@ -45,13 +45,13 @@ static int __init pci_eisa_init(struct pci_dev *pdev, return 0; } -static struct pci_device_id pci_eisa_pci_tbl[] = { +static struct pci_device_id __initdata pci_eisa_pci_tbl[] = { { PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_BRIDGE_EISA << 8, 0xffff00, 0 }, { 0, } }; -static struct pci_driver pci_eisa_driver = { +static struct pci_driver __initdata pci_eisa_driver = { .name = "pci_eisa", .id_table = pci_eisa_pci_tbl, .probe = pci_eisa_init, -- cgit v0.10.2 From 703f03c896fdbd726b809066ae279df513992f0e Mon Sep 17 00:00:00 2001 From: "Philip A. Prindeville" Date: Mon, 25 Jul 2011 17:13:05 -0700 Subject: geode: reflect mfgpt dependency on mfd As stated in drivers/mfd/cs5535-mfd.c, the mfd driver exposes the BARs which then make the GPIO, MFGPT, ACPI, etc. all visible to the system. So the dependencies of the MFGPT stuff have changed, and most people expect Kconfig to bring in the necessary dependencies. Without them, the module fails to load and most people don't understand why because the details of the rewrite aren't captured anywhere most people who know to look. This dependency needs to be reflected in Kconfig. Signed-off-by: Philip A. Prindeville Acked-by: Alexandros C. Couloumbis Acked-by: Andres Salomon Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index 4e349cd..3546474 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -245,8 +245,7 @@ config SGI_XP config CS5535_MFGPT tristate "CS5535/CS5536 Geode Multi-Function General Purpose Timer (MFGPT) support" - depends on PCI - depends on X86 + depends on PCI && X86 && MFD_CS5535 default n help This driver provides access to MFGPT functionality for other -- cgit v0.10.2 From a1bb73d76bc814e9385390e6aa9880d884322e2e Mon Sep 17 00:00:00 2001 From: Donggeun Kim Date: Mon, 25 Jul 2011 17:13:07 -0700 Subject: drivers/misc: add support the FSA9480 USB Switch The FSA9480 is a USB port accessory detector and switch. This patch adds support the FSA9480 USB Switch. [akpm@linux-foundation.org: make a couple of things static] Signed-off-by: Donggeun Kim Signed-off-by: Minkyu Kang Signed-off-by: Kyungmin Park Cc: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/Documentation/ABI/testing/sysfs-bus-i2c-devices-fsa9480 b/Documentation/ABI/testing/sysfs-bus-i2c-devices-fsa9480 new file mode 100644 index 0000000..9de269b --- /dev/null +++ b/Documentation/ABI/testing/sysfs-bus-i2c-devices-fsa9480 @@ -0,0 +1,21 @@ +What: /sys/bus/i2c/devices/.../device +Date: February 2011 +Contact: Minkyu Kang +Description: + show what device is attached + NONE - no device + USB - USB device is attached + UART - UART is attached + CHARGER - Charger is attaced + JIG - JIG is attached + +What: /sys/bus/i2c/devices/.../switch +Date: February 2011 +Contact: Minkyu Kang +Description: + show or set the state of manual switch + VAUDIO - switch to VAUDIO path + UART - switch to UART path + AUDIO - switch to AUDIO path + DHOST - switch to DHOST path + AUTO - switch automatically by device diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index 3546474..0a4d86c 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -489,6 +489,15 @@ config PCH_PHUB To compile this driver as a module, choose M here: the module will be called pch_phub. +config USB_SWITCH_FSA9480 + tristate "FSA9480 USB Switch" + depends on I2C + help + The FSA9480 is a USB port accessory detector and switch. + The FSA9480 is fully controlled using I2C and enables USB data, + stereo and mono audio, video, microphone and UART data to use + a common connector port. + source "drivers/misc/c2port/Kconfig" source "drivers/misc/eeprom/Kconfig" source "drivers/misc/cb710/Kconfig" diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile index 5f03172..3328215 100644 --- a/drivers/misc/Makefile +++ b/drivers/misc/Makefile @@ -46,3 +46,4 @@ obj-y += ti-st/ obj-$(CONFIG_AB8500_PWM) += ab8500-pwm.o obj-y += lis3lv02d/ obj-y += carma/ +obj-$(CONFIG_USB_SWITCH_FSA9480) += fsa9480.o diff --git a/drivers/misc/fsa9480.c b/drivers/misc/fsa9480.c new file mode 100644 index 0000000..5325a7e --- /dev/null +++ b/drivers/misc/fsa9480.c @@ -0,0 +1,557 @@ +/* + * fsa9480.c - FSA9480 micro USB switch device driver + * + * Copyright (C) 2010 Samsung Electronics + * Minkyu Kang + * Wonguk Jeong + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* FSA9480 I2C registers */ +#define FSA9480_REG_DEVID 0x01 +#define FSA9480_REG_CTRL 0x02 +#define FSA9480_REG_INT1 0x03 +#define FSA9480_REG_INT2 0x04 +#define FSA9480_REG_INT1_MASK 0x05 +#define FSA9480_REG_INT2_MASK 0x06 +#define FSA9480_REG_ADC 0x07 +#define FSA9480_REG_TIMING1 0x08 +#define FSA9480_REG_TIMING2 0x09 +#define FSA9480_REG_DEV_T1 0x0a +#define FSA9480_REG_DEV_T2 0x0b +#define FSA9480_REG_BTN1 0x0c +#define FSA9480_REG_BTN2 0x0d +#define FSA9480_REG_CK 0x0e +#define FSA9480_REG_CK_INT1 0x0f +#define FSA9480_REG_CK_INT2 0x10 +#define FSA9480_REG_CK_INTMASK1 0x11 +#define FSA9480_REG_CK_INTMASK2 0x12 +#define FSA9480_REG_MANSW1 0x13 +#define FSA9480_REG_MANSW2 0x14 + +/* Control */ +#define CON_SWITCH_OPEN (1 << 4) +#define CON_RAW_DATA (1 << 3) +#define CON_MANUAL_SW (1 << 2) +#define CON_WAIT (1 << 1) +#define CON_INT_MASK (1 << 0) +#define CON_MASK (CON_SWITCH_OPEN | CON_RAW_DATA | \ + CON_MANUAL_SW | CON_WAIT) + +/* Device Type 1 */ +#define DEV_USB_OTG (1 << 7) +#define DEV_DEDICATED_CHG (1 << 6) +#define DEV_USB_CHG (1 << 5) +#define DEV_CAR_KIT (1 << 4) +#define DEV_UART (1 << 3) +#define DEV_USB (1 << 2) +#define DEV_AUDIO_2 (1 << 1) +#define DEV_AUDIO_1 (1 << 0) + +#define DEV_T1_USB_MASK (DEV_USB_OTG | DEV_USB) +#define DEV_T1_UART_MASK (DEV_UART) +#define DEV_T1_CHARGER_MASK (DEV_DEDICATED_CHG | DEV_USB_CHG) + +/* Device Type 2 */ +#define DEV_AV (1 << 6) +#define DEV_TTY (1 << 5) +#define DEV_PPD (1 << 4) +#define DEV_JIG_UART_OFF (1 << 3) +#define DEV_JIG_UART_ON (1 << 2) +#define DEV_JIG_USB_OFF (1 << 1) +#define DEV_JIG_USB_ON (1 << 0) + +#define DEV_T2_USB_MASK (DEV_JIG_USB_OFF | DEV_JIG_USB_ON) +#define DEV_T2_UART_MASK (DEV_JIG_UART_OFF | DEV_JIG_UART_ON) +#define DEV_T2_JIG_MASK (DEV_JIG_USB_OFF | DEV_JIG_USB_ON | \ + DEV_JIG_UART_OFF | DEV_JIG_UART_ON) + +/* + * Manual Switch + * D- [7:5] / D+ [4:2] + * 000: Open all / 001: USB / 010: AUDIO / 011: UART / 100: V_AUDIO + */ +#define SW_VAUDIO ((4 << 5) | (4 << 2)) +#define SW_UART ((3 << 5) | (3 << 2)) +#define SW_AUDIO ((2 << 5) | (2 << 2)) +#define SW_DHOST ((1 << 5) | (1 << 2)) +#define SW_AUTO ((0 << 5) | (0 << 2)) + +/* Interrupt 1 */ +#define INT_DETACH (1 << 1) +#define INT_ATTACH (1 << 0) + +struct fsa9480_usbsw { + struct i2c_client *client; + struct fsa9480_platform_data *pdata; + int dev1; + int dev2; + int mansw; +}; + +static struct fsa9480_usbsw *chip; + +static int fsa9480_write_reg(struct i2c_client *client, + int reg, int value) +{ + int ret; + + ret = i2c_smbus_write_byte_data(client, reg, value); + + if (ret < 0) + dev_err(&client->dev, "%s: err %d\n", __func__, ret); + + return ret; +} + +static int fsa9480_read_reg(struct i2c_client *client, int reg) +{ + int ret; + + ret = i2c_smbus_read_byte_data(client, reg); + + if (ret < 0) + dev_err(&client->dev, "%s: err %d\n", __func__, ret); + + return ret; +} + +static int fsa9480_read_irq(struct i2c_client *client, int *value) +{ + int ret; + + ret = i2c_smbus_read_i2c_block_data(client, + FSA9480_REG_INT1, 2, (u8 *)value); + *value &= 0xffff; + + if (ret < 0) + dev_err(&client->dev, "%s: err %d\n", __func__, ret); + + return ret; +} + +static void fsa9480_set_switch(const char *buf) +{ + struct fsa9480_usbsw *usbsw = chip; + struct i2c_client *client = usbsw->client; + unsigned int value; + unsigned int path = 0; + + value = fsa9480_read_reg(client, FSA9480_REG_CTRL); + + if (!strncmp(buf, "VAUDIO", 6)) { + path = SW_VAUDIO; + value &= ~CON_MANUAL_SW; + } else if (!strncmp(buf, "UART", 4)) { + path = SW_UART; + value &= ~CON_MANUAL_SW; + } else if (!strncmp(buf, "AUDIO", 5)) { + path = SW_AUDIO; + value &= ~CON_MANUAL_SW; + } else if (!strncmp(buf, "DHOST", 5)) { + path = SW_DHOST; + value &= ~CON_MANUAL_SW; + } else if (!strncmp(buf, "AUTO", 4)) { + path = SW_AUTO; + value |= CON_MANUAL_SW; + } else { + printk(KERN_ERR "Wrong command\n"); + return; + } + + usbsw->mansw = path; + fsa9480_write_reg(client, FSA9480_REG_MANSW1, path); + fsa9480_write_reg(client, FSA9480_REG_CTRL, value); +} + +static ssize_t fsa9480_get_switch(char *buf) +{ + struct fsa9480_usbsw *usbsw = chip; + struct i2c_client *client = usbsw->client; + unsigned int value; + + value = fsa9480_read_reg(client, FSA9480_REG_MANSW1); + + if (value == SW_VAUDIO) + return sprintf(buf, "VAUDIO\n"); + else if (value == SW_UART) + return sprintf(buf, "UART\n"); + else if (value == SW_AUDIO) + return sprintf(buf, "AUDIO\n"); + else if (value == SW_DHOST) + return sprintf(buf, "DHOST\n"); + else if (value == SW_AUTO) + return sprintf(buf, "AUTO\n"); + else + return sprintf(buf, "%x", value); +} + +static ssize_t fsa9480_show_device(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct fsa9480_usbsw *usbsw = dev_get_drvdata(dev); + struct i2c_client *client = usbsw->client; + int dev1, dev2; + + dev1 = fsa9480_read_reg(client, FSA9480_REG_DEV_T1); + dev2 = fsa9480_read_reg(client, FSA9480_REG_DEV_T2); + + if (!dev1 && !dev2) + return sprintf(buf, "NONE\n"); + + /* USB */ + if (dev1 & DEV_T1_USB_MASK || dev2 & DEV_T2_USB_MASK) + return sprintf(buf, "USB\n"); + + /* UART */ + if (dev1 & DEV_T1_UART_MASK || dev2 & DEV_T2_UART_MASK) + return sprintf(buf, "UART\n"); + + /* CHARGER */ + if (dev1 & DEV_T1_CHARGER_MASK) + return sprintf(buf, "CHARGER\n"); + + /* JIG */ + if (dev2 & DEV_T2_JIG_MASK) + return sprintf(buf, "JIG\n"); + + return sprintf(buf, "UNKNOWN\n"); +} + +static ssize_t fsa9480_show_manualsw(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return fsa9480_get_switch(buf); + +} + +static ssize_t fsa9480_set_manualsw(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + fsa9480_set_switch(buf); + + return count; +} + +static DEVICE_ATTR(device, S_IRUGO, fsa9480_show_device, NULL); +static DEVICE_ATTR(switch, S_IRUGO | S_IWUSR, + fsa9480_show_manualsw, fsa9480_set_manualsw); + +static struct attribute *fsa9480_attributes[] = { + &dev_attr_device.attr, + &dev_attr_switch.attr, + NULL +}; + +static const struct attribute_group fsa9480_group = { + .attrs = fsa9480_attributes, +}; + +static void fsa9480_detect_dev(struct fsa9480_usbsw *usbsw, int intr) +{ + int val1, val2, ctrl; + struct fsa9480_platform_data *pdata = usbsw->pdata; + struct i2c_client *client = usbsw->client; + + val1 = fsa9480_read_reg(client, FSA9480_REG_DEV_T1); + val2 = fsa9480_read_reg(client, FSA9480_REG_DEV_T2); + ctrl = fsa9480_read_reg(client, FSA9480_REG_CTRL); + + dev_info(&client->dev, "intr: 0x%x, dev1: 0x%x, dev2: 0x%x\n", + intr, val1, val2); + + if (!intr) + goto out; + + if (intr & INT_ATTACH) { /* Attached */ + /* USB */ + if (val1 & DEV_T1_USB_MASK || val2 & DEV_T2_USB_MASK) { + if (pdata->usb_cb) + pdata->usb_cb(FSA9480_ATTACHED); + + if (usbsw->mansw) { + fsa9480_write_reg(client, + FSA9480_REG_MANSW1, usbsw->mansw); + } + } + + /* UART */ + if (val1 & DEV_T1_UART_MASK || val2 & DEV_T2_UART_MASK) { + if (pdata->uart_cb) + pdata->uart_cb(FSA9480_ATTACHED); + + if (!(ctrl & CON_MANUAL_SW)) { + fsa9480_write_reg(client, + FSA9480_REG_MANSW1, SW_UART); + } + } + + /* CHARGER */ + if (val1 & DEV_T1_CHARGER_MASK) { + if (pdata->charger_cb) + pdata->charger_cb(FSA9480_ATTACHED); + } + + /* JIG */ + if (val2 & DEV_T2_JIG_MASK) { + if (pdata->jig_cb) + pdata->jig_cb(FSA9480_ATTACHED); + } + } else if (intr & INT_DETACH) { /* Detached */ + /* USB */ + if (usbsw->dev1 & DEV_T1_USB_MASK || + usbsw->dev2 & DEV_T2_USB_MASK) { + if (pdata->usb_cb) + pdata->usb_cb(FSA9480_DETACHED); + } + + /* UART */ + if (usbsw->dev1 & DEV_T1_UART_MASK || + usbsw->dev2 & DEV_T2_UART_MASK) { + if (pdata->uart_cb) + pdata->uart_cb(FSA9480_DETACHED); + } + + /* CHARGER */ + if (usbsw->dev1 & DEV_T1_CHARGER_MASK) { + if (pdata->charger_cb) + pdata->charger_cb(FSA9480_DETACHED); + } + + /* JIG */ + if (usbsw->dev2 & DEV_T2_JIG_MASK) { + if (pdata->jig_cb) + pdata->jig_cb(FSA9480_DETACHED); + } + } + + usbsw->dev1 = val1; + usbsw->dev2 = val2; + +out: + ctrl &= ~CON_INT_MASK; + fsa9480_write_reg(client, FSA9480_REG_CTRL, ctrl); +} + +static irqreturn_t fsa9480_irq_handler(int irq, void *data) +{ + struct fsa9480_usbsw *usbsw = data; + struct i2c_client *client = usbsw->client; + int intr; + + /* clear interrupt */ + fsa9480_read_irq(client, &intr); + + /* device detection */ + fsa9480_detect_dev(usbsw, intr); + + return IRQ_HANDLED; +} + +static int fsa9480_irq_init(struct fsa9480_usbsw *usbsw) +{ + struct fsa9480_platform_data *pdata = usbsw->pdata; + struct i2c_client *client = usbsw->client; + int ret; + int intr; + unsigned int ctrl = CON_MASK; + + /* clear interrupt */ + fsa9480_read_irq(client, &intr); + + /* unmask interrupt (attach/detach only) */ + fsa9480_write_reg(client, FSA9480_REG_INT1_MASK, 0xfc); + fsa9480_write_reg(client, FSA9480_REG_INT2_MASK, 0x1f); + + usbsw->mansw = fsa9480_read_reg(client, FSA9480_REG_MANSW1); + + if (usbsw->mansw) + ctrl &= ~CON_MANUAL_SW; /* Manual Switching Mode */ + + fsa9480_write_reg(client, FSA9480_REG_CTRL, ctrl); + + if (pdata && pdata->cfg_gpio) + pdata->cfg_gpio(); + + if (client->irq) { + ret = request_threaded_irq(client->irq, NULL, + fsa9480_irq_handler, + IRQF_TRIGGER_FALLING | IRQF_ONESHOT, + "fsa9480 micro USB", usbsw); + if (ret) { + dev_err(&client->dev, "failed to reqeust IRQ\n"); + return ret; + } + + device_init_wakeup(&client->dev, pdata->wakeup); + } + + return 0; +} + +static int __devinit fsa9480_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent); + struct fsa9480_usbsw *usbsw; + int ret = 0; + + if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA)) + return -EIO; + + usbsw = kzalloc(sizeof(struct fsa9480_usbsw), GFP_KERNEL); + if (!usbsw) { + dev_err(&client->dev, "failed to allocate driver data\n"); + return -ENOMEM; + } + + usbsw->client = client; + usbsw->pdata = client->dev.platform_data; + + chip = usbsw; + + i2c_set_clientdata(client, usbsw); + + ret = fsa9480_irq_init(usbsw); + if (ret) + goto fail1; + + ret = sysfs_create_group(&client->dev.kobj, &fsa9480_group); + if (ret) { + dev_err(&client->dev, + "failed to create fsa9480 attribute group\n"); + goto fail2; + } + + /* ADC Detect Time: 500ms */ + fsa9480_write_reg(client, FSA9480_REG_TIMING1, 0x6); + + if (chip->pdata->reset_cb) + chip->pdata->reset_cb(); + + /* device detection */ + fsa9480_detect_dev(usbsw, INT_ATTACH); + + pm_runtime_set_active(&client->dev); + + return 0; + +fail2: + if (client->irq) + free_irq(client->irq, NULL); +fail1: + i2c_set_clientdata(client, NULL); + kfree(usbsw); + return ret; +} + +static int __devexit fsa9480_remove(struct i2c_client *client) +{ + struct fsa9480_usbsw *usbsw = i2c_get_clientdata(client); + if (client->irq) + free_irq(client->irq, NULL); + i2c_set_clientdata(client, NULL); + + sysfs_remove_group(&client->dev.kobj, &fsa9480_group); + device_init_wakeup(&client->dev, 0); + kfree(usbsw); + return 0; +} + +#ifdef CONFIG_PM + +static int fsa9480_suspend(struct i2c_client *client, pm_message_t state) +{ + struct fsa9480_usbsw *usbsw = i2c_get_clientdata(client); + struct fsa9480_platform_data *pdata = usbsw->pdata; + + if (device_may_wakeup(&client->dev) && client->irq) + enable_irq_wake(client->irq); + + if (pdata->usb_power) + pdata->usb_power(0); + + return 0; +} + +static int fsa9480_resume(struct i2c_client *client) +{ + struct fsa9480_usbsw *usbsw = i2c_get_clientdata(client); + int dev1, dev2; + + if (device_may_wakeup(&client->dev) && client->irq) + disable_irq_wake(client->irq); + + /* + * Clear Pending interrupt. Note that detect_dev does what + * the interrupt handler does. So, we don't miss pending and + * we reenable interrupt if there is one. + */ + fsa9480_read_reg(client, FSA9480_REG_INT1); + fsa9480_read_reg(client, FSA9480_REG_INT2); + + dev1 = fsa9480_read_reg(client, FSA9480_REG_DEV_T1); + dev2 = fsa9480_read_reg(client, FSA9480_REG_DEV_T2); + + /* device detection */ + fsa9480_detect_dev(usbsw, (dev1 || dev2) ? INT_ATTACH : INT_DETACH); + + return 0; +} + +#else + +#define fsa9480_suspend NULL +#define fsa9480_resume NULL + +#endif /* CONFIG_PM */ + +static const struct i2c_device_id fsa9480_id[] = { + {"fsa9480", 0}, + {} +}; +MODULE_DEVICE_TABLE(i2c, fsa9480_id); + +static struct i2c_driver fsa9480_i2c_driver = { + .driver = { + .name = "fsa9480", + }, + .probe = fsa9480_probe, + .remove = __devexit_p(fsa9480_remove), + .resume = fsa9480_resume, + .suspend = fsa9480_suspend, + .id_table = fsa9480_id, +}; + +static int __init fsa9480_init(void) +{ + return i2c_add_driver(&fsa9480_i2c_driver); +} +module_init(fsa9480_init); + +static void __exit fsa9480_exit(void) +{ + i2c_del_driver(&fsa9480_i2c_driver); +} +module_exit(fsa9480_exit); + +MODULE_AUTHOR("Minkyu Kang "); +MODULE_DESCRIPTION("FSA9480 USB Switch driver"); +MODULE_LICENSE("GPL"); diff --git a/include/linux/platform_data/fsa9480.h b/include/linux/platform_data/fsa9480.h new file mode 100644 index 0000000..72dddcb --- /dev/null +++ b/include/linux/platform_data/fsa9480.h @@ -0,0 +1,27 @@ +/* + * Copyright (C) 2010 Samsung Electronics + * Minkyu Kang + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _FSA9480_H_ +#define _FSA9480_H_ + +#define FSA9480_ATTACHED 1 +#define FSA9480_DETACHED 0 + +struct fsa9480_platform_data { + void (*cfg_gpio) (void); + void (*usb_cb) (u8 attached); + void (*uart_cb) (u8 attached); + void (*charger_cb) (u8 attached); + void (*jig_cb) (u8 attached); + void (*reset_cb) (void); + void (*usb_power) (u8 on); + int wakeup; +}; + +#endif /* _FSA9480_H_ */ -- cgit v0.10.2 From 80f1ff97d0a9d92f44d2b2dd9425afa950e58f2b Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Mon, 25 Jul 2011 17:13:08 -0700 Subject: notifiers: cpu: move cpu notifiers into cpu.h We presently define all kinds of notifiers in notifier.h. This is not necessary at all, since different subsystems use different notifiers, they are almost non-related with each other. This can also save much build time. Suppose I add a new netdevice event, really I don't have to recompile all the source, just network related. Without this patch, all the source will be recompiled. I move the notify events near to their subsystem notifier registers, so that they can be found more easily. This patch: It is not necessary to share the same notifier.h. Signed-off-by: WANG Cong Cc: David Miller Cc: "Rafael J. Wysocki" Cc: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 5f09323..b1a635a 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -70,6 +70,39 @@ enum { CPU_PRI_WORKQUEUE = 5, }; +#define CPU_ONLINE 0x0002 /* CPU (unsigned)v is up */ +#define CPU_UP_PREPARE 0x0003 /* CPU (unsigned)v coming up */ +#define CPU_UP_CANCELED 0x0004 /* CPU (unsigned)v NOT coming up */ +#define CPU_DOWN_PREPARE 0x0005 /* CPU (unsigned)v going down */ +#define CPU_DOWN_FAILED 0x0006 /* CPU (unsigned)v NOT going down */ +#define CPU_DEAD 0x0007 /* CPU (unsigned)v dead */ +#define CPU_DYING 0x0008 /* CPU (unsigned)v not running any task, + * not handling interrupts, soon dead. + * Called on the dying cpu, interrupts + * are already disabled. Must not + * sleep, must not fail */ +#define CPU_POST_DEAD 0x0009 /* CPU (unsigned)v dead, cpu_hotplug + * lock is dropped */ +#define CPU_STARTING 0x000A /* CPU (unsigned)v soon running. + * Called on the new cpu, just before + * enabling interrupts. Must not sleep, + * must not fail */ + +/* Used for CPU hotplug events occurring while tasks are frozen due to a suspend + * operation in progress + */ +#define CPU_TASKS_FROZEN 0x0010 + +#define CPU_ONLINE_FROZEN (CPU_ONLINE | CPU_TASKS_FROZEN) +#define CPU_UP_PREPARE_FROZEN (CPU_UP_PREPARE | CPU_TASKS_FROZEN) +#define CPU_UP_CANCELED_FROZEN (CPU_UP_CANCELED | CPU_TASKS_FROZEN) +#define CPU_DOWN_PREPARE_FROZEN (CPU_DOWN_PREPARE | CPU_TASKS_FROZEN) +#define CPU_DOWN_FAILED_FROZEN (CPU_DOWN_FAILED | CPU_TASKS_FROZEN) +#define CPU_DEAD_FROZEN (CPU_DEAD | CPU_TASKS_FROZEN) +#define CPU_DYING_FROZEN (CPU_DYING | CPU_TASKS_FROZEN) +#define CPU_STARTING_FROZEN (CPU_STARTING | CPU_TASKS_FROZEN) + + #ifdef CONFIG_SMP /* Need to know about CPUs going up/down? */ #if defined(CONFIG_HOTPLUG_CPU) || !defined(MODULE) diff --git a/include/linux/notifier.h b/include/linux/notifier.h index c0688b0..9eb25fc 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h @@ -185,6 +185,8 @@ static inline int notifier_to_errno(int ret) * VC switch chains (for loadable kernel svgalib VC switch helpers) etc... */ +/* CPU notfiers are defined in include/linux/cpu.h. */ + /* netdevice notifier chain. Please remember to update the rtnetlink * notification exclusion list in rtnetlink_event() when adding new * types. @@ -220,38 +222,6 @@ static inline int notifier_to_errno(int ret) #define NETLINK_URELEASE 0x0001 /* Unicast netlink socket released */ -#define CPU_ONLINE 0x0002 /* CPU (unsigned)v is up */ -#define CPU_UP_PREPARE 0x0003 /* CPU (unsigned)v coming up */ -#define CPU_UP_CANCELED 0x0004 /* CPU (unsigned)v NOT coming up */ -#define CPU_DOWN_PREPARE 0x0005 /* CPU (unsigned)v going down */ -#define CPU_DOWN_FAILED 0x0006 /* CPU (unsigned)v NOT going down */ -#define CPU_DEAD 0x0007 /* CPU (unsigned)v dead */ -#define CPU_DYING 0x0008 /* CPU (unsigned)v not running any task, - * not handling interrupts, soon dead. - * Called on the dying cpu, interrupts - * are already disabled. Must not - * sleep, must not fail */ -#define CPU_POST_DEAD 0x0009 /* CPU (unsigned)v dead, cpu_hotplug - * lock is dropped */ -#define CPU_STARTING 0x000A /* CPU (unsigned)v soon running. - * Called on the new cpu, just before - * enabling interrupts. Must not sleep, - * must not fail */ - -/* Used for CPU hotplug events occurring while tasks are frozen due to a suspend - * operation in progress - */ -#define CPU_TASKS_FROZEN 0x0010 - -#define CPU_ONLINE_FROZEN (CPU_ONLINE | CPU_TASKS_FROZEN) -#define CPU_UP_PREPARE_FROZEN (CPU_UP_PREPARE | CPU_TASKS_FROZEN) -#define CPU_UP_CANCELED_FROZEN (CPU_UP_CANCELED | CPU_TASKS_FROZEN) -#define CPU_DOWN_PREPARE_FROZEN (CPU_DOWN_PREPARE | CPU_TASKS_FROZEN) -#define CPU_DOWN_FAILED_FROZEN (CPU_DOWN_FAILED | CPU_TASKS_FROZEN) -#define CPU_DEAD_FROZEN (CPU_DEAD | CPU_TASKS_FROZEN) -#define CPU_DYING_FROZEN (CPU_DYING | CPU_TASKS_FROZEN) -#define CPU_STARTING_FROZEN (CPU_STARTING | CPU_TASKS_FROZEN) - /* Hibernation and suspend events */ #define PM_HIBERNATION_PREPARE 0x0001 /* Going to hibernate */ #define PM_POST_HIBERNATION 0x0002 /* Hibernation finished */ diff --git a/net/rds/page.c b/net/rds/page.c index d8acdeb..b82d63e 100644 --- a/net/rds/page.c +++ b/net/rds/page.c @@ -32,6 +32,7 @@ */ #include #include +#include #include "rds.h" -- cgit v0.10.2 From dcfe1421c916345b068f43749263b94270324500 Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Mon, 25 Jul 2011 17:13:09 -0700 Subject: notifiers: net: move netdevice notifiers into netdevice.h It is not necessary to share the same notifier.h. Signed-off-by: WANG Cong Acked-by: David Miller Cc: "Rafael J. Wysocki" Cc: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 34f3abc..ea6f4aa 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1521,6 +1521,39 @@ struct packet_type { #include +/* netdevice notifier chain. Please remember to update the rtnetlink + * notification exclusion list in rtnetlink_event() when adding new + * types. + */ +#define NETDEV_UP 0x0001 /* For now you can't veto a device up/down */ +#define NETDEV_DOWN 0x0002 +#define NETDEV_REBOOT 0x0003 /* Tell a protocol stack a network interface + detected a hardware crash and restarted + - we can use this eg to kick tcp sessions + once done */ +#define NETDEV_CHANGE 0x0004 /* Notify device state change */ +#define NETDEV_REGISTER 0x0005 +#define NETDEV_UNREGISTER 0x0006 +#define NETDEV_CHANGEMTU 0x0007 +#define NETDEV_CHANGEADDR 0x0008 +#define NETDEV_GOING_DOWN 0x0009 +#define NETDEV_CHANGENAME 0x000A +#define NETDEV_FEAT_CHANGE 0x000B +#define NETDEV_BONDING_FAILOVER 0x000C +#define NETDEV_PRE_UP 0x000D +#define NETDEV_PRE_TYPE_CHANGE 0x000E +#define NETDEV_POST_TYPE_CHANGE 0x000F +#define NETDEV_POST_INIT 0x0010 +#define NETDEV_UNREGISTER_BATCH 0x0011 +#define NETDEV_RELEASE 0x0012 +#define NETDEV_NOTIFY_PEERS 0x0013 +#define NETDEV_JOIN 0x0014 + +extern int register_netdevice_notifier(struct notifier_block *nb); +extern int unregister_netdevice_notifier(struct notifier_block *nb); +extern int call_netdevice_notifiers(unsigned long val, struct net_device *dev); + + extern rwlock_t dev_base_lock; /* Device list lock */ @@ -1603,12 +1636,9 @@ static inline void unregister_netdevice(struct net_device *dev) extern int netdev_refcnt_read(const struct net_device *dev); extern void free_netdev(struct net_device *dev); extern void synchronize_net(void); -extern int register_netdevice_notifier(struct notifier_block *nb); -extern int unregister_netdevice_notifier(struct notifier_block *nb); extern int init_dummy_netdev(struct net_device *dev); extern void netdev_resync_ops(struct net_device *dev); -extern int call_netdevice_notifiers(unsigned long val, struct net_device *dev); extern struct net_device *dev_get_by_index(struct net *net, int ifindex); extern struct net_device *__dev_get_by_index(struct net *net, int ifindex); extern struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex); diff --git a/include/linux/notifier.h b/include/linux/notifier.h index 9eb25fc..e8a858a 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h @@ -187,33 +187,7 @@ static inline int notifier_to_errno(int ret) /* CPU notfiers are defined in include/linux/cpu.h. */ -/* netdevice notifier chain. Please remember to update the rtnetlink - * notification exclusion list in rtnetlink_event() when adding new - * types. - */ -#define NETDEV_UP 0x0001 /* For now you can't veto a device up/down */ -#define NETDEV_DOWN 0x0002 -#define NETDEV_REBOOT 0x0003 /* Tell a protocol stack a network interface - detected a hardware crash and restarted - - we can use this eg to kick tcp sessions - once done */ -#define NETDEV_CHANGE 0x0004 /* Notify device state change */ -#define NETDEV_REGISTER 0x0005 -#define NETDEV_UNREGISTER 0x0006 -#define NETDEV_CHANGEMTU 0x0007 -#define NETDEV_CHANGEADDR 0x0008 -#define NETDEV_GOING_DOWN 0x0009 -#define NETDEV_CHANGENAME 0x000A -#define NETDEV_FEAT_CHANGE 0x000B -#define NETDEV_BONDING_FAILOVER 0x000C -#define NETDEV_PRE_UP 0x000D -#define NETDEV_PRE_TYPE_CHANGE 0x000E -#define NETDEV_POST_TYPE_CHANGE 0x000F -#define NETDEV_POST_INIT 0x0010 -#define NETDEV_UNREGISTER_BATCH 0x0011 -#define NETDEV_RELEASE 0x0012 -#define NETDEV_NOTIFY_PEERS 0x0013 -#define NETDEV_JOIN 0x0014 +/* netdevice notifiers are defined in include/linux/netdevice.h */ #define SYS_DOWN 0x0001 /* Notify of system down */ #define SYS_RESTART SYS_DOWN -- cgit v0.10.2 From c5f41752fd37979dbaec61dc59c7ece0606ddf7e Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Mon, 25 Jul 2011 17:13:10 -0700 Subject: notifiers: sys: move reboot notifiers into reboot.h It is not necessary to share the same notifier.h. This patch already moves register_reboot_notifier() and unregister_reboot_notifier() from kernel/notifier.c to kernel/sys.c. [amwang@redhat.com: make allyesconfig succeed on ppc64] Signed-off-by: WANG Cong Cc: David Miller Cc: "Rafael J. Wysocki" Cc: Greg KH Signed-off-by: WANG Cong Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 271ff63..0e0ea94 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c index bf5f5ce..e037c74 100644 --- a/arch/powerpc/kernel/rtas_flash.c +++ b/arch/powerpc/kernel/rtas_flash.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include diff --git a/include/linux/notifier.h b/include/linux/notifier.h index e8a858a..145c436 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h @@ -189,10 +189,7 @@ static inline int notifier_to_errno(int ret) /* netdevice notifiers are defined in include/linux/netdevice.h */ -#define SYS_DOWN 0x0001 /* Notify of system down */ -#define SYS_RESTART SYS_DOWN -#define SYS_HALT 0x0002 /* Notify of system halt */ -#define SYS_POWER_OFF 0x0003 /* Notify of system power off */ +/* reboot notifiers are defined in include/linux/reboot.h. */ #define NETLINK_URELEASE 0x0001 /* Unicast netlink socket released */ diff --git a/include/linux/reboot.h b/include/linux/reboot.h index 3005d5a..e0879a7 100644 --- a/include/linux/reboot.h +++ b/include/linux/reboot.h @@ -39,6 +39,11 @@ #include +#define SYS_DOWN 0x0001 /* Notify of system down */ +#define SYS_RESTART SYS_DOWN +#define SYS_HALT 0x0002 /* Notify of system halt */ +#define SYS_POWER_OFF 0x0003 /* Notify of system power off */ + extern int register_reboot_notifier(struct notifier_block *); extern int unregister_reboot_notifier(struct notifier_block *); diff --git a/kernel/notifier.c b/kernel/notifier.c index 2488ba7..8d7b435 100644 --- a/kernel/notifier.c +++ b/kernel/notifier.c @@ -525,37 +525,6 @@ void srcu_init_notifier_head(struct srcu_notifier_head *nh) } EXPORT_SYMBOL_GPL(srcu_init_notifier_head); -/** - * register_reboot_notifier - Register function to be called at reboot time - * @nb: Info about notifier function to be called - * - * Registers a function with the list of functions - * to be called at reboot time. - * - * Currently always returns zero, as blocking_notifier_chain_register() - * always returns zero. - */ -int register_reboot_notifier(struct notifier_block *nb) -{ - return blocking_notifier_chain_register(&reboot_notifier_list, nb); -} -EXPORT_SYMBOL(register_reboot_notifier); - -/** - * unregister_reboot_notifier - Unregister previously registered reboot notifier - * @nb: Hook to be unregistered - * - * Unregisters a previously registered reboot - * notifier function. - * - * Returns zero on success, or %-ENOENT on failure. - */ -int unregister_reboot_notifier(struct notifier_block *nb) -{ - return blocking_notifier_chain_unregister(&reboot_notifier_list, nb); -} -EXPORT_SYMBOL(unregister_reboot_notifier); - static ATOMIC_NOTIFIER_HEAD(die_chain); int notrace __kprobes notify_die(enum die_val val, const char *str, diff --git a/kernel/sys.c b/kernel/sys.c index e4128b2..a101ba3 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include @@ -320,6 +319,37 @@ void kernel_restart_prepare(char *cmd) } /** + * register_reboot_notifier - Register function to be called at reboot time + * @nb: Info about notifier function to be called + * + * Registers a function with the list of functions + * to be called at reboot time. + * + * Currently always returns zero, as blocking_notifier_chain_register() + * always returns zero. + */ +int register_reboot_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_register(&reboot_notifier_list, nb); +} +EXPORT_SYMBOL(register_reboot_notifier); + +/** + * unregister_reboot_notifier - Unregister previously registered reboot notifier + * @nb: Hook to be unregistered + * + * Unregisters a previously registered reboot + * notifier function. + * + * Returns zero on success, or %-ENOENT on failure. + */ +int unregister_reboot_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_unregister(&reboot_notifier_list, nb); +} +EXPORT_SYMBOL(unregister_reboot_notifier); + +/** * kernel_restart - reboot the system * @cmd: pointer to buffer containing command to execute for restart * or %NULL -- cgit v0.10.2 From 35eb6db11ed9cbf9702ec90a28779a51fe4a21a9 Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Mon, 25 Jul 2011 17:13:11 -0700 Subject: notifiers: pm: move pm notifiers into suspend.h It is not necessary to share the same notifier.h. Signed-off-by: WANG Cong Cc: David Miller Acked-by: "Rafael J. Wysocki" Cc: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index f091b43..89bdeae 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include diff --git a/include/linux/notifier.h b/include/linux/notifier.h index 145c436..ae8f7d9 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h @@ -191,15 +191,9 @@ static inline int notifier_to_errno(int ret) /* reboot notifiers are defined in include/linux/reboot.h. */ -#define NETLINK_URELEASE 0x0001 /* Unicast netlink socket released */ +/* Hibernation and suspend events are defined in include/linux/suspend.h. */ -/* Hibernation and suspend events */ -#define PM_HIBERNATION_PREPARE 0x0001 /* Going to hibernate */ -#define PM_POST_HIBERNATION 0x0002 /* Hibernation finished */ -#define PM_SUSPEND_PREPARE 0x0003 /* Going to suspend the system */ -#define PM_POST_SUSPEND 0x0004 /* Suspend finished */ -#define PM_RESTORE_PREPARE 0x0005 /* Going to restore a saved image */ -#define PM_POST_RESTORE 0x0006 /* Restore failed */ +#define NETLINK_URELEASE 0x0001 /* Unicast netlink socket released */ /* Console keyboard events. * Note: KBD_KEYCODE is always sent before KBD_UNBOUND_KEYCODE, KBD_UNICODE and diff --git a/include/linux/suspend.h b/include/linux/suspend.h index e1e3742..6bbcef2 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -268,6 +268,14 @@ static inline int hibernate(void) { return -ENOSYS; } static inline bool system_entering_hibernation(void) { return false; } #endif /* CONFIG_HIBERNATION */ +/* Hibernation and suspend events */ +#define PM_HIBERNATION_PREPARE 0x0001 /* Going to hibernate */ +#define PM_POST_HIBERNATION 0x0002 /* Hibernation finished */ +#define PM_SUSPEND_PREPARE 0x0003 /* Going to suspend the system */ +#define PM_POST_SUSPEND 0x0004 /* Suspend finished */ +#define PM_RESTORE_PREPARE 0x0005 /* Going to restore a saved image */ +#define PM_POST_RESTORE 0x0006 /* Restore failed */ + #ifdef CONFIG_PM_SLEEP void save_processor_state(void); void restore_processor_state(void); -- cgit v0.10.2 From a376d3d6727b2f05ef4c6670cc74afbd8110df89 Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Mon, 25 Jul 2011 17:13:12 -0700 Subject: notifiers: vt: move vt notifiers into vt.h It is not necessary to share the same notifier.h. Signed-off-by: WANG Cong Cc: David Miller Cc: "Rafael J. Wysocki" Cc: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/notifier.h b/include/linux/notifier.h index ae8f7d9..d65746e 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h @@ -193,6 +193,8 @@ static inline int notifier_to_errno(int ret) /* Hibernation and suspend events are defined in include/linux/suspend.h. */ +/* Virtual Terminal events are defined in include/linux/vt.h. */ + #define NETLINK_URELEASE 0x0001 /* Unicast netlink socket released */ /* Console keyboard events. @@ -206,12 +208,5 @@ static inline int notifier_to_errno(int ret) extern struct blocking_notifier_head reboot_notifier_list; -/* Virtual Terminal events. */ -#define VT_ALLOCATE 0x0001 /* Console got allocated */ -#define VT_DEALLOCATE 0x0002 /* Console will be deallocated */ -#define VT_WRITE 0x0003 /* A char got output */ -#define VT_UPDATE 0x0004 /* A bigger update occurred */ -#define VT_PREWRITE 0x0005 /* A char is about to be written to the console */ - #endif /* __KERNEL__ */ #endif /* _LINUX_NOTIFIER_H */ diff --git a/include/linux/vt.h b/include/linux/vt.h index d5dd0bc..30a8dd9 100644 --- a/include/linux/vt.h +++ b/include/linux/vt.h @@ -86,6 +86,13 @@ struct vt_setactivate { #ifdef __KERNEL__ +/* Virtual Terminal events. */ +#define VT_ALLOCATE 0x0001 /* Console got allocated */ +#define VT_DEALLOCATE 0x0002 /* Console will be deallocated */ +#define VT_WRITE 0x0003 /* A char got output */ +#define VT_UPDATE 0x0004 /* A bigger update occurred */ +#define VT_PREWRITE 0x0005 /* A char is about to be written to the console */ + #ifdef CONFIG_VT_CONSOLE extern int vt_kmsg_redirect(int new); -- cgit v0.10.2 From 626a0312514a121a90b4478cbde111ffc6826ae2 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Mon, 25 Jul 2011 17:13:12 -0700 Subject: kernel/configs.c: include MODULE_*() when CONFIG_IKCONFIG_PROC=n If CONFIG_IKCONFIG=m but CONFIG_IKCONFIG_PROC=n we get a module that has no MODULE_LICENSE definition. Move the MODULE_*() definitions outside the CONFIG_IKCONFIG_PROC #ifdef to prevent this configuration from tainting the kernel. Signed-off-by: Stephen Boyd Acked-by: Randy Dunlap Acked-by: WANG Cong Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/kernel/configs.c b/kernel/configs.c index b4066b4..42e8fa0 100644 --- a/kernel/configs.c +++ b/kernel/configs.c @@ -92,8 +92,8 @@ static void __exit ikconfig_cleanup(void) module_init(ikconfig_init); module_exit(ikconfig_cleanup); +#endif /* CONFIG_IKCONFIG_PROC */ + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Randy Dunlap"); MODULE_DESCRIPTION("Echo the kernel .config file used to build the kernel"); - -#endif /* CONFIG_IKCONFIG_PROC */ -- cgit v0.10.2 From 0334b3824e671c28737074ca9fb0723ef05d9b9e Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 25 Jul 2011 17:13:13 -0700 Subject: get_maintainers.pl: improve .mailmap parsing Entries that used formats other than "Proper Name " were not parsed properly. Try to improve the parsing so that the entries in the forms of: Proper Name and Proper Name Commit Name are transformed correctly. Signed-off-by: Joe Perches Reviewed-by: Florian Mickler Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl index d29a8d7..eb2f1e6 100755 --- a/scripts/get_maintainer.pl +++ b/scripts/get_maintainer.pl @@ -328,7 +328,8 @@ sub read_mailmap { # name1 # name1 name2 # (see man git-shortlog) - if (/^(.+)<(.+)>$/) { + + if (/^([^<]+)<([^>]+)>$/) { my $real_name = $1; my $address = $2; @@ -336,13 +337,13 @@ sub read_mailmap { ($real_name, $address) = parse_email("$real_name <$address>"); $mailmap->{names}->{$address} = $real_name; - } elsif (/^<([^\s]+)>\s*<([^\s]+)>$/) { + } elsif (/^<([^>]+)>\s*<([^>]+)>$/) { my $real_address = $1; my $wrong_address = $2; $mailmap->{addresses}->{$wrong_address} = $real_address; - } elsif (/^(.+)<([^\s]+)>\s*<([^\s]+)>$/) { + } elsif (/^(.+)<([^>]+)>\s*<([^>]+)>$/) { my $real_name = $1; my $real_address = $2; my $wrong_address = $3; @@ -353,7 +354,7 @@ sub read_mailmap { $mailmap->{names}->{$wrong_address} = $real_name; $mailmap->{addresses}->{$wrong_address} = $real_address; - } elsif (/^(.+)<([^\s]+)>\s*([^\s].*)<([^\s]+)>$/) { + } elsif (/^(.+)<([^>]+)>\s*(.+)\s*<([^>]+)>$/) { my $real_name = $1; my $real_address = $2; my $wrong_name = $3; -- cgit v0.10.2 From 05ed8490812a2f2b93da5d8e29fd2a1a9a87deb0 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 25 Jul 2011 17:13:14 -0700 Subject: MAINTAINERS: update HIGH RESOLUTION TIMERS patterns clockchips.h was typoed as clockevents.h Signed-off-by: Joe Perches Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/MAINTAINERS b/MAINTAINERS index 39d8822..d13bd15 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3012,7 +3012,7 @@ F: kernel/hrtimer.c F: kernel/time/clockevents.c F: kernel/time/tick*.* F: kernel/time/timer_*.c -F: include/linux/clockevents.h +F: include/linux/clockchips.h F: include/linux/hrtimer.h HIGH-SPEED SCC DRIVER FOR AX.25 -- cgit v0.10.2 From 19ab5cb8fb14a44a4eb0b532ddc3658b055d84f9 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 25 Jul 2011 17:13:15 -0700 Subject: drivers/leds/leds-lp5521.c: provide section tagging Tag the and remove() function as __devexit respectively. Signed-off-by: Linus Walleij Cc: Richard Purdie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/leds/leds-lp5521.c b/drivers/leds/leds-lp5521.c index cc1dc48..9fc122c 100644 --- a/drivers/leds/leds-lp5521.c +++ b/drivers/leds/leds-lp5521.c @@ -744,7 +744,7 @@ fail1: return ret; } -static int lp5521_remove(struct i2c_client *client) +static int __devexit lp5521_remove(struct i2c_client *client) { struct lp5521_chip *chip = i2c_get_clientdata(client); int i; @@ -775,7 +775,7 @@ static struct i2c_driver lp5521_driver = { .name = "lp5521", }, .probe = lp5521_probe, - .remove = lp5521_remove, + .remove = __devexit_p(lp5521_remove), .id_table = lp5521_id, }; -- cgit v0.10.2 From e179840ba88c342d215631356cbfe4e62c9f175b Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Mon, 25 Jul 2011 17:13:16 -0700 Subject: drivers/leds/leds-sunfire.c: fix sunfire_led_generic_probe() error handling - return -ENOMEM if kzalloc fails, rather than the current -EINVAL - fix a memory leak in the case of goto out_unregister_led_cdevs Signed-off-by: Axel Lin Cc: Richard Purdie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/leds/leds-sunfire.c b/drivers/leds/leds-sunfire.c index ab6d18f..1757396 100644 --- a/drivers/leds/leds-sunfire.c +++ b/drivers/leds/leds-sunfire.c @@ -127,17 +127,19 @@ static int __devinit sunfire_led_generic_probe(struct platform_device *pdev, struct led_type *types) { struct sunfire_drvdata *p; - int i, err = -EINVAL; + int i, err; if (pdev->num_resources != 1) { printk(KERN_ERR PFX "Wrong number of resources %d, should be 1\n", pdev->num_resources); + err = -EINVAL; goto out; } p = kzalloc(sizeof(*p), GFP_KERNEL); if (!p) { printk(KERN_ERR PFX "Could not allocate struct sunfire_drvdata\n"); + err = -ENOMEM; goto out; } @@ -160,14 +162,14 @@ static int __devinit sunfire_led_generic_probe(struct platform_device *pdev, dev_set_drvdata(&pdev->dev, p); - err = 0; -out: - return err; + return 0; out_unregister_led_cdevs: for (i--; i >= 0; i--) led_classdev_unregister(&p->leds[i].led_cdev); - goto out; + kfree(p); +out: + return err; } static int __devexit sunfire_led_generic_remove(struct platform_device *pdev) -- cgit v0.10.2 From 2504f6da50f4d9656cad353798bf07657ec12eea Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Mon, 25 Jul 2011 17:13:16 -0700 Subject: drivers/leds/leds-netxbig: make LEDS_NETXBIG depend on LEDS_CLASS We call led_classdev_register/led_classdev_unregister in create_netxbig_led/delete_netxbig_led, thus make LEDS_NETXBIG depend on LEDS_CLASS. This patch fixes below build error if LEDS_CLASS is not configured. LD .tmp_vmlinux1 drivers/built-in.o: In function `create_netxbig_led': drivers/leds/leds-netxbig.c:350: undefined reference to `led_classdev_register' drivers/leds/leds-netxbig.c:361: undefined reference to `led_classdev_unregister' drivers/built-in.o: In function `delete_netxbig_led': drivers/leds/leds-netxbig.c:313: undefined reference to `led_classdev_unregister' Signed-off-by: Axel Lin Cc: Richard Purdie Acked-by: Simon Guinot Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig index 6c21c29..b591e72 100644 --- a/drivers/leds/Kconfig +++ b/drivers/leds/Kconfig @@ -365,6 +365,7 @@ config LEDS_NS2 config LEDS_NETXBIG tristate "LED support for Big Network series LEDs" depends on MACH_NET2BIG_V2 || MACH_NET5BIG_V2 + depends on LEDS_CLASS default y help This option enable support for LEDs found on the LaCie 2Big -- cgit v0.10.2 From 40b1445b1b6afc1ddac6ac31f4533277a0fb75dc Mon Sep 17 00:00:00 2001 From: Shreshtha Kumar Sahu Date: Mon, 25 Jul 2011 17:13:17 -0700 Subject: arch/arm/mach-ux500/board-u5500.c: calibrate ALS input voltage Provide the support for auto calibration of ALS Zone boundaries based on min/max ALS input voltage. Signed-off-by: Shreshtha Kumar Sahu Signed-off-by: Linus Walleij Cc: Richard Purdie Cc: Lee Jones Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/leds/leds-lm3530.c b/drivers/leds/leds-lm3530.c index 4d7ce76..3dd7090 100644 --- a/drivers/leds/leds-lm3530.c +++ b/drivers/leds/leds-lm3530.c @@ -68,17 +68,16 @@ #define LM3530_ALS2_IMP_SHIFT (4) /* Zone Boundary Register defaults */ -#define LM3530_DEF_ZB_0 (0x33) -#define LM3530_DEF_ZB_1 (0x66) -#define LM3530_DEF_ZB_2 (0x99) -#define LM3530_DEF_ZB_3 (0xCC) +#define LM3530_ALS_ZB_MAX (4) +#define LM3530_ALS_WINDOW_mV (1000) +#define LM3530_ALS_OFFSET_mV (4) /* Zone Target Register defaults */ -#define LM3530_DEF_ZT_0 (0x19) -#define LM3530_DEF_ZT_1 (0x33) +#define LM3530_DEF_ZT_0 (0x7F) +#define LM3530_DEF_ZT_1 (0x66) #define LM3530_DEF_ZT_2 (0x4C) -#define LM3530_DEF_ZT_3 (0x66) -#define LM3530_DEF_ZT_4 (0x7F) +#define LM3530_DEF_ZT_3 (0x33) +#define LM3530_DEF_ZT_4 (0x19) struct lm3530_mode_map { const char *mode; @@ -150,6 +149,8 @@ static int lm3530_init_registers(struct lm3530_data *drvdata) u8 als_imp_sel = 0; u8 brightness; u8 reg_val[LM3530_REG_MAX]; + u8 zones[LM3530_ALS_ZB_MAX]; + u32 als_vmin, als_vmax, als_vstep; struct lm3530_platform_data *pltfm = drvdata->pdata; struct i2c_client *client = drvdata->client; @@ -161,6 +162,26 @@ static int lm3530_init_registers(struct lm3530_data *drvdata) gen_config |= (LM3530_ENABLE_I2C); if (drvdata->mode == LM3530_BL_MODE_ALS) { + if (pltfm->als_vmax == 0) { + pltfm->als_vmin = als_vmin = 0; + pltfm->als_vmin = als_vmax = LM3530_ALS_WINDOW_mV; + } + + als_vmin = pltfm->als_vmin; + als_vmax = pltfm->als_vmax; + + if ((als_vmax - als_vmin) > LM3530_ALS_WINDOW_mV) + pltfm->als_vmax = als_vmax = + als_vmin + LM3530_ALS_WINDOW_mV; + + /* n zone boundary makes n+1 zones */ + als_vstep = (als_vmax - als_vmin) / (LM3530_ALS_ZB_MAX + 1); + + for (i = 0; i < LM3530_ALS_ZB_MAX; i++) + zones[i] = (((als_vmin + LM3530_ALS_OFFSET_mV) + + als_vstep + (i * als_vstep)) * LED_FULL) + / 1000; + als_config = (pltfm->als_avrg_time << LM3530_ALS_AVG_TIME_SHIFT) | (LM3530_ENABLE_ALS) | @@ -169,6 +190,7 @@ static int lm3530_init_registers(struct lm3530_data *drvdata) als_imp_sel = (pltfm->als1_resistor_sel << LM3530_ALS1_IMP_SHIFT) | (pltfm->als2_resistor_sel << LM3530_ALS2_IMP_SHIFT); + } if (drvdata->mode == LM3530_BL_MODE_PWM) @@ -190,10 +212,10 @@ static int lm3530_init_registers(struct lm3530_data *drvdata) reg_val[3] = 0x00; /* LM3530_ALS_ZONE_REG */ reg_val[4] = als_imp_sel; /* LM3530_ALS_IMP_SELECT */ reg_val[5] = brightness; /* LM3530_BRT_CTRL_REG */ - reg_val[6] = LM3530_DEF_ZB_0; /* LM3530_ALS_ZB0_REG */ - reg_val[7] = LM3530_DEF_ZB_1; /* LM3530_ALS_ZB1_REG */ - reg_val[8] = LM3530_DEF_ZB_2; /* LM3530_ALS_ZB2_REG */ - reg_val[9] = LM3530_DEF_ZB_3; /* LM3530_ALS_ZB3_REG */ + reg_val[6] = zones[0]; /* LM3530_ALS_ZB0_REG */ + reg_val[7] = zones[1]; /* LM3530_ALS_ZB1_REG */ + reg_val[8] = zones[2]; /* LM3530_ALS_ZB2_REG */ + reg_val[9] = zones[3]; /* LM3530_ALS_ZB3_REG */ reg_val[10] = LM3530_DEF_ZT_0; /* LM3530_ALS_Z0T_REG */ reg_val[11] = LM3530_DEF_ZT_1; /* LM3530_ALS_Z1T_REG */ reg_val[12] = LM3530_DEF_ZT_2; /* LM3530_ALS_Z2T_REG */ @@ -265,6 +287,24 @@ static void lm3530_brightness_set(struct led_classdev *led_cdev, } } +static ssize_t lm3530_mode_get(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct i2c_client *client = container_of( + dev->parent, struct i2c_client, dev); + struct lm3530_data *drvdata = i2c_get_clientdata(client); + int i, len = 0; + + for (i = 0; i < ARRAY_SIZE(mode_map); i++) + if (drvdata->mode == mode_map[i].mode_val) + len += sprintf(buf + len, "[%s] ", mode_map[i].mode); + else + len += sprintf(buf + len, "%s ", mode_map[i].mode); + + len += sprintf(buf + len, "\n"); + + return len; +} static ssize_t lm3530_mode_set(struct device *dev, struct device_attribute *attr, const char *buf, size_t size) @@ -298,8 +338,7 @@ static ssize_t lm3530_mode_set(struct device *dev, struct device_attribute return sizeof(drvdata->mode); } - -static DEVICE_ATTR(mode, 0644, NULL, lm3530_mode_set); +static DEVICE_ATTR(mode, 0644, lm3530_mode_get, lm3530_mode_set); static int __devinit lm3530_probe(struct i2c_client *client, const struct i2c_device_id *id) diff --git a/include/linux/led-lm3530.h b/include/linux/led-lm3530.h index 58592fa..8eb1235 100644 --- a/include/linux/led-lm3530.h +++ b/include/linux/led-lm3530.h @@ -84,6 +84,8 @@ enum lm3530_als_mode { * @brt_ramp_rise: rate of rise of led current * @als1_resistor_sel: internal resistance from ALS1 input to ground * @als2_resistor_sel: internal resistance from ALS2 input to ground + * @als_vmin: als input voltage calibrated for max brightness in mV + * @als_vmax: als input voltage calibrated for min brightness in mV * @brt_val: brightness value (0-255) */ struct lm3530_platform_data { @@ -101,6 +103,9 @@ struct lm3530_platform_data { u8 als1_resistor_sel; u8 als2_resistor_sel; + u32 als_vmin; + u32 als_vmax; + u8 brt_val; }; -- cgit v0.10.2 From b09628ef56352a77bcf38e314869a8205c21a756 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 25 Jul 2011 17:13:18 -0700 Subject: mach-ux500: add lm3530 ALS platform data for U5500 From: Shreshtha Kumar Sahu platform data for simple backlight driver for LM3530 in the u5500 platform Signed-off-by: Shreshtha Kumar Sahu Signed-off-by: Linus Walleij Cc: Richard Purdie Cc: Lee Jones Cc: Russell King Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/arm/mach-ux500/board-u5500.c b/arch/arm/mach-ux500/board-u5500.c index 44fd3b5..e58f0f5 100644 --- a/arch/arm/mach-ux500/board-u5500.c +++ b/arch/arm/mach-ux500/board-u5500.c @@ -10,16 +10,97 @@ #include #include #include +#include #include #include +#include +#include + #include #include #include +#include "pins-db5500.h" #include "devices-db5500.h" +#include + +/* + * GPIO + */ + +static pin_cfg_t u5500_pins[] = { + /* I2C */ + GPIO218_I2C2_SCL | PIN_INPUT_PULLUP, + GPIO219_I2C2_SDA | PIN_INPUT_PULLUP, + + /* DISPLAY_ENABLE */ + GPIO226_GPIO | PIN_OUTPUT_LOW, + + /* Backlight Enbale */ + GPIO224_GPIO | PIN_OUTPUT_HIGH, +}; +/* + * I2C + */ + +#define U5500_I2C_CONTROLLER(id, _slsu, _tft, _rft, clk, _sm) \ +static struct nmk_i2c_controller u5500_i2c##id##_data = { \ + /* \ + * slave data setup time, which is \ + * 250 ns,100ns,10ns which is 14,6,2 \ + * respectively for a 48 Mhz \ + * i2c clock \ + */ \ + .slsu = _slsu, \ + /* Tx FIFO threshold */ \ + .tft = _tft, \ + /* Rx FIFO threshold */ \ + .rft = _rft, \ + /* std. mode operation */ \ + .clk_freq = clk, \ + .sm = _sm, \ +} +/* + * The board uses TODO <3> i2c controllers, initialize all of + * them with slave data setup time of 250 ns, + * Tx & Rx FIFO threshold values as 1 and standard + * mode of operation + */ + +U5500_I2C_CONTROLLER(2, 0xe, 1, 1, 400000, I2C_FREQ_MODE_FAST); + +static struct lm3530_platform_data u5500_als_platform_data = { + .mode = LM3530_BL_MODE_MANUAL, + .als_input_mode = LM3530_INPUT_ALS1, + .max_current = LM3530_FS_CURR_26mA, + .pwm_pol_hi = true, + .als_avrg_time = LM3530_ALS_AVRG_TIME_512ms, + .brt_ramp_law = 1, /* Linear */ + .brt_ramp_fall = LM3530_RAMP_TIME_8s, + .brt_ramp_rise = LM3530_RAMP_TIME_8s, + .als1_resistor_sel = LM3530_ALS_IMPD_13_53kOhm, + .als2_resistor_sel = LM3530_ALS_IMPD_Z, + .als_vmin = 730, /* mV */ + .als_vmax = 1020, /* mV */ + .brt_val = 0x7F, /* Max brightness */ +}; + +static struct i2c_board_info __initdata u5500_i2c2_devices[] = { + { + /* Backlight */ + I2C_BOARD_INFO("lm3530-led", 0x36), + .platform_data = &u5500_als_platform_data, + }, +}; + +static void __init u5500_i2c_init(void) +{ + db5500_add_i2c2(&u5500_i2c2_data); + i2c_register_board_info(2, ARRAY_AND_SIZE(u5500_i2c2_devices)); +} static void __init u5500_uart_init(void) { db5500_add_uart0(NULL); @@ -30,7 +111,8 @@ static void __init u5500_uart_init(void) static void __init u5500_init_machine(void) { u5500_init_devices(); - + nmk_config_pins(u5500_pins, ARRAY_SIZE(u5500_pins)); + u5500_i2c_init(); u5500_sdi_init(); u5500_uart_init(); } -- cgit v0.10.2 From 72d39508e43689b9346dfc956fad5e94a8911886 Mon Sep 17 00:00:00 2001 From: H Hartley Sweeten Date: Mon, 25 Jul 2011 17:13:20 -0700 Subject: lib/lcm.c: quiet sparse noise The symbol 'lcm' is exported to the kernel (EXPORT_SYMBOL_GPL). Pick up it's definition in to quiet the sparse noise: warning: symbol 'lcm' was not declared. Should it be static? Signed-off-by: H Hartley Sweeten Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/lib/lcm.c b/lib/lcm.c index 157cd88..10b5cfc 100644 --- a/lib/lcm.c +++ b/lib/lcm.c @@ -1,6 +1,7 @@ #include #include #include +#include /* Lowest common multiple */ unsigned long lcm(unsigned long a, unsigned long b) -- cgit v0.10.2 From 75fb8f269305fc066c4c6ec6e7232df0c92f434d Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 25 Jul 2011 17:13:20 -0700 Subject: lib: make _tolower() public This function is required by *printf and kstrto* functions that are located in the different modules. This patch makes _tolower() public. However, it's good idea to not use the helper outside of mentioned functions. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Andy Shevchenko Acked-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/ctype.h b/include/linux/ctype.h index a3d6ee0..8acfe31 100644 --- a/include/linux/ctype.h +++ b/include/linux/ctype.h @@ -52,4 +52,13 @@ static inline unsigned char __toupper(unsigned char c) #define tolower(c) __tolower(c) #define toupper(c) __toupper(c) +/* + * Fast implementation of tolower() for internal usage. Do not use in your + * code. + */ +static inline char _tolower(const char c) +{ + return c | 0x20; +} + #endif diff --git a/lib/kstrtox.c b/lib/kstrtox.c index 2dbae88..5e06675 100644 --- a/lib/kstrtox.c +++ b/lib/kstrtox.c @@ -19,11 +19,6 @@ #include #include -static inline char _tolower(const char c) -{ - return c | 0x20; -} - static int _kstrtoull(const char *s, unsigned int base, unsigned long long *res) { unsigned long long acc; diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 4365df3..39e3e8f 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -31,13 +31,10 @@ #include #include /* for dereference_function_descriptor() */ -/* Works only for digits and letters, but small and fast */ -#define TOLOWER(x) ((x) | 0x20) - static unsigned int simple_guess_base(const char *cp) { if (cp[0] == '0') { - if (TOLOWER(cp[1]) == 'x' && isxdigit(cp[2])) + if (_tolower(cp[1]) == 'x' && isxdigit(cp[2])) return 16; else return 8; @@ -59,13 +56,13 @@ unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int bas if (!base) base = simple_guess_base(cp); - if (base == 16 && cp[0] == '0' && TOLOWER(cp[1]) == 'x') + if (base == 16 && cp[0] == '0' && _tolower(cp[1]) == 'x') cp += 2; while (isxdigit(*cp)) { unsigned int value; - value = isdigit(*cp) ? *cp - '0' : TOLOWER(*cp) - 'a' + 10; + value = isdigit(*cp) ? *cp - '0' : _tolower(*cp) - 'a' + 10; if (value >= base) break; result = result * base + value; @@ -1036,8 +1033,8 @@ precision: qualifier: /* get the conversion qualifier */ spec->qualifier = -1; - if (*fmt == 'h' || TOLOWER(*fmt) == 'l' || - TOLOWER(*fmt) == 'z' || *fmt == 't') { + if (*fmt == 'h' || _tolower(*fmt) == 'l' || + _tolower(*fmt) == 'z' || *fmt == 't') { spec->qualifier = *fmt++; if (unlikely(spec->qualifier == *fmt)) { if (spec->qualifier == 'l') { @@ -1104,7 +1101,7 @@ qualifier: spec->type = FORMAT_TYPE_LONG; else spec->type = FORMAT_TYPE_ULONG; - } else if (TOLOWER(spec->qualifier) == 'z') { + } else if (_tolower(spec->qualifier) == 'z') { spec->type = FORMAT_TYPE_SIZE_T; } else if (spec->qualifier == 't') { spec->type = FORMAT_TYPE_PTRDIFF; @@ -1263,7 +1260,7 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) if (qualifier == 'l') { long *ip = va_arg(args, long *); *ip = (str - buf); - } else if (TOLOWER(qualifier) == 'z') { + } else if (_tolower(qualifier) == 'z') { size_t *ip = va_arg(args, size_t *); *ip = (str - buf); } else { @@ -1550,7 +1547,7 @@ do { \ void *skip_arg; if (qualifier == 'l') skip_arg = va_arg(args, long *); - else if (TOLOWER(qualifier) == 'z') + else if (_tolower(qualifier) == 'z') skip_arg = va_arg(args, size_t *); else skip_arg = va_arg(args, int *); @@ -1856,8 +1853,8 @@ int vsscanf(const char *buf, const char *fmt, va_list args) /* get conversion qualifier */ qualifier = -1; - if (*fmt == 'h' || TOLOWER(*fmt) == 'l' || - TOLOWER(*fmt) == 'z') { + if (*fmt == 'h' || _tolower(*fmt) == 'l' || + _tolower(*fmt) == 'z') { qualifier = *fmt++; if (unlikely(qualifier == *fmt)) { if (qualifier == 'h') { -- cgit v0.10.2 From 27c46a2546c75c6814562e85b751e3d64c188ad5 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 25 Jul 2011 17:13:21 -0700 Subject: drivers/firmware/sigma.c needs MODULE_LICENSE Fix module tainting message: sigma: module license 'unspecified' taints kernel. Signed-off-by: Randy Dunlap Acked-by: Mike Frysinger Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/firmware/sigma.c b/drivers/firmware/sigma.c index c19cd2c..f10fc52 100644 --- a/drivers/firmware/sigma.c +++ b/drivers/firmware/sigma.c @@ -11,6 +11,7 @@ #include #include #include +#include #include /* Return: 0==OK, <0==error, =1 ==no more actions */ @@ -113,3 +114,5 @@ int process_sigma_firmware(struct i2c_client *client, const char *name) return ret; } EXPORT_SYMBOL(process_sigma_firmware); + +MODULE_LICENSE("GPL"); -- cgit v0.10.2 From 7d2367af0b09f8028dc5c1b1919bb82d141c2afb Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 25 Jul 2011 17:13:22 -0700 Subject: checkpatch: suggest using min_t or max_t A common issue with min() or max() is using a cast on one or both of the arguments when using min_t/max_t could be better. Add cast detection to uses of min/max and suggest an appropriate use of min_t or max_t instead. Caveat: This only works for min() or max() on a single line. It does not find min() or max() split across multiple lines. This does find: min((u32)foo, bar); But it does not find: max((unsigned long)foo, bar); Suggested-by: Andrew Morton Signed-off-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index b0aa2c6..5ba62d8 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -268,6 +268,20 @@ sub build_types { } build_types(); +our $match_balanced_parentheses = qr/(\((?:[^\(\)]+|(-1))*\))/; + +our $Typecast = qr{\s*(\(\s*$NonptrType\s*\)){0,1}\s*}; +our $LvalOrFunc = qr{($Lval)\s*($match_balanced_parentheses{0,1})\s*}; + +sub deparenthesize { + my ($string) = @_; + return "" if (!defined($string)); + $string =~ s@^\s*\(\s*@@g; + $string =~ s@\s*\)\s*$@@g; + $string =~ s@\s+@ @g; + return $string; +} + $chk_signoff = 0 if ($file); my @dep_includes = (); @@ -2290,6 +2304,27 @@ sub process { } } +# typecasts on min/max could be min_t/max_t + if ($line =~ /^\+(?:.*?)\b(min|max)\s*\($Typecast{0,1}($LvalOrFunc)\s*,\s*$Typecast{0,1}($LvalOrFunc)\s*\)/) { + if (defined $2 || defined $8) { + my $call = $1; + my $cast1 = deparenthesize($2); + my $arg1 = $3; + my $cast2 = deparenthesize($8); + my $arg2 = $9; + my $cast; + + if ($cast1 ne "" && $cast2 ne "") { + $cast = "$cast1 or $cast2"; + } elsif ($cast1 ne "") { + $cast = $cast1; + } else { + $cast = $cast2; + } + WARN("$call() should probably be ${call}_t($cast, $arg1, $arg2)\n" . $herecurr); + } + } + # Need a space before open parenthesis after if, while etc if ($line=~/\b(if|while|for|switch)\(/) { ERROR("space required before the open parenthesis '('\n" . $herecurr); -- cgit v0.10.2 From 165e72a6c374ed03c57d03c88406d32745e1add2 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Mon, 25 Jul 2011 17:13:23 -0700 Subject: checkpatch: add __rcu as a sparse modifier Fix "need consistent spacing around '*'" error after a __rcu sparse annotation which was caused by the missing __rcu entry in the checkpatch.pl internal list of sparse keywords. Signed-off-by: Sven Eckelmann Cc: Andy Whitcroft Cc: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 5ba62d8..63cca81 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -145,7 +145,8 @@ our $Sparse = qr{ __must_check| __init_refok| __kprobes| - __ref + __ref| + __rcu }x; # Notes to $Attribute: -- cgit v0.10.2 From 2011247550c1b903a9ecd68f6eb3e9e7b7b07f52 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 25 Jul 2011 17:13:23 -0700 Subject: checkpatch: validate signature styles and To: and Cc: lines Signatures have many forms and can sometimes cause problems if not in the correct format when using git send-email or quilt. Try to verify the signature tags and email addresses to use the generally accepted "Signed-off-by: Full Name " form. Original idea by Anish Kumar Signed-off-by: Joe Perches Cc: Anish Kumar Cc: Nick Bowler Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 63cca81..b14f830 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -217,6 +217,16 @@ our $logFunctions = qr{(?x: MODULE_[A-Z_]+ )}; +our $signature_tags = qr{(?xi: + Signed-off-by:| + Acked-by:| + Tested-by:| + Reviewed-by:| + Reported-by:| + To:| + Cc: +)}; + our @typeList = ( qr{void}, qr{(?:unsigned\s+)?char}, @@ -356,6 +366,76 @@ sub top_of_kernel_tree { return 1; } +sub parse_email { + my ($formatted_email) = @_; + + my $name = ""; + my $address = ""; + my $comment = ""; + + if ($formatted_email =~ /^(.*)<(\S+\@\S+)>(.*)$/) { + $name = $1; + $address = $2; + $comment = $3 if defined $3; + } elsif ($formatted_email =~ /^\s*<(\S+\@\S+)>(.*)$/) { + $address = $1; + $comment = $2 if defined $2; + } elsif ($formatted_email =~ /(\S+\@\S+)(.*)$/) { + $address = $1; + $comment = $2 if defined $2; + $formatted_email =~ s/$address.*$//; + $name = $formatted_email; + $name =~ s/^\s+|\s+$//g; + $name =~ s/^\"|\"$//g; + # If there's a name left after stripping spaces and + # leading quotes, and the address doesn't have both + # leading and trailing angle brackets, the address + # is invalid. ie: + # "joe smith joe@smith.com" bad + # "joe smith ]+>$/) { + $name = ""; + $address = ""; + $comment = ""; + } + } + + $name =~ s/^\s+|\s+$//g; + $name =~ s/^\"|\"$//g; + $address =~ s/^\s+|\s+$//g; + $address =~ s/^\<|\>$//g; + + if ($name =~ /[^\w \-]/i) { ##has "must quote" chars + $name =~ s/(?"; + } + + return $formatted_email; +} + sub expand_tabs { my ($str) = @_; @@ -1380,17 +1460,44 @@ sub process { } } -#check the patch for a signoff: +# Check the patch for a signoff: if ($line =~ /^\s*signed-off-by:/i) { - # This is a signoff, if ugly, so do not double report. $signoff++; - if (!($line =~ /^\s*Signed-off-by:/)) { - WARN("Signed-off-by: is the preferred form\n" . - $herecurr); + } + +# Check signature styles + if ($line =~ /^(\s*)($signature_tags)(\s*)(.*)/) { + my $space_before = $1; + my $sign_off = $2; + my $space_after = $3; + my $email = $4; + my $ucfirst_sign_off = ucfirst(lc($sign_off)); + + if (defined $space_before && $space_before ne "") { + WARN("Do not use whitespace before $ucfirst_sign_off\n" . $herecurr); } - if ($line =~ /^\s*signed-off-by:\S/i) { - WARN("space required after Signed-off-by:\n" . - $herecurr); + if ($sign_off =~ /-by:$/i && $sign_off ne $ucfirst_sign_off) { + WARN("'$ucfirst_sign_off' is the preferred signature form\n" . $herecurr); + } + if (!defined $space_after || $space_after ne " ") { + WARN("Use a single space after $ucfirst_sign_off\n" . $herecurr); + } + + my ($email_name, $email_address, $comment) = parse_email($email); + my $suggested_email = format_email(($email_name, $email_address)); + if ($suggested_email eq "") { + ERROR("Unrecognized email address: '$email'\n" . $herecurr); + } else { + my $dequoted = $suggested_email; + $dequoted =~ s/^"//; + $dequoted =~ s/" $comment" ne $email && + "$suggested_email$comment" ne $email) { + WARN("email address '$email' might be better as '$suggested_email$comment'\n" . $herecurr); + } } } -- cgit v0.10.2 From 39b7e2878e783af027ddd3530f7a0abec330905d Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 25 Jul 2011 17:13:24 -0700 Subject: checkpatch: add a "prefer __aligned" check Prefer the use of __aligned(size) over __attribute__((__aligned___(size))) Link: http://lkml.kernel.org/r/20110609094526.1571774c.akpm@linux-foundation.org Suggested-by: Andrew Morton Signed-off-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index b14f830..8f35f0e 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -2891,6 +2891,11 @@ sub process { WARN("__packed is preferred over __attribute__((packed))\n" . $herecurr); } +# Check for __attribute__ aligned, prefer __aligned + if ($line =~ /\b__attribute__\s*\(\s*\(.*aligned/) { + WARN("__aligned(size) is preferred over __attribute__((aligned(size)))\n" . $herecurr); + } + # check for sizeof(&) if ($line =~ /\bsizeof\s*\(\s*\&/) { WARN("sizeof(& should be avoided\n" . $herecurr); -- cgit v0.10.2 From 000d1cc1829f938c87402fc2fd4bb5e8daed6b52 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 25 Jul 2011 17:13:25 -0700 Subject: checkpatch.pl: add ability to ignore various messages Some users would like the ability to not emit some of the messages that checkpatch produces. This can make it easier to use checkpatch in other projects and integrate into scm hook scripts. Add command line option to "--ignore" various message types. Add option --show-types to emit the "type" of each message. Categorize all ERROR, WARN and CHK messages with types. Add optional .checkpatch.conf file to store default options. 3 paths are searched for .checkpatch.conf . customized per-tree configurations $HOME user global configuration when per-tree configs don't exist ./scripts lk defaults to override script The .conf file can contain any valid command-line argument and the contents are prepended to any additional command line arguments. Multiple lines may be used, blank lines are ignored, # is a comment. Update "false positive" output for readability. Update version to 0.32 Signed-off-by: Joe Perches Acked-by: Mike Frysinger Cc: Andy Whitcroft Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 8f35f0e..05777ab 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -10,7 +10,7 @@ use strict; my $P = $0; $P =~ s@.*/@@g; -my $V = '0.31'; +my $V = '0.32'; use Getopt::Long qw(:config no_auto_abbrev); @@ -26,9 +26,13 @@ my $check = 0; my $summary = 1; my $mailback = 0; my $summary_file = 0; +my $show_types = 0; my $root; my %debug; +my %ignore_type = (); +my @ignore = (); my $help = 0; +my $configuration_file = ".checkpatch.conf"; sub help { my ($exitcode) = @_; @@ -46,6 +50,8 @@ Options: --terse one line per report -f, --file treat FILE as regular source file --subjective, --strict enable more subjective tests + --ignore TYPE(,TYPE2...) ignore various comma separated message types + --show-types show the message "types" in the output --root=PATH PATH to the kernel tree root --no-summary suppress the per-file summary --mailback only produce a report in case of warnings/errors @@ -63,6 +69,32 @@ EOM exit($exitcode); } +my $conf = which_conf($configuration_file); +if (-f $conf) { + my @conf_args; + open(my $conffile, '<', "$conf") + or warn "$P: Can't find a readable $configuration_file file $!\n"; + + while (<$conffile>) { + my $line = $_; + + $line =~ s/\s*\n?$//g; + $line =~ s/^\s*//g; + $line =~ s/\s+/ /g; + + next if ($line =~ m/^\s*#/); + next if ($line =~ m/^\s*$/); + + my @words = split(" ", $line); + foreach my $word (@words) { + last if ($word =~ m/^#/); + push (@conf_args, $word); + } + } + close($conffile); + unshift(@ARGV, @conf_args) if @conf_args; +} + GetOptions( 'q|quiet+' => \$quiet, 'tree!' => \$tree, @@ -73,6 +105,8 @@ GetOptions( 'f|file!' => \$file, 'subjective!' => \$check, 'strict!' => \$check, + 'ignore=s' => \@ignore, + 'show-types!' => \$show_types, 'root=s' => \$root, 'summary!' => \$summary, 'mailback!' => \$mailback, @@ -93,6 +127,19 @@ if ($#ARGV < 0) { exit(1); } +@ignore = split(/,/, join(',',@ignore)); +foreach my $word (@ignore) { + $word =~ s/\s*\n?$//g; + $word =~ s/^\s*//g; + $word =~ s/\s+/ /g; + $word =~ tr/[a-z]/[A-Z]/; + + next if ($word =~ m/^\s*#/); + next if ($word =~ m/^\s*$/); + + $ignore_type{$word}++; +} + my $dbg_values = 0; my $dbg_possible = 0; my $dbg_type = 0; @@ -364,7 +411,7 @@ sub top_of_kernel_tree { } } return 1; -} + } sub parse_email { my ($formatted_email) = @_; @@ -436,6 +483,18 @@ sub format_email { return $formatted_email; } +sub which_conf { + my ($conf) = @_; + + foreach my $path (split(/:/, ".:$ENV{HOME}:.scripts")) { + if (-e "$path/$conf") { + return "$path/$conf"; + } + } + + return ""; +} + sub expand_tabs { my ($str) = @_; @@ -1181,12 +1240,21 @@ sub possible { my $prefix = ''; +sub show_type { + return !defined $ignore_type{$_[0]}; +} + sub report { - if (defined $tst_only && $_[0] !~ /\Q$tst_only\E/) { + if (!show_type($_[1]) || + (defined $tst_only && $_[2] !~ /\Q$tst_only\E/)) { return 0; } - my $line = $prefix . $_[0]; - + my $line; + if ($show_types) { + $line = "$prefix$_[0]:$_[1]: $_[2]\n"; + } else { + $line = "$prefix$_[0]: $_[2]\n"; + } $line = (split('\n', $line))[0] . "\n" if ($terse); push(our @report, $line); @@ -1196,20 +1264,21 @@ sub report { sub report_dump { our @report; } + sub ERROR { - if (report("ERROR: $_[0]\n")) { + if (report("ERROR", $_[0], $_[1])) { our $clean = 0; our $cnt_error++; } } sub WARN { - if (report("WARNING: $_[0]\n")) { + if (report("WARNING", $_[0], $_[1])) { our $clean = 0; our $cnt_warn++; } } sub CHK { - if ($check && report("CHECK: $_[0]\n")) { + if ($check && report("CHECK", $_[0], $_[1])) { our $clean = 0; our $cnt_chk++; } @@ -1238,7 +1307,8 @@ sub check_absolute_file { ##print "prefix<$prefix>\n"; if ($prefix ne ".../") { - WARN("use relative pathname instead of absolute in changelog text\n" . $herecurr); + WARN("USE_RELATIVE_PATH", + "use relative pathname instead of absolute in changelog text\n" . $herecurr); } } @@ -1435,11 +1505,13 @@ sub process { $p1_prefix = $1; if (!$file && $tree && $p1_prefix ne '' && -e "$root/$p1_prefix") { - WARN("patch prefix '$p1_prefix' exists, appears to be a -p0 patch\n"); + WARN("PATCH_PREFIX", + "patch prefix '$p1_prefix' exists, appears to be a -p0 patch\n"); } if ($realfile =~ m@^include/asm/@) { - ERROR("do not modify files in include/asm, change architecture specific files in include/asm-\n" . "$here$rawline\n"); + ERROR("MODIFIED_INCLUDE_ASM", + "do not modify files in include/asm, change architecture specific files in include/asm-\n" . "$here$rawline\n"); } next; } @@ -1456,7 +1528,8 @@ sub process { if ($line =~ /^new (file )?mode.*[7531]\d{0,2}$/) { my $permhere = $here . "FILE: $realfile\n"; if ($realfile =~ /(Makefile|Kconfig|\.c|\.h|\.S|\.tmpl)$/) { - ERROR("do not set execute permissions for source files\n" . $permhere); + ERROR("EXECUTE_PERMISSIONS", + "do not set execute permissions for source files\n" . $permhere); } } @@ -1474,19 +1547,23 @@ sub process { my $ucfirst_sign_off = ucfirst(lc($sign_off)); if (defined $space_before && $space_before ne "") { - WARN("Do not use whitespace before $ucfirst_sign_off\n" . $herecurr); + WARN("BAD_SIGN_OFF", + "Do not use whitespace before $ucfirst_sign_off\n" . $herecurr); } if ($sign_off =~ /-by:$/i && $sign_off ne $ucfirst_sign_off) { - WARN("'$ucfirst_sign_off' is the preferred signature form\n" . $herecurr); + WARN("BAD_SIGN_OFF", + "'$ucfirst_sign_off' is the preferred signature form\n" . $herecurr); } if (!defined $space_after || $space_after ne " ") { - WARN("Use a single space after $ucfirst_sign_off\n" . $herecurr); + WARN("BAD_SIGN_OFF", + "Use a single space after $ucfirst_sign_off\n" . $herecurr); } my ($email_name, $email_address, $comment) = parse_email($email); my $suggested_email = format_email(($email_name, $email_address)); if ($suggested_email eq "") { - ERROR("Unrecognized email address: '$email'\n" . $herecurr); + ERROR("BAD_SIGN_OFF", + "Unrecognized email address: '$email'\n" . $herecurr); } else { my $dequoted = $suggested_email; $dequoted =~ s/^"//; @@ -1496,14 +1573,16 @@ sub process { if ("$dequoted$comment" ne $email && "<$email_address>$comment" ne $email && "$suggested_email$comment" ne $email) { - WARN("email address '$email' might be better as '$suggested_email$comment'\n" . $herecurr); + WARN("BAD_SIGN_OFF", + "email address '$email' might be better as '$suggested_email$comment'\n" . $herecurr); } } } # Check for wrappage within a valid hunk of the file if ($realcnt != 0 && $line !~ m{^(?:\+|-| |\\ No newline|$)}) { - ERROR("patch seems to be corrupt (line wrapped?)\n" . + ERROR("CORRUPTED_PATCH", + "patch seems to be corrupt (line wrapped?)\n" . $herecurr) if (!$emitted_corrupt++); } @@ -1530,7 +1609,8 @@ sub process { my $ptr = substr($blank, 0, length($utf8_prefix)) . "^"; my $hereptr = "$hereline$ptr\n"; - ERROR("Invalid UTF-8, patch and commit message should be encoded in UTF-8\n" . $hereptr); + ERROR("INVALID_UTF8", + "Invalid UTF-8, patch and commit message should be encoded in UTF-8\n" . $hereptr); } # ignore non-hunk lines and lines being removed @@ -1539,11 +1619,13 @@ sub process { #trailing whitespace if ($line =~ /^\+.*\015/) { my $herevet = "$here\n" . cat_vet($rawline) . "\n"; - ERROR("DOS line endings\n" . $herevet); + ERROR("DOS_LINE_ENDINGS", + "DOS line endings\n" . $herevet); } elsif ($rawline =~ /^\+.*\S\s+$/ || $rawline =~ /^\+\s+$/) { my $herevet = "$here\n" . cat_vet($rawline) . "\n"; - ERROR("trailing whitespace\n" . $herevet); + ERROR("TRAILING_WHITESPACE", + "trailing whitespace\n" . $herevet); $rpt_cleaners = 1; } @@ -1574,7 +1656,8 @@ sub process { } $length++; } - WARN("please write a paragraph that describes the config symbol fully\n" . $herecurr) if ($is_end && $length < 4); + WARN("CONFIG_DESCRIPTION", + "please write a paragraph that describes the config symbol fully\n" . $herecurr) if ($is_end && $length < 4); #print "is_end<$is_end> length<$length>\n"; } @@ -1588,28 +1671,33 @@ sub process { $line =~ /^\+\s*"[^"]*"\s*(?:\s*|,|\)\s*;)\s*$/) && $length > 80) { - WARN("line over 80 characters\n" . $herecurr); + WARN("LONG_LINE", + "line over 80 characters\n" . $herecurr); } # check for spaces before a quoted newline if ($rawline =~ /^.*\".*\s\\n/) { - WARN("unnecessary whitespace before a quoted newline\n" . $herecurr); + WARN("QUOTED_WHITESPACE_BEFORE_NEWLINE", + "unnecessary whitespace before a quoted newline\n" . $herecurr); } # check for adding lines without a newline. if ($line =~ /^\+/ && defined $lines[$linenr] && $lines[$linenr] =~ /^\\ No newline at end of file/) { - WARN("adding a line without newline at end of file\n" . $herecurr); + WARN("MISSING_EOF_NEWLINE", + "adding a line without newline at end of file\n" . $herecurr); } # Blackfin: use hi/lo macros if ($realfile =~ m@arch/blackfin/.*\.S$@) { if ($line =~ /\.[lL][[:space:]]*=.*&[[:space:]]*0x[fF][fF][fF][fF]/) { my $herevet = "$here\n" . cat_vet($line) . "\n"; - ERROR("use the LO() macro, not (... & 0xFFFF)\n" . $herevet); + ERROR("LO_MACRO", + "use the LO() macro, not (... & 0xFFFF)\n" . $herevet); } if ($line =~ /\.[hH][[:space:]]*=.*>>[[:space:]]*16/) { my $herevet = "$here\n" . cat_vet($line) . "\n"; - ERROR("use the HI() macro, not (... >> 16)\n" . $herevet); + ERROR("HI_MACRO", + "use the HI() macro, not (... >> 16)\n" . $herevet); } } @@ -1621,14 +1709,16 @@ sub process { if ($rawline =~ /^\+\s* \t\s*\S/ || $rawline =~ /^\+\s* \s*/) { my $herevet = "$here\n" . cat_vet($rawline) . "\n"; - ERROR("code indent should use tabs where possible\n" . $herevet); + ERROR("CODE_INDENT", + "code indent should use tabs where possible\n" . $herevet); $rpt_cleaners = 1; } # check for space before tabs. if ($rawline =~ /^\+/ && $rawline =~ / \t/) { my $herevet = "$here\n" . cat_vet($rawline) . "\n"; - WARN("please, no space before tabs\n" . $herevet); + WARN("SPACE_BEFORE_TAB", + "please, no space before tabs\n" . $herevet); } # check for spaces at the beginning of a line. @@ -1638,7 +1728,8 @@ sub process { # 3) hanging labels if ($rawline =~ /^\+ / && $line !~ /\+ *(?:$;|#|$Ident:)/) { my $herevet = "$here\n" . cat_vet($rawline) . "\n"; - WARN("please, no spaces at the start of a line\n" . $herevet); + WARN("LEADING_SPACE", + "please, no spaces at the start of a line\n" . $herevet); } # check we are in a valid C source file if not then ignore this hunk @@ -1646,17 +1737,20 @@ sub process { # check for RCS/CVS revision markers if ($rawline =~ /^\+.*\$(Revision|Log|Id)(?:\$|)/) { - WARN("CVS style keyword markers, these will _not_ be updated\n". $herecurr); + WARN("CVS_KEYWORD", + "CVS style keyword markers, these will _not_ be updated\n". $herecurr); } # Blackfin: don't use __builtin_bfin_[cs]sync if ($line =~ /__builtin_bfin_csync/) { my $herevet = "$here\n" . cat_vet($line) . "\n"; - ERROR("use the CSYNC() macro in asm/blackfin.h\n" . $herevet); + ERROR("CSYNC", + "use the CSYNC() macro in asm/blackfin.h\n" . $herevet); } if ($line =~ /__builtin_bfin_ssync/) { my $herevet = "$here\n" . cat_vet($line) . "\n"; - ERROR("use the SSYNC() macro in asm/blackfin.h\n" . $herevet); + ERROR("SSYNC", + "use the SSYNC() macro in asm/blackfin.h\n" . $herevet); } # Check for potential 'bare' types @@ -1745,7 +1839,8 @@ sub process { } } if ($err ne '') { - ERROR("switch and case should be at the same indent\n$hereline$err"); + ERROR("SWITCH_CASE_INDENT_LEVEL", + "switch and case should be at the same indent\n$hereline$err"); } } @@ -1773,7 +1868,8 @@ sub process { #print "pre<$pre_ctx>\nline<$line>\nctx<$ctx>\nnext<$lines[$ctx_ln - 1]>\n"; if ($ctx !~ /{\s*/ && defined($lines[$ctx_ln -1]) && $lines[$ctx_ln - 1] =~ /^\+\s*{/) { - ERROR("that open brace { should be on the previous line\n" . + ERROR("OPEN_BRACE", + "that open brace { should be on the previous line\n" . "$here\n$ctx\n$rawlines[$ctx_ln - 1]\n"); } if ($level == 0 && $pre_ctx !~ /}\s*while\s*\($/ && @@ -1782,7 +1878,8 @@ sub process { { my ($nlength, $nindent) = line_stats($lines[$ctx_ln - 1]); if ($nindent > $indent) { - WARN("trailing semicolon indicates no statements, indent implies otherwise\n" . + WARN("TRAILING_SEMICOLON", + "trailing semicolon indicates no statements, indent implies otherwise\n" . "$here\n$ctx\n$rawlines[$ctx_ln - 1]\n"); } } @@ -1870,7 +1967,8 @@ sub process { if ($check && (($sindent % 8) != 0 || ($sindent <= $indent && $s ne ''))) { - WARN("suspect code indent for conditional statements ($indent, $sindent)\n" . $herecurr . "$stat_real\n"); + WARN("SUSPECT_CODE_INDENT", + "suspect code indent for conditional statements ($indent, $sindent)\n" . $herecurr . "$stat_real\n"); } } @@ -1893,18 +1991,22 @@ sub process { # TEST: allow direct testing of the type matcher. if ($dbg_type) { if ($line =~ /^.\s*$Declare\s*$/) { - ERROR("TEST: is type\n" . $herecurr); + ERROR("TEST_TYPE", + "TEST: is type\n" . $herecurr); } elsif ($dbg_type > 1 && $line =~ /^.+($Declare)/) { - ERROR("TEST: is not type ($1 is)\n". $herecurr); + ERROR("TEST_NOT_TYPE", + "TEST: is not type ($1 is)\n". $herecurr); } next; } # TEST: allow direct testing of the attribute matcher. if ($dbg_attr) { if ($line =~ /^.\s*$Modifier\s*$/) { - ERROR("TEST: is attr\n" . $herecurr); + ERROR("TEST_ATTR", + "TEST: is attr\n" . $herecurr); } elsif ($dbg_attr > 1 && $line =~ /^.+($Modifier)/) { - ERROR("TEST: is not attr ($1 is)\n". $herecurr); + ERROR("TEST_NOT_ATTR", + "TEST: is not attr ($1 is)\n". $herecurr); } next; } @@ -1912,7 +2014,8 @@ sub process { # check for initialisation to aggregates open brace on the next line if ($line =~ /^.\s*{/ && $prevline =~ /(?:^|[^=])=\s*$/) { - ERROR("that open brace { should be on the previous line\n" . $hereprev); + ERROR("OPEN_BRACE", + "that open brace { should be on the previous line\n" . $hereprev); } # @@ -1923,14 +2026,16 @@ sub process { if ($rawline =~ m{^.\s*\#\s*include\s+[<"](.*)[">]}) { my $path = $1; if ($path =~ m{//}) { - ERROR("malformed #include filename\n" . + ERROR("MALFORMED_INCLUDE", + "malformed #include filename\n" . $herecurr); } } # no C99 // comments if ($line =~ m{//}) { - ERROR("do not use C99 // comments\n" . $herecurr); + ERROR("C99_COMMENTS", + "do not use C99 // comments\n" . $herecurr); } # Remove C99 comments. $line =~ s@//.*@@; @@ -1977,35 +2082,41 @@ sub process { } if (defined $suppress_export{$linenr} && $suppress_export{$linenr} == 2) { - WARN("EXPORT_SYMBOL(foo); should immediately follow its function/variable\n" . $herecurr); + WARN("EXPORT_SYMBOL", + "EXPORT_SYMBOL(foo); should immediately follow its function/variable\n" . $herecurr); } # check for global initialisers. if ($line =~ /^.$Type\s*$Ident\s*(?:\s+$Modifier)*\s*=\s*(0|NULL|false)\s*;/) { - ERROR("do not initialise globals to 0 or NULL\n" . + ERROR("GLOBAL_INITIALISERS", + "do not initialise globals to 0 or NULL\n" . $herecurr); } # check for static initialisers. if ($line =~ /\bstatic\s.*=\s*(0|NULL|false)\s*;/) { - ERROR("do not initialise statics to 0 or NULL\n" . + ERROR("INITIALISED_STATIC", + "do not initialise statics to 0 or NULL\n" . $herecurr); } # check for static const char * arrays. if ($line =~ /\bstatic\s+const\s+char\s*\*\s*(\w+)\s*\[\s*\]\s*=\s*/) { - WARN("static const char * array should probably be static const char * const\n" . + WARN("STATIC_CONST_CHAR_ARRAY", + "static const char * array should probably be static const char * const\n" . $herecurr); } # check for static char foo[] = "bar" declarations. if ($line =~ /\bstatic\s+char\s+(\w+)\s*\[\s*\]\s*=\s*"/) { - WARN("static char array declaration should probably be static const char\n" . + WARN("STATIC_CONST_CHAR_ARRAY", + "static char array declaration should probably be static const char\n" . $herecurr); } # check for declarations of struct pci_device_id if ($line =~ /\bstruct\s+pci_device_id\s+\w+\s*\[\s*\]\s*\=\s*\{/) { - WARN("Use DEFINE_PCI_DEVICE_TABLE for struct pci_device_id\n" . $herecurr); + WARN("DEFINE_PCI_DEVICE_TABLE", + "Use DEFINE_PCI_DEVICE_TABLE for struct pci_device_id\n" . $herecurr); } # check for new typedefs, only function parameters and sparse annotations @@ -2015,7 +2126,8 @@ sub process { $line !~ /\btypedef\s+$Type\s+$Ident\s*\(/ && $line !~ /\b$typeTypedefs\b/ && $line !~ /\b__bitwise(?:__|)\b/) { - WARN("do not add new typedefs\n" . $herecurr); + WARN("NEW_TYPEDEFS", + "do not add new typedefs\n" . $herecurr); } # * goes on variable not on type @@ -2033,7 +2145,8 @@ sub process { #print "from<$from> to<$to>\n"; if ($from ne $to) { - ERROR("\"(foo$from)\" should be \"(foo$to)\"\n" . $herecurr); + ERROR("POINTER_LOCATION", + "\"(foo$from)\" should be \"(foo$to)\"\n" . $herecurr); } } elsif ($line =~ m{\b$NonptrType(\s*(?:$Modifier\b\s*|\*\s*)+)($Ident)}) { my ($from, $to, $ident) = ($1, $1, $2); @@ -2050,7 +2163,8 @@ sub process { #print "from<$from> to<$to> ident<$ident>\n"; if ($from ne $to && $ident !~ /^$Modifier$/) { - ERROR("\"foo${from}bar\" should be \"foo${to}bar\"\n" . $herecurr); + ERROR("POINTER_LOCATION", + "\"foo${from}bar\" should be \"foo${to}bar\"\n" . $herecurr); } } @@ -2062,12 +2176,14 @@ sub process { # } if ($line =~ /\bLINUX_VERSION_CODE\b/) { - WARN("LINUX_VERSION_CODE should be avoided, code should be for the version to which it is merged\n" . $herecurr); + WARN("LINUX_VERSION_CODE", + "LINUX_VERSION_CODE should be avoided, code should be for the version to which it is merged\n" . $herecurr); } # check for uses of printk_ratelimit if ($line =~ /\bprintk_ratelimit\s*\(/) { - WARN("Prefer printk_ratelimited or pr__ratelimited to printk_ratelimit\n" . $herecurr); + WARN("PRINTK_RATELIMITED", +"Prefer printk_ratelimited or pr__ratelimited to printk_ratelimit\n" . $herecurr); } # printk should use KERN_* levels. Note that follow on printk's on the @@ -2089,7 +2205,8 @@ sub process { } } if ($ok == 0) { - WARN("printk() should include KERN_ facility level\n" . $herecurr); + WARN("PRINTK_WITHOUT_KERN_LEVEL", + "printk() should include KERN_ facility level\n" . $herecurr); } } @@ -2097,18 +2214,21 @@ sub process { # or if closed on same line if (($line=~/$Type\s*$Ident\(.*\).*\s{/) and !($line=~/\#\s*define.*do\s{/) and !($line=~/}/)) { - ERROR("open brace '{' following function declarations go on the next line\n" . $herecurr); + ERROR("OPEN_BRACE", + "open brace '{' following function declarations go on the next line\n" . $herecurr); } # open braces for enum, union and struct go on the same line. if ($line =~ /^.\s*{/ && $prevline =~ /^.\s*(?:typedef\s+)?(enum|union|struct)(?:\s+$Ident)?\s*$/) { - ERROR("open brace '{' following $1 go on the same line\n" . $hereprev); + ERROR("OPEN_BRACE", + "open brace '{' following $1 go on the same line\n" . $hereprev); } # missing space after union, struct or enum definition if ($line =~ /^.\s*(?:typedef\s+)?(enum|union|struct)(?:\s+$Ident)?(?:\s+$Ident)?[=\{]/) { - WARN("missing space after $1 definition\n" . $herecurr); + WARN("SPACING", + "missing space after $1 definition\n" . $herecurr); } # check for spacing round square brackets; allowed: @@ -2120,7 +2240,8 @@ sub process { if ($prefix !~ /$Type\s+$/ && ($where != 0 || $prefix !~ /^.\s+$/) && $prefix !~ /{\s+$/) { - ERROR("space prohibited before open square bracket '['\n" . $herecurr); + ERROR("BRACKET_SPACE", + "space prohibited before open square bracket '['\n" . $herecurr); } } @@ -2151,7 +2272,8 @@ sub process { } elsif ($ctx =~ /$Type$/) { } else { - WARN("space prohibited between function name and open parenthesis '('\n" . $herecurr); + WARN("SPACING", + "space prohibited between function name and open parenthesis '('\n" . $herecurr); } } # Check operator spacing. @@ -2225,7 +2347,8 @@ sub process { } elsif ($op eq ';') { if ($ctx !~ /.x[WEBC]/ && $cc !~ /^\\/ && $cc !~ /^;/) { - ERROR("space required after that '$op' $at\n" . $hereptr); + ERROR("SPACING", + "space required after that '$op' $at\n" . $hereptr); } # // is a comment @@ -2236,13 +2359,15 @@ sub process { # : when part of a bitfield } elsif ($op eq '->' || $opv eq ':B') { if ($ctx =~ /Wx.|.xW/) { - ERROR("spaces prohibited around that '$op' $at\n" . $hereptr); + ERROR("SPACING", + "spaces prohibited around that '$op' $at\n" . $hereptr); } # , must have a space on the right. } elsif ($op eq ',') { if ($ctx !~ /.x[WEC]/ && $cc !~ /^}/) { - ERROR("space required after that '$op' $at\n" . $hereptr); + ERROR("SPACING", + "space required after that '$op' $at\n" . $hereptr); } # '*' as part of a type definition -- reported already. @@ -2256,26 +2381,31 @@ sub process { $opv eq '*U' || $opv eq '-U' || $opv eq '&U' || $opv eq '&&U') { if ($ctx !~ /[WEBC]x./ && $ca !~ /(?:\)|!|~|\*|-|\&|\||\+\+|\-\-|\{)$/) { - ERROR("space required before that '$op' $at\n" . $hereptr); + ERROR("SPACING", + "space required before that '$op' $at\n" . $hereptr); } if ($op eq '*' && $cc =~/\s*$Modifier\b/) { # A unary '*' may be const } elsif ($ctx =~ /.xW/) { - ERROR("space prohibited after that '$op' $at\n" . $hereptr); + ERROR("SPACING", + "space prohibited after that '$op' $at\n" . $hereptr); } # unary ++ and unary -- are allowed no space on one side. } elsif ($op eq '++' or $op eq '--') { if ($ctx !~ /[WEOBC]x[^W]/ && $ctx !~ /[^W]x[WOBEC]/) { - ERROR("space required one side of that '$op' $at\n" . $hereptr); + ERROR("SPACING", + "space required one side of that '$op' $at\n" . $hereptr); } if ($ctx =~ /Wx[BE]/ || ($ctx =~ /Wx./ && $cc =~ /^;/)) { - ERROR("space prohibited before that '$op' $at\n" . $hereptr); + ERROR("SPACING", + "space prohibited before that '$op' $at\n" . $hereptr); } if ($ctx =~ /ExW/) { - ERROR("space prohibited after that '$op' $at\n" . $hereptr); + ERROR("SPACING", + "space prohibited after that '$op' $at\n" . $hereptr); } @@ -2287,7 +2417,8 @@ sub process { $op eq '%') { if ($ctx =~ /Wx[^WCE]|[^WCE]xW/) { - ERROR("need consistent spacing around '$op' $at\n" . + ERROR("SPACING", + "need consistent spacing around '$op' $at\n" . $hereptr); } @@ -2295,7 +2426,8 @@ sub process { # terminating a case value or a label. } elsif ($opv eq ':C' || $opv eq ':L') { if ($ctx =~ /Wx./) { - ERROR("space prohibited before that '$op' $at\n" . $hereptr); + ERROR("SPACING", + "space prohibited before that '$op' $at\n" . $hereptr); } # All the others need spaces both sides. @@ -2318,7 +2450,8 @@ sub process { } if ($ok == 0) { - ERROR("spaces required around that '$op' $at\n" . $hereptr); + ERROR("SPACING", + "spaces required around that '$op' $at\n" . $hereptr); } } $off += length($elements[$n + 1]); @@ -2327,7 +2460,8 @@ sub process { # check for multiple assignments if ($line =~ /^.\s*$Lval\s*=\s*$Lval\s*=(?!=)/) { - CHK("multiple assignments should be avoided\n" . $herecurr); + CHK("MULTIPLE_ASSIGNMENTS", + "multiple assignments should be avoided\n" . $herecurr); } ## # check for multiple declarations, allowing for a function declaration @@ -2341,45 +2475,53 @@ sub process { ## while ($ln =~ s/\([^\(\)]*\)//g) { ## } ## if ($ln =~ /,/) { -## WARN("declaring multiple variables together should be avoided\n" . $herecurr); +## WARN("MULTIPLE_DECLARATION", +## "declaring multiple variables together should be avoided\n" . $herecurr); ## } ## } #need space before brace following if, while, etc if (($line =~ /\(.*\){/ && $line !~ /\($Type\){/) || $line =~ /do{/) { - ERROR("space required before the open brace '{'\n" . $herecurr); + ERROR("SPACING", + "space required before the open brace '{'\n" . $herecurr); } # closing brace should have a space following it when it has anything # on the line if ($line =~ /}(?!(?:,|;|\)))\S/) { - ERROR("space required after that close brace '}'\n" . $herecurr); + ERROR("SPACING", + "space required after that close brace '}'\n" . $herecurr); } # check spacing on square brackets if ($line =~ /\[\s/ && $line !~ /\[\s*$/) { - ERROR("space prohibited after that open square bracket '['\n" . $herecurr); + ERROR("SPACING", + "space prohibited after that open square bracket '['\n" . $herecurr); } if ($line =~ /\s\]/) { - ERROR("space prohibited before that close square bracket ']'\n" . $herecurr); + ERROR("SPACING", + "space prohibited before that close square bracket ']'\n" . $herecurr); } # check spacing on parentheses if ($line =~ /\(\s/ && $line !~ /\(\s*(?:\\)?$/ && $line !~ /for\s*\(\s+;/) { - ERROR("space prohibited after that open parenthesis '('\n" . $herecurr); + ERROR("SPACING", + "space prohibited after that open parenthesis '('\n" . $herecurr); } if ($line =~ /(\s+)\)/ && $line !~ /^.\s*\)/ && $line !~ /for\s*\(.*;\s+\)/ && $line !~ /:\s+\)/) { - ERROR("space prohibited before that close parenthesis ')'\n" . $herecurr); + ERROR("SPACING", + "space prohibited before that close parenthesis ')'\n" . $herecurr); } #goto labels aren't indented, allow a single space however if ($line=~/^.\s+[A-Za-z\d_]+:(?![0-9]+)/ and !($line=~/^. [A-Za-z\d_]+:/) and !($line=~/^.\s+default:/)) { - WARN("labels should not be indented\n" . $herecurr); + WARN("INDENTED_LABEL", + "labels should not be indented\n" . $herecurr); } # Return is not a function. @@ -2398,17 +2540,20 @@ sub process { } #print "value<$value>\n"; if ($value =~ /^\s*(?:$Ident|-?$Constant)\s*$/) { - ERROR("return is not a function, parentheses are not required\n" . $herecurr); + ERROR("RETURN_PARENTHESES", + "return is not a function, parentheses are not required\n" . $herecurr); } elsif ($spacing !~ /\s+/) { - ERROR("space required before the open parenthesis '('\n" . $herecurr); + ERROR("SPACING", + "space required before the open parenthesis '('\n" . $herecurr); } } # Return of what appears to be an errno should normally be -'ve if ($line =~ /^.\s*return\s*(E[A-Z]*)\s*;/) { my $name = $1; if ($name ne 'EOF' && $name ne 'ERROR') { - WARN("return of an errno should typically be -ve (return -$1)\n" . $herecurr); + WARN("USE_NEGATIVE_ERRNO", + "return of an errno should typically be -ve (return -$1)\n" . $herecurr); } } @@ -2435,7 +2580,7 @@ sub process { # Need a space before open parenthesis after if, while etc if ($line=~/\b(if|while|for|switch)\(/) { - ERROR("space required before the open parenthesis '('\n" . $herecurr); + ERROR("SPACING", "space required before the open parenthesis '('\n" . $herecurr); } # Check for illegal assignment in if conditional -- and check for trailing @@ -2463,7 +2608,8 @@ sub process { my ($s, $c) = ($stat, $cond); if ($c =~ /\bif\s*\(.*[^<>!=]=[^=].*/s) { - ERROR("do not use assignment in if condition\n" . $herecurr); + ERROR("ASSIGN_IN_IF", + "do not use assignment in if condition\n" . $herecurr); } # Find out what is on the end of the line after the @@ -2485,7 +2631,8 @@ sub process { $stat_real = "[...]\n$stat_real"; } - ERROR("trailing statements should be on next line\n" . $herecurr . $stat_real); + ERROR("TRAILING_STATEMENTS", + "trailing statements should be on next line\n" . $herecurr . $stat_real); } } @@ -2501,7 +2648,8 @@ sub process { (?:\&\&|\|\||\)|\]) )/x) { - WARN("boolean test with hexadecimal, perhaps just 1 \& or \|?\n" . $herecurr); + WARN("HEXADECIMAL_BOOLEAN_TEST", + "boolean test with hexadecimal, perhaps just 1 \& or \|?\n" . $herecurr); } # if and else should not have general statements after it @@ -2509,12 +2657,14 @@ sub process { my $s = $1; $s =~ s/$;//g; # Remove any comments if ($s !~ /^\s*(?:\sif|(?:{|)\s*\\?\s*$)/) { - ERROR("trailing statements should be on next line\n" . $herecurr); + ERROR("TRAILING_STATEMENTS", + "trailing statements should be on next line\n" . $herecurr); } } # if should not continue a brace if ($line =~ /}\s*if\b/) { - ERROR("trailing statements should be on next line\n" . + ERROR("TRAILING_STATEMENTS", + "trailing statements should be on next line\n" . $herecurr); } # case and default should not have general statements after them @@ -2524,14 +2674,16 @@ sub process { \s*return\s+ )/xg) { - ERROR("trailing statements should be on next line\n" . $herecurr); + ERROR("TRAILING_STATEMENTS", + "trailing statements should be on next line\n" . $herecurr); } # Check for }else {, these must be at the same # indent level to be relevant to each other. if ($prevline=~/}\s*$/ and $line=~/^.\s*else\s*/ and $previndent == $indent) { - ERROR("else should follow close brace '}'\n" . $hereprev); + ERROR("ELSE_AFTER_BRACE", + "else should follow close brace '}'\n" . $hereprev); } if ($prevline=~/}\s*$/ and $line=~/^.\s*while\s*/ and @@ -2544,7 +2696,8 @@ sub process { $s =~ s/\n.*//g; if ($s =~ /^\s*;/) { - ERROR("while should follow close brace '}'\n" . $hereprev); + ERROR("WHILE_AFTER_BRACE", + "while should follow close brace '}'\n" . $hereprev); } } @@ -2557,7 +2710,8 @@ sub process { #no spaces allowed after \ in define if ($line=~/\#\s*define.*\\\s$/) { - WARN("Whitepspace after \\ makes next lines useless\n" . $herecurr); + WARN("WHITESPACE_AFTER_LINE_CONTINUATION", + "Whitepspace after \\ makes next lines useless\n" . $herecurr); } #warn if is #included and is available (uses RAW line) @@ -2569,9 +2723,11 @@ sub process { $1 !~ /$allowed_asm_includes/) { if ($realfile =~ m{^arch/}) { - CHK("Consider using #include instead of \n" . $herecurr); + CHK("ARCH_INCLUDE_LINUX", + "Consider using #include instead of \n" . $herecurr); } else { - WARN("Use #include instead of \n" . $herecurr); + WARN("INCLUDE_LINUX", + "Use #include instead of \n" . $herecurr); } } } @@ -2655,7 +2811,8 @@ sub process { if ($rest !~ /while\s*\(/ && $dstat !~ /$exceptions/) { - ERROR("Macros with multiple statements should be enclosed in a do - while loop\n" . "$here\n$ctx\n"); + ERROR("MULTISTATEMENT_MACRO_USE_DO_WHILE", + "Macros with multiple statements should be enclosed in a do - while loop\n" . "$here\n$ctx\n"); } } elsif ($ctx !~ /;/) { @@ -2665,7 +2822,8 @@ sub process { $dstat !~ /^\.$Ident\s*=/ && $dstat =~ /$Operators/) { - ERROR("Macros with complex values should be enclosed in parenthesis\n" . "$here\n$ctx\n"); + ERROR("COMPLEX_MACRO", + "Macros with complex values should be enclosed in parenthesis\n" . "$here\n$ctx\n"); } } } @@ -2676,7 +2834,8 @@ sub process { # ALIGN(...) # VMLINUX_SYMBOL(...) if ($realfile eq 'vmlinux.lds.h' && $line =~ /(?:(?:^|\s)$Ident\s*=|=\s*$Ident(?:\s|$))/) { - WARN("vmlinux.lds.h needs VMLINUX_SYMBOL() around C-visible symbols\n" . $herecurr); + WARN("MISSING_VMLINUX_SYMBOL", + "vmlinux.lds.h needs VMLINUX_SYMBOL() around C-visible symbols\n" . $herecurr); } # check for redundant bracing round if etc @@ -2724,7 +2883,8 @@ sub process { } } if ($seen && !$allowed) { - WARN("braces {} are not necessary for any arm of this statement\n" . $herectx); + WARN("BRACES", + "braces {} are not necessary for any arm of this statement\n" . $herectx); } } } @@ -2778,33 +2938,38 @@ sub process { $herectx .= raw_line($linenr, $n) . "\n";; } - WARN("braces {} are not necessary for single statement blocks\n" . $herectx); + WARN("BRACES", + "braces {} are not necessary for single statement blocks\n" . $herectx); } } # don't include deprecated include files (uses RAW line) for my $inc (@dep_includes) { if ($rawline =~ m@^.\s*\#\s*include\s*\<$inc>@) { - ERROR("Don't use <$inc>: see Documentation/feature-removal-schedule.txt\n" . $herecurr); + ERROR("DEPRECATED_INCLUDE", + "Don't use <$inc>: see Documentation/feature-removal-schedule.txt\n" . $herecurr); } } # don't use deprecated functions for my $func (@dep_functions) { if ($line =~ /\b$func\b/) { - ERROR("Don't use $func(): see Documentation/feature-removal-schedule.txt\n" . $herecurr); + ERROR("DEPRECATED_FUNCTION", + "Don't use $func(): see Documentation/feature-removal-schedule.txt\n" . $herecurr); } } # no volatiles please my $asm_volatile = qr{\b(__asm__|asm)\s+(__volatile__|volatile)\b}; if ($line =~ /\bvolatile\b/ && $line !~ /$asm_volatile/) { - WARN("Use of volatile is usually wrong: see Documentation/volatile-considered-harmful.txt\n" . $herecurr); + WARN("VOLATILE", + "Use of volatile is usually wrong: see Documentation/volatile-considered-harmful.txt\n" . $herecurr); } # warn about #if 0 if ($line =~ /^.\s*\#\s*if\s+0\b/) { - CHK("if this code is redundant consider removing it\n" . + CHK("REDUNDANT_CODE", + "if this code is redundant consider removing it\n" . $herecurr); } @@ -2812,14 +2977,16 @@ sub process { if ($prevline =~ /\bif\s*\(([^\)]*)\)/) { my $expr = $1; if ($line =~ /\bkfree\(\Q$expr\E\);/) { - WARN("kfree(NULL) is safe this check is probably not required\n" . $hereprev); + WARN("NEEDLESS_KFREE", + "kfree(NULL) is safe this check is probably not required\n" . $hereprev); } } # check for needless usb_free_urb() checks if ($prevline =~ /\bif\s*\(([^\)]*)\)/) { my $expr = $1; if ($line =~ /\busb_free_urb\(\Q$expr\E\);/) { - WARN("usb_free_urb(NULL) is safe this check is probably not required\n" . $hereprev); + WARN("NEEDLESS_USB_FREE_URB", + "usb_free_urb(NULL) is safe this check is probably not required\n" . $hereprev); } } @@ -2827,14 +2994,16 @@ sub process { if ($line =~ /\budelay\s*\(\s*(\w+)\s*\)/) { # ignore udelay's < 10, however if (! (($1 =~ /(\d+)/) && ($1 < 10)) ) { - CHK("usleep_range is preferred over udelay; see Documentation/timers/timers-howto.txt\n" . $line); + CHK("USLEEP_RANGE", + "usleep_range is preferred over udelay; see Documentation/timers/timers-howto.txt\n" . $line); } } # warn about unexpectedly long msleep's if ($line =~ /\bmsleep\s*\((\d+)\);/) { if ($1 < 20) { - WARN("msleep < 20ms can sleep for up to 20ms; see Documentation/timers/timers-howto.txt\n" . $line); + WARN("MSLEEP", + "msleep < 20ms can sleep for up to 20ms; see Documentation/timers/timers-howto.txt\n" . $line); } } @@ -2847,7 +3016,8 @@ sub process { # warn about spacing in #ifdefs if ($line =~ /^.\s*\#\s*(ifdef|ifndef|elif)\s\s+/) { - ERROR("exactly one space required after that #$1\n" . $herecurr); + ERROR("SPACING", + "exactly one space required after that #$1\n" . $herecurr); } # check for spinlock_t definitions without a comment. @@ -2855,55 +3025,65 @@ sub process { $line =~ /^.\s*(DEFINE_MUTEX)\s*\(/) { my $which = $1; if (!ctx_has_comment($first_line, $linenr)) { - CHK("$1 definition without comment\n" . $herecurr); + CHK("UNCOMMENTED_DEFINITION", + "$1 definition without comment\n" . $herecurr); } } # check for memory barriers without a comment. if ($line =~ /\b(mb|rmb|wmb|read_barrier_depends|smp_mb|smp_rmb|smp_wmb|smp_read_barrier_depends)\(/) { if (!ctx_has_comment($first_line, $linenr)) { - CHK("memory barrier without comment\n" . $herecurr); + CHK("MEMORY_BARRIER", + "memory barrier without comment\n" . $herecurr); } } # check of hardware specific defines if ($line =~ m@^.\s*\#\s*if.*\b(__i386__|__powerpc64__|__sun__|__s390x__)\b@ && $realfile !~ m@include/asm-@) { - CHK("architecture specific defines should be avoided\n" . $herecurr); + CHK("ARCH_DEFINES", + "architecture specific defines should be avoided\n" . $herecurr); } # Check that the storage class is at the beginning of a declaration if ($line =~ /\b$Storage\b/ && $line !~ /^.\s*$Storage\b/) { - WARN("storage class should be at the beginning of the declaration\n" . $herecurr) + WARN("STORAGE_CLASS", + "storage class should be at the beginning of the declaration\n" . $herecurr) } # check the location of the inline attribute, that it is between # storage class and type. if ($line =~ /\b$Type\s+$Inline\b/ || $line =~ /\b$Inline\s+$Storage\b/) { - ERROR("inline keyword should sit between storage class and type\n" . $herecurr); + ERROR("INLINE_LOCATION", + "inline keyword should sit between storage class and type\n" . $herecurr); } # Check for __inline__ and __inline, prefer inline if ($line =~ /\b(__inline__|__inline)\b/) { - WARN("plain inline is preferred over $1\n" . $herecurr); + WARN("INLINE", + "plain inline is preferred over $1\n" . $herecurr); } # Check for __attribute__ packed, prefer __packed if ($line =~ /\b__attribute__\s*\(\s*\(.*\bpacked\b/) { - WARN("__packed is preferred over __attribute__((packed))\n" . $herecurr); + WARN("PREFER_PACKED", + "__packed is preferred over __attribute__((packed))\n" . $herecurr); } # Check for __attribute__ aligned, prefer __aligned if ($line =~ /\b__attribute__\s*\(\s*\(.*aligned/) { - WARN("__aligned(size) is preferred over __attribute__((aligned(size)))\n" . $herecurr); + WARN("PREFER_ALIGNED", + "__aligned(size) is preferred over __attribute__((aligned(size)))\n" . $herecurr); } # check for sizeof(&) if ($line =~ /\bsizeof\s*\(\s*\&/) { - WARN("sizeof(& should be avoided\n" . $herecurr); + WARN("SIZEOF_ADDRESS", + "sizeof(& should be avoided\n" . $herecurr); } # check for line continuations in quoted strings with odd counts of " if ($rawline =~ /\\$/ && $rawline =~ tr/"/"/ % 2) { - WARN("Avoid line continuations in quoted strings\n" . $herecurr); + WARN("LINE_CONTINUATIONS", + "Avoid line continuations in quoted strings\n" . $herecurr); } # check for new externs in .c files. @@ -2920,17 +3100,20 @@ sub process { if ($s =~ /^\s*;/ && $function_name ne 'uninitialized_var') { - WARN("externs should be avoided in .c files\n" . $herecurr); + WARN("AVOID_EXTERNS", + "externs should be avoided in .c files\n" . $herecurr); } if ($paren_space =~ /\n/) { - WARN("arguments for function declarations should follow identifier\n" . $herecurr); + WARN("FUNCTION_ARGUMENTS", + "arguments for function declarations should follow identifier\n" . $herecurr); } } elsif ($realfile =~ /\.c$/ && defined $stat && $stat =~ /^.\s*extern\s+/) { - WARN("externs should be avoided in .c files\n" . $herecurr); + WARN("AVOID_EXTERNS", + "externs should be avoided in .c files\n" . $herecurr); } # checks for new __setup's @@ -2938,37 +3121,44 @@ sub process { my $name = $1; if (!grep(/$name/, @setup_docs)) { - CHK("__setup appears un-documented -- check Documentation/kernel-parameters.txt\n" . $herecurr); + CHK("UNDOCUMENTED_SETUP", + "__setup appears un-documented -- check Documentation/kernel-parameters.txt\n" . $herecurr); } } # check for pointless casting of kmalloc return if ($line =~ /\*\s*\)\s*[kv][czm]alloc(_node){0,1}\b/) { - WARN("unnecessary cast may hide bugs, see http://c-faq.com/malloc/mallocnocast.html\n" . $herecurr); + WARN("UNNECESSARY_CASTS", + "unnecessary cast may hide bugs, see http://c-faq.com/malloc/mallocnocast.html\n" . $herecurr); } # check for multiple semicolons if ($line =~ /;\s*;\s*$/) { - WARN("Statements terminations use 1 semicolon\n" . $herecurr); + WARN("ONE_SEMICOLON", + "Statements terminations use 1 semicolon\n" . $herecurr); } # check for gcc specific __FUNCTION__ if ($line =~ /__FUNCTION__/) { - WARN("__func__ should be used instead of gcc specific __FUNCTION__\n" . $herecurr); + WARN("USE_FUNC", + "__func__ should be used instead of gcc specific __FUNCTION__\n" . $herecurr); } # check for semaphores initialized locked if ($line =~ /^.\s*sema_init.+,\W?0\W?\)/) { - WARN("consider using a completion\n" . $herecurr); + WARN("CONSIDER_COMPLETION", + "consider using a completion\n" . $herecurr); } # recommend kstrto* over simple_strto* if ($line =~ /\bsimple_(strto.*?)\s*\(/) { - WARN("consider using kstrto* in preference to simple_$1\n" . $herecurr); + WARN("CONSIDER_KSTRTO", + "consider using kstrto* in preference to simple_$1\n" . $herecurr); } # check for __initcall(), use device_initcall() explicitly please if ($line =~ /^.\s*__initcall\s*\(/) { - WARN("please use device_initcall() instead of __initcall()\n" . $herecurr); + WARN("USE_DEVICE_INITCALL", + "please use device_initcall() instead of __initcall()\n" . $herecurr); } # check for various ops structs, ensure they are const. my $struct_ops = qr{acpi_dock_ops| @@ -3010,7 +3200,8 @@ sub process { wd_ops}x; if ($line !~ /\bconst\b/ && $line =~ /\bstruct\s+($struct_ops)\b/) { - WARN("struct $1 should normally be const\n" . + WARN("CONST_STRUCT", + "struct $1 should normally be const\n" . $herecurr); } @@ -3023,7 +3214,8 @@ sub process { $line !~ /\[[^\]]*\.\.\.[^\]]*NR_CPUS[^\]]*\]/ && $line !~ /\[[^\]]*NR_CPUS[^\]]*\.\.\.[^\]]*\]/) { - WARN("usage of NR_CPUS is often wrong - consider using cpu_possible(), num_possible_cpus(), for_each_possible_cpu(), etc\n" . $herecurr); + WARN("NR_CPUS", + "usage of NR_CPUS is often wrong - consider using cpu_possible(), num_possible_cpus(), for_each_possible_cpu(), etc\n" . $herecurr); } # check for %L{u,d,i} in strings @@ -3032,7 +3224,8 @@ sub process { $string = substr($rawline, $-[1], $+[1] - $-[1]); $string =~ s/%%/__/g; if ($string =~ /(?mutex.\n" . $herecurr); + ERROR("LOCKDEP", + "lockdep_no_validate class is reserved for device->mutex.\n" . $herecurr); } } if ($line =~ /debugfs_create_file.*S_IWUGO/ || $line =~ /DEVICE_ATTR.*S_IWUGO/ ) { - WARN("Exporting world writable files is usually an error. Consider more restrictive permissions.\n" . $herecurr); + WARN("EXPORTED_WORLD_WRITABLE", + "Exporting world writable files is usually an error. Consider more restrictive permissions.\n" . $herecurr); } # Check for memset with swapped arguments if ($line =~ /memset.*\,(\ |)(0x|)0(\ |0|)\);/) { - ERROR("memset size is 3rd argument, not the second.\n" . $herecurr); + ERROR("MEMSET", + "memset size is 3rd argument, not the second.\n" . $herecurr); } } @@ -3086,10 +3284,12 @@ sub process { } if (!$is_patch) { - ERROR("Does not appear to be a unified-diff format patch\n"); + ERROR("NOT_UNIFIED_DIFF", + "Does not appear to be a unified-diff format patch\n"); } if ($is_patch && $chk_signoff && $signoff == 0) { - ERROR("Missing Signed-off-by: line(s)\n"); + ERROR("MISSING_SIGN_OFF", + "Missing Signed-off-by: line(s)\n"); } print report_dump(); @@ -3111,13 +3311,25 @@ sub process { } } + if (keys %ignore_type) { + print "NOTE: Ignored message types:"; + foreach my $ignore (sort keys %ignore_type) { + print " $ignore"; + } + print "\n"; + print "\n" if ($quiet == 0); + } + if ($clean == 1 && $quiet == 0) { print "$vname has no obvious style problems and is ready for submission.\n" } if ($clean == 0 && $quiet == 0) { - print "$vname has style problems, please review. If any of these errors\n"; - print "are false positives report them to the maintainer, see\n"; - print "CHECKPATCH in MAINTAINERS.\n"; + print << "EOM"; +$vname has style problems, please review. + +If any of these errors are false positives, please report +them to the maintainer, see CHECKPATCH in MAINTAINERS. +EOM } return $clean; -- cgit v0.10.2 From 34d99219726ff4d3425b360d1e8d081627a73a00 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 25 Jul 2011 17:13:26 -0700 Subject: checkpatch: make utf-8 test --strict Some patches are sent in using ISO-8859 or even Windows codepage 1252. Make checkpatch accept these by default and only emit the "Invalid UTF-8" message when using --strict. Signed-off-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 05777ab..8dd720a 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -1609,8 +1609,8 @@ sub process { my $ptr = substr($blank, 0, length($utf8_prefix)) . "^"; my $hereptr = "$hereline$ptr\n"; - ERROR("INVALID_UTF8", - "Invalid UTF-8, patch and commit message should be encoded in UTF-8\n" . $hereptr); + CHK("INVALID_UTF8", + "Invalid UTF-8, patch and commit message should be encoded in UTF-8\n" . $hereptr); } # ignore non-hunk lines and lines being removed -- cgit v0.10.2 From 6e60c02e9d9427f59842192bdb123cbeaf8bc9a0 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 25 Jul 2011 17:13:27 -0700 Subject: checkpatch.pl: update $logFunctions Previous behavior allowed only alphabetic prefixes like pr_info to exceed the 80 column line length limit. ath6kl wants to add a digit into the prefix, so allow numbers as well as digits in the _ printks. __ratelimited and __once and WARN_RATELIMIT and WARN_ONCE may now exceed 80 cols. Add missing _printk type for completeness. Signed-off-by: Joe Perches Cc: Andy Whitcroft Cc: Kalle Valo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 8dd720a..9d761c9 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -257,9 +257,9 @@ our $typeTypedefs = qr{(?x: )}; our $logFunctions = qr{(?x: - printk| - [a-z]+_(emerg|alert|crit|err|warning|warn|notice|info|debug|dbg|vdbg|devel|cont|WARN)| - WARN| + printk(?:_ratelimited|_once|)| + [a-z0-9]+_(?:printk|emerg|alert|crit|err|warning|warn|notice|info|debug|dbg|vdbg|devel|cont|WARN)(?:_ratelimited|_once|)| + WARN(?:_RATELIMIT|_ONCE|)| panic| MODULE_[A-Z_]+ )}; -- cgit v0.10.2 From 06b4501e88ad10f02849a3f9d7408ed6ae15a53f Mon Sep 17 00:00:00 2001 From: Anatolij Gustschin Date: Mon, 25 Jul 2011 17:13:27 -0700 Subject: misc/eeprom: add driver for microwire 93xx46 EEPROMs Add EEPROM driver for 93xx46 chips. It can also be used with spi_gpio driver to access 93xx46 EEPROMs connected over GPIO lines. This driver supports read/write/erase access to the EEPROM chips over sysfs files. [rdunlap@xenotime.net: fix printk format] Signed-off-by: Anatolij Gustschin Cc: Jonathan Cameron Cc: Grant Likely Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/misc/eeprom/Kconfig b/drivers/misc/eeprom/Kconfig index 9118613..620de28 100644 --- a/drivers/misc/eeprom/Kconfig +++ b/drivers/misc/eeprom/Kconfig @@ -70,4 +70,17 @@ config EEPROM_93CX6 If unsure, say N. +config EEPROM_93XX46 + tristate "Microwire EEPROM 93XX46 support" + depends on SPI && SYSFS + help + Driver for the microwire EEPROM chipsets 93xx46x. The driver + supports both read and write commands and also the command to + erase the whole EEPROM. + + This driver can also be built as a module. If so, the module + will be called eeprom_93xx46. + + If unsure, say N. + endmenu diff --git a/drivers/misc/eeprom/Makefile b/drivers/misc/eeprom/Makefile index df3d68f..ec55aad 100644 --- a/drivers/misc/eeprom/Makefile +++ b/drivers/misc/eeprom/Makefile @@ -3,3 +3,4 @@ obj-$(CONFIG_EEPROM_AT25) += at25.o obj-$(CONFIG_EEPROM_LEGACY) += eeprom.o obj-$(CONFIG_EEPROM_MAX6875) += max6875.o obj-$(CONFIG_EEPROM_93CX6) += eeprom_93cx6.o +obj-$(CONFIG_EEPROM_93XX46) += eeprom_93xx46.o diff --git a/drivers/misc/eeprom/eeprom_93xx46.c b/drivers/misc/eeprom/eeprom_93xx46.c new file mode 100644 index 0000000..0c7ebb1 --- /dev/null +++ b/drivers/misc/eeprom/eeprom_93xx46.c @@ -0,0 +1,410 @@ +/* + * Driver for 93xx46 EEPROMs + * + * (C) 2011 DENX Software Engineering, Anatolij Gustschin + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define OP_START 0x4 +#define OP_WRITE (OP_START | 0x1) +#define OP_READ (OP_START | 0x2) +#define ADDR_EWDS 0x00 +#define ADDR_ERAL 0x20 +#define ADDR_EWEN 0x30 + +struct eeprom_93xx46_dev { + struct spi_device *spi; + struct eeprom_93xx46_platform_data *pdata; + struct bin_attribute bin; + struct mutex lock; + int addrlen; +}; + +static ssize_t +eeprom_93xx46_bin_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) +{ + struct eeprom_93xx46_dev *edev; + struct device *dev; + struct spi_message m; + struct spi_transfer t[2]; + int bits, ret; + u16 cmd_addr; + + dev = container_of(kobj, struct device, kobj); + edev = dev_get_drvdata(dev); + + if (unlikely(off >= edev->bin.size)) + return 0; + if ((off + count) > edev->bin.size) + count = edev->bin.size - off; + if (unlikely(!count)) + return count; + + cmd_addr = OP_READ << edev->addrlen; + + if (edev->addrlen == 7) { + cmd_addr |= off & 0x7f; + bits = 10; + } else { + cmd_addr |= off & 0x3f; + bits = 9; + } + + dev_dbg(&edev->spi->dev, "read cmd 0x%x, %d Hz\n", + cmd_addr, edev->spi->max_speed_hz); + + spi_message_init(&m); + memset(t, 0, sizeof(t)); + + t[0].tx_buf = (char *)&cmd_addr; + t[0].len = 2; + t[0].bits_per_word = bits; + spi_message_add_tail(&t[0], &m); + + t[1].rx_buf = buf; + t[1].len = count; + t[1].bits_per_word = 8; + spi_message_add_tail(&t[1], &m); + + mutex_lock(&edev->lock); + + if (edev->pdata->prepare) + edev->pdata->prepare(edev); + + ret = spi_sync(edev->spi, &m); + /* have to wait at least Tcsl ns */ + ndelay(250); + if (ret) { + dev_err(&edev->spi->dev, "read %zu bytes at %d: err. %d\n", + count, (int)off, ret); + } + + if (edev->pdata->finish) + edev->pdata->finish(edev); + + mutex_unlock(&edev->lock); + return ret ? : count; +} + +static int eeprom_93xx46_ew(struct eeprom_93xx46_dev *edev, int is_on) +{ + struct spi_message m; + struct spi_transfer t; + int bits, ret; + u16 cmd_addr; + + cmd_addr = OP_START << edev->addrlen; + if (edev->addrlen == 7) { + cmd_addr |= (is_on ? ADDR_EWEN : ADDR_EWDS) << 1; + bits = 10; + } else { + cmd_addr |= (is_on ? ADDR_EWEN : ADDR_EWDS); + bits = 9; + } + + dev_dbg(&edev->spi->dev, "ew cmd 0x%04x\n", cmd_addr); + + spi_message_init(&m); + memset(&t, 0, sizeof(t)); + + t.tx_buf = &cmd_addr; + t.len = 2; + t.bits_per_word = bits; + spi_message_add_tail(&t, &m); + + mutex_lock(&edev->lock); + + if (edev->pdata->prepare) + edev->pdata->prepare(edev); + + ret = spi_sync(edev->spi, &m); + /* have to wait at least Tcsl ns */ + ndelay(250); + if (ret) + dev_err(&edev->spi->dev, "erase/write %sable error %d\n", + is_on ? "en" : "dis", ret); + + if (edev->pdata->finish) + edev->pdata->finish(edev); + + mutex_unlock(&edev->lock); + return ret; +} + +static ssize_t +eeprom_93xx46_write_word(struct eeprom_93xx46_dev *edev, + const char *buf, unsigned off) +{ + struct spi_message m; + struct spi_transfer t[2]; + int bits, data_len, ret; + u16 cmd_addr; + + cmd_addr = OP_WRITE << edev->addrlen; + + if (edev->addrlen == 7) { + cmd_addr |= off & 0x7f; + bits = 10; + data_len = 1; + } else { + cmd_addr |= off & 0x3f; + bits = 9; + data_len = 2; + } + + dev_dbg(&edev->spi->dev, "write cmd 0x%x\n", cmd_addr); + + spi_message_init(&m); + memset(t, 0, sizeof(t)); + + t[0].tx_buf = (char *)&cmd_addr; + t[0].len = 2; + t[0].bits_per_word = bits; + spi_message_add_tail(&t[0], &m); + + t[1].tx_buf = buf; + t[1].len = data_len; + t[1].bits_per_word = 8; + spi_message_add_tail(&t[1], &m); + + ret = spi_sync(edev->spi, &m); + /* have to wait program cycle time Twc ms */ + mdelay(6); + return ret; +} + +static ssize_t +eeprom_93xx46_bin_write(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) +{ + struct eeprom_93xx46_dev *edev; + struct device *dev; + int i, ret, step = 1; + + dev = container_of(kobj, struct device, kobj); + edev = dev_get_drvdata(dev); + + if (unlikely(off >= edev->bin.size)) + return 0; + if ((off + count) > edev->bin.size) + count = edev->bin.size - off; + if (unlikely(!count)) + return count; + + /* only write even number of bytes on 16-bit devices */ + if (edev->addrlen == 6) { + step = 2; + count &= ~1; + } + + /* erase/write enable */ + ret = eeprom_93xx46_ew(edev, 1); + if (ret) + return ret; + + mutex_lock(&edev->lock); + + if (edev->pdata->prepare) + edev->pdata->prepare(edev); + + for (i = 0; i < count; i += step) { + ret = eeprom_93xx46_write_word(edev, &buf[i], off + i); + if (ret) { + dev_err(&edev->spi->dev, "write failed at %d: %d\n", + (int)off + i, ret); + break; + } + } + + if (edev->pdata->finish) + edev->pdata->finish(edev); + + mutex_unlock(&edev->lock); + + /* erase/write disable */ + eeprom_93xx46_ew(edev, 0); + return ret ? : count; +} + +static int eeprom_93xx46_eral(struct eeprom_93xx46_dev *edev) +{ + struct eeprom_93xx46_platform_data *pd = edev->pdata; + struct spi_message m; + struct spi_transfer t; + int bits, ret; + u16 cmd_addr; + + cmd_addr = OP_START << edev->addrlen; + if (edev->addrlen == 7) { + cmd_addr |= ADDR_ERAL << 1; + bits = 10; + } else { + cmd_addr |= ADDR_ERAL; + bits = 9; + } + + spi_message_init(&m); + memset(&t, 0, sizeof(t)); + + t.tx_buf = &cmd_addr; + t.len = 2; + t.bits_per_word = bits; + spi_message_add_tail(&t, &m); + + mutex_lock(&edev->lock); + + if (edev->pdata->prepare) + edev->pdata->prepare(edev); + + ret = spi_sync(edev->spi, &m); + if (ret) + dev_err(&edev->spi->dev, "erase error %d\n", ret); + /* have to wait erase cycle time Tec ms */ + mdelay(6); + + if (pd->finish) + pd->finish(edev); + + mutex_unlock(&edev->lock); + return ret; +} + +static ssize_t eeprom_93xx46_store_erase(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct eeprom_93xx46_dev *edev = dev_get_drvdata(dev); + int erase = 0, ret; + + sscanf(buf, "%d", &erase); + if (erase) { + ret = eeprom_93xx46_ew(edev, 1); + if (ret) + return ret; + ret = eeprom_93xx46_eral(edev); + if (ret) + return ret; + ret = eeprom_93xx46_ew(edev, 0); + if (ret) + return ret; + } + return count; +} +static DEVICE_ATTR(erase, S_IWUSR, NULL, eeprom_93xx46_store_erase); + +static int __devinit eeprom_93xx46_probe(struct spi_device *spi) +{ + struct eeprom_93xx46_platform_data *pd; + struct eeprom_93xx46_dev *edev; + int err; + + pd = spi->dev.platform_data; + if (!pd) { + dev_err(&spi->dev, "missing platform data\n"); + return -ENODEV; + } + + edev = kzalloc(sizeof(*edev), GFP_KERNEL); + if (!edev) + return -ENOMEM; + + if (pd->flags & EE_ADDR8) + edev->addrlen = 7; + else if (pd->flags & EE_ADDR16) + edev->addrlen = 6; + else { + dev_err(&spi->dev, "unspecified address type\n"); + err = -EINVAL; + goto fail; + } + + mutex_init(&edev->lock); + + edev->spi = spi_dev_get(spi); + edev->pdata = pd; + + sysfs_bin_attr_init(&edev->bin); + edev->bin.attr.name = "eeprom"; + edev->bin.attr.mode = S_IRUSR; + edev->bin.read = eeprom_93xx46_bin_read; + edev->bin.size = 128; + if (!(pd->flags & EE_READONLY)) { + edev->bin.write = eeprom_93xx46_bin_write; + edev->bin.attr.mode |= S_IWUSR; + } + + err = sysfs_create_bin_file(&spi->dev.kobj, &edev->bin); + if (err) + goto fail; + + dev_info(&spi->dev, "%d-bit eeprom %s\n", + (pd->flags & EE_ADDR8) ? 8 : 16, + (pd->flags & EE_READONLY) ? "(readonly)" : ""); + + if (!(pd->flags & EE_READONLY)) { + if (device_create_file(&spi->dev, &dev_attr_erase)) + dev_err(&spi->dev, "can't create erase interface\n"); + } + + dev_set_drvdata(&spi->dev, edev); + return 0; +fail: + kfree(edev); + return err; +} + +static int __devexit eeprom_93xx46_remove(struct spi_device *spi) +{ + struct eeprom_93xx46_dev *edev = dev_get_drvdata(&spi->dev); + + if (!(edev->pdata->flags & EE_READONLY)) + device_remove_file(&spi->dev, &dev_attr_erase); + + sysfs_remove_bin_file(&spi->dev.kobj, &edev->bin); + dev_set_drvdata(&spi->dev, NULL); + kfree(edev); + return 0; +} + +static struct spi_driver eeprom_93xx46_driver = { + .driver = { + .name = "93xx46", + .owner = THIS_MODULE, + }, + .probe = eeprom_93xx46_probe, + .remove = __devexit_p(eeprom_93xx46_remove), +}; + +static int __init eeprom_93xx46_init(void) +{ + return spi_register_driver(&eeprom_93xx46_driver); +} +module_init(eeprom_93xx46_init); + +static void __exit eeprom_93xx46_exit(void) +{ + spi_unregister_driver(&eeprom_93xx46_driver); +} +module_exit(eeprom_93xx46_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Driver for 93xx46 EEPROMs"); +MODULE_AUTHOR("Anatolij Gustschin "); +MODULE_ALIAS("spi:93xx46"); diff --git a/include/linux/eeprom_93xx46.h b/include/linux/eeprom_93xx46.h new file mode 100644 index 0000000..0679181 --- /dev/null +++ b/include/linux/eeprom_93xx46.h @@ -0,0 +1,18 @@ +/* + * Module: eeprom_93xx46 + * platform description for 93xx46 EEPROMs. + */ + +struct eeprom_93xx46_platform_data { + unsigned char flags; +#define EE_ADDR8 0x01 /* 8 bit addr. cfg */ +#define EE_ADDR16 0x02 /* 16 bit addr. cfg */ +#define EE_READONLY 0x08 /* forbid writing */ + + /* + * optional hooks to control additional logic + * before and after spi transfer. + */ + void (*prepare)(void *); + void (*finish)(void *); +}; -- cgit v0.10.2 From 469dded1839105cfbfc265376e23e24dbc48d2a7 Mon Sep 17 00:00:00 2001 From: Anatolij Gustschin Date: Mon, 25 Jul 2011 17:13:29 -0700 Subject: misc/eeprom: add eeprom access driver for digsy_mtc board Both displays on digsy_mtc board obtain their configuration from microwire EEPROMs which are connected to the SoC over GPIO lines. We need an easy way to access the EEPROMs to write the needed display configuration or to read out the currently programmed configuration. The generic eeprom_93xx46 SPI driver added by previous patch allows EEPROM access over sysfs. Using the simple driver added by this patch we provide used GPIO interface and access control description on the board for generic eeprom_93xx46 driver and spi_gpio driver. Signed-off-by: Anatolij Gustschin Cc: Jonathan Cameron Cc: Grant Likely Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/misc/eeprom/Kconfig b/drivers/misc/eeprom/Kconfig index 620de28..26cf12c 100644 --- a/drivers/misc/eeprom/Kconfig +++ b/drivers/misc/eeprom/Kconfig @@ -83,4 +83,16 @@ config EEPROM_93XX46 If unsure, say N. +config EEPROM_DIGSY_MTC_CFG + bool "DigsyMTC display configuration EEPROMs device" + depends on PPC_MPC5200_GPIO && GPIOLIB && SPI_GPIO + help + This option enables access to display configuration EEPROMs + on digsy_mtc board. You have to additionally select Microwire + EEPROM 93XX46 driver. sysfs entries will be created for that + EEPROM allowing to read/write the configuration data or to + erase the whole EEPROM. + + If unsure, say N. + endmenu diff --git a/drivers/misc/eeprom/Makefile b/drivers/misc/eeprom/Makefile index ec55aad..fc1e81d 100644 --- a/drivers/misc/eeprom/Makefile +++ b/drivers/misc/eeprom/Makefile @@ -4,3 +4,4 @@ obj-$(CONFIG_EEPROM_LEGACY) += eeprom.o obj-$(CONFIG_EEPROM_MAX6875) += max6875.o obj-$(CONFIG_EEPROM_93CX6) += eeprom_93cx6.o obj-$(CONFIG_EEPROM_93XX46) += eeprom_93xx46.o +obj-$(CONFIG_EEPROM_DIGSY_MTC_CFG) += digsy_mtc_eeprom.o diff --git a/drivers/misc/eeprom/digsy_mtc_eeprom.c b/drivers/misc/eeprom/digsy_mtc_eeprom.c new file mode 100644 index 0000000..66d9e1b --- /dev/null +++ b/drivers/misc/eeprom/digsy_mtc_eeprom.c @@ -0,0 +1,85 @@ +/* + * EEPROMs access control driver for display configuration EEPROMs + * on DigsyMTC board. + * + * (C) 2011 DENX Software Engineering, Anatolij Gustschin + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include + +#define GPIO_EEPROM_CLK 216 +#define GPIO_EEPROM_CS 210 +#define GPIO_EEPROM_DI 217 +#define GPIO_EEPROM_DO 249 +#define GPIO_EEPROM_OE 255 +#define EE_SPI_BUS_NUM 1 + +static void digsy_mtc_op_prepare(void *p) +{ + /* enable */ + gpio_set_value(GPIO_EEPROM_OE, 0); +} + +static void digsy_mtc_op_finish(void *p) +{ + /* disable */ + gpio_set_value(GPIO_EEPROM_OE, 1); +} + +struct eeprom_93xx46_platform_data digsy_mtc_eeprom_data = { + .flags = EE_ADDR8, + .prepare = digsy_mtc_op_prepare, + .finish = digsy_mtc_op_finish, +}; + +static struct spi_gpio_platform_data eeprom_spi_gpio_data = { + .sck = GPIO_EEPROM_CLK, + .mosi = GPIO_EEPROM_DI, + .miso = GPIO_EEPROM_DO, + .num_chipselect = 1, +}; + +static struct platform_device digsy_mtc_eeprom = { + .name = "spi_gpio", + .id = EE_SPI_BUS_NUM, + .dev = { + .platform_data = &eeprom_spi_gpio_data, + }, +}; + +static struct spi_board_info digsy_mtc_eeprom_info[] __initdata = { + { + .modalias = "93xx46", + .max_speed_hz = 1000000, + .bus_num = EE_SPI_BUS_NUM, + .chip_select = 0, + .mode = SPI_MODE_0, + .controller_data = (void *)GPIO_EEPROM_CS, + .platform_data = &digsy_mtc_eeprom_data, + }, +}; + +static int __init digsy_mtc_eeprom_devices_init(void) +{ + int ret; + + ret = gpio_request_one(GPIO_EEPROM_OE, GPIOF_OUT_INIT_HIGH, + "93xx46 EEPROMs OE"); + if (ret) { + pr_err("can't request gpio %d\n", GPIO_EEPROM_OE); + return ret; + } + spi_register_board_info(digsy_mtc_eeprom_info, + ARRAY_SIZE(digsy_mtc_eeprom_info)); + return platform_device_register(&digsy_mtc_eeprom); +} +device_initcall(digsy_mtc_eeprom_devices_init); -- cgit v0.10.2 From 7afe1845dd1e7c90828c942daed7e57ffa7c38d6 Mon Sep 17 00:00:00 2001 From: Sameer Nanda Date: Mon, 25 Jul 2011 17:13:29 -0700 Subject: init: skip calibration delay if previously done For each CPU, do the calibration delay only once. For subsequent calls, use the cached per-CPU value of loops_per_jiffy. This saves about 200ms of resume time on dual core Intel Atom N5xx based systems. This helps bring down the kernel resume time on such systems from about 500ms to about 300ms. [akpm@linux-foundation.org: make cpu_loops_per_jiffy static] [akpm@linux-foundation.org: clean up message text] [akpm@linux-foundation.org: fix things up after upstream rmk changes] Signed-off-by: Sameer Nanda Cc: Phil Carmody Cc: Andrew Worsley Cc: David Daney Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/init/calibrate.c b/init/calibrate.c index aae2f40..24df797 100644 --- a/init/calibrate.c +++ b/init/calibrate.c @@ -9,6 +9,7 @@ #include #include #include +#include unsigned long lpj_fine; unsigned long preset_lpj; @@ -243,12 +244,19 @@ recalibrate: return lpj; } +static DEFINE_PER_CPU(unsigned long, cpu_loops_per_jiffy) = { 0 }; + void __cpuinit calibrate_delay(void) { unsigned long lpj; static bool printed; + int this_cpu = smp_processor_id(); - if (preset_lpj) { + if (per_cpu(cpu_loops_per_jiffy, this_cpu)) { + lpj = per_cpu(cpu_loops_per_jiffy, this_cpu); + pr_info("Calibrating delay loop (skipped) " + "already calibrated this CPU"); + } else if (preset_lpj) { lpj = preset_lpj; if (!printed) pr_info("Calibrating delay loop (skipped) " @@ -266,6 +274,7 @@ void __cpuinit calibrate_delay(void) pr_info("Calibrating delay loop... "); lpj = calibrate_delay_converge(); } + per_cpu(cpu_loops_per_jiffy, this_cpu) = lpj; if (!printed) pr_cont("%lu.%02lu BogoMIPS (lpj=%lu)\n", lpj/(500000/HZ), -- cgit v0.10.2 From 955dbea3c7133d3ccfaa79c7eba1244c1de42865 Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Mon, 25 Jul 2011 17:13:30 -0700 Subject: drivers/rtc/rtc-mpc5121.c: add support for RTC on MPC5200 MPC5200B contains a limited version of RTC from MPC5121. Add support for the RTC on that CPU. Signed-off-by: Dmitry Eremin-Solenikov Cc: Alessandro Zummo Cc: Anatolij Gustschin Cc: Grant Likely Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index dcb61e2..55affcd 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -1006,10 +1006,10 @@ config RTC_DRV_MC13XXX config RTC_DRV_MPC5121 tristate "Freescale MPC5121 built-in RTC" - depends on PPC_MPC512x && RTC_CLASS + depends on PPC_MPC512x || PPC_MPC52xx help If you say yes here you will get support for the - built-in RTC MPC5121. + built-in RTC on MPC5121 or on MPC5200. This driver can also be built as a module. If so, the module will be called rtc-mpc5121. diff --git a/drivers/rtc/rtc-mpc5121.c b/drivers/rtc/rtc-mpc5121.c index 09ccd8d..da60915 100644 --- a/drivers/rtc/rtc-mpc5121.c +++ b/drivers/rtc/rtc-mpc5121.c @@ -3,6 +3,7 @@ * * Copyright 2007, Domen Puncer * Copyright 2008, Freescale Semiconductor, Inc. All rights reserved. + * Copyright 2011, Dmitry Eremin-Solenikov * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -145,6 +146,55 @@ static int mpc5121_rtc_set_time(struct device *dev, struct rtc_time *tm) return 0; } +static int mpc5200_rtc_read_time(struct device *dev, struct rtc_time *tm) +{ + struct mpc5121_rtc_data *rtc = dev_get_drvdata(dev); + struct mpc5121_rtc_regs __iomem *regs = rtc->regs; + int tmp; + + tm->tm_sec = in_8(®s->second); + tm->tm_min = in_8(®s->minute); + + /* 12 hour format? */ + if (in_8(®s->hour) & 0x20) + tm->tm_hour = (in_8(®s->hour) >> 1) + + (in_8(®s->hour) & 1 ? 12 : 0); + else + tm->tm_hour = in_8(®s->hour); + + tmp = in_8(®s->wday_mday); + tm->tm_mday = tmp & 0x1f; + tm->tm_mon = in_8(®s->month) - 1; + tm->tm_year = in_be16(®s->year) - 1900; + tm->tm_wday = (tmp >> 5) % 7; + tm->tm_yday = rtc_year_days(tm->tm_mday, tm->tm_mon, tm->tm_year); + tm->tm_isdst = 0; + + return 0; +} + +static int mpc5200_rtc_set_time(struct device *dev, struct rtc_time *tm) +{ + struct mpc5121_rtc_data *rtc = dev_get_drvdata(dev); + struct mpc5121_rtc_regs __iomem *regs = rtc->regs; + + mpc5121_rtc_update_smh(regs, tm); + + /* date */ + out_8(®s->month_set, tm->tm_mon + 1); + out_8(®s->weekday_set, tm->tm_wday ? tm->tm_wday : 7); + out_8(®s->date_set, tm->tm_mday); + out_be16(®s->year_set, tm->tm_year + 1900); + + /* set date sequence */ + out_8(®s->set_date, 0x1); + out_8(®s->set_date, 0x3); + out_8(®s->set_date, 0x1); + out_8(®s->set_date, 0x0); + + return 0; +} + static int mpc5121_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alarm) { struct mpc5121_rtc_data *rtc = dev_get_drvdata(dev); @@ -248,11 +298,18 @@ static const struct rtc_class_ops mpc5121_rtc_ops = { .alarm_irq_enable = mpc5121_rtc_alarm_irq_enable, }; +static const struct rtc_class_ops mpc5200_rtc_ops = { + .read_time = mpc5200_rtc_read_time, + .set_time = mpc5200_rtc_set_time, + .read_alarm = mpc5121_rtc_read_alarm, + .set_alarm = mpc5121_rtc_set_alarm, + .alarm_irq_enable = mpc5121_rtc_alarm_irq_enable, +}; + static int __devinit mpc5121_rtc_probe(struct platform_device *op) { struct mpc5121_rtc_data *rtc; int err = 0; - u32 ka; rtc = kzalloc(sizeof(*rtc), GFP_KERNEL); if (!rtc) @@ -287,15 +344,22 @@ static int __devinit mpc5121_rtc_probe(struct platform_device *op) goto out_dispose2; } - ka = in_be32(&rtc->regs->keep_alive); - if (ka & 0x02) { - dev_warn(&op->dev, - "mpc5121-rtc: Battery or oscillator failure!\n"); - out_be32(&rtc->regs->keep_alive, ka); + if (of_device_is_compatible(op->dev.of_node, "fsl,mpc5121-rtc")) { + u32 ka; + ka = in_be32(&rtc->regs->keep_alive); + if (ka & 0x02) { + dev_warn(&op->dev, + "mpc5121-rtc: Battery or oscillator failure!\n"); + out_be32(&rtc->regs->keep_alive, ka); + } + + rtc->rtc = rtc_device_register("mpc5121-rtc", &op->dev, + &mpc5121_rtc_ops, THIS_MODULE); + } else { + rtc->rtc = rtc_device_register("mpc5200-rtc", &op->dev, + &mpc5200_rtc_ops, THIS_MODULE); } - rtc->rtc = rtc_device_register("mpc5121-rtc", &op->dev, - &mpc5121_rtc_ops, THIS_MODULE); if (IS_ERR(rtc->rtc)) { err = PTR_ERR(rtc->rtc); goto out_free_irq; @@ -340,6 +404,7 @@ static int __devexit mpc5121_rtc_remove(struct platform_device *op) static struct of_device_id mpc5121_rtc_match[] __devinitdata = { { .compatible = "fsl,mpc5121-rtc", }, + { .compatible = "fsl,mpc5200-rtc", }, {}, }; -- cgit v0.10.2 From cefe4fbbaab8e20a7581a187db82d33c1e3320c0 Mon Sep 17 00:00:00 2001 From: Donggeun Kim Date: Mon, 25 Jul 2011 17:13:32 -0700 Subject: drivers/rtc/rtc-s3c.c: support clock gating Add support for clock gating. Power consumption can be reduced by setting rtc_clk disabled state except for when RTC related registers are accessed. Signed-off-by: Donggeun Kim Signed-off-by: MyungJoo Ham Signed-off-by: KyungMin Park Cc: Alessandro Zummo Cc: Ben Dooks Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c index 16512ec..53c99b1 100644 --- a/drivers/rtc/rtc-s3c.c +++ b/drivers/rtc/rtc-s3c.c @@ -57,11 +57,13 @@ static irqreturn_t s3c_rtc_alarmirq(int irq, void *id) { struct rtc_device *rdev = id; + clk_enable(rtc_clk); rtc_update_irq(rdev, 1, RTC_AF | RTC_IRQF); if (s3c_rtc_cpu_type == TYPE_S3C64XX) writeb(S3C2410_INTP_ALM, s3c_rtc_base + S3C2410_INTP); + clk_disable(rtc_clk); return IRQ_HANDLED; } @@ -69,11 +71,13 @@ static irqreturn_t s3c_rtc_tickirq(int irq, void *id) { struct rtc_device *rdev = id; + clk_enable(rtc_clk); rtc_update_irq(rdev, 1, RTC_PF | RTC_IRQF); if (s3c_rtc_cpu_type == TYPE_S3C64XX) writeb(S3C2410_INTP_TIC, s3c_rtc_base + S3C2410_INTP); + clk_disable(rtc_clk); return IRQ_HANDLED; } @@ -84,12 +88,14 @@ static int s3c_rtc_setaie(struct device *dev, unsigned int enabled) pr_debug("%s: aie=%d\n", __func__, enabled); + clk_enable(rtc_clk); tmp = readb(s3c_rtc_base + S3C2410_RTCALM) & ~S3C2410_RTCALM_ALMEN; if (enabled) tmp |= S3C2410_RTCALM_ALMEN; writeb(tmp, s3c_rtc_base + S3C2410_RTCALM); + clk_disable(rtc_clk); return 0; } @@ -103,6 +109,7 @@ static int s3c_rtc_setfreq(struct device *dev, int freq) if (!is_power_of_2(freq)) return -EINVAL; + clk_enable(rtc_clk); spin_lock_irq(&s3c_rtc_pie_lock); if (s3c_rtc_cpu_type == TYPE_S3C2410) { @@ -114,6 +121,7 @@ static int s3c_rtc_setfreq(struct device *dev, int freq) writel(tmp, s3c_rtc_base + S3C2410_TICNT); spin_unlock_irq(&s3c_rtc_pie_lock); + clk_disable(rtc_clk); return 0; } @@ -125,6 +133,7 @@ static int s3c_rtc_gettime(struct device *dev, struct rtc_time *rtc_tm) unsigned int have_retried = 0; void __iomem *base = s3c_rtc_base; + clk_enable(rtc_clk); retry_get_time: rtc_tm->tm_min = readb(base + S3C2410_RTCMIN); rtc_tm->tm_hour = readb(base + S3C2410_RTCHOUR); @@ -157,6 +166,7 @@ static int s3c_rtc_gettime(struct device *dev, struct rtc_time *rtc_tm) rtc_tm->tm_year += 100; rtc_tm->tm_mon -= 1; + clk_disable(rtc_clk); return rtc_valid_tm(rtc_tm); } @@ -165,6 +175,7 @@ static int s3c_rtc_settime(struct device *dev, struct rtc_time *tm) void __iomem *base = s3c_rtc_base; int year = tm->tm_year - 100; + clk_enable(rtc_clk); pr_debug("set time %04d.%02d.%02d %02d:%02d:%02d\n", 1900 + tm->tm_year, tm->tm_mon, tm->tm_mday, tm->tm_hour, tm->tm_min, tm->tm_sec); @@ -182,6 +193,7 @@ static int s3c_rtc_settime(struct device *dev, struct rtc_time *tm) writeb(bin2bcd(tm->tm_mday), base + S3C2410_RTCDATE); writeb(bin2bcd(tm->tm_mon + 1), base + S3C2410_RTCMON); writeb(bin2bcd(year), base + S3C2410_RTCYEAR); + clk_disable(rtc_clk); return 0; } @@ -192,6 +204,7 @@ static int s3c_rtc_getalarm(struct device *dev, struct rtc_wkalrm *alrm) void __iomem *base = s3c_rtc_base; unsigned int alm_en; + clk_enable(rtc_clk); alm_tm->tm_sec = readb(base + S3C2410_ALMSEC); alm_tm->tm_min = readb(base + S3C2410_ALMMIN); alm_tm->tm_hour = readb(base + S3C2410_ALMHOUR); @@ -243,6 +256,7 @@ static int s3c_rtc_getalarm(struct device *dev, struct rtc_wkalrm *alrm) else alm_tm->tm_year = -1; + clk_disable(rtc_clk); return 0; } @@ -252,6 +266,7 @@ static int s3c_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm) void __iomem *base = s3c_rtc_base; unsigned int alrm_en; + clk_enable(rtc_clk); pr_debug("s3c_rtc_setalarm: %d, %04d.%02d.%02d %02d:%02d:%02d\n", alrm->enabled, 1900 + tm->tm_year, tm->tm_mon, tm->tm_mday, @@ -282,6 +297,7 @@ static int s3c_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm) s3c_rtc_setaie(dev, alrm->enabled); + clk_disable(rtc_clk); return 0; } @@ -289,6 +305,7 @@ static int s3c_rtc_proc(struct device *dev, struct seq_file *seq) { unsigned int ticnt; + clk_enable(rtc_clk); if (s3c_rtc_cpu_type == TYPE_S3C64XX) { ticnt = readw(s3c_rtc_base + S3C2410_RTCCON); ticnt &= S3C64XX_RTCCON_TICEN; @@ -298,6 +315,7 @@ static int s3c_rtc_proc(struct device *dev, struct seq_file *seq) } seq_printf(seq, "periodic_IRQ\t: %s\n", ticnt ? "yes" : "no"); + clk_disable(rtc_clk); return 0; } @@ -360,6 +378,7 @@ static void s3c_rtc_enable(struct platform_device *pdev, int en) if (s3c_rtc_base == NULL) return; + clk_enable(rtc_clk); if (!en) { tmp = readw(base + S3C2410_RTCCON); if (s3c_rtc_cpu_type == TYPE_S3C64XX) @@ -399,6 +418,7 @@ static void s3c_rtc_enable(struct platform_device *pdev, int en) base + S3C2410_RTCCON); } } + clk_disable(rtc_clk); } static int __devexit s3c_rtc_remove(struct platform_device *dev) @@ -410,7 +430,6 @@ static int __devexit s3c_rtc_remove(struct platform_device *dev) s3c_rtc_setaie(&dev->dev, 0); - clk_disable(rtc_clk); clk_put(rtc_clk); rtc_clk = NULL; @@ -530,6 +549,8 @@ static int __devinit s3c_rtc_probe(struct platform_device *pdev) s3c_rtc_setfreq(&pdev->dev, 1); + clk_disable(rtc_clk); + return 0; err_nortc: @@ -555,6 +576,7 @@ static int ticnt_save, ticnt_en_save; static int s3c_rtc_suspend(struct platform_device *pdev, pm_message_t state) { + clk_enable(rtc_clk); /* save TICNT for anyone using periodic interrupts */ ticnt_save = readb(s3c_rtc_base + S3C2410_TICNT); if (s3c_rtc_cpu_type == TYPE_S3C64XX) { @@ -569,6 +591,7 @@ static int s3c_rtc_suspend(struct platform_device *pdev, pm_message_t state) else dev_err(&pdev->dev, "enable_irq_wake failed\n"); } + clk_disable(rtc_clk); return 0; } @@ -577,6 +600,7 @@ static int s3c_rtc_resume(struct platform_device *pdev) { unsigned int tmp; + clk_enable(rtc_clk); s3c_rtc_enable(pdev, 1); writeb(ticnt_save, s3c_rtc_base + S3C2410_TICNT); if (s3c_rtc_cpu_type == TYPE_S3C64XX && ticnt_en_save) { @@ -588,6 +612,7 @@ static int s3c_rtc_resume(struct platform_device *pdev) disable_irq_wake(s3c_rtc_alarmno); wake_en = false; } + clk_disable(rtc_clk); return 0; } -- cgit v0.10.2 From 9a9a54ad7aa2c7420c96c6fd33538f55d81775cb Mon Sep 17 00:00:00 2001 From: Anirudh Ghayal Date: Mon, 25 Jul 2011 17:13:33 -0700 Subject: drivers/rtc: add support for Qualcomm PMIC8xxx RTC Add support for PMIC8xxx based RTC. PMIC8xxx is Qualcomm's power management IC that internally houses an RTC module. This driver communicates with the PMIC module over SSBI bus. [akpm@linux-foundation.org: cosmetic tweaks] Acked-by: Wan ZongShun Reviewed-by: Stephen Boyd Signed-off-by: Anirudh Ghayal Signed-off-by: Ashay Jaiswal Cc: Samuel Ortiz Cc: Wan ZongShun Cc: Alessandro Zummo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index 55affcd..5a538fc 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -1034,6 +1034,16 @@ config RTC_DRV_LPC32XX This driver can also be buillt as a module. If so, the module will be called rtc-lpc32xx. +config RTC_DRV_PM8XXX + tristate "Qualcomm PMIC8XXX RTC" + depends on MFD_PM8XXX + help + If you say yes here you get support for the + Qualcomm PMIC8XXX RTC. + + To compile this driver as a module, choose M here: the + module will be called rtc-pm8xxx. + config RTC_DRV_TEGRA tristate "NVIDIA Tegra Internal RTC driver" depends on RTC_CLASS && ARCH_TEGRA diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile index 0ffefe8..6e69823 100644 --- a/drivers/rtc/Makefile +++ b/drivers/rtc/Makefile @@ -77,6 +77,7 @@ obj-$(CONFIG_RTC_DRV_PCF2123) += rtc-pcf2123.o obj-$(CONFIG_RTC_DRV_PCF50633) += rtc-pcf50633.o obj-$(CONFIG_RTC_DRV_PL030) += rtc-pl030.o obj-$(CONFIG_RTC_DRV_PL031) += rtc-pl031.o +obj-$(CONFIG_RTC_DRV_PM8XXX) += rtc-pm8xxx.o obj-$(CONFIG_RTC_DRV_PS3) += rtc-ps3.o obj-$(CONFIG_RTC_DRV_PUV3) += rtc-puv3.o obj-$(CONFIG_RTC_DRV_PXA) += rtc-pxa.o diff --git a/drivers/rtc/rtc-pm8xxx.c b/drivers/rtc/rtc-pm8xxx.c new file mode 100644 index 0000000..d420e9d --- /dev/null +++ b/drivers/rtc/rtc-pm8xxx.c @@ -0,0 +1,550 @@ +/* Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + + +/* RTC Register offsets from RTC CTRL REG */ +#define PM8XXX_ALARM_CTRL_OFFSET 0x01 +#define PM8XXX_RTC_WRITE_OFFSET 0x02 +#define PM8XXX_RTC_READ_OFFSET 0x06 +#define PM8XXX_ALARM_RW_OFFSET 0x0A + +/* RTC_CTRL register bit fields */ +#define PM8xxx_RTC_ENABLE BIT(7) +#define PM8xxx_RTC_ALARM_ENABLE BIT(1) +#define PM8xxx_RTC_ALARM_CLEAR BIT(0) + +#define NUM_8_BIT_RTC_REGS 0x4 + +/** + * struct pm8xxx_rtc - rtc driver internal structure + * @rtc: rtc device for this driver. + * @rtc_alarm_irq: rtc alarm irq number. + * @rtc_base: address of rtc control register. + * @rtc_read_base: base address of read registers. + * @rtc_write_base: base address of write registers. + * @alarm_rw_base: base address of alarm registers. + * @ctrl_reg: rtc control register. + * @rtc_dev: device structure. + * @ctrl_reg_lock: spinlock protecting access to ctrl_reg. + */ +struct pm8xxx_rtc { + struct rtc_device *rtc; + int rtc_alarm_irq; + int rtc_base; + int rtc_read_base; + int rtc_write_base; + int alarm_rw_base; + u8 ctrl_reg; + struct device *rtc_dev; + spinlock_t ctrl_reg_lock; +}; + +/* + * The RTC registers need to be read/written one byte at a time. This is a + * hardware limitation. + */ +static int pm8xxx_read_wrapper(struct pm8xxx_rtc *rtc_dd, u8 *rtc_val, + int base, int count) +{ + int i, rc; + struct device *parent = rtc_dd->rtc_dev->parent; + + for (i = 0; i < count; i++) { + rc = pm8xxx_readb(parent, base + i, &rtc_val[i]); + if (rc < 0) { + dev_err(rtc_dd->rtc_dev, "PMIC read failed\n"); + return rc; + } + } + + return 0; +} + +static int pm8xxx_write_wrapper(struct pm8xxx_rtc *rtc_dd, u8 *rtc_val, + int base, int count) +{ + int i, rc; + struct device *parent = rtc_dd->rtc_dev->parent; + + for (i = 0; i < count; i++) { + rc = pm8xxx_writeb(parent, base + i, rtc_val[i]); + if (rc < 0) { + dev_err(rtc_dd->rtc_dev, "PMIC write failed\n"); + return rc; + } + } + + return 0; +} + +/* + * Steps to write the RTC registers. + * 1. Disable alarm if enabled. + * 2. Write 0x00 to LSB. + * 3. Write Byte[1], Byte[2], Byte[3] then Byte[0]. + * 4. Enable alarm if disabled in step 1. + */ +static int pm8xxx_rtc_set_time(struct device *dev, struct rtc_time *tm) +{ + int rc, i; + unsigned long secs, irq_flags; + u8 value[NUM_8_BIT_RTC_REGS], reg = 0, alarm_enabled = 0, ctrl_reg; + struct pm8xxx_rtc *rtc_dd = dev_get_drvdata(dev); + + rtc_tm_to_time(tm, &secs); + + for (i = 0; i < NUM_8_BIT_RTC_REGS; i++) { + value[i] = secs & 0xFF; + secs >>= 8; + } + + dev_dbg(dev, "Seconds value to be written to RTC = %lu\n", secs); + + spin_lock_irqsave(&rtc_dd->ctrl_reg_lock, irq_flags); + ctrl_reg = rtc_dd->ctrl_reg; + + if (ctrl_reg & PM8xxx_RTC_ALARM_ENABLE) { + alarm_enabled = 1; + ctrl_reg &= ~PM8xxx_RTC_ALARM_ENABLE; + rc = pm8xxx_write_wrapper(rtc_dd, &ctrl_reg, rtc_dd->rtc_base, + 1); + if (rc < 0) { + dev_err(dev, "Write to RTC control register " + "failed\n"); + goto rtc_rw_fail; + } + rtc_dd->ctrl_reg = ctrl_reg; + } else + spin_unlock_irqrestore(&rtc_dd->ctrl_reg_lock, irq_flags); + + /* Write 0 to Byte[0] */ + reg = 0; + rc = pm8xxx_write_wrapper(rtc_dd, ®, rtc_dd->rtc_write_base, 1); + if (rc < 0) { + dev_err(dev, "Write to RTC write data register failed\n"); + goto rtc_rw_fail; + } + + /* Write Byte[1], Byte[2], Byte[3] */ + rc = pm8xxx_write_wrapper(rtc_dd, value + 1, + rtc_dd->rtc_write_base + 1, 3); + if (rc < 0) { + dev_err(dev, "Write to RTC write data register failed\n"); + goto rtc_rw_fail; + } + + /* Write Byte[0] */ + rc = pm8xxx_write_wrapper(rtc_dd, value, rtc_dd->rtc_write_base, 1); + if (rc < 0) { + dev_err(dev, "Write to RTC write data register failed\n"); + goto rtc_rw_fail; + } + + if (alarm_enabled) { + ctrl_reg |= PM8xxx_RTC_ALARM_ENABLE; + rc = pm8xxx_write_wrapper(rtc_dd, &ctrl_reg, rtc_dd->rtc_base, + 1); + if (rc < 0) { + dev_err(dev, "Write to RTC control register " + "failed\n"); + goto rtc_rw_fail; + } + rtc_dd->ctrl_reg = ctrl_reg; + } + +rtc_rw_fail: + if (alarm_enabled) + spin_unlock_irqrestore(&rtc_dd->ctrl_reg_lock, irq_flags); + + return rc; +} + +static int pm8xxx_rtc_read_time(struct device *dev, struct rtc_time *tm) +{ + int rc; + u8 value[NUM_8_BIT_RTC_REGS], reg; + unsigned long secs; + struct pm8xxx_rtc *rtc_dd = dev_get_drvdata(dev); + + rc = pm8xxx_read_wrapper(rtc_dd, value, rtc_dd->rtc_read_base, + NUM_8_BIT_RTC_REGS); + if (rc < 0) { + dev_err(dev, "RTC read data register failed\n"); + return rc; + } + + /* + * Read the LSB again and check if there has been a carry over. + * If there is, redo the read operation. + */ + rc = pm8xxx_read_wrapper(rtc_dd, ®, rtc_dd->rtc_read_base, 1); + if (rc < 0) { + dev_err(dev, "RTC read data register failed\n"); + return rc; + } + + if (unlikely(reg < value[0])) { + rc = pm8xxx_read_wrapper(rtc_dd, value, + rtc_dd->rtc_read_base, NUM_8_BIT_RTC_REGS); + if (rc < 0) { + dev_err(dev, "RTC read data register failed\n"); + return rc; + } + } + + secs = value[0] | (value[1] << 8) | (value[2] << 16) | (value[3] << 24); + + rtc_time_to_tm(secs, tm); + + rc = rtc_valid_tm(tm); + if (rc < 0) { + dev_err(dev, "Invalid time read from RTC\n"); + return rc; + } + + dev_dbg(dev, "secs = %lu, h:m:s == %d:%d:%d, d/m/y = %d/%d/%d\n", + secs, tm->tm_hour, tm->tm_min, tm->tm_sec, + tm->tm_mday, tm->tm_mon, tm->tm_year); + + return 0; +} + +static int pm8xxx_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alarm) +{ + int rc, i; + u8 value[NUM_8_BIT_RTC_REGS], ctrl_reg; + unsigned long secs, irq_flags; + struct pm8xxx_rtc *rtc_dd = dev_get_drvdata(dev); + + rtc_tm_to_time(&alarm->time, &secs); + + for (i = 0; i < NUM_8_BIT_RTC_REGS; i++) { + value[i] = secs & 0xFF; + secs >>= 8; + } + + spin_lock_irqsave(&rtc_dd->ctrl_reg_lock, irq_flags); + + rc = pm8xxx_write_wrapper(rtc_dd, value, rtc_dd->alarm_rw_base, + NUM_8_BIT_RTC_REGS); + if (rc < 0) { + dev_err(dev, "Write to RTC ALARM register failed\n"); + goto rtc_rw_fail; + } + + ctrl_reg = rtc_dd->ctrl_reg; + ctrl_reg = alarm->enabled ? (ctrl_reg | PM8xxx_RTC_ALARM_ENABLE) : + (ctrl_reg & ~PM8xxx_RTC_ALARM_ENABLE); + + rc = pm8xxx_write_wrapper(rtc_dd, &ctrl_reg, rtc_dd->rtc_base, 1); + if (rc < 0) { + dev_err(dev, "Write to RTC control register failed\n"); + goto rtc_rw_fail; + } + + rtc_dd->ctrl_reg = ctrl_reg; + + dev_dbg(dev, "Alarm Set for h:r:s=%d:%d:%d, d/m/y=%d/%d/%d\n", + alarm->time.tm_hour, alarm->time.tm_min, + alarm->time.tm_sec, alarm->time.tm_mday, + alarm->time.tm_mon, alarm->time.tm_year); +rtc_rw_fail: + spin_unlock_irqrestore(&rtc_dd->ctrl_reg_lock, irq_flags); + return rc; +} + +static int pm8xxx_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alarm) +{ + int rc; + u8 value[NUM_8_BIT_RTC_REGS]; + unsigned long secs; + struct pm8xxx_rtc *rtc_dd = dev_get_drvdata(dev); + + rc = pm8xxx_read_wrapper(rtc_dd, value, rtc_dd->alarm_rw_base, + NUM_8_BIT_RTC_REGS); + if (rc < 0) { + dev_err(dev, "RTC alarm time read failed\n"); + return rc; + } + + secs = value[0] | (value[1] << 8) | (value[2] << 16) | (value[3] << 24); + + rtc_time_to_tm(secs, &alarm->time); + + rc = rtc_valid_tm(&alarm->time); + if (rc < 0) { + dev_err(dev, "Invalid alarm time read from RTC\n"); + return rc; + } + + dev_dbg(dev, "Alarm set for - h:r:s=%d:%d:%d, d/m/y=%d/%d/%d\n", + alarm->time.tm_hour, alarm->time.tm_min, + alarm->time.tm_sec, alarm->time.tm_mday, + alarm->time.tm_mon, alarm->time.tm_year); + + return 0; +} + +static int pm8xxx_rtc_alarm_irq_enable(struct device *dev, unsigned int enable) +{ + int rc; + unsigned long irq_flags; + struct pm8xxx_rtc *rtc_dd = dev_get_drvdata(dev); + u8 ctrl_reg; + + spin_lock_irqsave(&rtc_dd->ctrl_reg_lock, irq_flags); + ctrl_reg = rtc_dd->ctrl_reg; + ctrl_reg = (enable) ? (ctrl_reg | PM8xxx_RTC_ALARM_ENABLE) : + (ctrl_reg & ~PM8xxx_RTC_ALARM_ENABLE); + + rc = pm8xxx_write_wrapper(rtc_dd, &ctrl_reg, rtc_dd->rtc_base, 1); + if (rc < 0) { + dev_err(dev, "Write to RTC control register failed\n"); + goto rtc_rw_fail; + } + + rtc_dd->ctrl_reg = ctrl_reg; + +rtc_rw_fail: + spin_unlock_irqrestore(&rtc_dd->ctrl_reg_lock, irq_flags); + return rc; +} + +static struct rtc_class_ops pm8xxx_rtc_ops = { + .read_time = pm8xxx_rtc_read_time, + .set_alarm = pm8xxx_rtc_set_alarm, + .read_alarm = pm8xxx_rtc_read_alarm, + .alarm_irq_enable = pm8xxx_rtc_alarm_irq_enable, +}; + +static irqreturn_t pm8xxx_alarm_trigger(int irq, void *dev_id) +{ + struct pm8xxx_rtc *rtc_dd = dev_id; + u8 ctrl_reg; + int rc; + unsigned long irq_flags; + + rtc_update_irq(rtc_dd->rtc, 1, RTC_IRQF | RTC_AF); + + spin_lock_irqsave(&rtc_dd->ctrl_reg_lock, irq_flags); + + /* Clear the alarm enable bit */ + ctrl_reg = rtc_dd->ctrl_reg; + ctrl_reg &= ~PM8xxx_RTC_ALARM_ENABLE; + + rc = pm8xxx_write_wrapper(rtc_dd, &ctrl_reg, rtc_dd->rtc_base, 1); + if (rc < 0) { + spin_unlock_irqrestore(&rtc_dd->ctrl_reg_lock, irq_flags); + dev_err(rtc_dd->rtc_dev, "Write to RTC control register " + "failed\n"); + goto rtc_alarm_handled; + } + + rtc_dd->ctrl_reg = ctrl_reg; + spin_unlock_irqrestore(&rtc_dd->ctrl_reg_lock, irq_flags); + + /* Clear RTC alarm register */ + rc = pm8xxx_read_wrapper(rtc_dd, &ctrl_reg, rtc_dd->rtc_base + + PM8XXX_ALARM_CTRL_OFFSET, 1); + if (rc < 0) { + dev_err(rtc_dd->rtc_dev, "RTC Alarm control register read " + "failed\n"); + goto rtc_alarm_handled; + } + + ctrl_reg &= ~PM8xxx_RTC_ALARM_CLEAR; + rc = pm8xxx_write_wrapper(rtc_dd, &ctrl_reg, rtc_dd->rtc_base + + PM8XXX_ALARM_CTRL_OFFSET, 1); + if (rc < 0) + dev_err(rtc_dd->rtc_dev, "Write to RTC Alarm control register" + " failed\n"); + +rtc_alarm_handled: + return IRQ_HANDLED; +} + +static int __devinit pm8xxx_rtc_probe(struct platform_device *pdev) +{ + int rc; + u8 ctrl_reg; + bool rtc_write_enable = false; + struct pm8xxx_rtc *rtc_dd; + struct resource *rtc_resource; + const struct pm8xxx_rtc_platform_data *pdata = + dev_get_platdata(&pdev->dev); + + if (pdata != NULL) + rtc_write_enable = pdata->rtc_write_enable; + + rtc_dd = kzalloc(sizeof(*rtc_dd), GFP_KERNEL); + if (rtc_dd == NULL) { + dev_err(&pdev->dev, "Unable to allocate memory!\n"); + return -ENOMEM; + } + + /* Initialise spinlock to protect RTC control register */ + spin_lock_init(&rtc_dd->ctrl_reg_lock); + + rtc_dd->rtc_alarm_irq = platform_get_irq(pdev, 0); + if (rtc_dd->rtc_alarm_irq < 0) { + dev_err(&pdev->dev, "Alarm IRQ resource absent!\n"); + rc = -ENXIO; + goto fail_rtc_enable; + } + + rtc_resource = platform_get_resource_byname(pdev, IORESOURCE_IO, + "pmic_rtc_base"); + if (!(rtc_resource && rtc_resource->start)) { + dev_err(&pdev->dev, "RTC IO resource absent!\n"); + rc = -ENXIO; + goto fail_rtc_enable; + } + + rtc_dd->rtc_base = rtc_resource->start; + + /* Setup RTC register addresses */ + rtc_dd->rtc_write_base = rtc_dd->rtc_base + PM8XXX_RTC_WRITE_OFFSET; + rtc_dd->rtc_read_base = rtc_dd->rtc_base + PM8XXX_RTC_READ_OFFSET; + rtc_dd->alarm_rw_base = rtc_dd->rtc_base + PM8XXX_ALARM_RW_OFFSET; + + rtc_dd->rtc_dev = &pdev->dev; + + /* Check if the RTC is on, else turn it on */ + rc = pm8xxx_read_wrapper(rtc_dd, &ctrl_reg, rtc_dd->rtc_base, 1); + if (rc < 0) { + dev_err(&pdev->dev, "RTC control register read failed!\n"); + goto fail_rtc_enable; + } + + if (!(ctrl_reg & PM8xxx_RTC_ENABLE)) { + ctrl_reg |= PM8xxx_RTC_ENABLE; + rc = pm8xxx_write_wrapper(rtc_dd, &ctrl_reg, rtc_dd->rtc_base, + 1); + if (rc < 0) { + dev_err(&pdev->dev, "Write to RTC control register " + "failed\n"); + goto fail_rtc_enable; + } + } + + rtc_dd->ctrl_reg = ctrl_reg; + if (rtc_write_enable == true) + pm8xxx_rtc_ops.set_time = pm8xxx_rtc_set_time; + + platform_set_drvdata(pdev, rtc_dd); + + /* Register the RTC device */ + rtc_dd->rtc = rtc_device_register("pm8xxx_rtc", &pdev->dev, + &pm8xxx_rtc_ops, THIS_MODULE); + if (IS_ERR(rtc_dd->rtc)) { + dev_err(&pdev->dev, "%s: RTC registration failed (%ld)\n", + __func__, PTR_ERR(rtc_dd->rtc)); + rc = PTR_ERR(rtc_dd->rtc); + goto fail_rtc_enable; + } + + /* Request the alarm IRQ */ + rc = request_any_context_irq(rtc_dd->rtc_alarm_irq, + pm8xxx_alarm_trigger, IRQF_TRIGGER_RISING, + "pm8xxx_rtc_alarm", rtc_dd); + if (rc < 0) { + dev_err(&pdev->dev, "Request IRQ failed (%d)\n", rc); + goto fail_req_irq; + } + + device_init_wakeup(&pdev->dev, 1); + + dev_dbg(&pdev->dev, "Probe success !!\n"); + + return 0; + +fail_req_irq: + rtc_device_unregister(rtc_dd->rtc); +fail_rtc_enable: + platform_set_drvdata(pdev, NULL); + kfree(rtc_dd); + return rc; +} + +static int __devexit pm8xxx_rtc_remove(struct platform_device *pdev) +{ + struct pm8xxx_rtc *rtc_dd = platform_get_drvdata(pdev); + + device_init_wakeup(&pdev->dev, 0); + free_irq(rtc_dd->rtc_alarm_irq, rtc_dd); + rtc_device_unregister(rtc_dd->rtc); + platform_set_drvdata(pdev, NULL); + kfree(rtc_dd); + + return 0; +} + +#ifdef CONFIG_PM_SLEEP +static int pm8xxx_rtc_resume(struct device *dev) +{ + struct pm8xxx_rtc *rtc_dd = dev_get_drvdata(dev); + + if (device_may_wakeup(dev)) + disable_irq_wake(rtc_dd->rtc_alarm_irq); + + return 0; +} + +static int pm8xxx_rtc_suspend(struct device *dev) +{ + struct pm8xxx_rtc *rtc_dd = dev_get_drvdata(dev); + + if (device_may_wakeup(dev)) + enable_irq_wake(rtc_dd->rtc_alarm_irq); + + return 0; +} +#endif + +SIMPLE_DEV_PM_OPS(pm8xxx_rtc_pm_ops, pm8xxx_rtc_suspend, pm8xxx_rtc_resume); + +static struct platform_driver pm8xxx_rtc_driver = { + .probe = pm8xxx_rtc_probe, + .remove = __devexit_p(pm8xxx_rtc_remove), + .driver = { + .name = PM8XXX_RTC_DEV_NAME, + .owner = THIS_MODULE, + .pm = &pm8xxx_rtc_pm_ops, + }, +}; + +static int __init pm8xxx_rtc_init(void) +{ + return platform_driver_register(&pm8xxx_rtc_driver); +} +module_init(pm8xxx_rtc_init); + +static void __exit pm8xxx_rtc_exit(void) +{ + platform_driver_unregister(&pm8xxx_rtc_driver); +} +module_exit(pm8xxx_rtc_exit); + +MODULE_ALIAS("platform:rtc-pm8xxx"); +MODULE_DESCRIPTION("PMIC8xxx RTC driver"); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Anirudh Ghayal "); diff --git a/include/linux/mfd/pm8xxx/rtc.h b/include/linux/mfd/pm8xxx/rtc.h new file mode 100644 index 0000000..14f1983 --- /dev/null +++ b/include/linux/mfd/pm8xxx/rtc.h @@ -0,0 +1,25 @@ +/* Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef __RTC_PM8XXX_H__ +#define __RTC_PM8XXX_H__ + +#define PM8XXX_RTC_DEV_NAME "rtc-pm8xxx" +/** + * struct pm8xxx_rtc_pdata - RTC driver platform data + * @rtc_write_enable: variable stating RTC write capability + */ +struct pm8xxx_rtc_platform_data { + bool rtc_write_enable; +}; + +#endif /* __RTC_PM8XXX_H__ */ -- cgit v0.10.2 From 8f6b0dd369868559b384a66aa17512ae5aae2d9b Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Mon, 25 Jul 2011 17:13:34 -0700 Subject: drivers/rtc/rtc-twl.c: check return value of twl_rtc_write_u8() in twl_rtc_set_time() We forget to save the return value of the call to twl_rtc_write_u8(save_control, REG_RTC_CTRL_REG); in 'ret', making the test of 'ret < 0' dead code since 'ret' then couldn't possibly have changed since the last test just a few lines above. It also makes us not detect failures from that specific twl_rtc_write_u8() call. Signed-off-by: Jesper Juhl Cc: Alessandro Zummo Cc: Alexandre Rusev Cc: "George G. Davis" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-twl.c b/drivers/rtc/rtc-twl.c index f9a2799..9a81f77 100644 --- a/drivers/rtc/rtc-twl.c +++ b/drivers/rtc/rtc-twl.c @@ -275,7 +275,7 @@ static int twl_rtc_set_time(struct device *dev, struct rtc_time *tm) goto out; save_control &= ~BIT_RTC_CTRL_REG_STOP_RTC_M; - twl_rtc_write_u8(save_control, REG_RTC_CTRL_REG); + ret = twl_rtc_write_u8(save_control, REG_RTC_CTRL_REG); if (ret < 0) goto out; -- cgit v0.10.2 From e57ee01750c4954fd0b5e3c6109cd4b870880eb9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 25 Jul 2011 17:13:34 -0700 Subject: drivers/rtc/rtc-tegra.c: properly initialize spinlock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using __SPIN_LOCK_UNLOCKED for a dynamically allocated lock is wrong and breaks the build with PREEMPT_RT_FULL. Signed-off-by: Uwe Kleine-König Cc: Andrew Chew Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-tegra.c b/drivers/rtc/rtc-tegra.c index 2fc31aa..75259fe 100644 --- a/drivers/rtc/rtc-tegra.c +++ b/drivers/rtc/rtc-tegra.c @@ -343,7 +343,7 @@ static int __devinit tegra_rtc_probe(struct platform_device *pdev) /* set context info. */ info->pdev = pdev; - info->tegra_rtc_lock = __SPIN_LOCK_UNLOCKED(info->tegra_rtc_lock); + spin_lock_init(&info->tegra_rtc_lock); platform_set_drvdata(pdev, info); -- cgit v0.10.2 From 29df8d8f8702f0f53c1375015f09f04bc8d023c1 Mon Sep 17 00:00:00 2001 From: Witold Szczeponik Date: Mon, 25 Jul 2011 17:13:36 -0700 Subject: pnpacpi: register disabled resources When parsing PnP ACPI resource structures, it may happen that some of the resources are disabled (in which case "the size" of the resource equals zero). The current solution is to skip these resources completely - with the unfortunate side effect that they are not registered despite the fact that they exist, after all. (The downside of this approach is that these resources cannot be used as templates for setting the actual device's resources because they are missing from the template.) The kernel's APM implementation does not suffer from this problem and registers all resources regardless of "their size". This patch fixes a problem with (at least) the vintage IBM ThinkPad 600E (and most likely also with the 600, 600X, and 770X which have a very similar layout) where some of its PnP devices support options where either an IRQ, a DMA, or an IO port is disabled. Without this patch, the devices can not be configured using the "/sys/bus/pnp/devices/*/resources" interface. The manipulation of these resources is important because the 600E has very demanding requirements. For instance, the number of IRQs is not sufficient to support all devices of the 600E. Fortunately, some of the devices, like the sound card's MPU-401 UART, can be configured to not use any IRQ, hence freeing an IRQ for a device that requires one. (Still, the device's "ResourceTemplate" requires an IRQ resource descriptor which cannot be created if the resource has not been registered in the first place.) As an example, the dependent sets of the 600E's CSC0103 device (the MPU-401 UART) are listed, with the patch applied, as: Dependent: 00 - Priority preferred port 0x300-0x330, align 0xf, size 0x4, 16-bit address decoding irq High-Edge Dependent: 01 - Priority acceptable port 0x300-0x330, align 0xf, size 0x4, 16-bit address decoding irq 5,7,2/9,10,11,15 High-Edge (The same result is obtained when PNPBIOS is used instead of PnP ACPI.) Without the patch, the IRQ resource in the preferred option is not listed at all: Dependent: 00 - Priority preferred port 0x300-0x330, align 0xf, size 0x4, 16-bit address decoding Dependent: 01 - Priority acceptable port 0x300-0x330, align 0xf, size 0x4, 16-bit address decoding irq 5,7,2/9,10,11,15 High-Edge And in fact, the 600E's DSDT lists the disabled IRQ as an option, as can be seen from the following excerpt from the DSDT: Name (_PRS, ResourceTemplate () { StartDependentFn (0x00, 0x00) { IO (Decode16, 0x0300, 0x0330, 0x10, 0x04) IRQNoFlags () {} } StartDependentFn (0x01, 0x00) { IO (Decode16, 0x0300, 0x0330, 0x10, 0x04) IRQNoFlags () {5,7,9,10,11,15} } EndDependentFn () }) With this patch applied, a user space program - or maybe even the kernel - can allocate all devices' resources optimally. For the 600E, this means to find optimal resources for (at least) the serial port, the parallel port, the infrared port, the MWAVE modem, the sound card, and the MPU-401 UART. The patch applies the idea to register disabled resources to all types of resources, not just to IRQs, DMAs, and IO ports. At the same time, it mimics the behavior of the "pnp_assign_xxx" functions from "drivers/pnp/manager.c" where resources with "no size" are considered disabled. No regressions were observed on hardware that does not require this patch. The patch is applied against 2.6.39. NB: The kernel's current PnP interface does not allow for disabling individual resources using the "/sys/bus/pnp/devices/$device/resources" file. Assuming this could be done, a device could be configured to use a disabled resource using a simple series of calls: echo disable > /sys/bus/pnp/devices/$device/resources echo clear > /sys/bus/pnp/devices/$device/resources echo set irq disabled > /sys/bus/pnp/devices/$device/resources echo fill > /sys/bus/pnp/devices/$device/resources echo activate > /sys/bus/pnp/devices/$device/resources This patch addresses only the parsing of PnP ACPI devices. ChangeLog (v1 -> v2): - extend patch description - fix typo in patch itself Signed-off-by: Witold Szczeponik Cc: Len Brown Cc: Adam Belay Cc: Bjorn Helgaas Cc: Bjorn Helgaas Cc: Henrique de Moraes Holschuh Cc: Matthew Garrett Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/pnp/pnpacpi/rsparser.c b/drivers/pnp/pnpacpi/rsparser.c index 100e4d9..4a07cd9 100644 --- a/drivers/pnp/pnpacpi/rsparser.c +++ b/drivers/pnp/pnpacpi/rsparser.c @@ -509,15 +509,15 @@ static __init void pnpacpi_parse_dma_option(struct pnp_dev *dev, struct acpi_resource_dma *p) { int i; - unsigned char map = 0, flags; + unsigned char map = 0, flags = 0; if (p->channel_count == 0) - return; + flags |= IORESOURCE_DISABLED; for (i = 0; i < p->channel_count; i++) map |= 1 << p->channels[i]; - flags = dma_flags(dev, p->type, p->bus_master, p->transfer); + flags |= dma_flags(dev, p->type, p->bus_master, p->transfer); pnp_register_dma_resource(dev, option_flags, map, flags); } @@ -527,17 +527,17 @@ static __init void pnpacpi_parse_irq_option(struct pnp_dev *dev, { int i; pnp_irq_mask_t map; - unsigned char flags; + unsigned char flags = 0; if (p->interrupt_count == 0) - return; + flags |= IORESOURCE_DISABLED; bitmap_zero(map.bits, PNP_IRQ_NR); for (i = 0; i < p->interrupt_count; i++) if (p->interrupts[i]) __set_bit(p->interrupts[i], map.bits); - flags = irq_flags(p->triggering, p->polarity, p->sharable); + flags |= irq_flags(p->triggering, p->polarity, p->sharable); pnp_register_irq_resource(dev, option_flags, &map, flags); } @@ -547,10 +547,10 @@ static __init void pnpacpi_parse_ext_irq_option(struct pnp_dev *dev, { int i; pnp_irq_mask_t map; - unsigned char flags; + unsigned char flags = 0; if (p->interrupt_count == 0) - return; + flags |= IORESOURCE_DISABLED; bitmap_zero(map.bits, PNP_IRQ_NR); for (i = 0; i < p->interrupt_count; i++) { @@ -564,7 +564,7 @@ static __init void pnpacpi_parse_ext_irq_option(struct pnp_dev *dev, } } - flags = irq_flags(p->triggering, p->polarity, p->sharable); + flags |= irq_flags(p->triggering, p->polarity, p->sharable); pnp_register_irq_resource(dev, option_flags, &map, flags); } @@ -575,10 +575,10 @@ static __init void pnpacpi_parse_port_option(struct pnp_dev *dev, unsigned char flags = 0; if (io->address_length == 0) - return; + flags |= IORESOURCE_DISABLED; if (io->io_decode == ACPI_DECODE_16) - flags = IORESOURCE_IO_16BIT_ADDR; + flags |= IORESOURCE_IO_16BIT_ADDR; pnp_register_port_resource(dev, option_flags, io->minimum, io->maximum, io->alignment, io->address_length, flags); } @@ -587,11 +587,13 @@ static __init void pnpacpi_parse_fixed_port_option(struct pnp_dev *dev, unsigned int option_flags, struct acpi_resource_fixed_io *io) { + unsigned char flags = 0; + if (io->address_length == 0) - return; + flags |= IORESOURCE_DISABLED; pnp_register_port_resource(dev, option_flags, io->address, io->address, - 0, io->address_length, IORESOURCE_IO_FIXED); + 0, io->address_length, flags | IORESOURCE_IO_FIXED); } static __init void pnpacpi_parse_mem24_option(struct pnp_dev *dev, @@ -601,10 +603,10 @@ static __init void pnpacpi_parse_mem24_option(struct pnp_dev *dev, unsigned char flags = 0; if (p->address_length == 0) - return; + flags |= IORESOURCE_DISABLED; if (p->write_protect == ACPI_READ_WRITE_MEMORY) - flags = IORESOURCE_MEM_WRITEABLE; + flags |= IORESOURCE_MEM_WRITEABLE; pnp_register_mem_resource(dev, option_flags, p->minimum, p->maximum, p->alignment, p->address_length, flags); } @@ -616,10 +618,10 @@ static __init void pnpacpi_parse_mem32_option(struct pnp_dev *dev, unsigned char flags = 0; if (p->address_length == 0) - return; + flags |= IORESOURCE_DISABLED; if (p->write_protect == ACPI_READ_WRITE_MEMORY) - flags = IORESOURCE_MEM_WRITEABLE; + flags |= IORESOURCE_MEM_WRITEABLE; pnp_register_mem_resource(dev, option_flags, p->minimum, p->maximum, p->alignment, p->address_length, flags); } @@ -631,10 +633,10 @@ static __init void pnpacpi_parse_fixed_mem32_option(struct pnp_dev *dev, unsigned char flags = 0; if (p->address_length == 0) - return; + flags |= IORESOURCE_DISABLED; if (p->write_protect == ACPI_READ_WRITE_MEMORY) - flags = IORESOURCE_MEM_WRITEABLE; + flags |= IORESOURCE_MEM_WRITEABLE; pnp_register_mem_resource(dev, option_flags, p->address, p->address, 0, p->address_length, flags); } @@ -655,18 +657,18 @@ static __init void pnpacpi_parse_address_option(struct pnp_dev *dev, } if (p->address_length == 0) - return; + flags |= IORESOURCE_DISABLED; if (p->resource_type == ACPI_MEMORY_RANGE) { if (p->info.mem.write_protect == ACPI_READ_WRITE_MEMORY) - flags = IORESOURCE_MEM_WRITEABLE; + flags |= IORESOURCE_MEM_WRITEABLE; pnp_register_mem_resource(dev, option_flags, p->minimum, p->minimum, 0, p->address_length, flags); } else if (p->resource_type == ACPI_IO_RANGE) pnp_register_port_resource(dev, option_flags, p->minimum, p->minimum, 0, p->address_length, - IORESOURCE_IO_FIXED); + flags | IORESOURCE_IO_FIXED); } static __init void pnpacpi_parse_ext_address_option(struct pnp_dev *dev, @@ -677,18 +679,18 @@ static __init void pnpacpi_parse_ext_address_option(struct pnp_dev *dev, unsigned char flags = 0; if (p->address_length == 0) - return; + flags |= IORESOURCE_DISABLED; if (p->resource_type == ACPI_MEMORY_RANGE) { if (p->info.mem.write_protect == ACPI_READ_WRITE_MEMORY) - flags = IORESOURCE_MEM_WRITEABLE; + flags |= IORESOURCE_MEM_WRITEABLE; pnp_register_mem_resource(dev, option_flags, p->minimum, p->minimum, 0, p->address_length, flags); } else if (p->resource_type == ACPI_IO_RANGE) pnp_register_port_resource(dev, option_flags, p->minimum, p->minimum, 0, p->address_length, - IORESOURCE_IO_FIXED); + flags | IORESOURCE_IO_FIXED); } struct acpipnp_parse_option_s { -- cgit v0.10.2 From 0c2fd1bfb155947a821fdaeb3c46aa1cfa20ad64 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Mon, 25 Jul 2011 17:13:37 -0700 Subject: reiserfs: use proper little-endian bitops Using __test_and_{set,clear}_bit_le() with ignoring its return value can be replaced with __{set,clear}_bit_le(). This introduces reiserfs_{set,clear}_le_bit for __{set,clear}_bit_le and does the above change with them. Signed-off-by: Akinobu Mita Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c index 483442e..567385a 100644 --- a/fs/reiserfs/bitmap.c +++ b/fs/reiserfs/bitmap.c @@ -214,7 +214,7 @@ static int scan_bitmap_block(struct reiserfs_transaction_handle *th, } /* otherwise we clear all bit were set ... */ while (--i >= *beg) - reiserfs_test_and_clear_le_bit + reiserfs_clear_le_bit (i, bh->b_data); reiserfs_restore_prepared_buffer(s, bh); *beg = org; diff --git a/fs/reiserfs/resize.c b/fs/reiserfs/resize.c index b3a94d2..b6b9b1f 100644 --- a/fs/reiserfs/resize.c +++ b/fs/reiserfs/resize.c @@ -136,7 +136,7 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new) return -EIO; } memset(bh->b_data, 0, sb_blocksize(sb)); - reiserfs_test_and_set_le_bit(0, bh->b_data); + reiserfs_set_le_bit(0, bh->b_data); reiserfs_cache_bitmap_metadata(s, bh, bitmap + i); set_buffer_uptodate(bh); @@ -172,7 +172,7 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new) reiserfs_prepare_for_journal(s, bh, 1); for (i = block_r; i < s->s_blocksize * 8; i++) - reiserfs_test_and_clear_le_bit(i, bh->b_data); + reiserfs_clear_le_bit(i, bh->b_data); info->free_count += s->s_blocksize * 8 - block_r; journal_mark_dirty(&th, s, bh); @@ -190,7 +190,7 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new) reiserfs_prepare_for_journal(s, bh, 1); for (i = block_r_new; i < s->s_blocksize * 8; i++) - reiserfs_test_and_set_le_bit(i, bh->b_data); + reiserfs_set_le_bit(i, bh->b_data); journal_mark_dirty(&th, s, bh); brelse(bh); diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h index eca75df..96d465f 100644 --- a/include/linux/reiserfs_fs.h +++ b/include/linux/reiserfs_fs.h @@ -2332,7 +2332,9 @@ __u32 keyed_hash(const signed char *msg, int len); __u32 yura_hash(const signed char *msg, int len); __u32 r5_hash(const signed char *msg, int len); +#define reiserfs_set_le_bit __set_bit_le #define reiserfs_test_and_set_le_bit __test_and_set_bit_le +#define reiserfs_clear_le_bit __clear_bit_le #define reiserfs_test_and_clear_le_bit __test_and_clear_bit_le #define reiserfs_test_le_bit test_bit_le #define reiserfs_find_next_zero_le_bit find_next_zero_bit_le -- cgit v0.10.2 From 9d6bf5aa177ee7ffdcee2a590ef8a1bf9e8ade87 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Mon, 25 Jul 2011 17:13:38 -0700 Subject: reiserfs: use hweight_long() Use hweight_long() to count free bits in the bitmap. Signed-off-by: Akinobu Mita Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c index 567385a..d1aca1d 100644 --- a/fs/reiserfs/bitmap.c +++ b/fs/reiserfs/bitmap.c @@ -1222,15 +1222,11 @@ void reiserfs_cache_bitmap_metadata(struct super_block *sb, info->free_count = 0; while (--cur >= (unsigned long *)bh->b_data) { - int i; - /* 0 and ~0 are special, we can optimize for them */ if (*cur == 0) info->free_count += BITS_PER_LONG; else if (*cur != ~0L) /* A mix, investigate */ - for (i = BITS_PER_LONG - 1; i >= 0; i--) - if (!reiserfs_test_le_bit(i, cur)) - info->free_count++; + info->free_count += BITS_PER_LONG - hweight_long(*cur); } } -- cgit v0.10.2 From df2694d80a84fb0b61ce9fa31353eb9c5f36ad36 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 25 Jul 2011 17:13:38 -0700 Subject: Documentation/SubmitChecklist: add RCU debug config options There have been persistent lockdep RCU splats, indicating that submitters are not testing with CONFIG_PROVE_RCU. Add this config option to the list in Documentation/SubmitChecklist. Also add CONFIG_DEBUG_OBJECTS_RCU_HEAD for good measure. Signed-off-by: Paul E. McKenney Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/Documentation/SubmitChecklist b/Documentation/SubmitChecklist index 7b13be4..dc0e332 100644 --- a/Documentation/SubmitChecklist +++ b/Documentation/SubmitChecklist @@ -53,8 +53,8 @@ kernel patches. 12: Has been tested with CONFIG_PREEMPT, CONFIG_DEBUG_PREEMPT, CONFIG_DEBUG_SLAB, CONFIG_DEBUG_PAGEALLOC, CONFIG_DEBUG_MUTEXES, - CONFIG_DEBUG_SPINLOCK, CONFIG_DEBUG_ATOMIC_SLEEP all simultaneously - enabled. + CONFIG_DEBUG_SPINLOCK, CONFIG_DEBUG_ATOMIC_SLEEP, CONFIG_PROVE_RCU + and CONFIG_DEBUG_OBJECTS_RCU_HEAD all simultaneously enabled. 13: Has been build- and runtime tested with and without CONFIG_SMP and CONFIG_PREEMPT. -- cgit v0.10.2 From f19da2ce8ef5e49b8b8ea199c3601dd45d71b262 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Mon, 25 Jul 2011 17:13:39 -0700 Subject: drivers/connector/cn_proc.c: remove unused local Fix the warning drivers/connector/cn_proc.c: In function 'proc_ptrace_connector': drivers/connector/cn_proc.c:176: warning: unused variable 'tracer' Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c index 281902d..0debc17 100644 --- a/drivers/connector/cn_proc.c +++ b/drivers/connector/cn_proc.c @@ -173,7 +173,6 @@ void proc_ptrace_connector(struct task_struct *task, int ptrace_id) struct proc_event *ev; struct timespec ts; __u8 buffer[CN_PROC_MSG_SIZE]; - struct task_struct *tracer; if (atomic_read(&proc_event_num_listeners) < 1) return; -- cgit v0.10.2