From 07a723097cd31e9c2def69ac4d42834a16f79219 Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Mon, 31 Oct 2011 17:06:28 -0700 Subject: dma-mapping: fix sync_single_range_* DMA debugging Commit 5fd75a7850b5 (dma-mapping: remove unnecessary sync_single_range_* in dma_map_ops) unified not only the dma_map_ops but also the corresponding debug_dma_sync_* calls. This led to spurious WARN()ings like the following because the DMA debug code was no longer able to detect the DMA buffer base address without the separate offset parameter: WARNING: at lib/dma-debug.c:911 check_sync+0xce/0x446() firewire_ohci 0000:04:00.0: DMA-API: device driver tries to sync DMA memory it has not allocated [device address=0x00000000cedaa400] [size=1024 bytes] Call Trace: ... [] check_sync+0xce/0x446 [] debug_dma_sync_single_for_device+0x39/0x3b [] ohci_queue_iso+0x4f3/0x77d [firewire_ohci] ... To fix this, unshare the sync_single_* and sync_single_range_* implementations so that we are able to call the correct debug_dma_sync_* functions. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Clemens Ladisch Cc: FUJITA Tomonori Cc: Konrad Rzeszutek Wilk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/asm-generic/dma-mapping-common.h b/include/asm-generic/dma-mapping-common.h index 0c80bb3..9fa3f96 100644 --- a/include/asm-generic/dma-mapping-common.h +++ b/include/asm-generic/dma-mapping-common.h @@ -123,7 +123,12 @@ static inline void dma_sync_single_range_for_cpu(struct device *dev, size_t size, enum dma_data_direction dir) { - dma_sync_single_for_cpu(dev, addr + offset, size, dir); + const struct dma_map_ops *ops = get_dma_ops(dev); + + BUG_ON(!valid_dma_direction(dir)); + if (ops->sync_single_for_cpu) + ops->sync_single_for_cpu(dev, addr + offset, size, dir); + debug_dma_sync_single_range_for_cpu(dev, addr, offset, size, dir); } static inline void dma_sync_single_range_for_device(struct device *dev, @@ -132,7 +137,12 @@ static inline void dma_sync_single_range_for_device(struct device *dev, size_t size, enum dma_data_direction dir) { - dma_sync_single_for_device(dev, addr + offset, size, dir); + const struct dma_map_ops *ops = get_dma_ops(dev); + + BUG_ON(!valid_dma_direction(dir)); + if (ops->sync_single_for_device) + ops->sync_single_for_device(dev, addr + offset, size, dir); + debug_dma_sync_single_range_for_device(dev, addr, offset, size, dir); } static inline void -- cgit v0.10.2 From 6eea69dd8befeabd3d0f217400f54b157dd91fe9 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Mon, 31 Oct 2011 17:06:29 -0700 Subject: include/linux/dmar.h: forward-declare struct acpi_dmar_header x86_64 allnoconfig: In file included from arch/x86/kernel/pci-dma.c:3: include/linux/dmar.h:248: warning: 'struct acpi_dmar_header' declared inside parameter list include/linux/dmar.h:248: warning: its scope is only this definition or declaration, which is probably not what you want Cc: Suresh Siddha Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/dmar.h b/include/linux/dmar.h index a8b1a84..731a609 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -26,6 +26,8 @@ #include #include +struct acpi_dmar_header; + /* DMAR Flags */ #define DMAR_INTR_REMAP 0x1 #define DMAR_X2APIC_OPT_OUT 0x2 -- cgit v0.10.2 From fc360bd9cdcf875639a77f07fafec26699c546f3 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Mon, 31 Oct 2011 17:06:32 -0700 Subject: /proc/self/numa_maps: restore "huge" tag for hugetlb vmas The display of the "huge" tag was accidentally removed in 29ea2f698 ("mm: use walk_page_range() instead of custom page table walking code"). Reported-by: Stephen Hemminger Tested-by: Stephen Hemminger Reviewed-by: Stephen Wilson Cc: KOSAKI Motohiro Cc: Hugh Dickins Acked-by: David Rientjes Cc: Lee Schermerhorn Cc: Alexey Dobriyan Cc: Christoph Lameter Cc: Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 5afaa58..c7d4ee6 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1039,6 +1039,9 @@ static int show_numa_map(struct seq_file *m, void *v) seq_printf(m, " stack"); } + if (is_vm_hugetlb_page(vma)) + seq_printf(m, " huge"); + walk_page_range(vma->vm_start, vma->vm_end, &walk); if (!md->pages) -- cgit v0.10.2 From 32ea845d5bafc37b7406bea1aee3005407cb0900 Mon Sep 17 00:00:00 2001 From: Wanlong Gao Date: Mon, 31 Oct 2011 17:06:35 -0700 Subject: ipc/mqueue.c: fix wrong use of schedule_hrtimeout_range_clock() Fix the wrong use of schedule_hrtimeout_range_clock() in wq_sleep(), although it is harmless for the syscall mq_timed* now. It was introduced by 9ca7d8e ("mqueue: Convert message queue timeout to use hrtimers"). Signed-off-by: Wanlong Gao Cc: Carsten Emde Cc: Thomas Gleixner Cc: Manfred Spraul Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/ipc/mqueue.c b/ipc/mqueue.c index ed049ea..2e0ecfc 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -449,8 +449,8 @@ static int wq_sleep(struct mqueue_inode_info *info, int sr, set_current_state(TASK_INTERRUPTIBLE); spin_unlock(&info->lock); - time = schedule_hrtimeout_range_clock(timeout, - HRTIMER_MODE_ABS, 0, CLOCK_REALTIME); + time = schedule_hrtimeout_range_clock(timeout, 0, + HRTIMER_MODE_ABS, CLOCK_REALTIME); while (ewp->state == STATE_PENDING) cpu_relax(); -- cgit v0.10.2 From fcf634098c00dd9cd247447368495f0b79be12d1 Mon Sep 17 00:00:00 2001 From: Christopher Yeoh Date: Mon, 31 Oct 2011 17:06:39 -0700 Subject: Cross Memory Attach The basic idea behind cross memory attach is to allow MPI programs doing intra-node communication to do a single copy of the message rather than a double copy of the message via shared memory. The following patch attempts to achieve this by allowing a destination process, given an address and size from a source process, to copy memory directly from the source process into its own address space via a system call. There is also a symmetrical ability to copy from the current process's address space into a destination process's address space. - Use of /proc/pid/mem has been considered, but there are issues with using it: - Does not allow for specifying iovecs for both src and dest, assuming preadv or pwritev was implemented either the area read from or written to would need to be contiguous. - Currently mem_read allows only processes who are currently ptrace'ing the target and are still able to ptrace the target to read from the target. This check could possibly be moved to the open call, but its not clear exactly what race this restriction is stopping (reason appears to have been lost) - Having to send the fd of /proc/self/mem via SCM_RIGHTS on unix domain socket is a bit ugly from a userspace point of view, especially when you may have hundreds if not (eventually) thousands of processes that all need to do this with each other - Doesn't allow for some future use of the interface we would like to consider adding in the future (see below) - Interestingly reading from /proc/pid/mem currently actually involves two copies! (But this could be fixed pretty easily) As mentioned previously use of vmsplice instead was considered, but has problems. Since you need the reader and writer working co-operatively if the pipe is not drained then you block. Which requires some wrapping to do non blocking on the send side or polling on the receive. In all to all communication it requires ordering otherwise you can deadlock. And in the example of many MPI tasks writing to one MPI task vmsplice serialises the copying. There are some cases of MPI collectives where even a single copy interface does not get us the performance gain we could. For example in an MPI_Reduce rather than copy the data from the source we would like to instead use it directly in a mathops (say the reduce is doing a sum) as this would save us doing a copy. We don't need to keep a copy of the data from the source. I haven't implemented this, but I think this interface could in the future do all this through the use of the flags - eg could specify the math operation and type and the kernel rather than just copying the data would apply the specified operation between the source and destination and store it in the destination. Although we don't have a "second user" of the interface (though I've had some nibbles from people who may be interested in using it for intra process messaging which is not MPI). This interface is something which hardware vendors are already doing for their custom drivers to implement fast local communication. And so in addition to this being useful for OpenMPI it would mean the driver maintainers don't have to fix things up when the mm changes. There was some discussion about how much faster a true zero copy would go. Here's a link back to the email with some testing I did on that: http://marc.info/?l=linux-mm&m=130105930902915&w=2 There is a basic man page for the proposed interface here: http://ozlabs.org/~cyeoh/cma/process_vm_readv.txt This has been implemented for x86 and powerpc, other architecture should mainly (I think) just need to add syscall numbers for the process_vm_readv and process_vm_writev. There are 32 bit compatibility versions for 64-bit kernels. For arch maintainers there are some simple tests to be able to quickly verify that the syscalls are working correctly here: http://ozlabs.org/~cyeoh/cma/cma-test-20110718.tgz Signed-off-by: Chris Yeoh Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Thomas Gleixner Cc: Arnd Bergmann Cc: Paul Mackerras Cc: Benjamin Herrenschmidt Cc: David Howells Cc: James Morris Cc: Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index fa0d27a..559ae1e 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h @@ -354,3 +354,5 @@ COMPAT_SYS_SPU(clock_adjtime) SYSCALL_SPU(syncfs) COMPAT_SYS_SPU(sendmmsg) SYSCALL_SPU(setns) +COMPAT_SYS(process_vm_readv) +COMPAT_SYS(process_vm_writev) diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index b8b3f59..d3d1b5e 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h @@ -373,10 +373,12 @@ #define __NR_syncfs 348 #define __NR_sendmmsg 349 #define __NR_setns 350 +#define __NR_process_vm_readv 351 +#define __NR_process_vm_writev 352 #ifdef __KERNEL__ -#define __NR_syscalls 351 +#define __NR_syscalls 353 #define __NR__exit __NR_exit #define NR_syscalls __NR_syscalls diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 54edb207..a6253ec 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -850,4 +850,6 @@ ia32_sys_call_table: .quad sys_syncfs .quad compat_sys_sendmmsg /* 345 */ .quad sys_setns + .quad compat_sys_process_vm_readv + .quad compat_sys_process_vm_writev ia32_syscall_end: diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h index 593485b3..599c77d 100644 --- a/arch/x86/include/asm/unistd_32.h +++ b/arch/x86/include/asm/unistd_32.h @@ -352,10 +352,12 @@ #define __NR_syncfs 344 #define __NR_sendmmsg 345 #define __NR_setns 346 +#define __NR_process_vm_readv 347 +#define __NR_process_vm_writev 348 #ifdef __KERNEL__ -#define NR_syscalls 347 +#define NR_syscalls 349 #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h index 0a6ba33..0431f19 100644 --- a/arch/x86/include/asm/unistd_64.h +++ b/arch/x86/include/asm/unistd_64.h @@ -682,6 +682,10 @@ __SYSCALL(__NR_sendmmsg, sys_sendmmsg) __SYSCALL(__NR_setns, sys_setns) #define __NR_getcpu 309 __SYSCALL(__NR_getcpu, sys_getcpu) +#define __NR_process_vm_readv 310 +__SYSCALL(__NR_process_vm_readv, sys_process_vm_readv) +#define __NR_process_vm_writev 311 +__SYSCALL(__NR_process_vm_writev, sys_process_vm_writev) #ifndef __NO_STUBS #define __ARCH_WANT_OLD_READDIR diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index bc19be3..9a0e312 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S @@ -346,3 +346,5 @@ ENTRY(sys_call_table) .long sys_syncfs .long sys_sendmmsg /* 345 */ .long sys_setns + .long sys_process_vm_readv + .long sys_process_vm_writev diff --git a/fs/aio.c b/fs/aio.c index e29ec48..632b235 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1387,13 +1387,13 @@ static ssize_t aio_setup_vectored_rw(int type, struct kiocb *kiocb, bool compat) ret = compat_rw_copy_check_uvector(type, (struct compat_iovec __user *)kiocb->ki_buf, kiocb->ki_nbytes, 1, &kiocb->ki_inline_vec, - &kiocb->ki_iovec); + &kiocb->ki_iovec, 1); else #endif ret = rw_copy_check_uvector(type, (struct iovec __user *)kiocb->ki_buf, kiocb->ki_nbytes, 1, &kiocb->ki_inline_vec, - &kiocb->ki_iovec); + &kiocb->ki_iovec, 1); if (ret < 0) goto out; diff --git a/fs/compat.c b/fs/compat.c index 302e761..c987875 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -546,7 +546,7 @@ out: ssize_t compat_rw_copy_check_uvector(int type, const struct compat_iovec __user *uvector, unsigned long nr_segs, unsigned long fast_segs, struct iovec *fast_pointer, - struct iovec **ret_pointer) + struct iovec **ret_pointer, int check_access) { compat_ssize_t tot_len; struct iovec *iov = *ret_pointer = fast_pointer; @@ -593,7 +593,8 @@ ssize_t compat_rw_copy_check_uvector(int type, } if (len < 0) /* size_t not fitting in compat_ssize_t .. */ goto out; - if (!access_ok(vrfy_dir(type), compat_ptr(buf), len)) { + if (check_access && + !access_ok(vrfy_dir(type), compat_ptr(buf), len)) { ret = -EFAULT; goto out; } @@ -1107,7 +1108,7 @@ static ssize_t compat_do_readv_writev(int type, struct file *file, goto out; tot_len = compat_rw_copy_check_uvector(type, uvector, nr_segs, - UIO_FASTIOV, iovstack, &iov); + UIO_FASTIOV, iovstack, &iov, 1); if (tot_len == 0) { ret = 0; goto out; diff --git a/fs/read_write.c b/fs/read_write.c index dfd1257..5ad4248 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -633,7 +633,8 @@ ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov, ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, unsigned long nr_segs, unsigned long fast_segs, struct iovec *fast_pointer, - struct iovec **ret_pointer) + struct iovec **ret_pointer, + int check_access) { unsigned long seg; ssize_t ret; @@ -689,7 +690,8 @@ ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, ret = -EINVAL; goto out; } - if (unlikely(!access_ok(vrfy_dir(type), buf, len))) { + if (check_access + && unlikely(!access_ok(vrfy_dir(type), buf, len))) { ret = -EFAULT; goto out; } @@ -721,7 +723,7 @@ static ssize_t do_readv_writev(int type, struct file *file, } ret = rw_copy_check_uvector(type, uvector, nr_segs, - ARRAY_SIZE(iovstack), iovstack, &iov); + ARRAY_SIZE(iovstack), iovstack, &iov, 1); if (ret <= 0) goto out; diff --git a/include/linux/compat.h b/include/linux/compat.h index c6e7523..154bf56 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -547,7 +547,8 @@ extern ssize_t compat_rw_copy_check_uvector(int type, const struct compat_iovec __user *uvector, unsigned long nr_segs, unsigned long fast_segs, struct iovec *fast_pointer, - struct iovec **ret_pointer); + struct iovec **ret_pointer, + int check_access); extern void __user *compat_alloc_user_space(unsigned long len); diff --git a/include/linux/fs.h b/include/linux/fs.h index 14493a2..87b4c6b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1633,9 +1633,10 @@ struct inode_operations { struct seq_file; ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, - unsigned long nr_segs, unsigned long fast_segs, - struct iovec *fast_pointer, - struct iovec **ret_pointer); + unsigned long nr_segs, unsigned long fast_segs, + struct iovec *fast_pointer, + struct iovec **ret_pointer, + int check_access); extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *); extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *); diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 1ff0ec2..86a24b1 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -844,4 +844,17 @@ asmlinkage long sys_open_by_handle_at(int mountdirfd, struct file_handle __user *handle, int flags); asmlinkage long sys_setns(int fd, int nstype); +asmlinkage long sys_process_vm_readv(pid_t pid, + const struct iovec __user *lvec, + unsigned long liovcnt, + const struct iovec __user *rvec, + unsigned long riovcnt, + unsigned long flags); +asmlinkage long sys_process_vm_writev(pid_t pid, + const struct iovec __user *lvec, + unsigned long liovcnt, + const struct iovec __user *rvec, + unsigned long riovcnt, + unsigned long flags); + #endif diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index a9a5de0..47bfa16 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -145,6 +145,10 @@ cond_syscall(sys_io_submit); cond_syscall(sys_io_cancel); cond_syscall(sys_io_getevents); cond_syscall(sys_syslog); +cond_syscall(sys_process_vm_readv); +cond_syscall(sys_process_vm_writev); +cond_syscall(compat_sys_process_vm_readv); +cond_syscall(compat_sys_process_vm_writev); /* arch-specific weak syscall entries */ cond_syscall(sys_pciconfig_read); diff --git a/mm/Makefile b/mm/Makefile index 836e416..50ec00e 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -5,7 +5,8 @@ mmu-y := nommu.o mmu-$(CONFIG_MMU) := fremap.o highmem.o madvise.o memory.o mincore.o \ mlock.o mmap.o mprotect.o mremap.o msync.o rmap.o \ - vmalloc.o pagewalk.o pgtable-generic.o + vmalloc.o pagewalk.o pgtable-generic.o \ + process_vm_access.o obj-y := filemap.o mempool.o oom_kill.o fadvise.o \ maccess.o page_alloc.o page-writeback.o \ diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c new file mode 100644 index 0000000..e920aa3 --- /dev/null +++ b/mm/process_vm_access.c @@ -0,0 +1,496 @@ +/* + * linux/mm/process_vm_access.c + * + * Copyright (C) 2010-2011 Christopher Yeoh , IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_COMPAT +#include +#endif + +/** + * process_vm_rw_pages - read/write pages from task specified + * @task: task to read/write from + * @mm: mm for task + * @process_pages: struct pages area that can store at least + * nr_pages_to_copy struct page pointers + * @pa: address of page in task to start copying from/to + * @start_offset: offset in page to start copying from/to + * @len: number of bytes to copy + * @lvec: iovec array specifying where to copy to/from + * @lvec_cnt: number of elements in iovec array + * @lvec_current: index in iovec array we are up to + * @lvec_offset: offset in bytes from current iovec iov_base we are up to + * @vm_write: 0 means copy from, 1 means copy to + * @nr_pages_to_copy: number of pages to copy + * @bytes_copied: returns number of bytes successfully copied + * Returns 0 on success, error code otherwise + */ +static int process_vm_rw_pages(struct task_struct *task, + struct mm_struct *mm, + struct page **process_pages, + unsigned long pa, + unsigned long start_offset, + unsigned long len, + const struct iovec *lvec, + unsigned long lvec_cnt, + unsigned long *lvec_current, + size_t *lvec_offset, + int vm_write, + unsigned int nr_pages_to_copy, + ssize_t *bytes_copied) +{ + int pages_pinned; + void *target_kaddr; + int pgs_copied = 0; + int j; + int ret; + ssize_t bytes_to_copy; + ssize_t rc = 0; + + *bytes_copied = 0; + + /* Get the pages we're interested in */ + down_read(&mm->mmap_sem); + pages_pinned = get_user_pages(task, mm, pa, + nr_pages_to_copy, + vm_write, 0, process_pages, NULL); + up_read(&mm->mmap_sem); + + if (pages_pinned != nr_pages_to_copy) { + rc = -EFAULT; + goto end; + } + + /* Do the copy for each page */ + for (pgs_copied = 0; + (pgs_copied < nr_pages_to_copy) && (*lvec_current < lvec_cnt); + pgs_copied++) { + /* Make sure we have a non zero length iovec */ + while (*lvec_current < lvec_cnt + && lvec[*lvec_current].iov_len == 0) + (*lvec_current)++; + if (*lvec_current == lvec_cnt) + break; + + /* + * Will copy smallest of: + * - bytes remaining in page + * - bytes remaining in destination iovec + */ + bytes_to_copy = min_t(ssize_t, PAGE_SIZE - start_offset, + len - *bytes_copied); + bytes_to_copy = min_t(ssize_t, bytes_to_copy, + lvec[*lvec_current].iov_len + - *lvec_offset); + + target_kaddr = kmap(process_pages[pgs_copied]) + start_offset; + + if (vm_write) + ret = copy_from_user(target_kaddr, + lvec[*lvec_current].iov_base + + *lvec_offset, + bytes_to_copy); + else + ret = copy_to_user(lvec[*lvec_current].iov_base + + *lvec_offset, + target_kaddr, bytes_to_copy); + kunmap(process_pages[pgs_copied]); + if (ret) { + *bytes_copied += bytes_to_copy - ret; + pgs_copied++; + rc = -EFAULT; + goto end; + } + *bytes_copied += bytes_to_copy; + *lvec_offset += bytes_to_copy; + if (*lvec_offset == lvec[*lvec_current].iov_len) { + /* + * Need to copy remaining part of page into the + * next iovec if there are any bytes left in page + */ + (*lvec_current)++; + *lvec_offset = 0; + start_offset = (start_offset + bytes_to_copy) + % PAGE_SIZE; + if (start_offset) + pgs_copied--; + } else { + start_offset = 0; + } + } + +end: + if (vm_write) { + for (j = 0; j < pages_pinned; j++) { + if (j < pgs_copied) + set_page_dirty_lock(process_pages[j]); + put_page(process_pages[j]); + } + } else { + for (j = 0; j < pages_pinned; j++) + put_page(process_pages[j]); + } + + return rc; +} + +/* Maximum number of pages kmalloc'd to hold struct page's during copy */ +#define PVM_MAX_KMALLOC_PAGES (PAGE_SIZE * 2) + +/** + * process_vm_rw_single_vec - read/write pages from task specified + * @addr: start memory address of target process + * @len: size of area to copy to/from + * @lvec: iovec array specifying where to copy to/from locally + * @lvec_cnt: number of elements in iovec array + * @lvec_current: index in iovec array we are up to + * @lvec_offset: offset in bytes from current iovec iov_base we are up to + * @process_pages: struct pages area that can store at least + * nr_pages_to_copy struct page pointers + * @mm: mm for task + * @task: task to read/write from + * @vm_write: 0 means copy from, 1 means copy to + * @bytes_copied: returns number of bytes successfully copied + * Returns 0 on success or on failure error code + */ +static int process_vm_rw_single_vec(unsigned long addr, + unsigned long len, + const struct iovec *lvec, + unsigned long lvec_cnt, + unsigned long *lvec_current, + size_t *lvec_offset, + struct page **process_pages, + struct mm_struct *mm, + struct task_struct *task, + int vm_write, + ssize_t *bytes_copied) +{ + unsigned long pa = addr & PAGE_MASK; + unsigned long start_offset = addr - pa; + unsigned long nr_pages; + ssize_t bytes_copied_loop; + ssize_t rc = 0; + unsigned long nr_pages_copied = 0; + unsigned long nr_pages_to_copy; + unsigned long max_pages_per_loop = PVM_MAX_KMALLOC_PAGES + / sizeof(struct pages *); + + *bytes_copied = 0; + + /* Work out address and page range required */ + if (len == 0) + return 0; + nr_pages = (addr + len - 1) / PAGE_SIZE - addr / PAGE_SIZE + 1; + + while ((nr_pages_copied < nr_pages) && (*lvec_current < lvec_cnt)) { + nr_pages_to_copy = min(nr_pages - nr_pages_copied, + max_pages_per_loop); + + rc = process_vm_rw_pages(task, mm, process_pages, pa, + start_offset, len, + lvec, lvec_cnt, + lvec_current, lvec_offset, + vm_write, nr_pages_to_copy, + &bytes_copied_loop); + start_offset = 0; + *bytes_copied += bytes_copied_loop; + + if (rc < 0) { + return rc; + } else { + len -= bytes_copied_loop; + nr_pages_copied += nr_pages_to_copy; + pa += nr_pages_to_copy * PAGE_SIZE; + } + } + + return rc; +} + +/* Maximum number of entries for process pages array + which lives on stack */ +#define PVM_MAX_PP_ARRAY_COUNT 16 + +/** + * process_vm_rw_core - core of reading/writing pages from task specified + * @pid: PID of process to read/write from/to + * @lvec: iovec array specifying where to copy to/from locally + * @liovcnt: size of lvec array + * @rvec: iovec array specifying where to copy to/from in the other process + * @riovcnt: size of rvec array + * @flags: currently unused + * @vm_write: 0 if reading from other process, 1 if writing to other process + * Returns the number of bytes read/written or error code. May + * return less bytes than expected if an error occurs during the copying + * process. + */ +static ssize_t process_vm_rw_core(pid_t pid, const struct iovec *lvec, + unsigned long liovcnt, + const struct iovec *rvec, + unsigned long riovcnt, + unsigned long flags, int vm_write) +{ + struct task_struct *task; + struct page *pp_stack[PVM_MAX_PP_ARRAY_COUNT]; + struct page **process_pages = pp_stack; + struct mm_struct *mm; + unsigned long i; + ssize_t rc = 0; + ssize_t bytes_copied_loop; + ssize_t bytes_copied = 0; + unsigned long nr_pages = 0; + unsigned long nr_pages_iov; + unsigned long iov_l_curr_idx = 0; + size_t iov_l_curr_offset = 0; + ssize_t iov_len; + + /* + * Work out how many pages of struct pages we're going to need + * when eventually calling get_user_pages + */ + for (i = 0; i < riovcnt; i++) { + iov_len = rvec[i].iov_len; + if (iov_len > 0) { + nr_pages_iov = ((unsigned long)rvec[i].iov_base + + iov_len) + / PAGE_SIZE - (unsigned long)rvec[i].iov_base + / PAGE_SIZE + 1; + nr_pages = max(nr_pages, nr_pages_iov); + } + } + + if (nr_pages == 0) + return 0; + + if (nr_pages > PVM_MAX_PP_ARRAY_COUNT) { + /* For reliability don't try to kmalloc more than + 2 pages worth */ + process_pages = kmalloc(min_t(size_t, PVM_MAX_KMALLOC_PAGES, + sizeof(struct pages *)*nr_pages), + GFP_KERNEL); + + if (!process_pages) + return -ENOMEM; + } + + /* Get process information */ + rcu_read_lock(); + task = find_task_by_vpid(pid); + if (task) + get_task_struct(task); + rcu_read_unlock(); + if (!task) { + rc = -ESRCH; + goto free_proc_pages; + } + + task_lock(task); + if (__ptrace_may_access(task, PTRACE_MODE_ATTACH)) { + task_unlock(task); + rc = -EPERM; + goto put_task_struct; + } + mm = task->mm; + + if (!mm || (task->flags & PF_KTHREAD)) { + task_unlock(task); + rc = -EINVAL; + goto put_task_struct; + } + + atomic_inc(&mm->mm_users); + task_unlock(task); + + for (i = 0; i < riovcnt && iov_l_curr_idx < liovcnt; i++) { + rc = process_vm_rw_single_vec( + (unsigned long)rvec[i].iov_base, rvec[i].iov_len, + lvec, liovcnt, &iov_l_curr_idx, &iov_l_curr_offset, + process_pages, mm, task, vm_write, &bytes_copied_loop); + bytes_copied += bytes_copied_loop; + if (rc != 0) { + /* If we have managed to copy any data at all then + we return the number of bytes copied. Otherwise + we return the error code */ + if (bytes_copied) + rc = bytes_copied; + goto put_mm; + } + } + + rc = bytes_copied; +put_mm: + mmput(mm); + +put_task_struct: + put_task_struct(task); + +free_proc_pages: + if (process_pages != pp_stack) + kfree(process_pages); + return rc; +} + +/** + * process_vm_rw - check iovecs before calling core routine + * @pid: PID of process to read/write from/to + * @lvec: iovec array specifying where to copy to/from locally + * @liovcnt: size of lvec array + * @rvec: iovec array specifying where to copy to/from in the other process + * @riovcnt: size of rvec array + * @flags: currently unused + * @vm_write: 0 if reading from other process, 1 if writing to other process + * Returns the number of bytes read/written or error code. May + * return less bytes than expected if an error occurs during the copying + * process. + */ +static ssize_t process_vm_rw(pid_t pid, + const struct iovec __user *lvec, + unsigned long liovcnt, + const struct iovec __user *rvec, + unsigned long riovcnt, + unsigned long flags, int vm_write) +{ + struct iovec iovstack_l[UIO_FASTIOV]; + struct iovec iovstack_r[UIO_FASTIOV]; + struct iovec *iov_l = iovstack_l; + struct iovec *iov_r = iovstack_r; + ssize_t rc; + + if (flags != 0) + return -EINVAL; + + /* Check iovecs */ + if (vm_write) + rc = rw_copy_check_uvector(WRITE, lvec, liovcnt, UIO_FASTIOV, + iovstack_l, &iov_l, 1); + else + rc = rw_copy_check_uvector(READ, lvec, liovcnt, UIO_FASTIOV, + iovstack_l, &iov_l, 1); + if (rc <= 0) + goto free_iovecs; + + rc = rw_copy_check_uvector(READ, rvec, riovcnt, UIO_FASTIOV, + iovstack_r, &iov_r, 0); + if (rc <= 0) + goto free_iovecs; + + rc = process_vm_rw_core(pid, iov_l, liovcnt, iov_r, riovcnt, flags, + vm_write); + +free_iovecs: + if (iov_r != iovstack_r) + kfree(iov_r); + if (iov_l != iovstack_l) + kfree(iov_l); + + return rc; +} + +SYSCALL_DEFINE6(process_vm_readv, pid_t, pid, const struct iovec __user *, lvec, + unsigned long, liovcnt, const struct iovec __user *, rvec, + unsigned long, riovcnt, unsigned long, flags) +{ + return process_vm_rw(pid, lvec, liovcnt, rvec, riovcnt, flags, 0); +} + +SYSCALL_DEFINE6(process_vm_writev, pid_t, pid, + const struct iovec __user *, lvec, + unsigned long, liovcnt, const struct iovec __user *, rvec, + unsigned long, riovcnt, unsigned long, flags) +{ + return process_vm_rw(pid, lvec, liovcnt, rvec, riovcnt, flags, 1); +} + +#ifdef CONFIG_COMPAT + +asmlinkage ssize_t +compat_process_vm_rw(compat_pid_t pid, + const struct compat_iovec __user *lvec, + unsigned long liovcnt, + const struct compat_iovec __user *rvec, + unsigned long riovcnt, + unsigned long flags, int vm_write) +{ + struct iovec iovstack_l[UIO_FASTIOV]; + struct iovec iovstack_r[UIO_FASTIOV]; + struct iovec *iov_l = iovstack_l; + struct iovec *iov_r = iovstack_r; + ssize_t rc = -EFAULT; + + if (flags != 0) + return -EINVAL; + + if (!access_ok(VERIFY_READ, lvec, liovcnt * sizeof(*lvec))) + goto out; + + if (!access_ok(VERIFY_READ, rvec, riovcnt * sizeof(*rvec))) + goto out; + + if (vm_write) + rc = compat_rw_copy_check_uvector(WRITE, lvec, liovcnt, + UIO_FASTIOV, iovstack_l, + &iov_l, 1); + else + rc = compat_rw_copy_check_uvector(READ, lvec, liovcnt, + UIO_FASTIOV, iovstack_l, + &iov_l, 1); + if (rc <= 0) + goto free_iovecs; + rc = compat_rw_copy_check_uvector(READ, rvec, riovcnt, + UIO_FASTIOV, iovstack_r, + &iov_r, 0); + if (rc <= 0) + goto free_iovecs; + + rc = process_vm_rw_core(pid, iov_l, liovcnt, iov_r, riovcnt, flags, + vm_write); + +free_iovecs: + if (iov_r != iovstack_r) + kfree(iov_r); + if (iov_l != iovstack_l) + kfree(iov_l); + +out: + return rc; +} + +asmlinkage ssize_t +compat_sys_process_vm_readv(compat_pid_t pid, + const struct compat_iovec __user *lvec, + unsigned long liovcnt, + const struct compat_iovec __user *rvec, + unsigned long riovcnt, + unsigned long flags) +{ + return compat_process_vm_rw(pid, lvec, liovcnt, rvec, + riovcnt, flags, 0); +} + +asmlinkage ssize_t +compat_sys_process_vm_writev(compat_pid_t pid, + const struct compat_iovec __user *lvec, + unsigned long liovcnt, + const struct compat_iovec __user *rvec, + unsigned long riovcnt, + unsigned long flags) +{ + return compat_process_vm_rw(pid, lvec, liovcnt, rvec, + riovcnt, flags, 1); +} + +#endif diff --git a/security/keys/compat.c b/security/keys/compat.c index 338b510..4c48e13 100644 --- a/security/keys/compat.c +++ b/security/keys/compat.c @@ -38,7 +38,7 @@ long compat_keyctl_instantiate_key_iov( ret = compat_rw_copy_check_uvector(WRITE, _payload_iov, ioc, ARRAY_SIZE(iovstack), - iovstack, &iov); + iovstack, &iov, 1); if (ret < 0) return ret; if (ret == 0) diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index eca5191..0b3f5d7 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -1065,7 +1065,7 @@ long keyctl_instantiate_key_iov(key_serial_t id, goto no_payload; ret = rw_copy_check_uvector(WRITE, _payload_iov, ioc, - ARRAY_SIZE(iovstack), iovstack, &iov); + ARRAY_SIZE(iovstack), iovstack, &iov, 1); if (ret < 0) return ret; if (ret == 0) -- cgit v0.10.2 From b9e84ac1536d35aee03b2601f19694949f0bd506 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Mon, 31 Oct 2011 17:06:44 -0700 Subject: mm: compaction: trivial clean up in acct_isolated() acct_isolated of compaction uses page_lru_base_type which returns only base type of LRU list so it never returns LRU_ACTIVE_ANON or LRU_ACTIVE_FILE. In addtion, cc->nr_[anon|file] is used in only acct_isolated so it doesn't have fields in conpact_control. This patch removes fields from compact_control and makes clear function of acct_issolated which counts the number of anon|file pages isolated. Signed-off-by: Minchan Kim Acked-by: Johannes Weiner Reviewed-by: KAMEZAWA Hiroyuki Reviewed-by: KOSAKI Motohiro Acked-by: Mel Gorman Acked-by: Rik van Riel Reviewed-by: Michal Hocko Cc: Andrea Arcangeli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/compaction.c b/mm/compaction.c index 6cc604b..b2977a5 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -35,10 +35,6 @@ struct compact_control { unsigned long migrate_pfn; /* isolate_migratepages search base */ bool sync; /* Synchronous migration */ - /* Account for isolated anon and file pages */ - unsigned long nr_anon; - unsigned long nr_file; - unsigned int order; /* order a direct compactor needs */ int migratetype; /* MOVABLE, RECLAIMABLE etc */ struct zone *zone; @@ -223,17 +219,13 @@ static void isolate_freepages(struct zone *zone, static void acct_isolated(struct zone *zone, struct compact_control *cc) { struct page *page; - unsigned int count[NR_LRU_LISTS] = { 0, }; + unsigned int count[2] = { 0, }; - list_for_each_entry(page, &cc->migratepages, lru) { - int lru = page_lru_base_type(page); - count[lru]++; - } + list_for_each_entry(page, &cc->migratepages, lru) + count[!!page_is_file_cache(page)]++; - cc->nr_anon = count[LRU_ACTIVE_ANON] + count[LRU_INACTIVE_ANON]; - cc->nr_file = count[LRU_ACTIVE_FILE] + count[LRU_INACTIVE_FILE]; - __mod_zone_page_state(zone, NR_ISOLATED_ANON, cc->nr_anon); - __mod_zone_page_state(zone, NR_ISOLATED_FILE, cc->nr_file); + __mod_zone_page_state(zone, NR_ISOLATED_ANON, count[0]); + __mod_zone_page_state(zone, NR_ISOLATED_FILE, count[1]); } /* Similar to reclaim, but different enough that they don't share logic */ -- cgit v0.10.2 From 4356f21d09283dc6d39a6f7287a65ddab61e2808 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Mon, 31 Oct 2011 17:06:47 -0700 Subject: mm: change isolate mode from #define to bitwise type Change ISOLATE_XXX macro with bitwise isolate_mode_t type. Normally, macro isn't recommended as it's type-unsafe and making debugging harder as symbol cannot be passed throught to the debugger. Quote from Johannes " Hmm, it would probably be cleaner to fully convert the isolation mode into independent flags. INACTIVE, ACTIVE, BOTH is currently a tri-state among flags, which is a bit ugly." This patch moves isolate mode from swap.h to mmzone.h by memcontrol.h Signed-off-by: Minchan Kim Cc: Johannes Weiner Cc: KAMEZAWA Hiroyuki Cc: KOSAKI Motohiro Cc: Mel Gorman Cc: Rik van Riel Cc: Michal Hocko Cc: Andrea Arcangeli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/Documentation/trace/postprocess/trace-vmscan-postprocess.pl b/Documentation/trace/postprocess/trace-vmscan-postprocess.pl index 12cecc8..4a37c47 100644 --- a/Documentation/trace/postprocess/trace-vmscan-postprocess.pl +++ b/Documentation/trace/postprocess/trace-vmscan-postprocess.pl @@ -379,10 +379,10 @@ EVENT_PROCESS: # To closer match vmstat scanning statistics, only count isolate_both # and isolate_inactive as scanning. isolate_active is rotation - # isolate_inactive == 0 - # isolate_active == 1 - # isolate_both == 2 - if ($isolate_mode != 1) { + # isolate_inactive == 1 + # isolate_active == 2 + # isolate_both == 3 + if ($isolate_mode != 2) { $perprocesspid{$process_pid}->{HIGH_NR_SCANNED} += $nr_scanned; } $perprocesspid{$process_pid}->{HIGH_NR_CONTIG_DIRTY} += $nr_contig_dirty; diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 343bd76..ac797fa 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -35,7 +35,8 @@ enum mem_cgroup_page_stat_item { extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, struct list_head *dst, unsigned long *scanned, int order, - int mode, struct zone *z, + isolate_mode_t mode, + struct zone *z, struct mem_cgroup *mem_cont, int active, int file); diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index be1ac8d..436ce6e 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -164,6 +164,14 @@ static inline int is_unevictable_lru(enum lru_list l) #define LRU_ALL_EVICTABLE (LRU_ALL_FILE | LRU_ALL_ANON) #define LRU_ALL ((1 << NR_LRU_LISTS) - 1) +/* Isolate inactive pages */ +#define ISOLATE_INACTIVE ((__force isolate_mode_t)0x1) +/* Isolate active pages */ +#define ISOLATE_ACTIVE ((__force isolate_mode_t)0x2) + +/* LRU Isolation modes. */ +typedef unsigned __bitwise__ isolate_mode_t; + enum zone_watermarks { WMARK_MIN, WMARK_LOW, diff --git a/include/linux/swap.h b/include/linux/swap.h index c71f84b..1e22e12 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -243,15 +243,10 @@ static inline void lru_cache_add_file(struct page *page) __lru_cache_add(page, LRU_INACTIVE_FILE); } -/* LRU Isolation modes. */ -#define ISOLATE_INACTIVE 0 /* Isolate inactive pages. */ -#define ISOLATE_ACTIVE 1 /* Isolate active pages. */ -#define ISOLATE_BOTH 2 /* Isolate both active and inactive pages. */ - /* linux/mm/vmscan.c */ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask, nodemask_t *mask); -extern int __isolate_lru_page(struct page *page, int mode, int file); +extern int __isolate_lru_page(struct page *page, isolate_mode_t mode, int file); extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem, gfp_t gfp_mask, bool noswap); extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h index 36851f7..edc4b3d 100644 --- a/include/trace/events/vmscan.h +++ b/include/trace/events/vmscan.h @@ -266,7 +266,7 @@ DECLARE_EVENT_CLASS(mm_vmscan_lru_isolate_template, unsigned long nr_lumpy_taken, unsigned long nr_lumpy_dirty, unsigned long nr_lumpy_failed, - int isolate_mode), + isolate_mode_t isolate_mode), TP_ARGS(order, nr_requested, nr_scanned, nr_taken, nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, isolate_mode), @@ -278,7 +278,7 @@ DECLARE_EVENT_CLASS(mm_vmscan_lru_isolate_template, __field(unsigned long, nr_lumpy_taken) __field(unsigned long, nr_lumpy_dirty) __field(unsigned long, nr_lumpy_failed) - __field(int, isolate_mode) + __field(isolate_mode_t, isolate_mode) ), TP_fast_assign( @@ -312,7 +312,7 @@ DEFINE_EVENT(mm_vmscan_lru_isolate_template, mm_vmscan_lru_isolate, unsigned long nr_lumpy_taken, unsigned long nr_lumpy_dirty, unsigned long nr_lumpy_failed, - int isolate_mode), + isolate_mode_t isolate_mode), TP_ARGS(order, nr_requested, nr_scanned, nr_taken, nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, isolate_mode) @@ -327,7 +327,7 @@ DEFINE_EVENT(mm_vmscan_lru_isolate_template, mm_vmscan_memcg_isolate, unsigned long nr_lumpy_taken, unsigned long nr_lumpy_dirty, unsigned long nr_lumpy_failed, - int isolate_mode), + isolate_mode_t isolate_mode), TP_ARGS(order, nr_requested, nr_scanned, nr_taken, nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, isolate_mode) diff --git a/mm/compaction.c b/mm/compaction.c index b2977a5..47f717f 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -349,7 +349,8 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone, } /* Try isolate the page */ - if (__isolate_lru_page(page, ISOLATE_BOTH, 0) != 0) + if (__isolate_lru_page(page, + ISOLATE_ACTIVE|ISOLATE_INACTIVE, 0) != 0) continue; VM_BUG_ON(PageTransCompound(page)); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 3508777..2d57555 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1185,7 +1185,8 @@ mem_cgroup_get_reclaim_stat_from_page(struct page *page) unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, struct list_head *dst, unsigned long *scanned, int order, - int mode, struct zone *z, + isolate_mode_t mode, + struct zone *z, struct mem_cgroup *mem_cont, int active, int file) { diff --git a/mm/vmscan.c b/mm/vmscan.c index 9fdfce7..ec6dbcb 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1012,23 +1012,27 @@ keep_lumpy: * * returns 0 on success, -ve errno on failure. */ -int __isolate_lru_page(struct page *page, int mode, int file) +int __isolate_lru_page(struct page *page, isolate_mode_t mode, int file) { + bool all_lru_mode; int ret = -EINVAL; /* Only take pages on the LRU. */ if (!PageLRU(page)) return ret; + all_lru_mode = (mode & (ISOLATE_ACTIVE|ISOLATE_INACTIVE)) == + (ISOLATE_ACTIVE|ISOLATE_INACTIVE); + /* * When checking the active state, we need to be sure we are * dealing with comparible boolean values. Take the logical not * of each. */ - if (mode != ISOLATE_BOTH && (!PageActive(page) != !mode)) + if (!all_lru_mode && !PageActive(page) != !(mode & ISOLATE_ACTIVE)) return ret; - if (mode != ISOLATE_BOTH && page_is_file_cache(page) != file) + if (!all_lru_mode && !!page_is_file_cache(page) != file) return ret; /* @@ -1076,7 +1080,8 @@ int __isolate_lru_page(struct page *page, int mode, int file) */ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, struct list_head *src, struct list_head *dst, - unsigned long *scanned, int order, int mode, int file) + unsigned long *scanned, int order, isolate_mode_t mode, + int file) { unsigned long nr_taken = 0; unsigned long nr_lumpy_taken = 0; @@ -1201,8 +1206,8 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, static unsigned long isolate_pages_global(unsigned long nr, struct list_head *dst, unsigned long *scanned, int order, - int mode, struct zone *z, - int active, int file) + isolate_mode_t mode, + struct zone *z, int active, int file) { int lru = LRU_BASE; if (active) @@ -1448,6 +1453,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, unsigned long nr_taken; unsigned long nr_anon; unsigned long nr_file; + isolate_mode_t reclaim_mode = ISOLATE_INACTIVE; while (unlikely(too_many_isolated(zone, file, sc))) { congestion_wait(BLK_RW_ASYNC, HZ/10); @@ -1458,15 +1464,15 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, } set_reclaim_mode(priority, sc, false); + if (sc->reclaim_mode & RECLAIM_MODE_LUMPYRECLAIM) + reclaim_mode |= ISOLATE_ACTIVE; + lru_add_drain(); spin_lock_irq(&zone->lru_lock); if (scanning_global_lru(sc)) { - nr_taken = isolate_pages_global(nr_to_scan, - &page_list, &nr_scanned, sc->order, - sc->reclaim_mode & RECLAIM_MODE_LUMPYRECLAIM ? - ISOLATE_BOTH : ISOLATE_INACTIVE, - zone, 0, file); + nr_taken = isolate_pages_global(nr_to_scan, &page_list, + &nr_scanned, sc->order, reclaim_mode, zone, 0, file); zone->pages_scanned += nr_scanned; if (current_is_kswapd()) __count_zone_vm_events(PGSCAN_KSWAPD, zone, @@ -1475,12 +1481,9 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, __count_zone_vm_events(PGSCAN_DIRECT, zone, nr_scanned); } else { - nr_taken = mem_cgroup_isolate_pages(nr_to_scan, - &page_list, &nr_scanned, sc->order, - sc->reclaim_mode & RECLAIM_MODE_LUMPYRECLAIM ? - ISOLATE_BOTH : ISOLATE_INACTIVE, - zone, sc->mem_cgroup, - 0, file); + nr_taken = mem_cgroup_isolate_pages(nr_to_scan, &page_list, + &nr_scanned, sc->order, reclaim_mode, zone, + sc->mem_cgroup, 0, file); /* * mem_cgroup_isolate_pages() keeps track of * scanned pages on its own. -- cgit v0.10.2 From 39deaf8585152f1a35c1676d3d7dc6ae0fb65967 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Mon, 31 Oct 2011 17:06:51 -0700 Subject: mm: compaction: make isolate_lru_page() filter-aware In async mode, compaction doesn't migrate dirty or writeback pages. So, it's meaningless to pick the page and re-add it to lru list. Of course, when we isolate the page in compaction, the page might be dirty or writeback but when we try to migrate the page, the page would be not dirty, writeback. So it could be migrated. But it's very unlikely as isolate and migration cycle is much faster than writeout. So, this patch helps cpu overhead and prevent unnecessary LRU churning. Signed-off-by: Minchan Kim Acked-by: Johannes Weiner Reviewed-by: KAMEZAWA Hiroyuki Reviewed-by: KOSAKI Motohiro Acked-by: Mel Gorman Acked-by: Rik van Riel Reviewed-by: Michal Hocko Cc: Andrea Arcangeli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 436ce6e..80da968 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -168,6 +168,8 @@ static inline int is_unevictable_lru(enum lru_list l) #define ISOLATE_INACTIVE ((__force isolate_mode_t)0x1) /* Isolate active pages */ #define ISOLATE_ACTIVE ((__force isolate_mode_t)0x2) +/* Isolate clean file */ +#define ISOLATE_CLEAN ((__force isolate_mode_t)0x4) /* LRU Isolation modes. */ typedef unsigned __bitwise__ isolate_mode_t; diff --git a/mm/compaction.c b/mm/compaction.c index 47f717f..a0e4202 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -261,6 +261,7 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone, unsigned long last_pageblock_nr = 0, pageblock_nr; unsigned long nr_scanned = 0, nr_isolated = 0; struct list_head *migratelist = &cc->migratepages; + isolate_mode_t mode = ISOLATE_ACTIVE|ISOLATE_INACTIVE; /* Do not scan outside zone boundaries */ low_pfn = max(cc->migrate_pfn, zone->zone_start_pfn); @@ -348,9 +349,11 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone, continue; } + if (!cc->sync) + mode |= ISOLATE_CLEAN; + /* Try isolate the page */ - if (__isolate_lru_page(page, - ISOLATE_ACTIVE|ISOLATE_INACTIVE, 0) != 0) + if (__isolate_lru_page(page, mode, 0) != 0) continue; VM_BUG_ON(PageTransCompound(page)); diff --git a/mm/vmscan.c b/mm/vmscan.c index ec6dbcb..c007e78 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1045,6 +1045,9 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode, int file) ret = -EBUSY; + if ((mode & ISOLATE_CLEAN) && (PageDirty(page) || PageWriteback(page))) + return ret; + if (likely(get_page_unless_zero(page))) { /* * Be careful not to clear PageLRU until after we're -- cgit v0.10.2 From f80c0673610e36ae29d63e3297175e22f70dde5f Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Mon, 31 Oct 2011 17:06:55 -0700 Subject: mm: zone_reclaim: make isolate_lru_page() filter-aware In __zone_reclaim case, we don't want to shrink mapped page. Nonetheless, we have isolated mapped page and re-add it into LRU's head. It's unnecessary CPU overhead and makes LRU churning. Of course, when we isolate the page, the page might be mapped but when we try to migrate the page, the page would be not mapped. So it could be migrated. But race is rare and although it happens, it's no big deal. Signed-off-by: Minchan Kim Acked-by: Johannes Weiner Reviewed-by: KAMEZAWA Hiroyuki Reviewed-by: KOSAKI Motohiro Reviewed-by: Michal Hocko Cc: Mel Gorman Cc: Rik van Riel Cc: Andrea Arcangeli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 80da968..ec57779 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -170,6 +170,8 @@ static inline int is_unevictable_lru(enum lru_list l) #define ISOLATE_ACTIVE ((__force isolate_mode_t)0x2) /* Isolate clean file */ #define ISOLATE_CLEAN ((__force isolate_mode_t)0x4) +/* Isolate unmapped file */ +#define ISOLATE_UNMAPPED ((__force isolate_mode_t)0x8) /* LRU Isolation modes. */ typedef unsigned __bitwise__ isolate_mode_t; diff --git a/mm/vmscan.c b/mm/vmscan.c index c007e78..b68a934 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1048,6 +1048,9 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode, int file) if ((mode & ISOLATE_CLEAN) && (PageDirty(page) || PageWriteback(page))) return ret; + if ((mode & ISOLATE_UNMAPPED) && page_mapped(page)) + return ret; + if (likely(get_page_unless_zero(page))) { /* * Be careful not to clear PageLRU until after we're @@ -1471,6 +1474,12 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, reclaim_mode |= ISOLATE_ACTIVE; lru_add_drain(); + + if (!sc->may_unmap) + reclaim_mode |= ISOLATE_UNMAPPED; + if (!sc->may_writepage) + reclaim_mode |= ISOLATE_CLEAN; + spin_lock_irq(&zone->lru_lock); if (scanning_global_lru(sc)) { @@ -1588,19 +1597,26 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, struct page *page; struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); unsigned long nr_rotated = 0; + isolate_mode_t reclaim_mode = ISOLATE_ACTIVE; lru_add_drain(); + + if (!sc->may_unmap) + reclaim_mode |= ISOLATE_UNMAPPED; + if (!sc->may_writepage) + reclaim_mode |= ISOLATE_CLEAN; + spin_lock_irq(&zone->lru_lock); if (scanning_global_lru(sc)) { nr_taken = isolate_pages_global(nr_pages, &l_hold, &pgscanned, sc->order, - ISOLATE_ACTIVE, zone, + reclaim_mode, zone, 1, file); zone->pages_scanned += pgscanned; } else { nr_taken = mem_cgroup_isolate_pages(nr_pages, &l_hold, &pgscanned, sc->order, - ISOLATE_ACTIVE, zone, + reclaim_mode, zone, sc->mem_cgroup, 1, file); /* * mem_cgroup_isolate_pages() keeps track of -- cgit v0.10.2 From 0dabec93de633a87adfbbe1d800a4c56cd19d73b Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Mon, 31 Oct 2011 17:06:57 -0700 Subject: mm: migration: clean up unmap_and_move() unmap_and_move() is one a big messy function. Clean it up. Signed-off-by: Minchan Kim Reviewed-by: KOSAKI Motohiro Cc: Johannes Weiner Cc: KAMEZAWA Hiroyuki Cc: Mel Gorman Cc: Rik van Riel Cc: Michal Hocko Cc: Andrea Arcangeli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/migrate.c b/mm/migrate.c index 14d0a6a..33358f8 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -621,38 +621,18 @@ static int move_to_new_page(struct page *newpage, struct page *page, return rc; } -/* - * Obtain the lock on page, remove all ptes and migrate the page - * to the newly allocated page in newpage. - */ -static int unmap_and_move(new_page_t get_new_page, unsigned long private, - struct page *page, int force, bool offlining, bool sync) +static int __unmap_and_move(struct page *page, struct page *newpage, + int force, bool offlining, bool sync) { - int rc = 0; - int *result = NULL; - struct page *newpage = get_new_page(page, private, &result); + int rc = -EAGAIN; int remap_swapcache = 1; int charge = 0; struct mem_cgroup *mem; struct anon_vma *anon_vma = NULL; - if (!newpage) - return -ENOMEM; - - if (page_count(page) == 1) { - /* page was freed from under us. So we are done. */ - goto move_newpage; - } - if (unlikely(PageTransHuge(page))) - if (unlikely(split_huge_page(page))) - goto move_newpage; - - /* prepare cgroup just returns 0 or -ENOMEM */ - rc = -EAGAIN; - if (!trylock_page(page)) { if (!force || !sync) - goto move_newpage; + goto out; /* * It's not safe for direct compaction to call lock_page. @@ -668,7 +648,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private, * altogether. */ if (current->flags & PF_MEMALLOC) - goto move_newpage; + goto out; lock_page(page); } @@ -785,27 +765,52 @@ uncharge: mem_cgroup_end_migration(mem, page, newpage, rc == 0); unlock: unlock_page(page); +out: + return rc; +} -move_newpage: +/* + * Obtain the lock on page, remove all ptes and migrate the page + * to the newly allocated page in newpage. + */ +static int unmap_and_move(new_page_t get_new_page, unsigned long private, + struct page *page, int force, bool offlining, bool sync) +{ + int rc = 0; + int *result = NULL; + struct page *newpage = get_new_page(page, private, &result); + + if (!newpage) + return -ENOMEM; + + if (page_count(page) == 1) { + /* page was freed from under us. So we are done. */ + goto out; + } + + if (unlikely(PageTransHuge(page))) + if (unlikely(split_huge_page(page))) + goto out; + + rc = __unmap_and_move(page, newpage, force, offlining, sync); +out: if (rc != -EAGAIN) { - /* - * A page that has been migrated has all references - * removed and will be freed. A page that has not been - * migrated will have kepts its references and be - * restored. - */ - list_del(&page->lru); + /* + * A page that has been migrated has all references + * removed and will be freed. A page that has not been + * migrated will have kepts its references and be + * restored. + */ + list_del(&page->lru); dec_zone_page_state(page, NR_ISOLATED_ANON + page_is_file_cache(page)); putback_lru_page(page); } - /* * Move the new page to the LRU. If migration was not successful * then this will free the page. */ putback_lru_page(newpage); - if (result) { if (rc) *result = rc; -- cgit v0.10.2 From 3fa36acbced23c563345de3179dfe1775f15be5e Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 31 Oct 2011 17:07:02 -0700 Subject: radix_tree: clean away saw_unset_tag leftovers radix_tree_tag_get()'s BUG (when it sees a tag after saw_unset_tag) was unsafe and removed in 2.6.34, but the pointless saw_unset_tag left behind. Remove it now, and return 0 as soon as we see unset tag - we already rely upon the root tag to be correct, returning 0 immediately if it's not set. Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/lib/radix-tree.c b/lib/radix-tree.c index a2f9da5..d9df745 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -576,7 +576,6 @@ int radix_tree_tag_get(struct radix_tree_root *root, { unsigned int height, shift; struct radix_tree_node *node; - int saw_unset_tag = 0; /* check the root's tag bit */ if (!root_tag_get(root, tag)) @@ -603,15 +602,10 @@ int radix_tree_tag_get(struct radix_tree_root *root, return 0; offset = (index >> shift) & RADIX_TREE_MAP_MASK; - - /* - * This is just a debug check. Later, we can bale as soon as - * we see an unset tag. - */ if (!tag_get(node, tag, offset)) - saw_unset_tag = 1; + return 0; if (height == 1) - return !!tag_get(node, tag, offset); + return 1; node = rcu_dereference_raw(node->slots[offset]); shift -= RADIX_TREE_MAP_SHIFT; height--; -- cgit v0.10.2 From 3da367c3e5fca71d4e778fa565d9b098d5518f4a Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 31 Oct 2011 17:07:03 -0700 Subject: vmscan: add block plug for page reclaim per-task block plug can reduce block queue lock contention and increase request merge. Currently page reclaim doesn't support it. I originally thought page reclaim doesn't need it, because kswapd thread count is limited and file cache write is done at flusher mostly. When I test a workload with heavy swap in a 4-node machine, each CPU is doing direct page reclaim and swap. This causes block queue lock contention. In my test, without below patch, the CPU utilization is about 2% ~ 7%. With the patch, the CPU utilization is about 1% ~ 3%. Disk throughput isn't changed. This should improve normal kswapd write and file cache write too (increase request merge for example), but might not be so obvious as I explain above. Signed-off-by: Shaohua Li Cc: Jens Axboe Cc: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/vmscan.c b/mm/vmscan.c index b68a934..b1520b0 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2005,12 +2005,14 @@ static void shrink_zone(int priority, struct zone *zone, enum lru_list l; unsigned long nr_reclaimed, nr_scanned; unsigned long nr_to_reclaim = sc->nr_to_reclaim; + struct blk_plug plug; restart: nr_reclaimed = 0; nr_scanned = sc->nr_scanned; get_scan_count(zone, sc, nr, priority); + blk_start_plug(&plug); while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || nr[LRU_INACTIVE_FILE]) { for_each_evictable_lru(l) { @@ -2034,6 +2036,7 @@ restart: if (nr_reclaimed >= nr_to_reclaim && priority < DEF_PRIORITY) break; } + blk_finish_plug(&plug); sc->nr_reclaimed += nr_reclaimed; /* -- cgit v0.10.2 From d08c429b06d21bd2add88aea2cd1996f1b9b3bda Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Mon, 31 Oct 2011 17:07:05 -0700 Subject: mm/page-writeback.c: document bdi_min_ratio Looks like someone got distracted after adding the comment characters. Signed-off-by: Johannes Weiner Acked-by: Peter Zijlstra Cc: Wu Fengguang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 0e309cd..793e987 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -305,7 +305,9 @@ static unsigned long task_min_dirty_limit(unsigned long bdi_dirty) } /* - * + * bdi_min_ratio keeps the sum of the minimum dirty shares of all + * registered backing devices, which, for obvious reasons, can not + * exceed 100%. */ static unsigned int bdi_min_ratio; -- cgit v0.10.2 From f660daac474c6f7c2d710100e29b3276a6f4db0a Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Mon, 31 Oct 2011 17:07:07 -0700 Subject: oom: thaw threads if oom killed thread is frozen before deferring If a thread has been oom killed and is frozen, thaw it before returning to the page allocator. Otherwise, it can stay frozen indefinitely and no memory will be freed. Signed-off-by: David Rientjes Reported-by: Konstantin Khlebnikov Cc: Oleg Nesterov Cc: KOSAKI Motohiro Cc: KAMEZAWA Hiroyuki Cc: "Rafael J. Wysocki" Acked-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 626303b..d897262 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -32,6 +32,7 @@ #include #include #include +#include int sysctl_panic_on_oom; int sysctl_oom_kill_allocating_task; @@ -317,8 +318,11 @@ static struct task_struct *select_bad_process(unsigned int *ppoints, * blocked waiting for another task which itself is waiting * for memory. Is there a better alternative? */ - if (test_tsk_thread_flag(p, TIF_MEMDIE)) + if (test_tsk_thread_flag(p, TIF_MEMDIE)) { + if (unlikely(frozen(p))) + thaw_process(p); return ERR_PTR(-1UL); + } if (!p->mm) continue; -- cgit v0.10.2 From 7b0d44fa49b1dcfdcf4897f12ddd12ddeab1a9d7 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Mon, 31 Oct 2011 17:07:11 -0700 Subject: oom: avoid killing kthreads if they assume the oom killed thread's mm After selecting a task to kill, the oom killer iterates all processes and kills all other threads that share the same mm_struct in different thread groups. It would not otherwise be helpful to kill a thread if its memory would not be subsequently freed. A kernel thread, however, may assume a user thread's mm by using use_mm(). This is only temporary and should not result in sending a SIGKILL to that kthread. This patch ensures that only user threads and not kthreads are sent a SIGKILL if they share the same mm_struct as the oom killed task. Signed-off-by: David Rientjes Reviewed-by: Michal Hocko Reviewed-by: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/oom_kill.c b/mm/oom_kill.c index d897262..b0d8943 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -439,7 +439,7 @@ static int oom_kill_task(struct task_struct *p, struct mem_cgroup *mem) task_unlock(p); /* - * Kill all processes sharing p->mm in other thread groups, if any. + * Kill all user processes sharing p->mm in other thread groups, if any. * They don't get access to memory reserves or a higher scheduler * priority, though, to avoid depletion of all memory or task * starvation. This prevents mm->mmap_sem livelock when an oom killed @@ -449,7 +449,8 @@ static int oom_kill_task(struct task_struct *p, struct mem_cgroup *mem) * signal. */ for_each_process(q) - if (q->mm == mm && !same_thread_group(q, p)) { + if (q->mm == mm && !same_thread_group(q, p) && + !(q->flags & PF_KTHREAD)) { task_lock(q); /* Protect ->comm from prctl() */ pr_err("Kill process %d (%s) sharing same memory\n", task_pid_nr(q), q->comm); -- cgit v0.10.2 From c9f01245b6a7d77d17deaa71af10f6aca14fa24e Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Mon, 31 Oct 2011 17:07:15 -0700 Subject: oom: remove oom_disable_count This removes mm->oom_disable_count entirely since it's unnecessary and currently buggy. The counter was intended to be per-process but it's currently decremented in the exit path for each thread that exits, causing it to underflow. The count was originally intended to prevent oom killing threads that share memory with threads that cannot be killed since it doesn't lead to future memory freeing. The counter could be fixed to represent all threads sharing the same mm, but it's better to remove the count since: - it is possible that the OOM_DISABLE thread sharing memory with the victim is waiting on that thread to exit and will actually cause future memory freeing, and - there is no guarantee that a thread is disabled from oom killing just because another thread sharing its mm is oom disabled. Signed-off-by: David Rientjes Reported-by: Oleg Nesterov Reviewed-by: Oleg Nesterov Cc: Ying Han Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/exec.c b/fs/exec.c index 25dcbe5..3625464 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -841,10 +841,6 @@ static int exec_mmap(struct mm_struct *mm) tsk->mm = mm; tsk->active_mm = mm; activate_mm(active_mm, mm); - if (old_mm && tsk->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) { - atomic_dec(&old_mm->oom_disable_count); - atomic_inc(&tsk->mm->oom_disable_count); - } task_unlock(tsk); arch_pick_mmap_layout(mm); if (old_mm) { diff --git a/fs/proc/base.c b/fs/proc/base.c index 5eb0206..8f0087e 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1107,13 +1107,6 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf, goto err_sighand; } - if (oom_adjust != task->signal->oom_adj) { - if (oom_adjust == OOM_DISABLE) - atomic_inc(&task->mm->oom_disable_count); - if (task->signal->oom_adj == OOM_DISABLE) - atomic_dec(&task->mm->oom_disable_count); - } - /* * Warn that /proc/pid/oom_adj is deprecated, see * Documentation/feature-removal-schedule.txt. @@ -1215,12 +1208,6 @@ static ssize_t oom_score_adj_write(struct file *file, const char __user *buf, goto err_sighand; } - if (oom_score_adj != task->signal->oom_score_adj) { - if (oom_score_adj == OOM_SCORE_ADJ_MIN) - atomic_inc(&task->mm->oom_disable_count); - if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) - atomic_dec(&task->mm->oom_disable_count); - } task->signal->oom_score_adj = oom_score_adj; if (has_capability_noaudit(current, CAP_SYS_RESOURCE)) task->signal->oom_score_adj_min = oom_score_adj; diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index c93d00a..6456624 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -336,9 +336,6 @@ struct mm_struct { unsigned int token_priority; unsigned int last_interval; - /* How many tasks sharing this mm are OOM_DISABLE */ - atomic_t oom_disable_count; - unsigned long flags; /* Must use atomic bitops to access the bits */ struct core_state *core_state; /* coredumping support */ diff --git a/kernel/exit.c b/kernel/exit.c index 2913b35..d0b7d98 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -681,8 +681,6 @@ static void exit_mm(struct task_struct * tsk) enter_lazy_tlb(mm, current); /* We don't want this task to be frozen prematurely */ clear_freeze_flag(tsk); - if (tsk->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) - atomic_dec(&mm->oom_disable_count); task_unlock(tsk); mm_update_next_owner(mm); mmput(mm); diff --git a/kernel/fork.c b/kernel/fork.c index 8e6b6f4..70d7619 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -501,7 +501,6 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p) mm->cached_hole_size = ~0UL; mm_init_aio(mm); mm_init_owner(mm, p); - atomic_set(&mm->oom_disable_count, 0); if (likely(!mm_alloc_pgd(mm))) { mm->def_flags = 0; @@ -816,8 +815,6 @@ good_mm: /* Initializing for Swap token stuff */ mm->token_priority = 0; mm->last_interval = 0; - if (tsk->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) - atomic_inc(&mm->oom_disable_count); tsk->mm = mm; tsk->active_mm = mm; @@ -1391,13 +1388,8 @@ bad_fork_cleanup_io: bad_fork_cleanup_namespaces: exit_task_namespaces(p); bad_fork_cleanup_mm: - if (p->mm) { - task_lock(p); - if (p->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) - atomic_dec(&p->mm->oom_disable_count); - task_unlock(p); + if (p->mm) mmput(p->mm); - } bad_fork_cleanup_signal: if (!(clone_flags & CLONE_THREAD)) free_signal_struct(p->signal); diff --git a/mm/oom_kill.c b/mm/oom_kill.c index b0d8943..2b97e8f 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -54,13 +54,7 @@ int test_set_oom_score_adj(int new_val) spin_lock_irq(&sighand->siglock); old_val = current->signal->oom_score_adj; - if (new_val != old_val) { - if (new_val == OOM_SCORE_ADJ_MIN) - atomic_inc(¤t->mm->oom_disable_count); - else if (old_val == OOM_SCORE_ADJ_MIN) - atomic_dec(¤t->mm->oom_disable_count); - current->signal->oom_score_adj = new_val; - } + current->signal->oom_score_adj = new_val; spin_unlock_irq(&sighand->siglock); return old_val; @@ -173,16 +167,6 @@ unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *mem, return 0; /* - * Shortcut check for a thread sharing p->mm that is OOM_SCORE_ADJ_MIN - * so the entire heuristic doesn't need to be executed for something - * that cannot be killed. - */ - if (atomic_read(&p->mm->oom_disable_count)) { - task_unlock(p); - return 0; - } - - /* * The memory controller may have a limit of 0 bytes, so avoid a divide * by zero, if necessary. */ @@ -451,6 +435,9 @@ static int oom_kill_task(struct task_struct *p, struct mem_cgroup *mem) for_each_process(q) if (q->mm == mm && !same_thread_group(q, p) && !(q->flags & PF_KTHREAD)) { + if (q->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) + continue; + task_lock(q); /* Protect ->comm from prctl() */ pr_err("Kill process %d (%s) sharing same memory\n", task_pid_nr(q), q->comm); @@ -727,7 +714,7 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, read_lock(&tasklist_lock); if (sysctl_oom_kill_allocating_task && !oom_unkillable_task(current, NULL, nodemask) && - current->mm && !atomic_read(¤t->mm->oom_disable_count)) { + current->mm) { /* * oom_kill_process() needs tasklist_lock held. If it returns * non-zero, current could not be killed so we must fallback to -- cgit v0.10.2 From 43362a4977e37db46f86f7e6ab935f0006956632 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Mon, 31 Oct 2011 17:07:18 -0700 Subject: oom: fix race while temporarily setting current's oom_score_adj test_set_oom_score_adj() was introduced in 72788c385604 ("oom: replace PF_OOM_ORIGIN with toggling oom_score_adj") to temporarily elevate current's oom_score_adj for ksm and swapoff without requiring an additional per-process flag. Using that function to both set oom_score_adj to OOM_SCORE_ADJ_MAX and then reinstate the previous value is racy since it's possible that userspace can set the value to something else itself before the old value is reinstated. That results in userspace setting current's oom_score_adj to a different value and then the kernel immediately setting it back to its previous value without notification. To fix this, a new compare_swap_oom_score_adj() function is introduced with the same semantics as the compare and swap CAS instruction, or CMPXCHG on x86. It is used to reinstate the previous value of oom_score_adj if and only if the present value is the same as the old value. Signed-off-by: David Rientjes Cc: Oleg Nesterov Cc: Ying Han Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/oom.h b/include/linux/oom.h index 13b7b02..6f9d04a 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -40,6 +40,7 @@ enum oom_constraint { CONSTRAINT_MEMCG, }; +extern void compare_swap_oom_score_adj(int old_val, int new_val); extern int test_set_oom_score_adj(int new_val); extern unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *mem, diff --git a/mm/ksm.c b/mm/ksm.c index 9a68b0c..310544a 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -1905,7 +1905,8 @@ static ssize_t run_store(struct kobject *kobj, struct kobj_attribute *attr, oom_score_adj = test_set_oom_score_adj(OOM_SCORE_ADJ_MAX); err = unmerge_and_remove_all_rmap_items(); - test_set_oom_score_adj(oom_score_adj); + compare_swap_oom_score_adj(OOM_SCORE_ADJ_MAX, + oom_score_adj); if (err) { ksm_run = KSM_RUN_STOP; count = err; diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 2b97e8f..e916168 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -39,6 +39,25 @@ int sysctl_oom_kill_allocating_task; int sysctl_oom_dump_tasks = 1; static DEFINE_SPINLOCK(zone_scan_lock); +/* + * compare_swap_oom_score_adj() - compare and swap current's oom_score_adj + * @old_val: old oom_score_adj for compare + * @new_val: new oom_score_adj for swap + * + * Sets the oom_score_adj value for current to @new_val iff its present value is + * @old_val. Usually used to reinstate a previous value to prevent racing with + * userspacing tuning the value in the interim. + */ +void compare_swap_oom_score_adj(int old_val, int new_val) +{ + struct sighand_struct *sighand = current->sighand; + + spin_lock_irq(&sighand->siglock); + if (current->signal->oom_score_adj == old_val) + current->signal->oom_score_adj = new_val; + spin_unlock_irq(&sighand->siglock); +} + /** * test_set_oom_score_adj() - set current's oom_score_adj and return old value * @new_val: new oom_score_adj value diff --git a/mm/swapfile.c b/mm/swapfile.c index 17bc224..c9d6540 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1617,7 +1617,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) oom_score_adj = test_set_oom_score_adj(OOM_SCORE_ADJ_MAX); err = try_to_unuse(type); - test_set_oom_score_adj(oom_score_adj); + compare_swap_oom_score_adj(OOM_SCORE_ADJ_MAX, oom_score_adj); if (err) { /* -- cgit v0.10.2 From f5fc870da2f8798edb5481cd2137a3b2d5bd1b19 Mon Sep 17 00:00:00 2001 From: "Robert P. J. Day" Date: Mon, 31 Oct 2011 17:07:21 -0700 Subject: tmpfs: add "tmpfs" to the Kconfig prompt to make it obvious. Add the leading word "tmpfs" to the Kconfig string to make it blindingly obvious that this selection refers to tmpfs. Signed-off-by: Robert P. J. Day Acked-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/Kconfig b/fs/Kconfig index 9fe0b34..5f4c45d 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -109,7 +109,7 @@ source "fs/proc/Kconfig" source "fs/sysfs/Kconfig" config TMPFS - bool "Virtual memory file system support (former shm fs)" + bool "Tmpfs virtual memory file system support (former shm fs)" depends on SHMEM help Tmpfs is a file system which keeps all files in virtual memory. -- cgit v0.10.2 From 4f31888c104687078f8d88c2f11eca1080c88464 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Mon, 31 Oct 2011 17:07:24 -0700 Subject: mm: output a list of loaded modules when we hit bad_page() When we get a bad_page bug report, it's useful to see what modules the user had loaded. Signed-off-by: Dave Jones Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 6e8ecb6..83a0205 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -318,6 +318,7 @@ static void bad_page(struct page *page) current->comm, page_to_pfn(page)); dump_page(page); + print_modules(); dump_stack(); out: /* Leave bad fields for debug, except PageBuddy could make trouble */ -- cgit v0.10.2 From f11c0ca501af89fc07b0d9f17531ba3b68a4ef39 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Mon, 31 Oct 2011 17:07:27 -0700 Subject: mm: vmscan: drop nr_force_scan[] from get_scan_count The nr_force_scan[] tuple holds the effective scan numbers for anon and file pages in case the situation called for a forced scan and the regularly calculated scan numbers turned out zero. However, the effective scan number can always be assumed to be SWAP_CLUSTER_MAX right before the division into anon and file. The numerators and denominator are properly set up for all cases, be it force scan for just file, just anon, or both, to do the right thing. Signed-off-by: Johannes Weiner Reviewed-by: Minchan Kim Acked-by: KAMEZAWA Hiroyuki Reviewed-by: Michal Hocko Cc: Ying Han Cc: Balbir Singh Cc: KOSAKI Motohiro Cc: Daisuke Nishimura Acked-by: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/vmscan.c b/mm/vmscan.c index b1520b0..d29b2bd 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1817,12 +1817,19 @@ static void get_scan_count(struct zone *zone, struct scan_control *sc, enum lru_list l; int noswap = 0; bool force_scan = false; - unsigned long nr_force_scan[2]; - /* kswapd does zone balancing and needs to scan this zone */ + /* + * If the zone or memcg is small, nr[l] can be 0. This + * results in no scanning on this priority and a potential + * priority drop. Global direct reclaim can go to the next + * zone and tends to have no problems. Global kswapd is for + * zone balancing and it needs to scan a minimum amount. When + * reclaiming for a memcg, a priority drop can cause high + * latencies, so it's better to scan a minimum amount there as + * well. + */ if (scanning_global_lru(sc) && current_is_kswapd()) force_scan = true; - /* memcg may have small limit and need to avoid priority drop */ if (!scanning_global_lru(sc)) force_scan = true; @@ -1832,8 +1839,6 @@ static void get_scan_count(struct zone *zone, struct scan_control *sc, fraction[0] = 0; fraction[1] = 1; denominator = 1; - nr_force_scan[0] = 0; - nr_force_scan[1] = SWAP_CLUSTER_MAX; goto out; } @@ -1850,8 +1855,6 @@ static void get_scan_count(struct zone *zone, struct scan_control *sc, fraction[0] = 1; fraction[1] = 0; denominator = 1; - nr_force_scan[0] = SWAP_CLUSTER_MAX; - nr_force_scan[1] = 0; goto out; } } @@ -1900,11 +1903,6 @@ static void get_scan_count(struct zone *zone, struct scan_control *sc, fraction[0] = ap; fraction[1] = fp; denominator = ap + fp + 1; - if (force_scan) { - unsigned long scan = SWAP_CLUSTER_MAX; - nr_force_scan[0] = div64_u64(scan * ap, denominator); - nr_force_scan[1] = div64_u64(scan * fp, denominator); - } out: for_each_evictable_lru(l) { int file = is_file_lru(l); @@ -1913,20 +1911,10 @@ out: scan = zone_nr_lru_pages(zone, sc, l); if (priority || noswap) { scan >>= priority; + if (!scan && force_scan) + scan = SWAP_CLUSTER_MAX; scan = div64_u64(scan * fraction[file], denominator); } - - /* - * If zone is small or memcg is small, nr[l] can be 0. - * This results no-scan on this priority and priority drop down. - * For global direct reclaim, it can visit next zone and tend - * not to have problems. For global kswapd, it's for zone - * balancing and it need to scan a small amounts. When using - * memcg, priority drop can cause big latency. So, it's better - * to scan small amount. See may_noscan above. - */ - if (!scan && force_scan) - scan = nr_force_scan[file]; nr[l] = scan; } } -- cgit v0.10.2 From bc3e53f682d93df677dbd5006a404722b3adfe18 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 31 Oct 2011 17:07:30 -0700 Subject: mm: distinguish between mlocked and pinned pages Some kernel components pin user space memory (infiniband and perf) (by increasing the page count) and account that memory as "mlocked". The difference between mlocking and pinning is: A. mlocked pages are marked with PG_mlocked and are exempt from swapping. Page migration may move them around though. They are kept on a special LRU list. B. Pinned pages cannot be moved because something needs to directly access physical memory. They may not be on any LRU list. I recently saw an mlockalled process where mm->locked_vm became bigger than the virtual size of the process (!) because some memory was accounted for twice: Once when the page was mlocked and once when the Infiniband layer increased the refcount because it needt to pin the RDMA memory. This patch introduces a separate counter for pinned pages and accounts them seperately. Signed-off-by: Christoph Lameter Cc: Mike Marciniszyn Cc: Roland Dreier Cc: Sean Hefty Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index b645e55..9155f91 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -136,7 +136,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, down_write(¤t->mm->mmap_sem); - locked = npages + current->mm->locked_vm; + locked = npages + current->mm->pinned_vm; lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) { @@ -206,7 +206,7 @@ out: __ib_umem_release(context->device, umem, 0); kfree(umem); } else - current->mm->locked_vm = locked; + current->mm->pinned_vm = locked; up_write(¤t->mm->mmap_sem); if (vma_list) @@ -222,7 +222,7 @@ static void ib_umem_account(struct work_struct *work) struct ib_umem *umem = container_of(work, struct ib_umem, work); down_write(&umem->mm->mmap_sem); - umem->mm->locked_vm -= umem->diff; + umem->mm->pinned_vm -= umem->diff; up_write(&umem->mm->mmap_sem); mmput(umem->mm); kfree(umem); diff --git a/drivers/infiniband/hw/ipath/ipath_user_pages.c b/drivers/infiniband/hw/ipath/ipath_user_pages.c index cfed539..dc66c45 100644 --- a/drivers/infiniband/hw/ipath/ipath_user_pages.c +++ b/drivers/infiniband/hw/ipath/ipath_user_pages.c @@ -79,7 +79,7 @@ static int __ipath_get_user_pages(unsigned long start_page, size_t num_pages, goto bail_release; } - current->mm->locked_vm += num_pages; + current->mm->pinned_vm += num_pages; ret = 0; goto bail; @@ -178,7 +178,7 @@ void ipath_release_user_pages(struct page **p, size_t num_pages) __ipath_release_user_pages(p, num_pages, 1); - current->mm->locked_vm -= num_pages; + current->mm->pinned_vm -= num_pages; up_write(¤t->mm->mmap_sem); } @@ -195,7 +195,7 @@ static void user_pages_account(struct work_struct *_work) container_of(_work, struct ipath_user_pages_work, work); down_write(&work->mm->mmap_sem); - work->mm->locked_vm -= work->num_pages; + work->mm->pinned_vm -= work->num_pages; up_write(&work->mm->mmap_sem); mmput(work->mm); kfree(work); diff --git a/drivers/infiniband/hw/qib/qib_user_pages.c b/drivers/infiniband/hw/qib/qib_user_pages.c index 7689e49..2bc1d2b 100644 --- a/drivers/infiniband/hw/qib/qib_user_pages.c +++ b/drivers/infiniband/hw/qib/qib_user_pages.c @@ -74,7 +74,7 @@ static int __qib_get_user_pages(unsigned long start_page, size_t num_pages, goto bail_release; } - current->mm->locked_vm += num_pages; + current->mm->pinned_vm += num_pages; ret = 0; goto bail; @@ -151,7 +151,7 @@ void qib_release_user_pages(struct page **p, size_t num_pages) __qib_release_user_pages(p, num_pages, 1); if (current->mm) { - current->mm->locked_vm -= num_pages; + current->mm->pinned_vm -= num_pages; up_write(¤t->mm->mmap_sem); } } diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index c7d4ee6..e418c5a 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -44,6 +44,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) "VmPeak:\t%8lu kB\n" "VmSize:\t%8lu kB\n" "VmLck:\t%8lu kB\n" + "VmPin:\t%8lu kB\n" "VmHWM:\t%8lu kB\n" "VmRSS:\t%8lu kB\n" "VmData:\t%8lu kB\n" @@ -55,6 +56,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) hiwater_vm << (PAGE_SHIFT-10), (total_vm - mm->reserved_vm) << (PAGE_SHIFT-10), mm->locked_vm << (PAGE_SHIFT-10), + mm->pinned_vm << (PAGE_SHIFT-10), hiwater_rss << (PAGE_SHIFT-10), total_rss << (PAGE_SHIFT-10), data << (PAGE_SHIFT-10), diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 6456624..f317583 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -304,7 +304,7 @@ struct mm_struct { unsigned long hiwater_rss; /* High-watermark of RSS usage */ unsigned long hiwater_vm; /* High-water virtual memory usage */ - unsigned long total_vm, locked_vm, shared_vm, exec_vm; + unsigned long total_vm, locked_vm, pinned_vm, shared_vm, exec_vm; unsigned long stack_vm, reserved_vm, def_flags, nr_ptes; unsigned long start_code, end_code, start_data, end_data; unsigned long start_brk, brk, start_stack; diff --git a/kernel/events/core.c b/kernel/events/core.c index d1a1bee..12a0287 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3544,7 +3544,7 @@ static void perf_mmap_close(struct vm_area_struct *vma) struct ring_buffer *rb = event->rb; atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm); - vma->vm_mm->locked_vm -= event->mmap_locked; + vma->vm_mm->pinned_vm -= event->mmap_locked; rcu_assign_pointer(event->rb, NULL); mutex_unlock(&event->mmap_mutex); @@ -3625,7 +3625,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) lock_limit = rlimit(RLIMIT_MEMLOCK); lock_limit >>= PAGE_SHIFT; - locked = vma->vm_mm->locked_vm + extra; + locked = vma->vm_mm->pinned_vm + extra; if ((locked > lock_limit) && perf_paranoid_tracepoint_raw() && !capable(CAP_IPC_LOCK)) { @@ -3651,7 +3651,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) atomic_long_add(user_extra, &user->locked_vm); event->mmap_locked = extra; event->mmap_user = get_current_user(); - vma->vm_mm->locked_vm += event->mmap_locked; + vma->vm_mm->pinned_vm += event->mmap_locked; unlock: if (!ret) -- cgit v0.10.2 From e10d59f2c3decaf22cc5d3de7040eba202bc2df3 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 31 Oct 2011 17:07:34 -0700 Subject: mm: add comments to explain mm_struct fields Add comments to explain the page statistics field in the mm_struct. [akpm@linux-foundation.org: add missing ;] Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index f317583..3e01a19 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -304,8 +304,15 @@ struct mm_struct { unsigned long hiwater_rss; /* High-watermark of RSS usage */ unsigned long hiwater_vm; /* High-water virtual memory usage */ - unsigned long total_vm, locked_vm, pinned_vm, shared_vm, exec_vm; - unsigned long stack_vm, reserved_vm, def_flags, nr_ptes; + unsigned long total_vm; /* Total pages mapped */ + unsigned long locked_vm; /* Pages that have PG_mlocked set */ + unsigned long pinned_vm; /* Refcount permanently increased */ + unsigned long shared_vm; /* Shared pages (files) */ + unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE */ + unsigned long stack_vm; /* VM_GROWSUP/DOWN */ + unsigned long reserved_vm; /* VM_RESERVED|VM_IO pages */ + unsigned long def_flags; + unsigned long nr_ptes; /* Page table pages */ unsigned long start_code, end_code, start_data, end_data; unsigned long start_brk, brk, start_stack; unsigned long arg_start, arg_end, env_start, env_end; -- cgit v0.10.2 From ee72886d8ed5d9de3fa0ed3b99a7ca7702576a96 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Mon, 31 Oct 2011 17:07:38 -0700 Subject: mm: vmscan: do not writeback filesystem pages in direct reclaim Testing from the XFS folk revealed that there is still too much I/O from the end of the LRU in kswapd. Previously it was considered acceptable by VM people for a small number of pages to be written back from reclaim with testing generally showing about 0.3% of pages reclaimed were written back (higher if memory was low). That writing back a small number of pages is ok has been heavily disputed for quite some time and Dave Chinner explained it well; It doesn't have to be a very high number to be a problem. IO is orders of magnitude slower than the CPU time it takes to flush a page, so the cost of making a bad flush decision is very high. And single page writeback from the LRU is almost always a bad flush decision. To complicate matters, filesystems respond very differently to requests from reclaim according to Christoph Hellwig; xfs tries to write it back if the requester is kswapd ext4 ignores the request if it's a delayed allocation btrfs ignores the request As a result, each filesystem has different performance characteristics when under memory pressure and there are many pages being dirtied. In some cases, the request is ignored entirely so the VM cannot depend on the IO being dispatched. The objective of this series is to reduce writing of filesystem-backed pages from reclaim, play nicely with writeback that is already in progress and throttle reclaim appropriately when writeback pages are encountered. The assumption is that the flushers will always write pages faster than if reclaim issues the IO. A secondary goal is to avoid the problem whereby direct reclaim splices two potentially deep call stacks together. There is a potential new problem as reclaim has less control over how long before a page in a particularly zone or container is cleaned and direct reclaimers depend on kswapd or flusher threads to do the necessary work. However, as filesystems sometimes ignore direct reclaim requests already, it is not expected to be a serious issue. Patch 1 disables writeback of filesystem pages from direct reclaim entirely. Anonymous pages are still written. Patch 2 removes dead code in lumpy reclaim as it is no longer able to synchronously write pages. This hurts lumpy reclaim but there is an expectation that compaction is used for hugepage allocations these days and lumpy reclaim's days are numbered. Patches 3-4 add warnings to XFS and ext4 if called from direct reclaim. With patch 1, this "never happens" and is intended to catch regressions in this logic in the future. Patch 5 disables writeback of filesystem pages from kswapd unless the priority is raised to the point where kswapd is considered to be in trouble. Patch 6 throttles reclaimers if too many dirty pages are being encountered and the zones or backing devices are congested. Patch 7 invalidates dirty pages found at the end of the LRU so they are reclaimed quickly after being written back rather than waiting for a reclaimer to find them I consider this series to be orthogonal to the writeback work but it is worth noting that the writeback work affects the viability of patch 8 in particular. I tested this on ext4 and xfs using fs_mark, a simple writeback test based on dd and a micro benchmark that does a streaming write to a large mapping (exercises use-once LRU logic) followed by streaming writes to a mix of anonymous and file-backed mappings. The command line for fs_mark when botted with 512M looked something like ./fs_mark -d /tmp/fsmark-2676 -D 100 -N 150 -n 150 -L 25 -t 1 -S0 -s 10485760 The number of files was adjusted depending on the amount of available memory so that the files created was about 3xRAM. For multiple threads, the -d switch is specified multiple times. The test machine is x86-64 with an older generation of AMD processor with 4 cores. The underlying storage was 4 disks configured as RAID-0 as this was the best configuration of storage I had available. Swap is on a separate disk. Dirty ratio was tuned to 40% instead of the default of 20%. Testing was run with and without monitors to both verify that the patches were operating as expected and that any performance gain was real and not due to interference from monitors. Here is a summary of results based on testing XFS. 512M1P-xfs Files/s mean 32.69 ( 0.00%) 34.44 ( 5.08%) 512M1P-xfs Elapsed Time fsmark 51.41 48.29 512M1P-xfs Elapsed Time simple-wb 114.09 108.61 512M1P-xfs Elapsed Time mmap-strm 113.46 109.34 512M1P-xfs Kswapd efficiency fsmark 62% 63% 512M1P-xfs Kswapd efficiency simple-wb 56% 61% 512M1P-xfs Kswapd efficiency mmap-strm 44% 42% 512M-xfs Files/s mean 30.78 ( 0.00%) 35.94 (14.36%) 512M-xfs Elapsed Time fsmark 56.08 48.90 512M-xfs Elapsed Time simple-wb 112.22 98.13 512M-xfs Elapsed Time mmap-strm 219.15 196.67 512M-xfs Kswapd efficiency fsmark 54% 56% 512M-xfs Kswapd efficiency simple-wb 54% 55% 512M-xfs Kswapd efficiency mmap-strm 45% 44% 512M-4X-xfs Files/s mean 30.31 ( 0.00%) 33.33 ( 9.06%) 512M-4X-xfs Elapsed Time fsmark 63.26 55.88 512M-4X-xfs Elapsed Time simple-wb 100.90 90.25 512M-4X-xfs Elapsed Time mmap-strm 261.73 255.38 512M-4X-xfs Kswapd efficiency fsmark 49% 50% 512M-4X-xfs Kswapd efficiency simple-wb 54% 56% 512M-4X-xfs Kswapd efficiency mmap-strm 37% 36% 512M-16X-xfs Files/s mean 60.89 ( 0.00%) 65.22 ( 6.64%) 512M-16X-xfs Elapsed Time fsmark 67.47 58.25 512M-16X-xfs Elapsed Time simple-wb 103.22 90.89 512M-16X-xfs Elapsed Time mmap-strm 237.09 198.82 512M-16X-xfs Kswapd efficiency fsmark 45% 46% 512M-16X-xfs Kswapd efficiency simple-wb 53% 55% 512M-16X-xfs Kswapd efficiency mmap-strm 33% 33% Up until 512-4X, the FSmark improvements were statistically significant. For the 4X and 16X tests the results were within standard deviations but just barely. The time to completion for all tests is improved which is an important result. In general, kswapd efficiency is not affected by skipping dirty pages. 1024M1P-xfs Files/s mean 39.09 ( 0.00%) 41.15 ( 5.01%) 1024M1P-xfs Elapsed Time fsmark 84.14 80.41 1024M1P-xfs Elapsed Time simple-wb 210.77 184.78 1024M1P-xfs Elapsed Time mmap-strm 162.00 160.34 1024M1P-xfs Kswapd efficiency fsmark 69% 75% 1024M1P-xfs Kswapd efficiency simple-wb 71% 77% 1024M1P-xfs Kswapd efficiency mmap-strm 43% 44% 1024M-xfs Files/s mean 35.45 ( 0.00%) 37.00 ( 4.19%) 1024M-xfs Elapsed Time fsmark 94.59 91.00 1024M-xfs Elapsed Time simple-wb 229.84 195.08 1024M-xfs Elapsed Time mmap-strm 405.38 440.29 1024M-xfs Kswapd efficiency fsmark 79% 71% 1024M-xfs Kswapd efficiency simple-wb 74% 74% 1024M-xfs Kswapd efficiency mmap-strm 39% 42% 1024M-4X-xfs Files/s mean 32.63 ( 0.00%) 35.05 ( 6.90%) 1024M-4X-xfs Elapsed Time fsmark 103.33 97.74 1024M-4X-xfs Elapsed Time simple-wb 204.48 178.57 1024M-4X-xfs Elapsed Time mmap-strm 528.38 511.88 1024M-4X-xfs Kswapd efficiency fsmark 81% 70% 1024M-4X-xfs Kswapd efficiency simple-wb 73% 72% 1024M-4X-xfs Kswapd efficiency mmap-strm 39% 38% 1024M-16X-xfs Files/s mean 42.65 ( 0.00%) 42.97 ( 0.74%) 1024M-16X-xfs Elapsed Time fsmark 103.11 99.11 1024M-16X-xfs Elapsed Time simple-wb 200.83 178.24 1024M-16X-xfs Elapsed Time mmap-strm 397.35 459.82 1024M-16X-xfs Kswapd efficiency fsmark 84% 69% 1024M-16X-xfs Kswapd efficiency simple-wb 74% 73% 1024M-16X-xfs Kswapd efficiency mmap-strm 39% 40% All FSMark tests up to 16X had statistically significant improvements. For the most part, tests are completing faster with the exception of the streaming writes to a mixture of anonymous and file-backed mappings which were slower in two cases In the cases where the mmap-strm tests were slower, there was more swapping due to dirty pages being skipped. The number of additional pages swapped is almost identical to the fewer number of pages written from reclaim. In other words, roughly the same number of pages were reclaimed but swapping was slower. As the test is a bit unrealistic and stresses memory heavily, the small shift is acceptable. 4608M1P-xfs Files/s mean 29.75 ( 0.00%) 30.96 ( 3.91%) 4608M1P-xfs Elapsed Time fsmark 512.01 492.15 4608M1P-xfs Elapsed Time simple-wb 618.18 566.24 4608M1P-xfs Elapsed Time mmap-strm 488.05 465.07 4608M1P-xfs Kswapd efficiency fsmark 93% 86% 4608M1P-xfs Kswapd efficiency simple-wb 88% 84% 4608M1P-xfs Kswapd efficiency mmap-strm 46% 45% 4608M-xfs Files/s mean 27.60 ( 0.00%) 28.85 ( 4.33%) 4608M-xfs Elapsed Time fsmark 555.96 532.34 4608M-xfs Elapsed Time simple-wb 659.72 571.85 4608M-xfs Elapsed Time mmap-strm 1082.57 1146.38 4608M-xfs Kswapd efficiency fsmark 89% 91% 4608M-xfs Kswapd efficiency simple-wb 88% 82% 4608M-xfs Kswapd efficiency mmap-strm 48% 46% 4608M-4X-xfs Files/s mean 26.00 ( 0.00%) 27.47 ( 5.35%) 4608M-4X-xfs Elapsed Time fsmark 592.91 564.00 4608M-4X-xfs Elapsed Time simple-wb 616.65 575.07 4608M-4X-xfs Elapsed Time mmap-strm 1773.02 1631.53 4608M-4X-xfs Kswapd efficiency fsmark 90% 94% 4608M-4X-xfs Kswapd efficiency simple-wb 87% 82% 4608M-4X-xfs Kswapd efficiency mmap-strm 43% 43% 4608M-16X-xfs Files/s mean 26.07 ( 0.00%) 26.42 ( 1.32%) 4608M-16X-xfs Elapsed Time fsmark 602.69 585.78 4608M-16X-xfs Elapsed Time simple-wb 606.60 573.81 4608M-16X-xfs Elapsed Time mmap-strm 1549.75 1441.86 4608M-16X-xfs Kswapd efficiency fsmark 98% 98% 4608M-16X-xfs Kswapd efficiency simple-wb 88% 82% 4608M-16X-xfs Kswapd efficiency mmap-strm 44% 42% Unlike the other tests, the fsmark results are not statistically significant but the min and max times are both improved and for the most part, tests completed faster. There are other indications that this is an improvement as well. For example, in the vast majority of cases, there were fewer pages scanned by direct reclaim implying in many cases that stalls due to direct reclaim are reduced. KSwapd is scanning more due to skipping dirty pages which is unfortunate but the CPU usage is still acceptable In an earlier set of tests, I used blktrace and in almost all cases throughput throughout the entire test was higher. However, I ended up discarding those results as recording blktrace data was too heavy for my liking. On a laptop, I plugged in a USB stick and ran a similar tests of tests using it as backing storage. A desktop environment was running and for the entire duration of the tests, firefox and gnome terminal were launching and exiting to vaguely simulate a user. 1024M-xfs Files/s mean 0.41 ( 0.00%) 0.44 ( 6.82%) 1024M-xfs Elapsed Time fsmark 2053.52 1641.03 1024M-xfs Elapsed Time simple-wb 1229.53 768.05 1024M-xfs Elapsed Time mmap-strm 4126.44 4597.03 1024M-xfs Kswapd efficiency fsmark 84% 85% 1024M-xfs Kswapd efficiency simple-wb 92% 81% 1024M-xfs Kswapd efficiency mmap-strm 60% 51% 1024M-xfs Avg wait ms fsmark 5404.53 4473.87 1024M-xfs Avg wait ms simple-wb 2541.35 1453.54 1024M-xfs Avg wait ms mmap-strm 3400.25 3852.53 The mmap-strm results were hurt because firefox launching had a tendency to push the test out of memory. On the postive side, firefox launched marginally faster with the patches applied. Time to completion for many tests was faster but more importantly - the "Avg wait" time as measured by iostat was far lower implying the system would be more responsive. It was also the case that "Avg wait ms" on the root filesystem was lower. I tested it manually and while the system felt slightly more responsive while copying data to a USB stick, it was marginal enough that it could be my imagination. This patch: do not writeback filesystem pages in direct reclaim. When kswapd is failing to keep zones above the min watermark, a process will enter direct reclaim in the same manner kswapd does. If a dirty page is encountered during the scan, this page is written to backing storage using mapping->writepage. This causes two problems. First, it can result in very deep call stacks, particularly if the target storage or filesystem are complex. Some filesystems ignore write requests from direct reclaim as a result. The second is that a single-page flush is inefficient in terms of IO. While there is an expectation that the elevator will merge requests, this does not always happen. Quoting Christoph Hellwig; The elevator has a relatively small window it can operate on, and can never fix up a bad large scale writeback pattern. This patch prevents direct reclaim writing back filesystem pages by checking if current is kswapd. Anonymous pages are still written to swap as there is not the equivalent of a flusher thread for anonymous pages. If the dirty pages cannot be written back, they are placed back on the LRU lists. There is now a direct dependency on dirty page balancing to prevent too many pages in the system being dirtied which would prevent reclaim making forward progress. Signed-off-by: Mel Gorman Reviewed-by: Minchan Kim Cc: Dave Chinner Cc: Christoph Hellwig Cc: Johannes Weiner Cc: Wu Fengguang Cc: Jan Kara Cc: Rik van Riel Cc: Mel Gorman Cc: Alex Elder Cc: Theodore Ts'o Cc: Chris Mason Cc: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index ec57779..2c41b2c 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -100,6 +100,7 @@ enum zone_stat_item { NR_UNSTABLE_NFS, /* NFS unstable pages */ NR_BOUNCE, NR_VMSCAN_WRITE, + NR_VMSCAN_WRITE_SKIP, NR_WRITEBACK_TEMP, /* Writeback using temporary buffers */ NR_ISOLATED_ANON, /* Temporary isolated pages from anon lru */ NR_ISOLATED_FILE, /* Temporary isolated pages from file lru */ diff --git a/mm/vmscan.c b/mm/vmscan.c index d29b2bd..10f9c59 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -865,6 +865,15 @@ static unsigned long shrink_page_list(struct list_head *page_list, if (PageDirty(page)) { nr_dirty++; + /* + * Only kswapd can writeback filesystem pages to + * avoid risk of stack overflow + */ + if (page_is_file_cache(page) && !current_is_kswapd()) { + inc_zone_page_state(page, NR_VMSCAN_WRITE_SKIP); + goto keep_locked; + } + if (references == PAGEREF_RECLAIM_CLEAN) goto keep_locked; if (!may_enter_fs) diff --git a/mm/vmstat.c b/mm/vmstat.c index d52b13d..210bd8f 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -702,6 +702,7 @@ const char * const vmstat_text[] = { "nr_unstable", "nr_bounce", "nr_vmscan_write", + "nr_vmscan_write_skip", "nr_writeback_temp", "nr_isolated_anon", "nr_isolated_file", -- cgit v0.10.2 From a18bba061c789f5815c3efc3c80e6ac269911964 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Mon, 31 Oct 2011 17:07:42 -0700 Subject: mm: vmscan: remove dead code related to lumpy reclaim waiting on pages under writeback Lumpy reclaim worked with two passes - the first which queued pages for IO and the second which waited on writeback. As direct reclaim can no longer write pages there is some dead code. This patch removes it but direct reclaim will continue to wait on pages under writeback while in synchronous reclaim mode. Signed-off-by: Mel Gorman Cc: Dave Chinner Cc: Christoph Hellwig Cc: Johannes Weiner Cc: Wu Fengguang Cc: Jan Kara Cc: Minchan Kim Cc: Rik van Riel Cc: Mel Gorman Cc: Alex Elder Cc: Theodore Ts'o Cc: Chris Mason Cc: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/vmscan.c b/mm/vmscan.c index 10f9c59..5c59665 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -495,15 +495,6 @@ static pageout_t pageout(struct page *page, struct address_space *mapping, return PAGE_ACTIVATE; } - /* - * Wait on writeback if requested to. This happens when - * direct reclaiming a large contiguous area and the - * first attempt to free a range of pages fails. - */ - if (PageWriteback(page) && - (sc->reclaim_mode & RECLAIM_MODE_SYNC)) - wait_on_page_writeback(page); - if (!PageWriteback(page)) { /* synchronous write or broken a_ops? */ ClearPageReclaim(page); @@ -804,12 +795,10 @@ static unsigned long shrink_page_list(struct list_head *page_list, if (PageWriteback(page)) { /* - * Synchronous reclaim is performed in two passes, - * first an asynchronous pass over the list to - * start parallel writeback, and a second synchronous - * pass to wait for the IO to complete. Wait here - * for any page for which writeback has already - * started. + * Synchronous reclaim cannot queue pages for + * writeback due to the possibility of stack overflow + * but if it encounters a page under writeback, wait + * for the IO to complete. */ if ((sc->reclaim_mode & RECLAIM_MODE_SYNC) && may_enter_fs) @@ -1414,7 +1403,7 @@ static noinline_for_stack void update_isolated_counts(struct zone *zone, } /* - * Returns true if the caller should wait to clean dirty/writeback pages. + * Returns true if a direct reclaim should wait on pages under writeback. * * If we are direct reclaiming for contiguous pages and we do not reclaim * everything in the list, try again and wait for writeback IO to complete. -- cgit v0.10.2 From 94054fa3fca1fd78db02cb3d68d5627120f0a1d4 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Mon, 31 Oct 2011 17:07:45 -0700 Subject: xfs: warn if direct reclaim tries to writeback pages Direct reclaim should never writeback pages. For now, handle the situation and warn about it. Ultimately, this will be a BUG_ON. Signed-off-by: Mel Gorman Cc: Dave Chinner Cc: Christoph Hellwig Cc: Johannes Weiner Cc: Wu Fengguang Cc: Jan Kara Cc: Minchan Kim Cc: Rik van Riel Cc: Mel Gorman Cc: Alex Elder Cc: Theodore Ts'o Cc: Chris Mason Cc: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 11b2aad..33b1331 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -902,11 +902,11 @@ xfs_vm_writepage( * random callers for direct reclaim or memcg reclaim. We explicitly * allow reclaim from kswapd as the stack usage there is relatively low. * - * This should really be done by the core VM, but until that happens - * filesystems like XFS, btrfs and ext4 have to take care of this - * by themselves. + * This should never happen except in the case of a VM regression so + * warn about it. */ - if ((current->flags & (PF_MEMALLOC|PF_KSWAPD)) == PF_MEMALLOC) + if (WARN_ON_ONCE((current->flags & (PF_MEMALLOC|PF_KSWAPD)) == + PF_MEMALLOC)) goto redirty; /* -- cgit v0.10.2 From 966dbde2c208e07bab7a45a7855e1e693eabe661 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Mon, 31 Oct 2011 17:07:48 -0700 Subject: ext4: warn if direct reclaim tries to writeback pages Direct reclaim should never writeback pages. Warn if an attempt is made. Signed-off-by: Mel Gorman Cc: Dave Chinner Cc: Christoph Hellwig Cc: Johannes Weiner Cc: Wu Fengguang Cc: Jan Kara Cc: Minchan Kim Cc: Rik van Riel Cc: Mel Gorman Cc: Alex Elder Cc: Theodore Ts'o Cc: Chris Mason Cc: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 986e238..0defe0b 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1811,8 +1811,12 @@ static int ext4_writepage(struct page *page, * We don't want to do block allocation, so redirty * the page and return. We may reach here when we do * a journal commit via journal_submit_inode_data_buffers. - * We can also reach here via shrink_page_list + * We can also reach here via shrink_page_list but it + * should never be for direct reclaim so warn if that + * happens */ + WARN_ON_ONCE((current->flags & (PF_MEMALLOC|PF_KSWAPD)) == + PF_MEMALLOC); goto redirty_page; } if (commit_write) -- cgit v0.10.2 From f84f6e2b0868f198f97a32ba503d6f9f319a249a Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Mon, 31 Oct 2011 17:07:51 -0700 Subject: mm: vmscan: do not writeback filesystem pages in kswapd except in high priority It is preferable that no dirty pages are dispatched for cleaning from the page reclaim path. At normal priorities, this patch prevents kswapd writing pages. However, page reclaim does have a requirement that pages be freed in a particular zone. If it is failing to make sufficient progress (reclaiming < SWAP_CLUSTER_MAX at any priority priority), the priority is raised to scan more pages. A priority of DEF_PRIORITY - 3 is considered to be the point where kswapd is getting into trouble reclaiming pages. If this priority is reached, kswapd will dispatch pages for writing. Signed-off-by: Mel Gorman Reviewed-by: Minchan Kim Cc: Dave Chinner Cc: Christoph Hellwig Cc: Johannes Weiner Cc: Wu Fengguang Cc: Jan Kara Cc: Rik van Riel Cc: Mel Gorman Cc: Alex Elder Cc: Theodore Ts'o Cc: Chris Mason Cc: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/vmscan.c b/mm/vmscan.c index 5c59665..15e3a29 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -750,7 +750,8 @@ static noinline_for_stack void free_page_list(struct list_head *free_pages) */ static unsigned long shrink_page_list(struct list_head *page_list, struct zone *zone, - struct scan_control *sc) + struct scan_control *sc, + int priority) { LIST_HEAD(ret_pages); LIST_HEAD(free_pages); @@ -856,9 +857,11 @@ static unsigned long shrink_page_list(struct list_head *page_list, /* * Only kswapd can writeback filesystem pages to - * avoid risk of stack overflow + * avoid risk of stack overflow but do not writeback + * unless under significant pressure. */ - if (page_is_file_cache(page) && !current_is_kswapd()) { + if (page_is_file_cache(page) && + (!current_is_kswapd() || priority >= DEF_PRIORITY - 2)) { inc_zone_page_state(page, NR_VMSCAN_WRITE_SKIP); goto keep_locked; } @@ -1509,12 +1512,12 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, spin_unlock_irq(&zone->lru_lock); - nr_reclaimed = shrink_page_list(&page_list, zone, sc); + nr_reclaimed = shrink_page_list(&page_list, zone, sc, priority); /* Check if we should syncronously wait for writeback */ if (should_reclaim_stall(nr_taken, nr_reclaimed, priority, sc)) { set_reclaim_mode(priority, sc, true); - nr_reclaimed += shrink_page_list(&page_list, zone, sc); + nr_reclaimed += shrink_page_list(&page_list, zone, sc, priority); } local_irq_disable(); -- cgit v0.10.2 From 92df3a723f84cdf8133560bbff950a7a99e92bc9 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Mon, 31 Oct 2011 17:07:56 -0700 Subject: mm: vmscan: throttle reclaim if encountering too many dirty pages under writeback Workloads that are allocating frequently and writing files place a large number of dirty pages on the LRU. With use-once logic, it is possible for them to reach the end of the LRU quickly requiring the reclaimer to scan more to find clean pages. Ordinarily, processes that are dirtying memory will get throttled by dirty balancing but this is a global heuristic and does not take into account that LRUs are maintained on a per-zone basis. This can lead to a situation whereby reclaim is scanning heavily, skipping over a large number of pages under writeback and recycling them around the LRU consuming CPU. This patch checks how many of the number of pages isolated from the LRU were dirty and under writeback. If a percentage of them under writeback, the process will be throttled if a backing device or the zone is congested. Note that this applies whether it is anonymous or file-backed pages that are under writeback meaning that swapping is potentially throttled. This is intentional due to the fact if the swap device is congested, scanning more pages and dispatching more IO is not going to help matters. The percentage that must be in writeback depends on the priority. At default priority, all of them must be dirty. At DEF_PRIORITY-1, 50% of them must be, DEF_PRIORITY-2, 25% etc. i.e. as pressure increases the greater the likelihood the process will get throttled to allow the flusher threads to make some progress. Signed-off-by: Mel Gorman Reviewed-by: Minchan Kim Acked-by: Johannes Weiner Cc: Dave Chinner Cc: Christoph Hellwig Cc: Wu Fengguang Cc: Jan Kara Cc: Rik van Riel Cc: Mel Gorman Cc: Alex Elder Cc: Theodore Ts'o Cc: Chris Mason Cc: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/vmscan.c b/mm/vmscan.c index 15e3a29..7b0573f 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -751,7 +751,9 @@ static noinline_for_stack void free_page_list(struct list_head *free_pages) static unsigned long shrink_page_list(struct list_head *page_list, struct zone *zone, struct scan_control *sc, - int priority) + int priority, + unsigned long *ret_nr_dirty, + unsigned long *ret_nr_writeback) { LIST_HEAD(ret_pages); LIST_HEAD(free_pages); @@ -759,6 +761,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, unsigned long nr_dirty = 0; unsigned long nr_congested = 0; unsigned long nr_reclaimed = 0; + unsigned long nr_writeback = 0; cond_resched(); @@ -795,6 +798,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, (PageSwapCache(page) && (sc->gfp_mask & __GFP_IO)); if (PageWriteback(page)) { + nr_writeback++; /* * Synchronous reclaim cannot queue pages for * writeback due to the possibility of stack overflow @@ -1000,6 +1004,8 @@ keep_lumpy: list_splice(&ret_pages, page_list); count_vm_events(PGACTIVATE, pgactivate); + *ret_nr_dirty += nr_dirty; + *ret_nr_writeback += nr_writeback; return nr_reclaimed; } @@ -1460,6 +1466,8 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, unsigned long nr_taken; unsigned long nr_anon; unsigned long nr_file; + unsigned long nr_dirty = 0; + unsigned long nr_writeback = 0; isolate_mode_t reclaim_mode = ISOLATE_INACTIVE; while (unlikely(too_many_isolated(zone, file, sc))) { @@ -1512,12 +1520,14 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, spin_unlock_irq(&zone->lru_lock); - nr_reclaimed = shrink_page_list(&page_list, zone, sc, priority); + nr_reclaimed = shrink_page_list(&page_list, zone, sc, priority, + &nr_dirty, &nr_writeback); /* Check if we should syncronously wait for writeback */ if (should_reclaim_stall(nr_taken, nr_reclaimed, priority, sc)) { set_reclaim_mode(priority, sc, true); - nr_reclaimed += shrink_page_list(&page_list, zone, sc, priority); + nr_reclaimed += shrink_page_list(&page_list, zone, sc, + priority, &nr_dirty, &nr_writeback); } local_irq_disable(); @@ -1527,6 +1537,32 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, putback_lru_pages(zone, sc, nr_anon, nr_file, &page_list); + /* + * If reclaim is isolating dirty pages under writeback, it implies + * that the long-lived page allocation rate is exceeding the page + * laundering rate. Either the global limits are not being effective + * at throttling processes due to the page distribution throughout + * zones or there is heavy usage of a slow backing device. The + * only option is to throttle from reclaim context which is not ideal + * as there is no guarantee the dirtying process is throttled in the + * same way balance_dirty_pages() manages. + * + * This scales the number of dirty pages that must be under writeback + * before throttling depending on priority. It is a simple backoff + * function that has the most effect in the range DEF_PRIORITY to + * DEF_PRIORITY-2 which is the priority reclaim is considered to be + * in trouble and reclaim is considered to be in trouble. + * + * DEF_PRIORITY 100% isolated pages must be PageWriteback to throttle + * DEF_PRIORITY-1 50% must be PageWriteback + * DEF_PRIORITY-2 25% must be PageWriteback, kswapd in trouble + * ... + * DEF_PRIORITY-6 For SWAP_CLUSTER_MAX isolated pages, throttle if any + * isolated page is PageWriteback + */ + if (nr_writeback && nr_writeback >= (nr_taken >> (DEF_PRIORITY-priority))) + wait_iff_congested(zone, BLK_RW_ASYNC, HZ/10); + trace_mm_vmscan_lru_shrink_inactive(zone->zone_pgdat->node_id, zone_idx(zone), nr_scanned, nr_reclaimed, -- cgit v0.10.2 From 49ea7eb65e7c5060807fb9312b1ad4c3eab82e2c Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Mon, 31 Oct 2011 17:07:59 -0700 Subject: mm: vmscan: immediately reclaim end-of-LRU dirty pages when writeback completes When direct reclaim encounters a dirty page, it gets recycled around the LRU for another cycle. This patch marks the page PageReclaim similar to deactivate_page() so that the page gets reclaimed almost immediately after the page gets cleaned. This is to avoid reclaiming clean pages that are younger than a dirty page encountered at the end of the LRU that might have been something like a use-once page. Signed-off-by: Mel Gorman Acked-by: Johannes Weiner Cc: Dave Chinner Cc: Christoph Hellwig Cc: Wu Fengguang Cc: Jan Kara Cc: Minchan Kim Cc: Rik van Riel Cc: Mel Gorman Cc: Alex Elder Cc: Theodore Ts'o Cc: Chris Mason Cc: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 2c41b2c..188cb2f 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -100,7 +100,7 @@ enum zone_stat_item { NR_UNSTABLE_NFS, /* NFS unstable pages */ NR_BOUNCE, NR_VMSCAN_WRITE, - NR_VMSCAN_WRITE_SKIP, + NR_VMSCAN_IMMEDIATE, /* Prioritise for reclaim when writeback ends */ NR_WRITEBACK_TEMP, /* Writeback using temporary buffers */ NR_ISOLATED_ANON, /* Temporary isolated pages from anon lru */ NR_ISOLATED_FILE, /* Temporary isolated pages from file lru */ diff --git a/mm/vmscan.c b/mm/vmscan.c index 7b0573f..a297603 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -866,7 +866,15 @@ static unsigned long shrink_page_list(struct list_head *page_list, */ if (page_is_file_cache(page) && (!current_is_kswapd() || priority >= DEF_PRIORITY - 2)) { - inc_zone_page_state(page, NR_VMSCAN_WRITE_SKIP); + /* + * Immediately reclaim when written back. + * Similar in principal to deactivate_page() + * except we already have the page isolated + * and know it's dirty + */ + inc_zone_page_state(page, NR_VMSCAN_IMMEDIATE); + SetPageReclaim(page); + goto keep_locked; } diff --git a/mm/vmstat.c b/mm/vmstat.c index 210bd8f..56e529a 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -702,7 +702,7 @@ const char * const vmstat_text[] = { "nr_unstable", "nr_bounce", "nr_vmscan_write", - "nr_vmscan_write_skip", + "nr_vmscan_immediate_reclaim", "nr_writeback_temp", "nr_isolated_anon", "nr_isolated_file", -- cgit v0.10.2 From 16fb951237c2b0b28037b992ee44e7ee401c30d1 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 31 Oct 2011 17:08:02 -0700 Subject: vmscan: count pages into balanced for zone with good watermark It's possible a zone watermark is ok when entering the balance_pgdat() loop, while the zone is within the requested classzone_idx. Count pages from this zone into `balanced'. In this way, we can skip shrinking zones too much for high order allocation. Signed-off-by: Shaohua Li Acked-by: Mel Gorman Reviewed-by: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/vmscan.c b/mm/vmscan.c index a297603..77ee24f 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2748,6 +2748,8 @@ out: /* If balanced, clear the congested flag */ zone_clear_flag(zone, ZONE_CONGESTED); + if (i <= *classzone_idx) + balanced += zone->present_pages; } } -- cgit v0.10.2 From 77311139f364d7f71fc9ba88f59fd90e60205007 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Mon, 31 Oct 2011 17:08:05 -0700 Subject: mm/debug-pagealloc.c: use plain __ratelimit() instead of printk_ratelimit() printk_ratelimit() should not be used, because it shares ratelimiting state with all other unrelated printk_ratelimit() callsites. Signed-off-by: Akinobu Mita Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/debug-pagealloc.c b/mm/debug-pagealloc.c index a1e3324..a4b6d70 100644 --- a/mm/debug-pagealloc.c +++ b/mm/debug-pagealloc.c @@ -2,6 +2,7 @@ #include #include #include +#include static inline void set_page_poison(struct page *page) { @@ -59,6 +60,7 @@ static bool single_bit_flip(unsigned char a, unsigned char b) static void check_poison_mem(unsigned char *mem, size_t bytes) { + static DEFINE_RATELIMIT_STATE(ratelimit, 5 * HZ, 10); unsigned char *start; unsigned char *end; @@ -74,7 +76,7 @@ static void check_poison_mem(unsigned char *mem, size_t bytes) break; } - if (!printk_ratelimit()) + if (!__ratelimit(&ratelimit)) return; else if (start == end && single_bit_flip(*start, PAGE_POISON)) printk(KERN_ERR "pagealloc: single bit error\n"); -- cgit v0.10.2 From 798248206b59acc6e1238c778281419c041891a7 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Mon, 31 Oct 2011 17:08:07 -0700 Subject: lib/string.c: introduce memchr_inv() memchr_inv() is mainly used to check whether the whole buffer is filled with just a specified byte. The function name and prototype are stolen from logfs and the implementation is from SLUB. Signed-off-by: Akinobu Mita Acked-by: Christoph Lameter Acked-by: Pekka Enberg Cc: Matt Mackall Acked-by: Joern Engel Cc: Marcin Slusarz Cc: Eric Dumazet Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h index f22d108..398ecff 100644 --- a/fs/logfs/logfs.h +++ b/fs/logfs/logfs.h @@ -618,7 +618,6 @@ static inline int logfs_buf_recover(struct logfs_area *area, u64 ofs, struct page *emergency_read_begin(struct address_space *mapping, pgoff_t index); void emergency_read_end(struct page *page); void logfs_crash_dump(struct super_block *sb); -void *memchr_inv(const void *s, int c, size_t n); int logfs_statfs(struct dentry *dentry, struct kstatfs *stats); int logfs_check_ds(struct logfs_disk_super *ds); int logfs_write_sb(struct super_block *sb); diff --git a/fs/logfs/super.c b/fs/logfs/super.c index ce03a18..f2697e4 100644 --- a/fs/logfs/super.c +++ b/fs/logfs/super.c @@ -91,28 +91,6 @@ void logfs_crash_dump(struct super_block *sb) } /* - * TODO: move to lib/string.c - */ -/** - * memchr_inv - Find a character in an area of memory. - * @s: The memory area - * @c: The byte to search for - * @n: The size of the area. - * - * returns the address of the first character other than @c, or %NULL - * if the whole buffer contains just @c. - */ -void *memchr_inv(const void *s, int c, size_t n) -{ - const unsigned char *p = s; - while (n-- != 0) - if ((unsigned char)c != *p++) - return (void *)(p - 1); - - return NULL; -} - -/* * FIXME: There should be a reserve for root, similar to ext2. */ int logfs_statfs(struct dentry *dentry, struct kstatfs *stats) diff --git a/include/linux/string.h b/include/linux/string.h index a176db2..e033564 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -114,6 +114,7 @@ extern int memcmp(const void *,const void *,__kernel_size_t); #ifndef __HAVE_ARCH_MEMCHR extern void * memchr(const void *,int,__kernel_size_t); #endif +void *memchr_inv(const void *s, int c, size_t n); extern char *kstrdup(const char *s, gfp_t gfp); extern char *kstrndup(const char *s, size_t len, gfp_t gfp); diff --git a/lib/string.c b/lib/string.c index 01fad9b..11df543 100644 --- a/lib/string.c +++ b/lib/string.c @@ -756,3 +756,57 @@ void *memchr(const void *s, int c, size_t n) } EXPORT_SYMBOL(memchr); #endif + +static void *check_bytes8(const u8 *start, u8 value, unsigned int bytes) +{ + while (bytes) { + if (*start != value) + return (void *)start; + start++; + bytes--; + } + return NULL; +} + +/** + * memchr_inv - Find an unmatching character in an area of memory. + * @start: The memory area + * @c: Find a character other than c + * @bytes: The size of the area. + * + * returns the address of the first character other than @c, or %NULL + * if the whole buffer contains just @c. + */ +void *memchr_inv(const void *start, int c, size_t bytes) +{ + u8 value = c; + u64 value64; + unsigned int words, prefix; + + if (bytes <= 16) + return check_bytes8(start, value, bytes); + + value64 = value | value << 8 | value << 16 | value << 24; + value64 = (value64 & 0xffffffff) | value64 << 32; + prefix = 8 - ((unsigned long)start) % 8; + + if (prefix) { + u8 *r = check_bytes8(start, value, prefix); + if (r) + return r; + start += prefix; + bytes -= prefix; + } + + words = bytes / 8; + + while (words) { + if (*(u64 *)start != value64) + return check_bytes8(start, value, 8); + start += 8; + words--; + } + + return check_bytes8(start, value, bytes % 8); +} +EXPORT_SYMBOL(memchr_inv); diff --git a/mm/slub.c b/mm/slub.c index 95215aa..7d2a996 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -655,49 +655,6 @@ static void init_object(struct kmem_cache *s, void *object, u8 val) memset(p + s->objsize, val, s->inuse - s->objsize); } -static u8 *check_bytes8(u8 *start, u8 value, unsigned int bytes) -{ - while (bytes) { - if (*start != value) - return start; - start++; - bytes--; - } - return NULL; -} - -static u8 *check_bytes(u8 *start, u8 value, unsigned int bytes) -{ - u64 value64; - unsigned int words, prefix; - - if (bytes <= 16) - return check_bytes8(start, value, bytes); - - value64 = value | value << 8 | value << 16 | value << 24; - value64 = (value64 & 0xffffffff) | value64 << 32; - prefix = 8 - ((unsigned long)start) % 8; - - if (prefix) { - u8 *r = check_bytes8(start, value, prefix); - if (r) - return r; - start += prefix; - bytes -= prefix; - } - - words = bytes / 8; - - while (words) { - if (*(u64 *)start != value64) - return check_bytes8(start, value, 8); - start += 8; - words--; - } - - return check_bytes8(start, value, bytes % 8); -} - static void restore_bytes(struct kmem_cache *s, char *message, u8 data, void *from, void *to) { @@ -712,7 +669,7 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page, u8 *fault; u8 *end; - fault = check_bytes(start, value, bytes); + fault = memchr_inv(start, value, bytes); if (!fault) return 1; @@ -805,7 +762,7 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page) if (!remainder) return 1; - fault = check_bytes(end - remainder, POISON_INUSE, remainder); + fault = memchr_inv(end - remainder, POISON_INUSE, remainder); if (!fault) return 1; while (end > fault && end[-1] == POISON_INUSE) -- cgit v0.10.2 From 8c5fb8eadde41f67c61a7ac2d3246dab87bf7020 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Mon, 31 Oct 2011 17:08:10 -0700 Subject: mm/debug-pagealloc.c: use memchr_inv Use newly introduced memchr_inv() for page verification. Signed-off-by: Akinobu Mita Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/debug-pagealloc.c b/mm/debug-pagealloc.c index a4b6d70..2618933 100644 --- a/mm/debug-pagealloc.c +++ b/mm/debug-pagealloc.c @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -64,11 +65,8 @@ static void check_poison_mem(unsigned char *mem, size_t bytes) unsigned char *start; unsigned char *end; - for (start = mem; start < mem + bytes; start++) { - if (*start != PAGE_POISON) - break; - } - if (start == mem + bytes) + start = memchr_inv(mem, PAGE_POISON, bytes); + if (!start) return; for (end = mem + bytes - 1; end > start; end--) { -- cgit v0.10.2 From f5252e009d5b87071a919221e4f6624184005368 Mon Sep 17 00:00:00 2001 From: Mitsuo Hayasaka Date: Mon, 31 Oct 2011 17:08:13 -0700 Subject: mm: avoid null pointer access in vm_struct via /proc/vmallocinfo The /proc/vmallocinfo shows information about vmalloc allocations in vmlist that is a linklist of vm_struct. It, however, may access pages field of vm_struct where a page was not allocated. This results in a null pointer access and leads to a kernel panic. Why this happens: In __vmalloc_node_range() called from vmalloc(), newly allocated vm_struct is added to vmlist at __get_vm_area_node() and then, some fields of vm_struct such as nr_pages and pages are set at __vmalloc_area_node(). In other words, it is added to vmlist before it is fully initialized. At the same time, when the /proc/vmallocinfo is read, it accesses the pages field of vm_struct according to the nr_pages field at show_numa_info(). Thus, a null pointer access happens. The patch adds the newly allocated vm_struct to the vmlist *after* it is fully initialized. So, it can avoid accessing the pages field with unallocated page when show_numa_info() is called. Signed-off-by: Mitsuo Hayasaka Cc: Andrew Morton Cc: David Rientjes Cc: Namhyung Kim Cc: "Paul E. McKenney" Cc: Jeremy Fitzhardinge Cc: Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 9332e52..687fb11 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -13,6 +13,7 @@ struct vm_area_struct; /* vma defining user mapping in mm_types.h */ #define VM_MAP 0x00000004 /* vmap()ed pages */ #define VM_USERMAP 0x00000008 /* suitable for remap_vmalloc_range */ #define VM_VPAGES 0x00000010 /* buffer for pages was vmalloc'ed */ +#define VM_UNLIST 0x00000020 /* vm_struct is not listed in vmlist */ /* bits [20..32] reserved for arch specific ioremap internals */ /* diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 5016f19..56faf31 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1253,18 +1253,22 @@ EXPORT_SYMBOL_GPL(map_vm_area); DEFINE_RWLOCK(vmlist_lock); struct vm_struct *vmlist; -static void insert_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va, +static void setup_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va, unsigned long flags, void *caller) { - struct vm_struct *tmp, **p; - vm->flags = flags; vm->addr = (void *)va->va_start; vm->size = va->va_end - va->va_start; vm->caller = caller; va->private = vm; va->flags |= VM_VM_AREA; +} + +static void insert_vmalloc_vmlist(struct vm_struct *vm) +{ + struct vm_struct *tmp, **p; + vm->flags &= ~VM_UNLIST; write_lock(&vmlist_lock); for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) { if (tmp->addr >= vm->addr) @@ -1275,6 +1279,13 @@ static void insert_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va, write_unlock(&vmlist_lock); } +static void insert_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va, + unsigned long flags, void *caller) +{ + setup_vmalloc_vm(vm, va, flags, caller); + insert_vmalloc_vmlist(vm); +} + static struct vm_struct *__get_vm_area_node(unsigned long size, unsigned long align, unsigned long flags, unsigned long start, unsigned long end, int node, gfp_t gfp_mask, void *caller) @@ -1313,7 +1324,18 @@ static struct vm_struct *__get_vm_area_node(unsigned long size, return NULL; } - insert_vmalloc_vm(area, va, flags, caller); + /* + * When this function is called from __vmalloc_node_range, + * we do not add vm_struct to vmlist here to avoid + * accessing uninitialized members of vm_struct such as + * pages and nr_pages fields. They will be set later. + * To distinguish it from others, we use a VM_UNLIST flag. + */ + if (flags & VM_UNLIST) + setup_vmalloc_vm(area, va, flags, caller); + else + insert_vmalloc_vm(area, va, flags, caller); + return area; } @@ -1381,17 +1403,20 @@ struct vm_struct *remove_vm_area(const void *addr) va = find_vmap_area((unsigned long)addr); if (va && va->flags & VM_VM_AREA) { struct vm_struct *vm = va->private; - struct vm_struct *tmp, **p; - /* - * remove from list and disallow access to this vm_struct - * before unmap. (address range confliction is maintained by - * vmap.) - */ - write_lock(&vmlist_lock); - for (p = &vmlist; (tmp = *p) != vm; p = &tmp->next) - ; - *p = tmp->next; - write_unlock(&vmlist_lock); + + if (!(vm->flags & VM_UNLIST)) { + struct vm_struct *tmp, **p; + /* + * remove from list and disallow access to + * this vm_struct before unmap. (address range + * confliction is maintained by vmap.) + */ + write_lock(&vmlist_lock); + for (p = &vmlist; (tmp = *p) != vm; p = &tmp->next) + ; + *p = tmp->next; + write_unlock(&vmlist_lock); + } vmap_debug_free_range(va->va_start, va->va_end); free_unmap_vmap_area(va); @@ -1602,8 +1627,8 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align, if (!size || (size >> PAGE_SHIFT) > totalram_pages) return NULL; - area = __get_vm_area_node(size, align, VM_ALLOC, start, end, node, - gfp_mask, caller); + area = __get_vm_area_node(size, align, VM_ALLOC | VM_UNLIST, + start, end, node, gfp_mask, caller); if (!area) return NULL; @@ -1611,6 +1636,12 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align, addr = __vmalloc_area_node(area, gfp_mask, prot, node, caller); /* + * In this function, newly allocated vm_struct is not added + * to vmlist at __get_vm_area_node(). so, it is added here. + */ + insert_vmalloc_vmlist(area); + + /* * A ref_count = 3 is needed because the vm_struct and vmap_area * structures allocated in the __get_vm_area_node() function contain * references to the virtual address of the vmalloc'ed block. -- cgit v0.10.2 From 0a93ebef698b08ed04af0d7d913bab8aedfdc253 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Mon, 31 Oct 2011 17:08:16 -0700 Subject: memblock: add memblock_start_of_DRAM() SPARC32 require access to the start address. Add a new helper memblock_start_of_DRAM() to give access to the address of the first memblock - which contains the lowest address. The awkward name was chosen to match the already present memblock_end_of_DRAM(). Signed-off-by: Sam Ravnborg Cc: "David S. Miller" Cc: Yinghai Lu Acked-by: Tejun Heo Cc: "H. Peter Anvin" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 7525e38..e6b843e 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -80,6 +80,7 @@ extern phys_addr_t __memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr); extern phys_addr_t memblock_phys_mem_size(void); +extern phys_addr_t memblock_start_of_DRAM(void); extern phys_addr_t memblock_end_of_DRAM(void); extern void memblock_enforce_memory_limit(phys_addr_t memory_limit); extern int memblock_is_memory(phys_addr_t addr); diff --git a/mm/memblock.c b/mm/memblock.c index ccbf973..b7ed636 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -626,6 +626,12 @@ phys_addr_t __init memblock_phys_mem_size(void) return memblock.memory_size; } +/* lowest address */ +phys_addr_t __init_memblock memblock_start_of_DRAM(void) +{ + return memblock.memory.regions[0].base; +} + phys_addr_t __init_memblock memblock_end_of_DRAM(void) { int idx = memblock.memory.cnt - 1; -- cgit v0.10.2 From 6661672053aee709d93f5dbd7887c789364c11d4 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Mon, 31 Oct 2011 17:08:20 -0700 Subject: memblock: add NO_BOOTMEM config symbol With the NO_BOOTMEM symbol added architectures may now use the following syntax to tell that they do not need bootmem: select NO_BOOTMEM This is much more convinient than adding a new kconfig symbol which was otherwise required. Adding this symbol does not conflict with the architctures that already define their own symbol. Signed-off-by: Sam Ravnborg Cc: Yinghai Lu Acked-by: Tejun Heo Cc: "H. Peter Anvin" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/Kconfig b/mm/Kconfig index f2f1ca1..011b110 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -131,6 +131,9 @@ config SPARSEMEM_VMEMMAP config HAVE_MEMBLOCK boolean +config NO_BOOTMEM + boolean + # eventually, we can have this option just 'select SPARSEMEM' config MEMORY_HOTPLUG bool "Allow for memory hot-add" -- cgit v0.10.2 From ebed48460be5abd86d9a24fa7c66378e58109f30 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Mon, 31 Oct 2011 17:08:22 -0700 Subject: mremap: check for overflow using deltas Using "- 1" relies on the old_end to be page aligned and PAGE_SIZE > 1, those are reasonable requirements but the check remains obscure and it looks more like an off by one error than an overflow check. This I feel will improve readability. Signed-off-by: Andrea Arcangeli Acked-by: Johannes Weiner Acked-by: Mel Gorman Acked-by: Rik van Riel Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/mremap.c b/mm/mremap.c index 506fa44..195e866 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -141,9 +141,10 @@ unsigned long move_page_tables(struct vm_area_struct *vma, for (; old_addr < old_end; old_addr += extent, new_addr += extent) { cond_resched(); next = (old_addr + PMD_SIZE) & PMD_MASK; - if (next - 1 > old_end) - next = old_end; + /* even if next overflowed, extent below will be ok */ extent = next - old_addr; + if (extent > old_end - old_addr) + extent = old_end - old_addr; old_pmd = get_old_pmd(vma->vm_mm, old_addr); if (!old_pmd) continue; -- cgit v0.10.2 From 7b6efc2bc4f19952b25ebf9b236e5ac43cd386c2 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Mon, 31 Oct 2011 17:08:26 -0700 Subject: mremap: avoid sending one IPI per page This replaces ptep_clear_flush() with ptep_get_and_clear() and a single flush_tlb_range() at the end of the loop, to avoid sending one IPI for each page. The mmu_notifier_invalidate_range_start/end section is enlarged accordingly but this is not going to fundamentally change things. It was more by accident that the region under mremap was for the most part still available for secondary MMUs: the primary MMU was never allowed to reliably access that region for the duration of the mremap (modulo trapping SIGSEGV on the old address range which sounds unpractical and flakey). If users wants secondary MMUs not to lose access to a large region under mremap they should reduce the mremap size accordingly in userland and run multiple calls. Overall this will run faster so it's actually going to reduce the time the region is under mremap for the primary MMU which should provide a net benefit to apps. For KVM this is a noop because the guest physical memory is never mremapped, there's just no point it ever moving it while guest runs. One target of this optimization is JVM GC (so unrelated to the mmu notifier logic). Signed-off-by: Andrea Arcangeli Acked-by: Johannes Weiner Acked-by: Mel Gorman Acked-by: Rik van Riel Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/mremap.c b/mm/mremap.c index 195e866..a184f37 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -80,11 +80,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, struct mm_struct *mm = vma->vm_mm; pte_t *old_pte, *new_pte, pte; spinlock_t *old_ptl, *new_ptl; - unsigned long old_start; - old_start = old_addr; - mmu_notifier_invalidate_range_start(vma->vm_mm, - old_start, old_end); if (vma->vm_file) { /* * Subtle point from Rajesh Venkatasubramanian: before @@ -111,7 +107,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, new_pte++, new_addr += PAGE_SIZE) { if (pte_none(*old_pte)) continue; - pte = ptep_clear_flush(vma, old_addr, old_pte); + pte = ptep_get_and_clear(mm, old_addr, old_pte); pte = move_pte(pte, new_vma->vm_page_prot, old_addr, new_addr); set_pte_at(mm, new_addr, new_pte, pte); } @@ -123,7 +119,6 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, pte_unmap_unlock(old_pte - 1, old_ptl); if (mapping) mutex_unlock(&mapping->i_mmap_mutex); - mmu_notifier_invalidate_range_end(vma->vm_mm, old_start, old_end); } #define LATENCY_LIMIT (64 * PAGE_SIZE) @@ -134,10 +129,13 @@ unsigned long move_page_tables(struct vm_area_struct *vma, { unsigned long extent, next, old_end; pmd_t *old_pmd, *new_pmd; + bool need_flush = false; old_end = old_addr + len; flush_cache_range(vma, old_addr, old_end); + mmu_notifier_invalidate_range_start(vma->vm_mm, old_addr, old_end); + for (; old_addr < old_end; old_addr += extent, new_addr += extent) { cond_resched(); next = (old_addr + PMD_SIZE) & PMD_MASK; @@ -158,7 +156,12 @@ unsigned long move_page_tables(struct vm_area_struct *vma, extent = LATENCY_LIMIT; move_ptes(vma, old_pmd, old_addr, old_addr + extent, new_vma, new_pmd, new_addr); + need_flush = true; } + if (likely(need_flush)) + flush_tlb_range(vma, old_end-len, old_addr); + + mmu_notifier_invalidate_range_end(vma->vm_mm, old_end-len, old_end); return len + old_addr - old_end; /* how much done */ } -- cgit v0.10.2 From 37a1c49a91ad55f917a399ef2174b5ebda4283f9 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Mon, 31 Oct 2011 17:08:30 -0700 Subject: thp: mremap support and TLB optimization This adds THP support to mremap (decreases the number of split_huge_page() calls). Here are also some benchmarks with a proggy like this: === #define _GNU_SOURCE #include #include #include #include #include #define SIZE (5UL*1024*1024*1024) int main() { static struct timeval oldstamp, newstamp; long diffsec; char *p, *p2, *p3, *p4; if (posix_memalign((void **)&p, 2*1024*1024, SIZE)) perror("memalign"), exit(1); if (posix_memalign((void **)&p2, 2*1024*1024, SIZE)) perror("memalign"), exit(1); if (posix_memalign((void **)&p3, 2*1024*1024, 4096)) perror("memalign"), exit(1); memset(p, 0xff, SIZE); memset(p2, 0xff, SIZE); memset(p3, 0x77, 4096); gettimeofday(&oldstamp, NULL); p4 = mremap(p, SIZE, SIZE, MREMAP_FIXED|MREMAP_MAYMOVE, p3); gettimeofday(&newstamp, NULL); diffsec = newstamp.tv_sec - oldstamp.tv_sec; diffsec = newstamp.tv_usec - oldstamp.tv_usec + 1000000 * diffsec; printf("usec %ld\n", diffsec); if (p == MAP_FAILED || p4 != p3) //if (p == MAP_FAILED) perror("mremap"), exit(1); if (memcmp(p4, p2, SIZE)) printf("mremap bug\n"), exit(1); printf("ok\n"); return 0; } === THP on Performance counter stats for './largepage13' (3 runs): 69195836 dTLB-loads ( +- 3.546% ) (scaled from 50.30%) 60708 dTLB-load-misses ( +- 11.776% ) (scaled from 52.62%) 676266476 dTLB-stores ( +- 5.654% ) (scaled from 69.54%) 29856 dTLB-store-misses ( +- 4.081% ) (scaled from 89.22%) 1055848782 iTLB-loads ( +- 4.526% ) (scaled from 80.18%) 8689 iTLB-load-misses ( +- 2.987% ) (scaled from 58.20%) 7.314454164 seconds time elapsed ( +- 0.023% ) THP off Performance counter stats for './largepage13' (3 runs): 1967379311 dTLB-loads ( +- 0.506% ) (scaled from 60.59%) 9238687 dTLB-load-misses ( +- 22.547% ) (scaled from 61.87%) 2014239444 dTLB-stores ( +- 0.692% ) (scaled from 60.40%) 3312335 dTLB-store-misses ( +- 7.304% ) (scaled from 67.60%) 6764372065 iTLB-loads ( +- 0.925% ) (scaled from 79.00%) 8202 iTLB-load-misses ( +- 0.475% ) (scaled from 70.55%) 9.693655243 seconds time elapsed ( +- 0.069% ) grep thp /proc/vmstat thp_fault_alloc 35849 thp_fault_fallback 0 thp_collapse_alloc 3 thp_collapse_alloc_failed 0 thp_split 0 thp_split 0 confirms no thp split despite plenty of hugepages allocated. The measurement of only the mremap time (so excluding the 3 long memset and final long 10GB memory accessing memcmp): THP on usec 14824 usec 14862 usec 14859 THP off usec 256416 usec 255981 usec 255847 With an older kernel without the mremap optimizations (the below patch optimizes the non THP version too). THP on usec 392107 usec 390237 usec 404124 THP off usec 444294 usec 445237 usec 445820 I guess with a threaded program that sends more IPI on large SMP it'd create an even larger difference. All debug options are off except DEBUG_VM to avoid skewing the results. The only problem for native 2M mremap like it happens above both the source and destination address must be 2M aligned or the hugepmd can't be moved without a split but that is an hardware limitation. [akpm@linux-foundation.org: coding-style nitpicking] Signed-off-by: Andrea Arcangeli Acked-by: Johannes Weiner Acked-by: Mel Gorman Acked-by: Rik van Riel Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 48c32eb..a9ace9c 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -22,6 +22,11 @@ extern int zap_huge_pmd(struct mmu_gather *tlb, extern int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, unsigned long end, unsigned char *vec); +extern int move_huge_pmd(struct vm_area_struct *vma, + struct vm_area_struct *new_vma, + unsigned long old_addr, + unsigned long new_addr, unsigned long old_end, + pmd_t *old_pmd, pmd_t *new_pmd); extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, pgprot_t newprot); diff --git a/mm/huge_memory.c b/mm/huge_memory.c index e2d1587..6b072bd 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1052,6 +1052,51 @@ int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, return ret; } +int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma, + unsigned long old_addr, + unsigned long new_addr, unsigned long old_end, + pmd_t *old_pmd, pmd_t *new_pmd) +{ + int ret = 0; + pmd_t pmd; + + struct mm_struct *mm = vma->vm_mm; + + if ((old_addr & ~HPAGE_PMD_MASK) || + (new_addr & ~HPAGE_PMD_MASK) || + old_end - old_addr < HPAGE_PMD_SIZE || + (new_vma->vm_flags & VM_NOHUGEPAGE)) + goto out; + + /* + * The destination pmd shouldn't be established, free_pgtables() + * should have release it. + */ + if (WARN_ON(!pmd_none(*new_pmd))) { + VM_BUG_ON(pmd_trans_huge(*new_pmd)); + goto out; + } + + spin_lock(&mm->page_table_lock); + if (likely(pmd_trans_huge(*old_pmd))) { + if (pmd_trans_splitting(*old_pmd)) { + spin_unlock(&mm->page_table_lock); + wait_split_huge_page(vma->anon_vma, old_pmd); + ret = -1; + } else { + pmd = pmdp_get_and_clear(mm, old_addr, old_pmd); + VM_BUG_ON(!pmd_none(*new_pmd)); + set_pmd_at(mm, new_addr, new_pmd, pmd); + spin_unlock(&mm->page_table_lock); + ret = 1; + } + } else { + spin_unlock(&mm->page_table_lock); + } +out: + return ret; +} + int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, pgprot_t newprot) { diff --git a/mm/mremap.c b/mm/mremap.c index a184f37..d6959cb 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -41,8 +41,7 @@ static pmd_t *get_old_pmd(struct mm_struct *mm, unsigned long addr) return NULL; pmd = pmd_offset(pud, addr); - split_huge_page_pmd(mm, pmd); - if (pmd_none_or_clear_bad(pmd)) + if (pmd_none(*pmd)) return NULL; return pmd; @@ -65,8 +64,6 @@ static pmd_t *alloc_new_pmd(struct mm_struct *mm, struct vm_area_struct *vma, return NULL; VM_BUG_ON(pmd_trans_huge(*pmd)); - if (pmd_none(*pmd) && __pte_alloc(mm, vma, pmd, addr)) - return NULL; return pmd; } @@ -149,6 +146,23 @@ unsigned long move_page_tables(struct vm_area_struct *vma, new_pmd = alloc_new_pmd(vma->vm_mm, vma, new_addr); if (!new_pmd) break; + if (pmd_trans_huge(*old_pmd)) { + int err = 0; + if (extent == HPAGE_PMD_SIZE) + err = move_huge_pmd(vma, new_vma, old_addr, + new_addr, old_end, + old_pmd, new_pmd); + if (err > 0) { + need_flush = true; + continue; + } else if (!err) { + split_huge_page_pmd(vma->vm_mm, old_pmd); + } + VM_BUG_ON(pmd_trans_huge(*old_pmd)); + } + if (pmd_none(*new_pmd) && __pte_alloc(new_vma->vm_mm, new_vma, + new_pmd, new_addr)) + break; next = (new_addr + PMD_SIZE) & PMD_MASK; if (extent > next - new_addr) extent = next - new_addr; -- cgit v0.10.2 From 06d5e032adcbc7d50c606a1396f00e2474e4213e Mon Sep 17 00:00:00 2001 From: Sonic Zhang Date: Mon, 31 Oct 2011 17:08:31 -0700 Subject: include/asm-generic/page.h: calculate virt_to_page and page_to_virt via predefined macro On NOMMU architectures, if physical memory doesn't start from 0, ARCH_PFN_OFFSET is defined to generate page index in mem_map array. Because virtual address is equal to physical address, PAGE_OFFSET is always 0. virt_to_page and page_to_virt should not index page by PAGE_OFFSET directly. Signed-off-by: Sonic Zhang Cc: Greg Ungerer Cc: Geert Uytterhoeven Cc: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/asm-generic/page.h b/include/asm-generic/page.h index 75fec18..351889d 100644 --- a/include/asm-generic/page.h +++ b/include/asm-generic/page.h @@ -79,8 +79,8 @@ extern unsigned long memory_end; #define virt_to_pfn(kaddr) (__pa(kaddr) >> PAGE_SHIFT) #define pfn_to_virt(pfn) __va((pfn) << PAGE_SHIFT) -#define virt_to_page(addr) (mem_map + (((unsigned long)(addr)-PAGE_OFFSET) >> PAGE_SHIFT)) -#define page_to_virt(page) ((((page) - mem_map) << PAGE_SHIFT) + PAGE_OFFSET) +#define virt_to_page(addr) pfn_to_page(virt_to_pfn(addr)) +#define page_to_virt(page) pfn_to_virt(page_to_pfn(page)) #ifndef page_to_phys #define page_to_phys(page) ((dma_addr_t)page_to_pfn(page) << PAGE_SHIFT) -- cgit v0.10.2 From 3ee9a4f086716d792219c021e8509f91165a4128 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 31 Oct 2011 17:08:35 -0700 Subject: mm: neaten warn_alloc_failed Add __attribute__((format (printf...) to the function to validate format and arguments. Use vsprintf extension %pV to avoid any possible message interleaving. Coalesce format string. Convert printks/pr_warning to pr_warn. [akpm@linux-foundation.org: use the __printf() macro] Signed-off-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/mm.h b/include/linux/mm.h index 7438071..3b3e3b8 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1334,7 +1334,8 @@ extern void si_meminfo(struct sysinfo * val); extern void si_meminfo_node(struct sysinfo *val, int nid); extern int after_bootmem; -extern void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...); +extern __printf(3, 4) +void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...); extern void setup_per_cpu_pageset(void); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 83a0205..9dd443d 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1754,7 +1754,6 @@ static DEFINE_RATELIMIT_STATE(nopage_rs, void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...) { - va_list args; unsigned int filter = SHOW_MEM_FILTER_NODES; if ((gfp_mask & __GFP_NOWARN) || !__ratelimit(&nopage_rs)) @@ -1773,14 +1772,21 @@ void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...) filter &= ~SHOW_MEM_FILTER_NODES; if (fmt) { - printk(KERN_WARNING); + struct va_format vaf; + va_list args; + va_start(args, fmt); - vprintk(fmt, args); + + vaf.fmt = fmt; + vaf.va = &args; + + pr_warn("%pV", &vaf); + va_end(args); } - pr_warning("%s: page allocation failure: order:%d, mode:0x%x\n", - current->comm, order, gfp_mask); + pr_warn("%s: page allocation failure: order:%d, mode:0x%x\n", + current->comm, order, gfp_mask); dump_stack(); if (!should_suppress_show_mem()) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 56faf31..08ab0aa 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1593,8 +1593,8 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, return area->addr; fail: - warn_alloc_failed(gfp_mask, order, "vmalloc: allocation failure, " - "allocated %ld of %ld bytes\n", + warn_alloc_failed(gfp_mask, order, + "vmalloc: allocation failure, allocated %ld of %ld bytes\n", (area->nr_pages*PAGE_SIZE), area->size); vfree(area->addr); return NULL; -- cgit v0.10.2 From 64212ec569bfdd094f7a23d9b09862209a983559 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Mon, 31 Oct 2011 17:08:38 -0700 Subject: debug-pagealloc: add support for highmem pages This adds support for highmem pages poisoning and verification to the debug-pagealloc feature for no-architecture support. [akpm@linux-foundation.org: remove unneeded preempt_disable/enable] Signed-off-by: Akinobu Mita Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/debug-pagealloc.c b/mm/debug-pagealloc.c index 2618933..7cea557 100644 --- a/mm/debug-pagealloc.c +++ b/mm/debug-pagealloc.c @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include @@ -20,28 +21,13 @@ static inline bool page_poison(struct page *page) return test_bit(PAGE_DEBUG_FLAG_POISON, &page->debug_flags); } -static void poison_highpage(struct page *page) -{ - /* - * Page poisoning for highmem pages is not implemented. - * - * This can be called from interrupt contexts. - * So we need to create a new kmap_atomic slot for this - * application and it will need interrupt protection. - */ -} - static void poison_page(struct page *page) { - void *addr; + void *addr = kmap_atomic(page); - if (PageHighMem(page)) { - poison_highpage(page); - return; - } set_page_poison(page); - addr = page_address(page); memset(addr, PAGE_POISON, PAGE_SIZE); + kunmap_atomic(addr); } static void poison_pages(struct page *page, int n) @@ -86,27 +72,17 @@ static void check_poison_mem(unsigned char *mem, size_t bytes) dump_stack(); } -static void unpoison_highpage(struct page *page) -{ - /* - * See comment in poison_highpage(). - * Highmem pages should not be poisoned for now - */ - BUG_ON(page_poison(page)); -} - static void unpoison_page(struct page *page) { - if (PageHighMem(page)) { - unpoison_highpage(page); + void *addr; + + if (!page_poison(page)) return; - } - if (page_poison(page)) { - void *addr = page_address(page); - check_poison_mem(addr, PAGE_SIZE); - clear_page_poison(page); - } + addr = kmap_atomic(page); + check_poison_mem(addr, PAGE_SIZE); + clear_page_poison(page); + kunmap_atomic(addr); } static void unpoison_pages(struct page *page, int n) -- cgit v0.10.2 From d2ebd0f6b89567eb93ead4e2ca0cbe03021f344b Mon Sep 17 00:00:00 2001 From: "Alex,Shi" Date: Mon, 31 Oct 2011 17:08:39 -0700 Subject: kswapd: avoid unnecessary rebalance after an unsuccessful balancing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In commit 215ddd66 ("mm: vmscan: only read new_classzone_idx from pgdat when reclaiming successfully") , Mel Gorman said kswapd is better to sleep after a unsuccessful balancing if there is tighter reclaim request pending in the balancing. But in the following scenario, kswapd do something that is not matched our expectation. The patch fixes this issue. 1, Read pgdat request A (classzone_idx, order = 3) 2, balance_pgdat() 3, During pgdat, a new pgdat request B (classzone_idx, order = 5) is placed 4, balance_pgdat() returns but failed since returned order = 0 5, pgdat of request A assigned to balance_pgdat(), and do balancing again. While the expectation behavior of kswapd should try to sleep. Signed-off-by: Alex Shi Reviewed-by: Tim Chen Acked-by: Mel Gorman Tested-by: Pádraig Brady Cc: Rik van Riel Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/vmscan.c b/mm/vmscan.c index 77ee24f..dd5fc86 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2823,7 +2823,9 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int order, int classzone_idx) static int kswapd(void *p) { unsigned long order, new_order; + unsigned balanced_order; int classzone_idx, new_classzone_idx; + int balanced_classzone_idx; pg_data_t *pgdat = (pg_data_t*)p; struct task_struct *tsk = current; @@ -2854,7 +2856,9 @@ static int kswapd(void *p) set_freezable(); order = new_order = 0; + balanced_order = 0; classzone_idx = new_classzone_idx = pgdat->nr_zones - 1; + balanced_classzone_idx = classzone_idx; for ( ; ; ) { int ret; @@ -2863,7 +2867,8 @@ static int kswapd(void *p) * new request of a similar or harder type will succeed soon * so consider going to sleep on the basis we reclaimed at */ - if (classzone_idx >= new_classzone_idx && order == new_order) { + if (balanced_classzone_idx >= new_classzone_idx && + balanced_order == new_order) { new_order = pgdat->kswapd_max_order; new_classzone_idx = pgdat->classzone_idx; pgdat->kswapd_max_order = 0; @@ -2878,7 +2883,8 @@ static int kswapd(void *p) order = new_order; classzone_idx = new_classzone_idx; } else { - kswapd_try_to_sleep(pgdat, order, classzone_idx); + kswapd_try_to_sleep(pgdat, balanced_order, + balanced_classzone_idx); order = pgdat->kswapd_max_order; classzone_idx = pgdat->classzone_idx; pgdat->kswapd_max_order = 0; @@ -2895,7 +2901,9 @@ static int kswapd(void *p) */ if (!ret) { trace_mm_vmscan_kswapd_wake(pgdat->node_id, order); - order = balance_pgdat(pgdat, order, &classzone_idx); + balanced_classzone_idx = classzone_idx; + balanced_order = balance_pgdat(pgdat, order, + &balanced_classzone_idx); } } return 0; -- cgit v0.10.2 From d1f0ece6cdca973c01a46dff0eb062baafe78a85 Mon Sep 17 00:00:00 2001 From: Jonghwan Choi Date: Mon, 31 Oct 2011 17:08:42 -0700 Subject: mm/memblock.c: small function definition fixes warning: function 'memblock_memory_can_coalesce' with external linkage has definition. Signed-off-by: Jonghwan Choi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/memblock.c b/mm/memblock.c index b7ed636..7d4f5c8 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -267,7 +267,7 @@ static int __init_memblock memblock_double_array(struct memblock_type *type) return 0; } -extern int __init_memblock __weak memblock_memory_can_coalesce(phys_addr_t addr1, phys_addr_t size1, +int __init_memblock __weak memblock_memory_can_coalesce(phys_addr_t addr1, phys_addr_t size1, phys_addr_t addr2, phys_addr_t size2) { return 1; -- cgit v0.10.2 From f0dfcde099453aa4c0dc42473828d15a6d492936 Mon Sep 17 00:00:00 2001 From: "Alex,Shi" Date: Mon, 31 Oct 2011 17:08:45 -0700 Subject: kswapd: assign new_order and new_classzone_idx after wakeup in sleeping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There 2 places to read pgdat in kswapd. One is return from a successful balance, another is waked up from kswapd sleeping. The new_order and new_classzone_idx represent the balance input order and classzone_idx. But current new_order and new_classzone_idx are not assigned after kswapd_try_to_sleep(), that will cause a bug in the following scenario. 1: after a successful balance, kswapd goes to sleep, and new_order = 0; new_classzone_idx = __MAX_NR_ZONES - 1; 2: kswapd waked up with order = 3 and classzone_idx = ZONE_NORMAL 3: in the balance_pgdat() running, a new balance wakeup happened with order = 5, and classzone_idx = ZONE_NORMAL 4: the first wakeup(order = 3) finished successufly, return order = 3 but, the new_order is still 0, so, this balancing will be treated as a failed balance. And then the second tighter balancing will be missed. So, to avoid the above problem, the new_order and new_classzone_idx need to be assigned for later successful comparison. Signed-off-by: Alex Shi Acked-by: Mel Gorman Reviewed-by: Minchan Kim Tested-by: Pádraig Brady Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/vmscan.c b/mm/vmscan.c index dd5fc86..51bc4bf 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2887,6 +2887,8 @@ static int kswapd(void *p) balanced_classzone_idx); order = pgdat->kswapd_max_order; classzone_idx = pgdat->classzone_idx; + new_order = order; + new_classzone_idx = classzone_idx; pgdat->kswapd_max_order = 0; pgdat->classzone_idx = pgdat->nr_zones - 1; } -- cgit v0.10.2 From de7d2b567d040e3b67fe7121945982f14343213d Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 31 Oct 2011 17:08:48 -0700 Subject: mm/vmalloc.c: report more vmalloc failures Some vmalloc failure paths do not report OOM conditions. Add warn_alloc_failed, which also does a dump_stack, to those failure paths. This allows more site specific vmalloc failure logging message printks to be removed. Signed-off-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 08ab0aa..b669aa6 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1625,13 +1625,12 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align, size = PAGE_ALIGN(size); if (!size || (size >> PAGE_SHIFT) > totalram_pages) - return NULL; + goto fail; area = __get_vm_area_node(size, align, VM_ALLOC | VM_UNLIST, start, end, node, gfp_mask, caller); - if (!area) - return NULL; + goto fail; addr = __vmalloc_area_node(area, gfp_mask, prot, node, caller); @@ -1649,6 +1648,12 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align, kmemleak_alloc(addr, real_size, 3, gfp_mask); return addr; + +fail: + warn_alloc_failed(gfp_mask, 0, + "vmalloc: allocation failure: %lu bytes\n", + real_size); + return NULL; } /** -- cgit v0.10.2 From 99ef0315f1b320f392acc4364598340e78758fd2 Mon Sep 17 00:00:00 2001 From: Wanlong Gao Date: Mon, 31 Oct 2011 17:08:51 -0700 Subject: ksm: fix the comment of try_to_unmap_one() try_to_unmap_one() is called by try_to_unmap_ksm(), too. Signed-off-by: Wanlong Gao Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/rmap.c b/mm/rmap.c index 8005080..6541cf7 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1164,7 +1164,7 @@ void page_remove_rmap(struct page *page) /* * Subfunctions of try_to_unmap: try_to_unmap_one called - * repeatedly from either try_to_unmap_anon or try_to_unmap_file. + * repeatedly from try_to_unmap_ksm, try_to_unmap_anon or try_to_unmap_file. */ int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, unsigned long address, enum ttu_flags flags) -- cgit v0.10.2 From 20c8c62891a346e09c8d26de41ce78bd7a76c5b0 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Mon, 31 Oct 2011 17:08:54 -0700 Subject: mm-add-comment-explaining-task-state-setting-in-bdi_forker_thread-fix fiddle wording Cc: Jan Kara Cc: Wu Fengguang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/backing-dev.c b/mm/backing-dev.c index a87da52..7520ef0 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -404,9 +404,8 @@ static int bdi_forker_thread(void *ptr) /* * In the following loop we are going to check whether we have * some work to do without any synchronization with tasks - * waking us up to do work for them. So we have to set task - * state already here so that we don't miss wakeups coming - * after we verify some condition. + * waking us up to do work for them. Set the task state here + * so that we don't miss wakeups after verifying conditions. */ set_current_state(TASK_INTERRUPTIBLE); -- cgit v0.10.2 From 09f363c7363eb10cfb4b82094bd7064e5608258b Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 31 Oct 2011 17:08:57 -0700 Subject: vmscan: fix shrinker callback bug in fs/super.c The callback must not return -1 when nr_to_scan is zero. Fix the bug in fs/super.c and add this requirement to the callback specification. Signed-off-by: Mikulas Patocka Cc: Dave Chinner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/super.c b/fs/super.c index 3f56a26..32a81f3 100644 --- a/fs/super.c +++ b/fs/super.c @@ -61,7 +61,7 @@ static int prune_super(struct shrinker *shrink, struct shrink_control *sc) return -1; if (!grab_super_passive(sb)) - return -1; + return !sc->nr_to_scan ? 0 : -1; if (sb->s_op && sb->s_op->nr_cached_objects) fs_objects = sb->s_op->nr_cached_objects(sb); diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h index 790651b..a83833a 100644 --- a/include/linux/shrinker.h +++ b/include/linux/shrinker.h @@ -20,6 +20,7 @@ struct shrink_control { * 'nr_to_scan' entries and attempt to free them up. It should return * the number of objects which remain in the cache. If it returns -1, it means * it cannot do any scanning at this time (eg. there is a risk of deadlock). + * The callback must not return -1 if nr_to_scan is zero. * * The 'gfpmask' refers to the allocation we are currently trying to * fulfil. -- cgit v0.10.2 From 584cff54e1ff8f59d5109dc8093efedff8bcc375 Mon Sep 17 00:00:00 2001 From: Kautuk Consul Date: Mon, 31 Oct 2011 17:08:59 -0700 Subject: mm/mmap.c: eliminate the ret variable from mm_take_all_locks() The ret variable is really not needed in mm_take_all_locks(). Signed-off-by: Kautuk Consul Reviewed-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/mmap.c b/mm/mmap.c index a65efd4..3c0061f 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2558,7 +2558,6 @@ int mm_take_all_locks(struct mm_struct *mm) { struct vm_area_struct *vma; struct anon_vma_chain *avc; - int ret = -EINTR; BUG_ON(down_read_trylock(&mm->mmap_sem)); @@ -2579,13 +2578,11 @@ int mm_take_all_locks(struct mm_struct *mm) vm_lock_anon_vma(mm, avc->anon_vma); } - ret = 0; + return 0; out_unlock: - if (ret) - mm_drop_all_locks(mm); - - return ret; + mm_drop_all_locks(mm); + return -EINTR; } static void vm_unlock_anon_vma(struct anon_vma *anon_vma) -- cgit v0.10.2 From 72a2ebd8bc62e6658513d3b2a1119e91c3ea6810 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Mon, 31 Oct 2011 17:09:00 -0700 Subject: fs/buffer.c: add device information for error output in __find_get_block_slow() On the ext4 mailing list[1], we got some report about errors in __find_get_block_slow(), but the information is very limited. If the device information is given, we can know the name of the sick volume. Futhermore, we can get the corresponding status of that block(group, inode block etc) by analyzing the disk layout. [1] http://marc.info/?l=linux-ext4&m=131379831421147&w=2 Signed-off-by: Tao Ma Cc: Al Viro Cc: Theodore Ts'o Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/buffer.c b/fs/buffer.c index 936d603..70a1974 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -213,13 +213,16 @@ __find_get_block_slow(struct block_device *bdev, sector_t block) * elsewhere, don't buffer_error if we had some unmapped buffers */ if (all_mapped) { + char b[BDEVNAME_SIZE]; + printk("__find_get_block_slow() failed. " "block=%llu, b_blocknr=%llu\n", (unsigned long long)block, (unsigned long long)bh->b_blocknr); printk("b_state=0x%08lx, b_size=%zu\n", bh->b_state, bh->b_size); - printk("device blocksize: %d\n", 1 << bd_inode->i_blkbits); + printk("device %s blocksize: %d\n", bdevname(bdev, b), + 1 << bd_inode->i_blkbits); } out_unlock: spin_unlock(&bd_mapping->private_lock); -- cgit v0.10.2 From dd73e85f6d8f721d66bcbd2734a5f4bc3d3cd768 Mon Sep 17 00:00:00 2001 From: Dean Nelson Date: Mon, 31 Oct 2011 17:09:04 -0700 Subject: HWPOISON: convert pr_debug()s to pr_info()s Commit fb46e73520940b ("HWPOISON: Convert pr_debugs to pr_info) authored by Andi Kleen converted a number of pr_debug()s to pr_info()s. About the same time additional code with pr_debug()s was added by two other commits 8c6c2ecb4466 ("HWPOSION, hugetlb: recover from free hugepage error when !MF_COUNT_INCREASED") and d950b95882f3d ("HWPOISON, hugetlb: soft offlining for hugepage"). And these pr_debug()s failed to get converted to pr_info()s. This patch converts them as well. And does some minor related whitespace cleanup. Signed-off-by: Dean Nelson Reviewed-by: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 2b43ba0..edc388d 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1310,7 +1310,7 @@ int unpoison_memory(unsigned long pfn) * to the end. */ if (PageHuge(page)) { - pr_debug("MCE: Memory failure is now running on free hugepage %#lx\n", pfn); + pr_info("MCE: Memory failure is now running on free hugepage %#lx\n", pfn); return 0; } if (TestClearPageHWPoison(p)) @@ -1419,7 +1419,7 @@ static int soft_offline_huge_page(struct page *page, int flags) if (PageHWPoison(hpage)) { put_page(hpage); - pr_debug("soft offline: %#lx hugepage already poisoned\n", pfn); + pr_info("soft offline: %#lx hugepage already poisoned\n", pfn); return -EBUSY; } @@ -1433,8 +1433,8 @@ static int soft_offline_huge_page(struct page *page, int flags) list_for_each_entry_safe(page1, page2, &pagelist, lru) put_page(page1); - pr_debug("soft offline: %#lx: migration failed %d, type %lx\n", - pfn, ret, page->flags); + pr_info("soft offline: %#lx: migration failed %d, type %lx\n", + pfn, ret, page->flags); if (ret > 0) ret = -EIO; return ret; @@ -1505,7 +1505,7 @@ int soft_offline_page(struct page *page, int flags) } if (!PageLRU(page)) { pr_info("soft_offline: %#lx: unknown non LRU page type %lx\n", - pfn, page->flags); + pfn, page->flags); return -EIO; } @@ -1566,7 +1566,7 @@ int soft_offline_page(struct page *page, int flags) } } else { pr_info("soft offline: %#lx: isolation failed: %d, page count %d, type %lx\n", - pfn, ret, page_count(page), page->flags); + pfn, ret, page_count(page), page->flags); } if (ret) return ret; -- cgit v0.10.2 From d43a87e68e9e71d2987a29cc239acec4e8f410c9 Mon Sep 17 00:00:00 2001 From: Kyungmin Park Date: Mon, 31 Oct 2011 17:09:08 -0700 Subject: mm: compaction: make compact_zone_order() static There's no compact_zone_order() user outside file scope, so make it static. Signed-off-by: Kyungmin Park Acked-by: David Rientjes Reviewed-by: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/compaction.h b/include/linux/compaction.h index cc9f7a4..bb2bbdb 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -24,8 +24,6 @@ extern unsigned long try_to_compact_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask, nodemask_t *mask, bool sync); extern unsigned long compaction_suitable(struct zone *zone, int order); -extern unsigned long compact_zone_order(struct zone *zone, int order, - gfp_t gfp_mask, bool sync); /* Do not skip compaction more than 64 times */ #define COMPACT_MAX_DEFER_SHIFT 6 @@ -69,12 +67,6 @@ static inline unsigned long compaction_suitable(struct zone *zone, int order) return COMPACT_SKIPPED; } -static inline unsigned long compact_zone_order(struct zone *zone, int order, - gfp_t gfp_mask, bool sync) -{ - return COMPACT_CONTINUE; -} - static inline void defer_compaction(struct zone *zone) { } diff --git a/mm/compaction.c b/mm/compaction.c index a0e4202..899d956 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -582,7 +582,7 @@ out: return ret; } -unsigned long compact_zone_order(struct zone *zone, +static unsigned long compact_zone_order(struct zone *zone, int order, gfp_t gfp_mask, bool sync) { -- cgit v0.10.2 From 4e9dc5df46001510ebd3b3e54faa650f474e51a3 Mon Sep 17 00:00:00 2001 From: Li Haifeng Date: Mon, 31 Oct 2011 17:09:09 -0700 Subject: mm: fix kunmap_high() comment Signed-off-by: Li Haifeng Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/highmem.c b/mm/highmem.c index 5ef672c..e159a7b 100644 --- a/mm/highmem.c +++ b/mm/highmem.c @@ -250,7 +250,7 @@ void *kmap_high_get(struct page *page) #endif /** - * kunmap_high - map a highmem page into memory + * kunmap_high - unmap a highmem page into memory * @page: &struct page to unmap * * If ARCH_NEEDS_KMAP_HIGH_GET is not defined then this may be called -- cgit v0.10.2 From 3f380998aeb51b99d5d22cadb41162e1e9db70d2 Mon Sep 17 00:00:00 2001 From: Kautuk Consul Date: Mon, 31 Oct 2011 17:09:11 -0700 Subject: vmscan.c: fix invalid strict_strtoul() check in write_scan_unevictable_node() write_scan_unevictable_node() checks the value req returned by strict_strtoul() and returns 1 if req is 0. However, when strict_strtoul() returns 0, it means successful conversion of buf to unsigned long. Due to this, the function was not proceeding to scan the zones for unevictable pages even though we write a valid value to the scan_unevictable_pages sys file. Change this check slightly to check for invalid value in buf as well as 0 value stored in res after successful conversion via strict_strtoul. In both cases, we do not perform the scanning of this node's zones. Signed-off-by: Kautuk Consul Reviewed-by: KAMEZAWA Hiroyuki Cc: Johannes Weiner Cc: Lee Schermerhorn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/vmscan.c b/mm/vmscan.c index 51bc4bf..ac644fe 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -3520,8 +3520,8 @@ static ssize_t write_scan_unevictable_node(struct sys_device *dev, unsigned long res; unsigned long req = strict_strtoul(buf, 10, &res); - if (!req) - return 1; /* zero is no-op */ + if (req || !res) + return 1; /* Invalid input or zero is no-op */ for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) { if (!populated_zone(zone)) -- cgit v0.10.2 From 264e56d8247ef6e31ed4386926cae86c61ddcb18 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Mon, 31 Oct 2011 17:09:13 -0700 Subject: mm: disable user interface to manually rescue unevictable pages At one point, anonymous pages were supposed to go on the unevictable list when no swap space was configured, and the idea was to manually rescue those pages after adding swap and making them evictable again. But nowadays, swap-backed pages on the anon LRU list are not scanned without available swap space anyway, so there is no point in moving them to a separate list anymore. The manual rescue could also be used in case pages were stranded on the unevictable list due to race conditions. But the code has been around for a while now and newly discovered bugs should be properly reported and dealt with instead of relying on such a manual fixup. In addition to the lack of a usecase, the sysfs interface to rescue pages from a specific NUMA node has been broken since its introduction, so it's unlikely that anybody ever relied on that. This patch removes the functionality behind the sysctl and the node-interface and emits a one-time warning when somebody tries to access either of them. Signed-off-by: Johannes Weiner Reported-by: Kautuk Consul Reviewed-by: Minchan Kim Acked-by: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/vmscan.c b/mm/vmscan.c index ac644fe..3886b0b 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -3417,66 +3417,12 @@ void scan_mapping_unevictable_pages(struct address_space *mapping) } -/** - * scan_zone_unevictable_pages - check unevictable list for evictable pages - * @zone - zone of which to scan the unevictable list - * - * Scan @zone's unevictable LRU lists to check for pages that have become - * evictable. Move those that have to @zone's inactive list where they - * become candidates for reclaim, unless shrink_inactive_zone() decides - * to reactivate them. Pages that are still unevictable are rotated - * back onto @zone's unevictable list. - */ -#define SCAN_UNEVICTABLE_BATCH_SIZE 16UL /* arbitrary lock hold batch size */ -static void scan_zone_unevictable_pages(struct zone *zone) +static void warn_scan_unevictable_pages(void) { - struct list_head *l_unevictable = &zone->lru[LRU_UNEVICTABLE].list; - unsigned long scan; - unsigned long nr_to_scan = zone_page_state(zone, NR_UNEVICTABLE); - - while (nr_to_scan > 0) { - unsigned long batch_size = min(nr_to_scan, - SCAN_UNEVICTABLE_BATCH_SIZE); - - spin_lock_irq(&zone->lru_lock); - for (scan = 0; scan < batch_size; scan++) { - struct page *page = lru_to_page(l_unevictable); - - if (!trylock_page(page)) - continue; - - prefetchw_prev_lru_page(page, l_unevictable, flags); - - if (likely(PageLRU(page) && PageUnevictable(page))) - check_move_unevictable_page(page, zone); - - unlock_page(page); - } - spin_unlock_irq(&zone->lru_lock); - - nr_to_scan -= batch_size; - } -} - - -/** - * scan_all_zones_unevictable_pages - scan all unevictable lists for evictable pages - * - * A really big hammer: scan all zones' unevictable LRU lists to check for - * pages that have become evictable. Move those back to the zones' - * inactive list where they become candidates for reclaim. - * This occurs when, e.g., we have unswappable pages on the unevictable lists, - * and we add swap to the system. As such, it runs in the context of a task - * that has possibly/probably made some previously unevictable pages - * evictable. - */ -static void scan_all_zones_unevictable_pages(void) -{ - struct zone *zone; - - for_each_zone(zone) { - scan_zone_unevictable_pages(zone); - } + printk_once(KERN_WARNING + "The scan_unevictable_pages sysctl/node-interface has been " + "disabled for lack of a legitimate use case. If you have " + "one, please send an email to linux-mm@kvack.org.\n"); } /* @@ -3489,11 +3435,8 @@ int scan_unevictable_handler(struct ctl_table *table, int write, void __user *buffer, size_t *length, loff_t *ppos) { + warn_scan_unevictable_pages(); proc_doulongvec_minmax(table, write, buffer, length, ppos); - - if (write && *(unsigned long *)table->data) - scan_all_zones_unevictable_pages(); - scan_unevictable_pages = 0; return 0; } @@ -3508,6 +3451,7 @@ static ssize_t read_scan_unevictable_node(struct sys_device *dev, struct sysdev_attribute *attr, char *buf) { + warn_scan_unevictable_pages(); return sprintf(buf, "0\n"); /* always zero; should fit... */ } @@ -3515,19 +3459,7 @@ static ssize_t write_scan_unevictable_node(struct sys_device *dev, struct sysdev_attribute *attr, const char *buf, size_t count) { - struct zone *node_zones = NODE_DATA(dev->id)->node_zones; - struct zone *zone; - unsigned long res; - unsigned long req = strict_strtoul(buf, 10, &res); - - if (req || !res) - return 1; /* Invalid input or zero is no-op */ - - for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) { - if (!populated_zone(zone)) - continue; - scan_zone_unevictable_pages(zone); - } + warn_scan_unevictable_pages(); return 1; } -- cgit v0.10.2 From 2d7d3eb2bad116e0d1b3b3930a923c55f6d0f70e Mon Sep 17 00:00:00 2001 From: H Hartley Sweeten Date: Mon, 31 Oct 2011 17:09:15 -0700 Subject: mm/memblock.c: quiet sparse noise Quiet the following sparse noise in this file: warning: symbol 'memblock_overlaps_region' was not declared. Should it be static? Signed-off-by: H Hartley Sweeten Cc: Yinghai Lu Cc: "H. Peter Anvin" Cc: Benjamin Herrenschmidt Cc: Tomi Valkeinen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/memblock.c b/mm/memblock.c index 7d4f5c8..84bec49 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -58,7 +58,8 @@ static unsigned long __init_memblock memblock_addrs_overlap(phys_addr_t base1, p return ((base1 < (base2 + size2)) && (base2 < (base1 + size1))); } -long __init_memblock memblock_overlaps_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size) +static long __init_memblock memblock_overlaps_region(struct memblock_type *type, + phys_addr_t base, phys_addr_t size) { unsigned long i; -- cgit v0.10.2 From 22d5368a0838c00ed0e3ec20e7ff8c6e46ba99ef Mon Sep 17 00:00:00 2001 From: H Hartley Sweeten Date: Mon, 31 Oct 2011 17:09:19 -0700 Subject: mm/thrash.c: quiet sparse noise Quiet the following sparse noise: warning: symbol 'swap_token_memcg' was not declared. Should it be static? Signed-off-by: H Hartley Sweeten Cc: Rik van Riel Cc: KOSAKI Motohiro Cc: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/thrash.c b/mm/thrash.c index e53f7d0..57ad495 100644 --- a/mm/thrash.c +++ b/mm/thrash.c @@ -29,7 +29,7 @@ static DEFINE_SPINLOCK(swap_token_lock); struct mm_struct *swap_token_mm; -struct mem_cgroup *swap_token_memcg; +static struct mem_cgroup *swap_token_memcg; #ifdef CONFIG_CGROUP_MEM_RES_CTLR static struct mem_cgroup *swap_token_memcg_from_mm(struct mm_struct *mm) -- cgit v0.10.2 From e754d79d35f0b8612445a9bd7491c48d7317e3ad Mon Sep 17 00:00:00 2001 From: H Hartley Sweeten Date: Mon, 31 Oct 2011 17:09:23 -0700 Subject: mm/mempolicy.c: quiet sparse noise Quiet the spares noise: warning: symbol 'default_policy' was not declared. Should it be static? Signed-off-by: H Hartley Sweeten Cc: KOSAKI Motohiro Cc: Stephen Wilson Cc: Andrea Arcangeli Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 9c51f9f..cd237f4 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -111,7 +111,7 @@ enum zone_type policy_zone = 0; /* * run-time system-wide default policy => local allocation */ -struct mempolicy default_policy = { +static struct mempolicy default_policy = { .refcnt = ATOMIC_INIT(1), /* never free it */ .mode = MPOL_PREFERRED, .flags = MPOL_F_LOCAL, -- cgit v0.10.2 From 2f1da6421570d064a94e17190a4955c2df99794d Mon Sep 17 00:00:00 2001 From: H Hartley Sweeten Date: Mon, 31 Oct 2011 17:09:25 -0700 Subject: mm/huge_memory.c: quiet sparse noise Quiet the sparse noise: warning: symbol 'khugepaged_scan' was not declared. Should it be static? warning: context imbalance in 'khugepaged_scan_mm_slot' - unexpected unlock Signed-off-by: H Hartley Sweeten Cc: Andrea Arcangeli Cc: Rik van Riel Cc: Johannes Weiner Cc: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 6b072bd..44f5763 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -89,7 +89,8 @@ struct khugepaged_scan { struct list_head mm_head; struct mm_slot *mm_slot; unsigned long address; -} khugepaged_scan = { +}; +static struct khugepaged_scan khugepaged_scan = { .mm_head = LIST_HEAD_INIT(khugepaged_scan.mm_head), }; @@ -2069,6 +2070,8 @@ static void collect_mm_slot(struct mm_slot *mm_slot) static unsigned int khugepaged_scan_mm_slot(unsigned int pages, struct page **hpage) + __releases(&khugepaged_mm_lock) + __acquires(&khugepaged_mm_lock) { struct mm_slot *mm_slot; struct mm_struct *mm; -- cgit v0.10.2 From 21ee9f398be209ccbb62929d35961ca1ed48eec3 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Mon, 31 Oct 2011 17:09:28 -0700 Subject: vmscan: add barrier to prevent evictable page in unevictable list When a race between putback_lru_page() and shmem_lock with lock=0 happens, progrom execution order is as follows, but clear_bit in processor #1 could be reordered right before spin_unlock of processor #1. Then, the page would be stranded on the unevictable list. spin_lock SetPageLRU spin_unlock clear_bit(AS_UNEVICTABLE) spin_lock if PageLRU() if !test_bit(AS_UNEVICTABLE) move evictable list smp_mb if !test_bit(AS_UNEVICTABLE) move evictable list spin_unlock But, pagevec_lookup() in scan_mapping_unevictable_pages() has rcu_read_[un]lock() so it could protect reordering before reaching test_bit(AS_UNEVICTABLE) on processor #1 so this problem never happens. But it's a unexpected side effect and we should solve this problem properly. This patch adds a barrier after mapping_clear_unevictable. I didn't meet this problem but just found during review. Signed-off-by: Minchan Kim Acked-by: KOSAKI Motohiro Cc: Mel Gorman Cc: Rik van Riel Cc: Lee Schermerhorn Acked-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/shmem.c b/mm/shmem.c index 2d35772..fa4fa6c 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1068,6 +1068,12 @@ int shmem_lock(struct file *file, int lock, struct user_struct *user) user_shm_unlock(inode->i_size, user); info->flags &= ~VM_LOCKED; mapping_clear_unevictable(file->f_mapping); + /* + * Ensure that a racing putback_lru_page() can see + * the pages of this mapping are evictable when we + * skip them due to !PageLRU during the scan. + */ + smp_mb__after_clear_bit(); scan_mapping_unevictable_pages(file->f_mapping); } retval = 0; diff --git a/mm/vmscan.c b/mm/vmscan.c index 3886b0b..f51a33e 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -633,13 +633,14 @@ redo: lru = LRU_UNEVICTABLE; add_page_to_unevictable_list(page); /* - * When racing with an mlock clearing (page is - * unlocked), make sure that if the other thread does - * not observe our setting of PG_lru and fails - * isolation, we see PG_mlocked cleared below and move + * When racing with an mlock or AS_UNEVICTABLE clearing + * (page is unlocked) make sure that if the other thread + * does not observe our setting of PG_lru and fails + * isolation/check_move_unevictable_page, + * we see PG_mlocked/AS_UNEVICTABLE cleared below and move * the page back to the evictable list. * - * The other side is TestClearPageMlocked(). + * The other side is TestClearPageMlocked() or shmem_lock(). */ smp_mb(); } -- cgit v0.10.2 From e0887c19b2daa140f20ca8104bdc5740f39dbb86 Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Mon, 31 Oct 2011 17:09:31 -0700 Subject: vmscan: limit direct reclaim for higher order allocations When suffering from memory fragmentation due to unfreeable pages, THP page faults will repeatedly try to compact memory. Due to the unfreeable pages, compaction fails. Needless to say, at that point page reclaim also fails to create free contiguous 2MB areas. However, that doesn't stop the current code from trying, over and over again, and freeing a minimum of 4MB (2UL << sc->order pages) at every single invocation. This resulted in my 12GB system having 2-3GB free memory, a corresponding amount of used swap and very sluggish response times. This can be avoided by having the direct reclaim code not reclaim from zones that already have plenty of free memory available for compaction. If compaction still fails due to unmovable memory, doing additional reclaim will only hurt the system, not help. [jweiner@redhat.com: change comment to explain the order check] Signed-off-by: Rik van Riel Acked-by: Johannes Weiner Acked-by: Mel Gorman Cc: Andrea Arcangeli Reviewed-by: Minchan Kim Signed-off-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/vmscan.c b/mm/vmscan.c index f51a33e..7e0f057 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2125,6 +2125,22 @@ static void shrink_zones(int priority, struct zonelist *zonelist, continue; if (zone->all_unreclaimable && priority != DEF_PRIORITY) continue; /* Let kswapd poll it */ + if (COMPACTION_BUILD) { + /* + * If we already have plenty of memory + * free for compaction, don't free any + * more. Even though compaction is + * invoked for any non-zero order, + * only frequent costly order + * reclamation is disruptive enough to + * become a noticable problem, like + * transparent huge page allocations. + */ + if (sc->order > PAGE_ALLOC_COSTLY_ORDER && + (compaction_suitable(zone, sc->order) || + compaction_deferred(zone))) + continue; + } /* * This steals pages from memory cgroups over softlimit * and returns the number of reclaimed pages and -- cgit v0.10.2 From e0c23279c9f800c403f37511484d9014ac83adec Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Mon, 31 Oct 2011 17:09:33 -0700 Subject: vmscan: abort reclaim/compaction if compaction can proceed If compaction can proceed, shrink_zones() stops doing any work but its callers still call shrink_slab() which raises the priority and potentially sleeps. This is unnecessary and wasteful so this patch aborts direct reclaim/compaction entirely if compaction can proceed. Signed-off-by: Mel Gorman Acked-by: Rik van Riel Reviewed-by: Minchan Kim Acked-by: Johannes Weiner Cc: Josh Boyer Cc: Andrea Arcangeli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/vmscan.c b/mm/vmscan.c index 7e0f057..a90c603 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2103,14 +2103,19 @@ restart: * * If a zone is deemed to be full of pinned pages then just give it a light * scan then give up on it. + * + * This function returns true if a zone is being reclaimed for a costly + * high-order allocation and compaction is either ready to begin or deferred. + * This indicates to the caller that it should retry the allocation or fail. */ -static void shrink_zones(int priority, struct zonelist *zonelist, +static bool shrink_zones(int priority, struct zonelist *zonelist, struct scan_control *sc) { struct zoneref *z; struct zone *zone; unsigned long nr_soft_reclaimed; unsigned long nr_soft_scanned; + bool should_abort_reclaim = false; for_each_zone_zonelist_nodemask(zone, z, zonelist, gfp_zone(sc->gfp_mask), sc->nodemask) { @@ -2127,19 +2132,20 @@ static void shrink_zones(int priority, struct zonelist *zonelist, continue; /* Let kswapd poll it */ if (COMPACTION_BUILD) { /* - * If we already have plenty of memory - * free for compaction, don't free any - * more. Even though compaction is - * invoked for any non-zero order, - * only frequent costly order - * reclamation is disruptive enough to - * become a noticable problem, like - * transparent huge page allocations. + * If we already have plenty of memory free for + * compaction in this zone, don't free any more. + * Even though compaction is invoked for any + * non-zero order, only frequent costly order + * reclamation is disruptive enough to become a + * noticable problem, like transparent huge page + * allocations. */ if (sc->order > PAGE_ALLOC_COSTLY_ORDER && (compaction_suitable(zone, sc->order) || - compaction_deferred(zone))) + compaction_deferred(zone))) { + should_abort_reclaim = true; continue; + } } /* * This steals pages from memory cgroups over softlimit @@ -2158,6 +2164,8 @@ static void shrink_zones(int priority, struct zonelist *zonelist, shrink_zone(priority, zone, sc); } + + return should_abort_reclaim; } static bool zone_reclaimable(struct zone *zone) @@ -2222,7 +2230,9 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, sc->nr_scanned = 0; if (!priority) disable_swap_token(sc->mem_cgroup); - shrink_zones(priority, zonelist, sc); + if (shrink_zones(priority, zonelist, sc)) + break; + /* * Don't shrink slabs when reclaiming memory from * over limit cgroups -- cgit v0.10.2 From df9d6985be2a7e7683c46e4c6ea608fc69f02b45 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 31 Oct 2011 17:09:35 -0700 Subject: mm: do not drain pagevecs for mlockall(MCL_FUTURE) MCL_FUTURE does not move pages between lru list and draining the LRU per cpu pagevecs is a nasty activity. Avoid doing it unecessarily. Signed-off-by: Christoph Lameter Cc: David Rientjes Reviewed-by: Minchan Kim Acked-by: KOSAKI Motohiro Cc: Mel Gorman Acked-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/mlock.c b/mm/mlock.c index 048260c..7debb4f 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -549,7 +549,8 @@ SYSCALL_DEFINE1(mlockall, int, flags) if (!can_do_mlock()) goto out; - lru_add_drain_all(); /* flush pagevec */ + if (flags & MCL_CURRENT) + lru_add_drain_all(); /* flush pagevec */ down_write(¤t->mm->mmap_sem); -- cgit v0.10.2 From 0089e4853ae1ac161fae5137170971ccb6f4f152 Mon Sep 17 00:00:00 2001 From: Hillf Danton Date: Mon, 31 Oct 2011 17:09:38 -0700 Subject: mm/huge_memory: fix copying user highpage The THP copy-on-write handler falls back to regular-sized pages for a huge page replacement upon allocation failure or if THP has been individually disabled in the target VMA. The loop responsible for copying page-sized chunks accidentally uses multiples of PAGE_SHIFT instead of PAGE_SIZE as the virtual address arg for copy_user_highpage(). Signed-off-by: Hillf Danton Acked-by: Johannes Weiner Reviewed-by: Andrea Arcangeli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 44f5763..ef95438 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -830,7 +830,7 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm, for (i = 0; i < HPAGE_PMD_NR; i++) { copy_user_highpage(pages[i], page + i, - haddr + PAGE_SHIFT*i, vma); + haddr + PAGE_SIZE * i, vma); __SetPageUptodate(pages[i]); cond_resched(); } -- cgit v0.10.2 From 35d8c7ad7208dad5d352c483408e555022750978 Mon Sep 17 00:00:00 2001 From: Hillf Danton Date: Mon, 31 Oct 2011 17:09:40 -0700 Subject: mm/huge_memory: fix typo when updating mmu cache There are three cases of update_mmu_cache() in the file, and the case in function collapse_huge_page() has a typo, namely the last parameter used, which is corrected based on the other two cases. Due to the define of update_mmu_cache by X86, the only arch that implements THP currently, the change here has no really crystal point, but one or two minutes of efforts could be saved for those archs that are likely to support THP in future. Signed-off-by: Hillf Danton Cc: Johannes Weiner Reviewed-by: Andrea Arcangeli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/huge_memory.c b/mm/huge_memory.c index ef95438..860ec21 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1952,7 +1952,7 @@ static void collapse_huge_page(struct mm_struct *mm, BUG_ON(!pmd_none(*pmd)); page_add_new_anon_rmap(new_page, vma, address); set_pmd_at(mm, address, pmd, _pmd); - update_mmu_cache(vma, address, entry); + update_mmu_cache(vma, address, _pmd); prepare_pmd_huge_pte(pgtable, mm); mm->nr_ptes--; spin_unlock(&mm->page_table_lock); -- cgit v0.10.2 From 3d470fc385defa60d9af610f05db8e7f8b4f2f5e Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 31 Oct 2011 17:09:43 -0700 Subject: mm: munlock use mapcount to avoid terrible overhead A process spent 30 minutes exiting, just munlocking the pages of a large anonymous area that had been alternately mprotected into page-sized vmas: for every single page there's an anon_vma walk through all the other little vmas to find the right one. A general fix to that would be a lot more complicated (use prio_tree on anon_vma?), but there's one very simple thing we can do to speed up the common case: if a page to be munlocked is mapped only once, then it is our vma that it is mapped into, and there's no need whatever to walk through all the others. Okay, there is a very remote race in munlock_vma_pages_range(), if between its follow_page() and lock_page(), another process were to munlock the same page, then page reclaim remove it from our vma, then another process mlock it again. We would find it with page_mapcount 1, yet it's still mlocked in another process. But never mind, that's much less likely than the down_read_trylock() failure which munlocking already tolerates (in try_to_unmap_one()): in due course page reclaim will discover and move the page to unevictable instead. [akpm@linux-foundation.org: add comment] Signed-off-by: Hugh Dickins Cc: Michel Lespinasse Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/mlock.c b/mm/mlock.c index 7debb4f..bd34b3a 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -110,7 +110,15 @@ void munlock_vma_page(struct page *page) if (TestClearPageMlocked(page)) { dec_zone_page_state(page, NR_MLOCK); if (!isolate_lru_page(page)) { - int ret = try_to_munlock(page); + int ret = SWAP_AGAIN; + + /* + * Optimization: if the page was mapped just once, + * that's our mapping and we don't need to check all the + * other vmas. + */ + if (page_mapcount(page) > 1) + ret = try_to_munlock(page); /* * did try_to_unlock() succeed or punt? */ -- cgit v0.10.2 From a1cb2c60ddc98ff4e5246f410558805401ceee67 Mon Sep 17 00:00:00 2001 From: Dimitri Sivanich Date: Mon, 31 Oct 2011 17:09:46 -0700 Subject: mm/vmstat.c: cache align vm_stat Avoid false sharing of the vm_stat array. This was found to adversely affect tmpfs I/O performance. Tests run on a 640 cpu UV system. With 120 threads doing parallel writes, each to different tmpfs mounts: No patch: ~300 MB/sec With vm_stat alignment: ~430 MB/sec Signed-off-by: Dimitri Sivanich Acked-by: Christoph Lameter Acked-by: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/vmstat.c b/mm/vmstat.c index 56e529a..8fd603b 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -78,7 +78,7 @@ void vm_events_fold_cpu(int cpu) * * vm_stat contains the global counters */ -atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS]; +atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS] __cacheline_aligned_in_smp; EXPORT_SYMBOL(vm_stat); #ifdef CONFIG_SMP -- cgit v0.10.2 From 0a8c384ebe4cc1c462076732dbffb595e7e81d5c Mon Sep 17 00:00:00 2001 From: Michael Cree Date: Mon, 31 Oct 2011 17:09:49 -0700 Subject: alpha: wire up accept4 syscall Somehow wiring up the accept4 syscall on Alpha was missed long ago. This commit rectifies that oversight. Signed-off-by: Michael Cree Reviewed-by: Matt Turner Cc: Richard Henderson Cc: Ivan Kokshaysky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/alpha/include/asm/unistd.h b/arch/alpha/include/asm/unistd.h index 4ac48a0..1fbfd64 100644 --- a/arch/alpha/include/asm/unistd.h +++ b/arch/alpha/include/asm/unistd.h @@ -457,10 +457,11 @@ #define __NR_clock_adjtime 499 #define __NR_syncfs 500 #define __NR_setns 501 +#define __NR_accept4 502 #ifdef __KERNEL__ -#define NR_SYSCALLS 502 +#define NR_SYSCALLS 503 #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR diff --git a/arch/alpha/kernel/systbls.S b/arch/alpha/kernel/systbls.S index 6acea1f..53d0ce2 100644 --- a/arch/alpha/kernel/systbls.S +++ b/arch/alpha/kernel/systbls.S @@ -520,6 +520,7 @@ sys_call_table: .quad sys_clock_adjtime .quad sys_syncfs /* 500 */ .quad sys_setns + .quad sys_accept4 .size sys_call_table, . - sys_call_table .type sys_call_table, @object -- cgit v0.10.2 From a8aff21ecc8c455687c5bb2b98cb04dce3eea7c7 Mon Sep 17 00:00:00 2001 From: Michael Cree Date: Mon, 31 Oct 2011 17:10:01 -0700 Subject: alpha: wire up sendmmsg syscall Signed-off-by: Michael Cree Reviewed-by: Matt Turner Cc: Richard Henderson Cc: Ivan Kokshaysky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/alpha/include/asm/unistd.h b/arch/alpha/include/asm/unistd.h index 1fbfd64..2207fc6 100644 --- a/arch/alpha/include/asm/unistd.h +++ b/arch/alpha/include/asm/unistd.h @@ -458,10 +458,11 @@ #define __NR_syncfs 500 #define __NR_setns 501 #define __NR_accept4 502 +#define __NR_sendmmsg 503 #ifdef __KERNEL__ -#define NR_SYSCALLS 503 +#define NR_SYSCALLS 504 #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR diff --git a/arch/alpha/kernel/systbls.S b/arch/alpha/kernel/systbls.S index 53d0ce2..e534e1c 100644 --- a/arch/alpha/kernel/systbls.S +++ b/arch/alpha/kernel/systbls.S @@ -521,6 +521,7 @@ sys_call_table: .quad sys_syncfs /* 500 */ .quad sys_setns .quad sys_accept4 + .quad sys_sendmmsg .size sys_call_table, . - sys_call_table .type sys_call_table, @object -- cgit v0.10.2 From d70ef97baf048412c395bb5d65791d8fe133a52b Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 31 Oct 2011 17:10:04 -0700 Subject: fs/pipe.c: add ->statfs callback for pipefs Currently a statfs on a pipe's /proc//fd/ link returns -ENOSYS. Wire pipfs up so that the statfs succeeds. This is required by checkpoint-restart in the userspace to make it possible to distinguish pipes from fifos. When we dump information about task's open files we use the /proc/pid/fd directoy's symlinks and the fact that opening any of them gives us exactly the same dentry->inode pair as the original process has. Now if a task we're dumping has opened pipe and fifo we need to detect this and act accordingly. Knowing that an fd with type S_ISFIFO resides on a pipefs is the most precise way. Signed-off-by: Pavel Emelyanov Reviewed-by: Tejun Heo Acked-by: Serge Hallyn Signed-off-by: Cyrill Gorcunov Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/pipe.c b/fs/pipe.c index 0e0be1d..4065f07 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1254,6 +1254,7 @@ out: static const struct super_operations pipefs_ops = { .destroy_inode = free_inode_nonrcu, + .statfs = simple_statfs, }; /* -- cgit v0.10.2 From c51eaacce2ae6861e1ad8c80ade6102ad428dc93 Mon Sep 17 00:00:00 2001 From: Jiaju Zhang Date: Mon, 31 Oct 2011 17:10:07 -0700 Subject: lib/Kconfig.debug: fix help message for DEFAULT_HUNG_TASK_TIMEOUT Added missing _secs in the help message of config DEFAULT_HUNG_TASK_TIMEOUT. Signed-off-by: Jiaju Zhang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index c583a57..82928f5 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -248,8 +248,9 @@ config DEFAULT_HUNG_TASK_TIMEOUT to determine when a task has become non-responsive and should be considered hung. - It can be adjusted at runtime via the kernel.hung_task_timeout - sysctl or by writing a value to /proc/sys/kernel/hung_task_timeout. + It can be adjusted at runtime via the kernel.hung_task_timeout_secs + sysctl or by writing a value to + /proc/sys/kernel/hung_task_timeout_secs. A timeout of 0 disables the check. The default is two minutes. Keeping the default should be fine in most cases. -- cgit v0.10.2 From 4ca5f468cc2a0be1cba585f335dcbe56b40944f2 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 31 Oct 2011 17:10:09 -0700 Subject: hwmon: convert idr to ida and use ida_simple interface hwmon was using an idr with a NULL pointer, so convert to an ida which then allows use of Rusty's ida_simple_get. Signed-off-by: Jonathan Cameron Cc: Rusty Russell Cc: Tejun Heo Acked-by: Guenter Roeck Cc: James Bottomley Cc: David Airlie Cc: Thomas Hellstrom Cc: Evgeniy Polyakov Cc: Darrick J. Wong Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/hwmon/hwmon.c b/drivers/hwmon/hwmon.c index a61e781..6460487 100644 --- a/drivers/hwmon/hwmon.c +++ b/drivers/hwmon/hwmon.c @@ -27,8 +27,7 @@ static struct class *hwmon_class; -static DEFINE_IDR(hwmon_idr); -static DEFINE_SPINLOCK(idr_lock); +static DEFINE_IDA(hwmon_ida); /** * hwmon_device_register - register w/ hwmon @@ -42,30 +41,17 @@ static DEFINE_SPINLOCK(idr_lock); struct device *hwmon_device_register(struct device *dev) { struct device *hwdev; - int id, err; - -again: - if (unlikely(idr_pre_get(&hwmon_idr, GFP_KERNEL) == 0)) - return ERR_PTR(-ENOMEM); - - spin_lock(&idr_lock); - err = idr_get_new(&hwmon_idr, NULL, &id); - spin_unlock(&idr_lock); + int id; - if (unlikely(err == -EAGAIN)) - goto again; - else if (unlikely(err)) - return ERR_PTR(err); + id = ida_simple_get(&hwmon_ida, 0, 0, GFP_KERNEL); + if (id < 0) + return ERR_PTR(id); - id = id & MAX_ID_MASK; hwdev = device_create(hwmon_class, dev, MKDEV(0, 0), NULL, HWMON_ID_FORMAT, id); - if (IS_ERR(hwdev)) { - spin_lock(&idr_lock); - idr_remove(&hwmon_idr, id); - spin_unlock(&idr_lock); - } + if (IS_ERR(hwdev)) + ida_simple_remove(&hwmon_ida, id); return hwdev; } @@ -81,9 +67,7 @@ void hwmon_device_unregister(struct device *dev) if (likely(sscanf(dev_name(dev), HWMON_ID_FORMAT, &id) == 1)) { device_unregister(dev); - spin_lock(&idr_lock); - idr_remove(&hwmon_idr, id); - spin_unlock(&idr_lock); + ida_simple_remove(&hwmon_ida, id); } else dev_dbg(dev->parent, "hwmon_device_unregister() failed: bad class ID!\n"); -- cgit v0.10.2 From 65807044760e03ebf766973c5e94a2ea1d57937b Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 31 Oct 2011 17:10:27 -0700 Subject: drivers/hwmon/hwmon.c: convert idr to ida and use ida_simple_get() A straightforward looking use of idr for a device id. Signed-off-by: Jonathan Cameron Cc: Rusty Russell Cc: Tejun Heo Cc: Guenter Roeck Cc: James Bottomley Acked-by: Darrick J. Wong Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/hwmon/ibmaem.c b/drivers/hwmon/ibmaem.c index c316294..783d0c1 100644 --- a/drivers/hwmon/ibmaem.c +++ b/drivers/hwmon/ibmaem.c @@ -88,8 +88,7 @@ #define AEM_MIN_POWER_INTERVAL 200 #define UJ_PER_MJ 1000L -static DEFINE_IDR(aem_idr); -static DEFINE_SPINLOCK(aem_idr_lock); +static DEFINE_IDA(aem_ida); static struct platform_driver aem_driver = { .driver = { @@ -356,38 +355,6 @@ static void aem_msg_handler(struct ipmi_recv_msg *msg, void *user_msg_data) complete(&data->read_complete); } -/* ID functions */ - -/* Obtain an id */ -static int aem_idr_get(int *id) -{ - int i, err; - -again: - if (unlikely(!idr_pre_get(&aem_idr, GFP_KERNEL))) - return -ENOMEM; - - spin_lock(&aem_idr_lock); - err = idr_get_new(&aem_idr, NULL, &i); - spin_unlock(&aem_idr_lock); - - if (unlikely(err == -EAGAIN)) - goto again; - else if (unlikely(err)) - return err; - - *id = i & MAX_ID_MASK; - return 0; -} - -/* Release an object ID */ -static void aem_idr_put(int id) -{ - spin_lock(&aem_idr_lock); - idr_remove(&aem_idr, id); - spin_unlock(&aem_idr_lock); -} - /* Sensor support functions */ /* Read a sensor value */ @@ -530,7 +497,7 @@ static void aem_delete(struct aem_data *data) ipmi_destroy_user(data->ipmi.user); platform_set_drvdata(data->pdev, NULL); platform_device_unregister(data->pdev); - aem_idr_put(data->id); + ida_simple_remove(&aem_ida, data->id); kfree(data); } @@ -587,7 +554,8 @@ static int aem_init_aem1_inst(struct aem_ipmi_data *probe, u8 module_handle) data->power_period[i] = AEM_DEFAULT_POWER_INTERVAL; /* Create sub-device for this fw instance */ - if (aem_idr_get(&data->id)) + data->id = ida_simple_get(&aem_ida, 0, 0, GFP_KERNEL); + if (data->id < 0) goto id_err; data->pdev = platform_device_alloc(DRVNAME, data->id); @@ -638,7 +606,7 @@ ipmi_err: platform_set_drvdata(data->pdev, NULL); platform_device_unregister(data->pdev); dev_err: - aem_idr_put(data->id); + ida_simple_remove(&aem_ida, data->id); id_err: kfree(data); @@ -720,7 +688,8 @@ static int aem_init_aem2_inst(struct aem_ipmi_data *probe, data->power_period[i] = AEM_DEFAULT_POWER_INTERVAL; /* Create sub-device for this fw instance */ - if (aem_idr_get(&data->id)) + data->id = ida_simple_get(&aem_ida, 0, 0, GFP_KERNEL); + if (data->id < 0) goto id_err; data->pdev = platform_device_alloc(DRVNAME, data->id); @@ -771,7 +740,7 @@ ipmi_err: platform_set_drvdata(data->pdev, NULL); platform_device_unregister(data->pdev); dev_err: - aem_idr_put(data->id); + ida_simple_remove(&aem_ida, data->id); id_err: kfree(data); -- cgit v0.10.2 From 1510dd5954be5070e46b155eb32362dc73d9e9cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89ric=20Piel?= Date: Mon, 31 Oct 2011 17:10:31 -0700 Subject: lis3lv02d: avoid divide by zero due to unchecked MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After an "unexpected" reboot, I found this Oops in my logs: divide error: 0000 [#1] PREEMPT SMP=20 CPU 0=20 Modules linked in: lis3lv02d hp_wmi input_polldev [...] Pid: 390, comm: modprobe Tainted: G C 2.6.39-rc7-wl+=20 RIP: 0010:[] [] lis3lv02d_poweron+0x4e/0x94 [lis3lv02d] RSP: 0018:ffff8801d6407cf8 EFLAGS: 00010246 RAX: 0000000000000bb8 RBX: ffffffffa014e000 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffffea00066e4708 RDI: ffff8801df002700 RBP: ffff8801d6407d18 R08: ffffea00066c5a30 R09: ffffffff812498c9 R10: ffff8801d7bfcea0 R11: ffff8801d7bfce10 R12: 0000000000000bb8 R13: 00000000ffffffda R14: ffffffffa0154120 R15: ffffffffa0154030 =46S: 00007fc0705db700(0000) GS:ffff8801dfa00000(0000) knlGS:0 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 00007f33549174f0 CR3: 00000001d65c9000 CR4: 00000000000406f0 Process modprobe (pid: 390, threadinfo ffff8801d6406000, task ffff8801d6b40= 000) Stack: ffffffffa0154120 62ffffffa0154030 ffffffffa014e000 00000000ffffffea ffff8801d6407d58 ffffffffa014bcc1 0000000000000000 0000000000000048 ffff8801d8bae800 00000000ffffffea 00000000ffffffda ffffffffa0154120 Call Trace: [] lis3lv02d_init_device+0x1ce/0x496 [lis3lv02d] [] lis3lv02d_add+0x10f/0x17c [hp_accel] [] acpi_device_probe+0x49/0x117 [...] Code: 3a 75 06 80 4d ef 50 eb 04 80 4d ef 40 0f b6 55 ef be 21 00 00 00 48 89 df ff 53 18 44 8b 63 6c e8 3e fc ff ff 89 c1 44 89 e0 99 f9 89 c7 e8 93 82 ef e0 48 83 7b 30 00 74 2d 45 31 e4 80 7b=20 RIP [] lis3lv02d_poweron+0x4e/0x94 [lis3lv02d] RSP >From my POV, it looks like the hardware is not working as expected and returns a bogus data rate. The driver doesn't check the result and directly uses it as some sort of divisor in some places: msleep(lis3->pwron_delay / lis3lv02d_get_odr()); Under this circumstances, this could very well cause the "divide by zero" exception from above. For now, I fixed it the easiest and most obvious way: Check if the result is sane and if it isn't use a sane default instead. I went for "100" in the latter case, simply because /sys/devices/platform/lis3lv02d/rate returns it on a successful boot. Signed-off-by: Christian Lamparter Signed-off-by: Éric Piel Cc: Matthew Garrett Cc: Witold Pilat Cc: Lyall Pearce Cc: Malte Starostik Cc: Ilkka Koskinen Cc: Thadeu Lima de Souza Cascardo Cc: Christian Lamparter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/misc/lis3lv02d/lis3lv02d.c b/drivers/misc/lis3lv02d/lis3lv02d.c index 8b51cd6..1fc6715 100644 --- a/drivers/misc/lis3lv02d/lis3lv02d.c +++ b/drivers/misc/lis3lv02d/lis3lv02d.c @@ -206,6 +206,18 @@ static int lis3lv02d_get_odr(void) return lis3_dev.odrs[(ctrl >> shift)]; } +static int lis3lv02d_get_pwron_wait(struct lis3lv02d *lis3) +{ + int div = lis3lv02d_get_odr(); + + if (WARN_ONCE(div == 0, "device returned spurious data")) + return -ENXIO; + + /* LIS3 power on delay is quite long */ + msleep(lis3->pwron_delay / div); + return 0; +} + static int lis3lv02d_set_odr(int rate) { u8 ctrl; @@ -266,7 +278,9 @@ static int lis3lv02d_selftest(struct lis3lv02d *lis3, s16 results[3]) lis3->read(lis3, ctlreg, ®); lis3->write(lis3, ctlreg, (reg | selftest)); - msleep(lis3->pwron_delay / lis3lv02d_get_odr()); + ret = lis3lv02d_get_pwron_wait(lis3); + if (ret) + goto fail; /* Read directly to avoid axis remap */ x = lis3->read_data(lis3, OUTX); @@ -275,7 +289,9 @@ static int lis3lv02d_selftest(struct lis3lv02d *lis3, s16 results[3]) /* back to normal settings */ lis3->write(lis3, ctlreg, reg); - msleep(lis3->pwron_delay / lis3lv02d_get_odr()); + ret = lis3lv02d_get_pwron_wait(lis3); + if (ret) + goto fail; results[0] = x - lis3->read_data(lis3, OUTX); results[1] = y - lis3->read_data(lis3, OUTY); @@ -363,8 +379,9 @@ void lis3lv02d_poweroff(struct lis3lv02d *lis3) } EXPORT_SYMBOL_GPL(lis3lv02d_poweroff); -void lis3lv02d_poweron(struct lis3lv02d *lis3) +int lis3lv02d_poweron(struct lis3lv02d *lis3) { + int err; u8 reg; lis3->init(lis3); @@ -384,11 +401,14 @@ void lis3lv02d_poweron(struct lis3lv02d *lis3) lis3->write(lis3, CTRL_REG2, reg); } - /* LIS3 power on delay is quite long */ - msleep(lis3->pwron_delay / lis3lv02d_get_odr()); + err = lis3lv02d_get_pwron_wait(lis3); + if (err) + return err; if (lis3->reg_ctrl) lis3_context_restore(lis3); + + return 0; } EXPORT_SYMBOL_GPL(lis3lv02d_poweron); @@ -928,7 +948,11 @@ int lis3lv02d_init_device(struct lis3lv02d *dev) atomic_set(&dev->wake_thread, 0); lis3lv02d_add_fs(dev); - lis3lv02d_poweron(dev); + err = lis3lv02d_poweron(dev); + if (err) { + lis3lv02d_remove_fs(dev); + return err; + } if (dev->pm_dev) { pm_runtime_set_active(dev->pm_dev); diff --git a/drivers/misc/lis3lv02d/lis3lv02d.h b/drivers/misc/lis3lv02d/lis3lv02d.h index a193958..57c64bb 100644 --- a/drivers/misc/lis3lv02d/lis3lv02d.h +++ b/drivers/misc/lis3lv02d/lis3lv02d.h @@ -285,7 +285,7 @@ int lis3lv02d_init_device(struct lis3lv02d *lis3); int lis3lv02d_joystick_enable(void); void lis3lv02d_joystick_disable(void); void lis3lv02d_poweroff(struct lis3lv02d *lis3); -void lis3lv02d_poweron(struct lis3lv02d *lis3); +int lis3lv02d_poweron(struct lis3lv02d *lis3); int lis3lv02d_remove_fs(struct lis3lv02d *lis3); extern struct lis3lv02d lis3_dev; diff --git a/drivers/platform/x86/hp_accel.c b/drivers/platform/x86/hp_accel.c index 1b52d00..891e71f 100644 --- a/drivers/platform/x86/hp_accel.c +++ b/drivers/platform/x86/hp_accel.c @@ -354,8 +354,7 @@ static int lis3lv02d_suspend(struct acpi_device *device, pm_message_t state) static int lis3lv02d_resume(struct acpi_device *device) { - lis3lv02d_poweron(&lis3_dev); - return 0; + return lis3lv02d_poweron(&lis3_dev); } #else #define lis3lv02d_suspend NULL -- cgit v0.10.2 From bd35665f0a16051dbe538ae3122d4baa4659b1f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89ric=20Piel?= Date: Mon, 31 Oct 2011 17:10:41 -0700 Subject: lis3: update maintainer information MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the move of the lis3 driver, the hp_accel.c file got dropped from the MAINTAINER file. Make it explicit again that this file is tied to lis3 again. Signed-off-by: Éric Piel Cc: Matthew Garrett Cc: Witold Pilat Cc: Lyall Pearce Cc: Malte Starostik Cc: Ilkka Koskinen Cc: Thadeu Lima de Souza Cascardo Cc: Christian Lamparter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/MAINTAINERS b/MAINTAINERS index 2014c1f..23b89e0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3995,6 +3995,7 @@ M: Eric Piel S: Maintained F: Documentation/misc-devices/lis3lv02d F: drivers/misc/lis3lv02d/ +F: drivers/platform/x86/hp_accel.c LLC (802.2) M: Arnaldo Carvalho de Melo -- cgit v0.10.2 From a17b81beeb76a7cc3551fc17d7aa73b303dee52c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89ric=20Piel?= Date: Mon, 31 Oct 2011 17:10:43 -0700 Subject: lis3: add support for HP EliteBook 2730p MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add axis correction for HP EliteBook 2730p. Tested-by: Witold Pilat Signed-off-by: Éric Piel Cc: Matthew Garrett Cc: Lyall Pearce Cc: Malte Starostik Cc: Ilkka Koskinen Cc: Thadeu Lima de Souza Cascardo Cc: Christian Lamparter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/platform/x86/hp_accel.c b/drivers/platform/x86/hp_accel.c index 891e71f..76b24e6 100644 --- a/drivers/platform/x86/hp_accel.c +++ b/drivers/platform/x86/hp_accel.c @@ -209,6 +209,7 @@ static struct dmi_system_id lis3lv02d_dmi_ids[] = { AXIS_DMI_MATCH("NC6715x", "HP Compaq 6715", y_inverted), AXIS_DMI_MATCH("NC693xx", "HP EliteBook 693", xy_rotated_right), AXIS_DMI_MATCH("NC693xx", "HP EliteBook 853", xy_swap), + AXIS_DMI_MATCH("NC273xx", "HP EliteBook 273", y_inverted), /* Intel-based HP Pavilion dv5 */ AXIS_DMI_MATCH2("HPDV5_I", PRODUCT_NAME, "HP Pavilion dv5", -- cgit v0.10.2 From cdeaf62255d171dd159f0bdc453efd7ff31c6916 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89ric=20Piel?= Date: Mon, 31 Oct 2011 17:10:46 -0700 Subject: lis3: add support for HP EliteBook 8540w MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add axis correction for HP EliteBook 8540w. Reported-by: Lyall Pearce Signed-off-by: Éric Piel Cc: Matthew Garrett Cc: Witold Pilat Cc: Malte Starostik Cc: Ilkka Koskinen Cc: Thadeu Lima de Souza Cascardo Cc: Christian Lamparter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/platform/x86/hp_accel.c b/drivers/platform/x86/hp_accel.c index 76b24e6..626b162 100644 --- a/drivers/platform/x86/hp_accel.c +++ b/drivers/platform/x86/hp_accel.c @@ -209,6 +209,7 @@ static struct dmi_system_id lis3lv02d_dmi_ids[] = { AXIS_DMI_MATCH("NC6715x", "HP Compaq 6715", y_inverted), AXIS_DMI_MATCH("NC693xx", "HP EliteBook 693", xy_rotated_right), AXIS_DMI_MATCH("NC693xx", "HP EliteBook 853", xy_swap), + AXIS_DMI_MATCH("NC854xx", "HP EliteBook 854", y_inverted), AXIS_DMI_MATCH("NC273xx", "HP EliteBook 273", y_inverted), /* Intel-based HP Pavilion dv5 */ AXIS_DMI_MATCH2("HPDV5_I", -- cgit v0.10.2 From d0b6a971ed034054897df42f3e3000b4f57cccc5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89ric=20Piel?= Date: Mon, 31 Oct 2011 17:10:50 -0700 Subject: hp_accel: add HP ProBook 655x MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add axis correction for HP ProBook 6555b. Signed-off-by: Malte Starostik Signed-off-by: Éric Piel Cc: Matthew Garrett Cc: Witold Pilat Cc: Lyall Pearce Cc: Ilkka Koskinen Cc: Thadeu Lima de Souza Cascardo Cc: Christian Lamparter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/platform/x86/hp_accel.c b/drivers/platform/x86/hp_accel.c index 626b162..89c5cb5 100644 --- a/drivers/platform/x86/hp_accel.c +++ b/drivers/platform/x86/hp_accel.c @@ -229,6 +229,7 @@ static struct dmi_system_id lis3lv02d_dmi_ids[] = { AXIS_DMI_MATCH("HPB452x", "HP ProBook 452", y_inverted), AXIS_DMI_MATCH("HPB522x", "HP ProBook 522", xy_swap), AXIS_DMI_MATCH("HPB532x", "HP ProBook 532", y_inverted), + AXIS_DMI_MATCH("HPB655x", "HP ProBook 655", xy_swap_inverted), AXIS_DMI_MATCH("Mini510x", "HP Mini 510", xy_rotated_left_usd), { NULL, } /* Laptop models without axis info (yet): -- cgit v0.10.2 From 0021586b958d7eb5d73dbb9c42a0b4f19ebe3a9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89ric=20Piel?= Date: Mon, 31 Oct 2011 17:10:54 -0700 Subject: lis3: free regulators if probe() fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ilkka Koskinen Signed-off-by: Éric Piel Cc: Matthew Garrett Cc: Witold Pilat Cc: Lyall Pearce Cc: Malte Starostik Cc: Thadeu Lima de Souza Cascardo Cc: Christian Lamparter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/misc/lis3lv02d/lis3lv02d_i2c.c b/drivers/misc/lis3lv02d/lis3lv02d_i2c.c index b20dfb4..ea9159c 100644 --- a/drivers/misc/lis3lv02d/lis3lv02d_i2c.c +++ b/drivers/misc/lis3lv02d/lis3lv02d_i2c.c @@ -161,8 +161,13 @@ static int __devinit lis3lv02d_i2c_probe(struct i2c_client *client, if (lis3_dev.reg_ctrl) lis3_reg_ctrl(&lis3_dev, LIS3_REG_OFF); - if (ret == 0) - return 0; + if (ret) + goto fail2; + return 0; + +fail2: + regulator_bulk_free(ARRAY_SIZE(lis3_dev.regulators), + lis3_dev.regulators); fail: if (pdata && pdata->release_resources) pdata->release_resources(); -- cgit v0.10.2 From d7f81d4299cdc8cf06fc9562ec3dafce528bd6ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89ric=20Piel?= Date: Mon, 31 Oct 2011 17:10:58 -0700 Subject: lis3: use consistent naming of variables MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ilkka Koskinen Signed-off-by: Éric Piel Cc: Matthew Garrett Cc: Witold Pilat Cc: Lyall Pearce Cc: Malte Starostik Cc: Thadeu Lima de Souza Cascardo Cc: Christian Lamparter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/misc/lis3lv02d/lis3lv02d.c b/drivers/misc/lis3lv02d/lis3lv02d.c index 1fc6715..eaef96bb 100644 --- a/drivers/misc/lis3lv02d/lis3lv02d.c +++ b/drivers/misc/lis3lv02d/lis3lv02d.c @@ -833,23 +833,23 @@ int lis3lv02d_remove_fs(struct lis3lv02d *lis3) } EXPORT_SYMBOL_GPL(lis3lv02d_remove_fs); -static void lis3lv02d_8b_configure(struct lis3lv02d *dev, +static void lis3lv02d_8b_configure(struct lis3lv02d *lis3, struct lis3lv02d_platform_data *p) { int err; int ctrl2 = p->hipass_ctrl; if (p->click_flags) { - dev->write(dev, CLICK_CFG, p->click_flags); - dev->write(dev, CLICK_TIMELIMIT, p->click_time_limit); - dev->write(dev, CLICK_LATENCY, p->click_latency); - dev->write(dev, CLICK_WINDOW, p->click_window); - dev->write(dev, CLICK_THSZ, p->click_thresh_z & 0xf); - dev->write(dev, CLICK_THSY_X, + lis3->write(lis3, CLICK_CFG, p->click_flags); + lis3->write(lis3, CLICK_TIMELIMIT, p->click_time_limit); + lis3->write(lis3, CLICK_LATENCY, p->click_latency); + lis3->write(lis3, CLICK_WINDOW, p->click_window); + lis3->write(lis3, CLICK_THSZ, p->click_thresh_z & 0xf); + lis3->write(lis3, CLICK_THSY_X, (p->click_thresh_x & 0xf) | (p->click_thresh_y << 4)); - if (dev->idev) { + if (lis3->idev) { struct input_dev *input_dev = lis3_dev.idev->input; input_set_capability(input_dev, EV_KEY, BTN_X); input_set_capability(input_dev, EV_KEY, BTN_Y); @@ -858,22 +858,22 @@ static void lis3lv02d_8b_configure(struct lis3lv02d *dev, } if (p->wakeup_flags) { - dev->write(dev, FF_WU_CFG_1, p->wakeup_flags); - dev->write(dev, FF_WU_THS_1, p->wakeup_thresh & 0x7f); + lis3->write(lis3, FF_WU_CFG_1, p->wakeup_flags); + lis3->write(lis3, FF_WU_THS_1, p->wakeup_thresh & 0x7f); /* pdata value + 1 to keep this backward compatible*/ - dev->write(dev, FF_WU_DURATION_1, p->duration1 + 1); + lis3->write(lis3, FF_WU_DURATION_1, p->duration1 + 1); ctrl2 ^= HP_FF_WU1; /* Xor to keep compatible with old pdata*/ } if (p->wakeup_flags2) { - dev->write(dev, FF_WU_CFG_2, p->wakeup_flags2); - dev->write(dev, FF_WU_THS_2, p->wakeup_thresh2 & 0x7f); + lis3->write(lis3, FF_WU_CFG_2, p->wakeup_flags2); + lis3->write(lis3, FF_WU_THS_2, p->wakeup_thresh2 & 0x7f); /* pdata value + 1 to keep this backward compatible*/ - dev->write(dev, FF_WU_DURATION_2, p->duration2 + 1); + lis3->write(lis3, FF_WU_DURATION_2, p->duration2 + 1); ctrl2 ^= HP_FF_WU2; /* Xor to keep compatible with old pdata*/ } /* Configure hipass filters */ - dev->write(dev, CTRL_REG2, ctrl2); + lis3->write(lis3, CTRL_REG2, ctrl2); if (p->irq2) { err = request_threaded_irq(p->irq2, @@ -891,72 +891,72 @@ static void lis3lv02d_8b_configure(struct lis3lv02d *dev, * Initialise the accelerometer and the various subsystems. * Should be rather independent of the bus system. */ -int lis3lv02d_init_device(struct lis3lv02d *dev) +int lis3lv02d_init_device(struct lis3lv02d *lis3) { int err; irq_handler_t thread_fn; int irq_flags = 0; - dev->whoami = lis3lv02d_read_8(dev, WHO_AM_I); + lis3->whoami = lis3lv02d_read_8(lis3, WHO_AM_I); - switch (dev->whoami) { + switch (lis3->whoami) { case WAI_12B: pr_info("12 bits sensor found\n"); - dev->read_data = lis3lv02d_read_12; - dev->mdps_max_val = 2048; - dev->pwron_delay = LIS3_PWRON_DELAY_WAI_12B; - dev->odrs = lis3_12_rates; - dev->odr_mask = CTRL1_DF0 | CTRL1_DF1; - dev->scale = LIS3_SENSITIVITY_12B; - dev->regs = lis3_wai12_regs; - dev->regs_size = ARRAY_SIZE(lis3_wai12_regs); + lis3->read_data = lis3lv02d_read_12; + lis3->mdps_max_val = 2048; + lis3->pwron_delay = LIS3_PWRON_DELAY_WAI_12B; + lis3->odrs = lis3_12_rates; + lis3->odr_mask = CTRL1_DF0 | CTRL1_DF1; + lis3->scale = LIS3_SENSITIVITY_12B; + lis3->regs = lis3_wai12_regs; + lis3->regs_size = ARRAY_SIZE(lis3_wai12_regs); break; case WAI_8B: pr_info("8 bits sensor found\n"); - dev->read_data = lis3lv02d_read_8; - dev->mdps_max_val = 128; - dev->pwron_delay = LIS3_PWRON_DELAY_WAI_8B; - dev->odrs = lis3_8_rates; - dev->odr_mask = CTRL1_DR; - dev->scale = LIS3_SENSITIVITY_8B; - dev->regs = lis3_wai8_regs; - dev->regs_size = ARRAY_SIZE(lis3_wai8_regs); + lis3->read_data = lis3lv02d_read_8; + lis3->mdps_max_val = 128; + lis3->pwron_delay = LIS3_PWRON_DELAY_WAI_8B; + lis3->odrs = lis3_8_rates; + lis3->odr_mask = CTRL1_DR; + lis3->scale = LIS3_SENSITIVITY_8B; + lis3->regs = lis3_wai8_regs; + lis3->regs_size = ARRAY_SIZE(lis3_wai8_regs); break; case WAI_3DC: pr_info("8 bits 3DC sensor found\n"); - dev->read_data = lis3lv02d_read_8; - dev->mdps_max_val = 128; - dev->pwron_delay = LIS3_PWRON_DELAY_WAI_8B; - dev->odrs = lis3_3dc_rates; - dev->odr_mask = CTRL1_ODR0|CTRL1_ODR1|CTRL1_ODR2|CTRL1_ODR3; - dev->scale = LIS3_SENSITIVITY_8B; + lis3->read_data = lis3lv02d_read_8; + lis3->mdps_max_val = 128; + lis3->pwron_delay = LIS3_PWRON_DELAY_WAI_8B; + lis3->odrs = lis3_3dc_rates; + lis3->odr_mask = CTRL1_ODR0|CTRL1_ODR1|CTRL1_ODR2|CTRL1_ODR3; + lis3->scale = LIS3_SENSITIVITY_8B; break; default: - pr_err("unknown sensor type 0x%X\n", dev->whoami); + pr_err("unknown sensor type 0x%X\n", lis3->whoami); return -EINVAL; } - dev->reg_cache = kzalloc(max(sizeof(lis3_wai8_regs), + lis3->reg_cache = kzalloc(max(sizeof(lis3_wai8_regs), sizeof(lis3_wai12_regs)), GFP_KERNEL); - if (dev->reg_cache == NULL) { + if (lis3->reg_cache == NULL) { printk(KERN_ERR DRIVER_NAME "out of memory\n"); return -ENOMEM; } - mutex_init(&dev->mutex); - atomic_set(&dev->wake_thread, 0); + mutex_init(&lis3->mutex); + atomic_set(&lis3->wake_thread, 0); - lis3lv02d_add_fs(dev); - err = lis3lv02d_poweron(dev); + lis3lv02d_add_fs(lis3); + err = lis3lv02d_poweron(lis3); if (err) { - lis3lv02d_remove_fs(dev); + lis3lv02d_remove_fs(lis3); return err; } - if (dev->pm_dev) { - pm_runtime_set_active(dev->pm_dev); - pm_runtime_enable(dev->pm_dev); + if (lis3->pm_dev) { + pm_runtime_set_active(lis3->pm_dev); + pm_runtime_enable(lis3->pm_dev); } if (lis3lv02d_joystick_enable()) @@ -964,24 +964,24 @@ int lis3lv02d_init_device(struct lis3lv02d *dev) /* passing in platform specific data is purely optional and only * used by the SPI transport layer at the moment */ - if (dev->pdata) { - struct lis3lv02d_platform_data *p = dev->pdata; + if (lis3->pdata) { + struct lis3lv02d_platform_data *p = lis3->pdata; - if (dev->whoami == WAI_8B) - lis3lv02d_8b_configure(dev, p); + if (lis3->whoami == WAI_8B) + lis3lv02d_8b_configure(lis3, p); irq_flags = p->irq_flags1 & IRQF_TRIGGER_MASK; - dev->irq_cfg = p->irq_cfg; + lis3->irq_cfg = p->irq_cfg; if (p->irq_cfg) - dev->write(dev, CTRL_REG3, p->irq_cfg); + lis3->write(lis3, CTRL_REG3, p->irq_cfg); if (p->default_rate) lis3lv02d_set_odr(p->default_rate); } /* bail if we did not get an IRQ from the bus layer */ - if (!dev->irq) { + if (!lis3->irq) { pr_debug("No IRQ. Disabling /dev/freefall\n"); goto out; } @@ -997,12 +997,12 @@ int lis3lv02d_init_device(struct lis3lv02d *dev) * io-apic is not configurable (and generates a warning) but I keep it * in case of support for other hardware. */ - if (dev->pdata && dev->whoami == WAI_8B) + if (lis3->pdata && lis3->whoami == WAI_8B) thread_fn = lis302dl_interrupt_thread1_8b; else thread_fn = NULL; - err = request_threaded_irq(dev->irq, lis302dl_interrupt, + err = request_threaded_irq(lis3->irq, lis302dl_interrupt, thread_fn, IRQF_TRIGGER_RISING | IRQF_ONESHOT | irq_flags, -- cgit v0.10.2 From e1e5687d75ef0ea5cbae63df48ff2fdcb5306f66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89ric=20Piel?= Date: Mon, 31 Oct 2011 17:11:02 -0700 Subject: lis3: change exported function to use passed parameter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change exported functions to use the device given as parameter instead of the global one. Signed-off-by: Ilkka Koskinen Signed-off-by: Éric Piel Cc: Matthew Garrett Cc: Witold Pilat Cc: Lyall Pearce Cc: Malte Starostik Cc: Thadeu Lima de Souza Cascardo Cc: Christian Lamparter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/misc/lis3lv02d/lis3lv02d.c b/drivers/misc/lis3lv02d/lis3lv02d.c index eaef96bb..57249a0 100644 --- a/drivers/misc/lis3lv02d/lis3lv02d.c +++ b/drivers/misc/lis3lv02d/lis3lv02d.c @@ -631,7 +631,7 @@ static struct miscdevice lis3lv02d_misc_device = { .fops = &lis3lv02d_misc_fops, }; -int lis3lv02d_joystick_enable(void) +int lis3lv02d_joystick_enable(struct lis3lv02d *lis3) { struct input_dev *input_dev; int err; @@ -689,7 +689,7 @@ int lis3lv02d_joystick_enable(void) } EXPORT_SYMBOL_GPL(lis3lv02d_joystick_enable); -void lis3lv02d_joystick_disable(void) +void lis3lv02d_joystick_disable(struct lis3lv02d *lis3) { if (lis3_dev.irq) free_irq(lis3_dev.irq, &lis3_dev); @@ -959,7 +959,7 @@ int lis3lv02d_init_device(struct lis3lv02d *lis3) pm_runtime_enable(lis3->pm_dev); } - if (lis3lv02d_joystick_enable()) + if (lis3lv02d_joystick_enable(lis3)) pr_err("joystick initialization failed\n"); /* passing in platform specific data is purely optional and only diff --git a/drivers/misc/lis3lv02d/lis3lv02d.h b/drivers/misc/lis3lv02d/lis3lv02d.h index 57c64bb..b94e723 100644 --- a/drivers/misc/lis3lv02d/lis3lv02d.h +++ b/drivers/misc/lis3lv02d/lis3lv02d.h @@ -282,8 +282,8 @@ struct lis3lv02d { }; int lis3lv02d_init_device(struct lis3lv02d *lis3); -int lis3lv02d_joystick_enable(void); -void lis3lv02d_joystick_disable(void); +int lis3lv02d_joystick_enable(struct lis3lv02d *lis3); +void lis3lv02d_joystick_disable(struct lis3lv02d *lis3); void lis3lv02d_poweroff(struct lis3lv02d *lis3); int lis3lv02d_poweron(struct lis3lv02d *lis3); int lis3lv02d_remove_fs(struct lis3lv02d *lis3); diff --git a/drivers/misc/lis3lv02d/lis3lv02d_i2c.c b/drivers/misc/lis3lv02d/lis3lv02d_i2c.c index ea9159c..6cdc38f 100644 --- a/drivers/misc/lis3lv02d/lis3lv02d_i2c.c +++ b/drivers/misc/lis3lv02d/lis3lv02d_i2c.c @@ -182,7 +182,7 @@ static int __devexit lis3lv02d_i2c_remove(struct i2c_client *client) if (pdata && pdata->release_resources) pdata->release_resources(); - lis3lv02d_joystick_disable(); + lis3lv02d_joystick_disable(lis3); lis3lv02d_remove_fs(&lis3_dev); if (lis3_dev.reg_ctrl) diff --git a/drivers/misc/lis3lv02d/lis3lv02d_spi.c b/drivers/misc/lis3lv02d/lis3lv02d_spi.c index c1f8a8f..b2c1be1 100644 --- a/drivers/misc/lis3lv02d/lis3lv02d_spi.c +++ b/drivers/misc/lis3lv02d/lis3lv02d_spi.c @@ -83,7 +83,7 @@ static int __devinit lis302dl_spi_probe(struct spi_device *spi) static int __devexit lis302dl_spi_remove(struct spi_device *spi) { struct lis3lv02d *lis3 = spi_get_drvdata(spi); - lis3lv02d_joystick_disable(); + lis3lv02d_joystick_disable(lis3); lis3lv02d_poweroff(lis3); return lis3lv02d_remove_fs(&lis3_dev); diff --git a/drivers/platform/x86/hp_accel.c b/drivers/platform/x86/hp_accel.c index 89c5cb5..f93a47b 100644 --- a/drivers/platform/x86/hp_accel.c +++ b/drivers/platform/x86/hp_accel.c @@ -323,7 +323,7 @@ static int lis3lv02d_add(struct acpi_device *device) INIT_WORK(&hpled_led.work, delayed_set_status_worker); ret = led_classdev_register(NULL, &hpled_led.led_classdev); if (ret) { - lis3lv02d_joystick_disable(); + lis3lv02d_joystick_disable(&lis3_dev); lis3lv02d_poweroff(&lis3_dev); flush_work(&hpled_led.work); return ret; @@ -337,7 +337,7 @@ static int lis3lv02d_remove(struct acpi_device *device, int type) if (!device) return -EINVAL; - lis3lv02d_joystick_disable(); + lis3lv02d_joystick_disable(&lis3_dev); lis3lv02d_poweroff(&lis3_dev); led_classdev_unregister(&hpled_led.led_classdev); -- cgit v0.10.2 From 895c156c044a736d8dc2239020f4530bb6245675 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89ric=20Piel?= Date: Mon, 31 Oct 2011 17:11:05 -0700 Subject: lis3: remove the references to the global variable in core driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ilkka.koskinen@nokia.com: fix arg to lis3->read()] Signed-off-by: Ilkka Koskinen Signed-off-by: Éric Piel Cc: Matthew Garrett Cc: Witold Pilat Cc: Lyall Pearce Cc: Malte Starostik Cc: Thadeu Lima de Souza Cascardo Cc: Christian Lamparter Subject: lis3-remove-the-references-to-the-global-variable-in-core-driver-fix Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/misc/lis3lv02d/lis3lv02d.c b/drivers/misc/lis3lv02d/lis3lv02d.c index 57249a0..29d12a7 100644 --- a/drivers/misc/lis3lv02d/lis3lv02d.c +++ b/drivers/misc/lis3lv02d/lis3lv02d.c @@ -163,7 +163,7 @@ static void lis3lv02d_get_xyz(struct lis3lv02d *lis3, int *x, int *y, int *z) int i; if (lis3->blkread) { - if (lis3_dev.whoami == WAI_12B) { + if (lis3->whoami == WAI_12B) { u16 data[3]; lis3->blkread(lis3, OUTX_L, 6, (u8 *)data); for (i = 0; i < 3; i++) @@ -195,20 +195,20 @@ static int lis3_8_rates[2] = {100, 400}; static int lis3_3dc_rates[16] = {0, 1, 10, 25, 50, 100, 200, 400, 1600, 5000}; /* ODR is Output Data Rate */ -static int lis3lv02d_get_odr(void) +static int lis3lv02d_get_odr(struct lis3lv02d *lis3) { u8 ctrl; int shift; - lis3_dev.read(&lis3_dev, CTRL_REG1, &ctrl); - ctrl &= lis3_dev.odr_mask; - shift = ffs(lis3_dev.odr_mask) - 1; - return lis3_dev.odrs[(ctrl >> shift)]; + lis3->read(lis3, CTRL_REG1, &ctrl); + ctrl &= lis3->odr_mask; + shift = ffs(lis3->odr_mask) - 1; + return lis3->odrs[(ctrl >> shift)]; } static int lis3lv02d_get_pwron_wait(struct lis3lv02d *lis3) { - int div = lis3lv02d_get_odr(); + int div = lis3lv02d_get_odr(lis3); if (WARN_ONCE(div == 0, "device returned spurious data")) return -ENXIO; @@ -218,7 +218,7 @@ static int lis3lv02d_get_pwron_wait(struct lis3lv02d *lis3) return 0; } -static int lis3lv02d_set_odr(int rate) +static int lis3lv02d_set_odr(struct lis3lv02d *lis3, int rate) { u8 ctrl; int i, len, shift; @@ -226,14 +226,14 @@ static int lis3lv02d_set_odr(int rate) if (!rate) return -EINVAL; - lis3_dev.read(&lis3_dev, CTRL_REG1, &ctrl); - ctrl &= ~lis3_dev.odr_mask; - len = 1 << hweight_long(lis3_dev.odr_mask); /* # of possible values */ - shift = ffs(lis3_dev.odr_mask) - 1; + lis3->read(lis3, CTRL_REG1, &ctrl); + ctrl &= ~lis3->odr_mask; + len = 1 << hweight_long(lis3->odr_mask); /* # of possible values */ + shift = ffs(lis3->odr_mask) - 1; for (i = 0; i < len; i++) - if (lis3_dev.odrs[i] == rate) { - lis3_dev.write(&lis3_dev, CTRL_REG1, + if (lis3->odrs[i] == rate) { + lis3->write(lis3, CTRL_REG1, ctrl | (i << shift)); return 0; } @@ -252,12 +252,12 @@ static int lis3lv02d_selftest(struct lis3lv02d *lis3, s16 results[3]) mutex_lock(&lis3->mutex); irq_cfg = lis3->irq_cfg; - if (lis3_dev.whoami == WAI_8B) { + if (lis3->whoami == WAI_8B) { lis3->data_ready_count[IRQ_LINE0] = 0; lis3->data_ready_count[IRQ_LINE1] = 0; /* Change interrupt cfg to data ready for selftest */ - atomic_inc(&lis3_dev.wake_thread); + atomic_inc(&lis3->wake_thread); lis3->irq_cfg = LIS3_IRQ1_DATA_READY | LIS3_IRQ2_DATA_READY; lis3->read(lis3, CTRL_REG3, &ctrl_reg_data); lis3->write(lis3, CTRL_REG3, (ctrl_reg_data & @@ -265,12 +265,12 @@ static int lis3lv02d_selftest(struct lis3lv02d *lis3, s16 results[3]) (LIS3_IRQ1_DATA_READY | LIS3_IRQ2_DATA_READY)); } - if (lis3_dev.whoami == WAI_3DC) { + if (lis3->whoami == WAI_3DC) { ctlreg = CTRL_REG4; selftest = CTRL4_ST0; } else { ctlreg = CTRL_REG1; - if (lis3_dev.whoami == WAI_12B) + if (lis3->whoami == WAI_12B) selftest = CTRL1_ST; else selftest = CTRL1_STP; @@ -299,9 +299,9 @@ static int lis3lv02d_selftest(struct lis3lv02d *lis3, s16 results[3]) ret = 0; - if (lis3_dev.whoami == WAI_8B) { + if (lis3->whoami == WAI_8B) { /* Restore original interrupt configuration */ - atomic_dec(&lis3_dev.wake_thread); + atomic_dec(&lis3->wake_thread); lis3->write(lis3, CTRL_REG3, ctrl_reg_data); lis3->irq_cfg = irq_cfg; @@ -415,24 +415,27 @@ EXPORT_SYMBOL_GPL(lis3lv02d_poweron); static void lis3lv02d_joystick_poll(struct input_polled_dev *pidev) { + struct lis3lv02d *lis3 = pidev->private; int x, y, z; - mutex_lock(&lis3_dev.mutex); - lis3lv02d_get_xyz(&lis3_dev, &x, &y, &z); + mutex_lock(&lis3->mutex); + lis3lv02d_get_xyz(lis3, &x, &y, &z); input_report_abs(pidev->input, ABS_X, x); input_report_abs(pidev->input, ABS_Y, y); input_report_abs(pidev->input, ABS_Z, z); input_sync(pidev->input); - mutex_unlock(&lis3_dev.mutex); + mutex_unlock(&lis3->mutex); } static void lis3lv02d_joystick_open(struct input_polled_dev *pidev) { - if (lis3_dev.pm_dev) - pm_runtime_get_sync(lis3_dev.pm_dev); + struct lis3lv02d *lis3 = pidev->private; - if (lis3_dev.pdata && lis3_dev.whoami == WAI_8B && lis3_dev.idev) - atomic_set(&lis3_dev.wake_thread, 1); + if (lis3->pm_dev) + pm_runtime_get_sync(lis3->pm_dev); + + if (lis3->pdata && lis3->whoami == WAI_8B && lis3->idev) + atomic_set(&lis3->wake_thread, 1); /* * Update coordinates for the case where poll interval is 0 and * the chip in running purely under interrupt control @@ -442,14 +445,18 @@ static void lis3lv02d_joystick_open(struct input_polled_dev *pidev) static void lis3lv02d_joystick_close(struct input_polled_dev *pidev) { - atomic_set(&lis3_dev.wake_thread, 0); - if (lis3_dev.pm_dev) - pm_runtime_put(lis3_dev.pm_dev); + struct lis3lv02d *lis3 = pidev->private; + + atomic_set(&lis3->wake_thread, 0); + if (lis3->pm_dev) + pm_runtime_put(lis3->pm_dev); } -static irqreturn_t lis302dl_interrupt(int irq, void *dummy) +static irqreturn_t lis302dl_interrupt(int irq, void *data) { - if (!test_bit(0, &lis3_dev.misc_opened)) + struct lis3lv02d *lis3 = data; + + if (!test_bit(0, &lis3->misc_opened)) goto out; /* @@ -457,12 +464,12 @@ static irqreturn_t lis302dl_interrupt(int irq, void *dummy) * the lid is closed. This leads to interrupts as soon as a little move * is done. */ - atomic_inc(&lis3_dev.count); + atomic_inc(&lis3->count); - wake_up_interruptible(&lis3_dev.misc_wait); - kill_fasync(&lis3_dev.async_queue, SIGIO, POLL_IN); + wake_up_interruptible(&lis3->misc_wait); + kill_fasync(&lis3->async_queue, SIGIO, POLL_IN); out: - if (atomic_read(&lis3_dev.wake_thread)) + if (atomic_read(&lis3->wake_thread)) return IRQ_WAKE_THREAD; return IRQ_HANDLED; } @@ -534,28 +541,37 @@ static irqreturn_t lis302dl_interrupt_thread2_8b(int irq, void *data) static int lis3lv02d_misc_open(struct inode *inode, struct file *file) { - if (test_and_set_bit(0, &lis3_dev.misc_opened)) + struct lis3lv02d *lis3 = container_of(file->private_data, + struct lis3lv02d, miscdev); + + if (test_and_set_bit(0, &lis3->misc_opened)) return -EBUSY; /* already open */ - if (lis3_dev.pm_dev) - pm_runtime_get_sync(lis3_dev.pm_dev); + if (lis3->pm_dev) + pm_runtime_get_sync(lis3->pm_dev); - atomic_set(&lis3_dev.count, 0); + atomic_set(&lis3->count, 0); return 0; } static int lis3lv02d_misc_release(struct inode *inode, struct file *file) { - fasync_helper(-1, file, 0, &lis3_dev.async_queue); - clear_bit(0, &lis3_dev.misc_opened); /* release the device */ - if (lis3_dev.pm_dev) - pm_runtime_put(lis3_dev.pm_dev); + struct lis3lv02d *lis3 = container_of(file->private_data, + struct lis3lv02d, miscdev); + + fasync_helper(-1, file, 0, &lis3->async_queue); + clear_bit(0, &lis3->misc_opened); /* release the device */ + if (lis3->pm_dev) + pm_runtime_put(lis3->pm_dev); return 0; } static ssize_t lis3lv02d_misc_read(struct file *file, char __user *buf, size_t count, loff_t *pos) { + struct lis3lv02d *lis3 = container_of(file->private_data, + struct lis3lv02d, miscdev); + DECLARE_WAITQUEUE(wait, current); u32 data; unsigned char byte_data; @@ -564,10 +580,10 @@ static ssize_t lis3lv02d_misc_read(struct file *file, char __user *buf, if (count < 1) return -EINVAL; - add_wait_queue(&lis3_dev.misc_wait, &wait); + add_wait_queue(&lis3->misc_wait, &wait); while (true) { set_current_state(TASK_INTERRUPTIBLE); - data = atomic_xchg(&lis3_dev.count, 0); + data = atomic_xchg(&lis3->count, 0); if (data) break; @@ -597,22 +613,28 @@ static ssize_t lis3lv02d_misc_read(struct file *file, char __user *buf, out: __set_current_state(TASK_RUNNING); - remove_wait_queue(&lis3_dev.misc_wait, &wait); + remove_wait_queue(&lis3->misc_wait, &wait); return retval; } static unsigned int lis3lv02d_misc_poll(struct file *file, poll_table *wait) { - poll_wait(file, &lis3_dev.misc_wait, wait); - if (atomic_read(&lis3_dev.count)) + struct lis3lv02d *lis3 = container_of(file->private_data, + struct lis3lv02d, miscdev); + + poll_wait(file, &lis3->misc_wait, wait); + if (atomic_read(&lis3->count)) return POLLIN | POLLRDNORM; return 0; } static int lis3lv02d_misc_fasync(int fd, struct file *file, int on) { - return fasync_helper(fd, file, on, &lis3_dev.async_queue); + struct lis3lv02d *lis3 = container_of(file->private_data, + struct lis3lv02d, miscdev); + + return fasync_helper(fd, file, on, &lis3->async_queue); } static const struct file_operations lis3lv02d_misc_fops = { @@ -625,12 +647,6 @@ static const struct file_operations lis3lv02d_misc_fops = { .fasync = lis3lv02d_misc_fasync, }; -static struct miscdevice lis3lv02d_misc_device = { - .minor = MISC_DYNAMIC_MINOR, - .name = "freefall", - .fops = &lis3lv02d_misc_fops, -}; - int lis3lv02d_joystick_enable(struct lis3lv02d *lis3) { struct input_dev *input_dev; @@ -638,51 +654,52 @@ int lis3lv02d_joystick_enable(struct lis3lv02d *lis3) int max_val, fuzz, flat; int btns[] = {BTN_X, BTN_Y, BTN_Z}; - if (lis3_dev.idev) + if (lis3->idev) return -EINVAL; - lis3_dev.idev = input_allocate_polled_device(); - if (!lis3_dev.idev) + lis3->idev = input_allocate_polled_device(); + if (!lis3->idev) return -ENOMEM; - lis3_dev.idev->poll = lis3lv02d_joystick_poll; - lis3_dev.idev->open = lis3lv02d_joystick_open; - lis3_dev.idev->close = lis3lv02d_joystick_close; - lis3_dev.idev->poll_interval = MDPS_POLL_INTERVAL; - lis3_dev.idev->poll_interval_min = MDPS_POLL_MIN; - lis3_dev.idev->poll_interval_max = MDPS_POLL_MAX; - input_dev = lis3_dev.idev->input; + lis3->idev->poll = lis3lv02d_joystick_poll; + lis3->idev->open = lis3lv02d_joystick_open; + lis3->idev->close = lis3lv02d_joystick_close; + lis3->idev->poll_interval = MDPS_POLL_INTERVAL; + lis3->idev->poll_interval_min = MDPS_POLL_MIN; + lis3->idev->poll_interval_max = MDPS_POLL_MAX; + lis3->idev->private = lis3; + input_dev = lis3->idev->input; input_dev->name = "ST LIS3LV02DL Accelerometer"; input_dev->phys = DRIVER_NAME "/input0"; input_dev->id.bustype = BUS_HOST; input_dev->id.vendor = 0; - input_dev->dev.parent = &lis3_dev.pdev->dev; + input_dev->dev.parent = &lis3->pdev->dev; set_bit(EV_ABS, input_dev->evbit); - max_val = (lis3_dev.mdps_max_val * lis3_dev.scale) / LIS3_ACCURACY; - if (lis3_dev.whoami == WAI_12B) { + max_val = (lis3->mdps_max_val * lis3->scale) / LIS3_ACCURACY; + if (lis3->whoami == WAI_12B) { fuzz = LIS3_DEFAULT_FUZZ_12B; flat = LIS3_DEFAULT_FLAT_12B; } else { fuzz = LIS3_DEFAULT_FUZZ_8B; flat = LIS3_DEFAULT_FLAT_8B; } - fuzz = (fuzz * lis3_dev.scale) / LIS3_ACCURACY; - flat = (flat * lis3_dev.scale) / LIS3_ACCURACY; + fuzz = (fuzz * lis3->scale) / LIS3_ACCURACY; + flat = (flat * lis3->scale) / LIS3_ACCURACY; input_set_abs_params(input_dev, ABS_X, -max_val, max_val, fuzz, flat); input_set_abs_params(input_dev, ABS_Y, -max_val, max_val, fuzz, flat); input_set_abs_params(input_dev, ABS_Z, -max_val, max_val, fuzz, flat); - lis3_dev.mapped_btns[0] = lis3lv02d_get_axis(abs(lis3_dev.ac.x), btns); - lis3_dev.mapped_btns[1] = lis3lv02d_get_axis(abs(lis3_dev.ac.y), btns); - lis3_dev.mapped_btns[2] = lis3lv02d_get_axis(abs(lis3_dev.ac.z), btns); + lis3->mapped_btns[0] = lis3lv02d_get_axis(abs(lis3->ac.x), btns); + lis3->mapped_btns[1] = lis3lv02d_get_axis(abs(lis3->ac.y), btns); + lis3->mapped_btns[2] = lis3lv02d_get_axis(abs(lis3->ac.z), btns); - err = input_register_polled_device(lis3_dev.idev); + err = input_register_polled_device(lis3->idev); if (err) { - input_free_polled_device(lis3_dev.idev); - lis3_dev.idev = NULL; + input_free_polled_device(lis3->idev); + lis3->idev = NULL; } return err; @@ -691,19 +708,19 @@ EXPORT_SYMBOL_GPL(lis3lv02d_joystick_enable); void lis3lv02d_joystick_disable(struct lis3lv02d *lis3) { - if (lis3_dev.irq) - free_irq(lis3_dev.irq, &lis3_dev); - if (lis3_dev.pdata && lis3_dev.pdata->irq2) - free_irq(lis3_dev.pdata->irq2, &lis3_dev); + if (lis3->irq) + free_irq(lis3->irq, lis3); + if (lis3->pdata && lis3->pdata->irq2) + free_irq(lis3->pdata->irq2, lis3); - if (!lis3_dev.idev) + if (!lis3->idev) return; - if (lis3_dev.irq) - misc_deregister(&lis3lv02d_misc_device); - input_unregister_polled_device(lis3_dev.idev); - input_free_polled_device(lis3_dev.idev); - lis3_dev.idev = NULL; + if (lis3->irq) + misc_deregister(&lis3->miscdev); + input_unregister_polled_device(lis3->idev); + input_free_polled_device(lis3->idev); + lis3->idev = NULL; } EXPORT_SYMBOL_GPL(lis3lv02d_joystick_disable); @@ -728,6 +745,7 @@ static void lis3lv02d_sysfs_poweron(struct lis3lv02d *lis3) static ssize_t lis3lv02d_selftest_show(struct device *dev, struct device_attribute *attr, char *buf) { + struct lis3lv02d *lis3 = dev_get_drvdata(dev); s16 values[3]; static const char ok[] = "OK"; @@ -735,8 +753,8 @@ static ssize_t lis3lv02d_selftest_show(struct device *dev, static const char irq[] = "FAIL_IRQ"; const char *res; - lis3lv02d_sysfs_poweron(&lis3_dev); - switch (lis3lv02d_selftest(&lis3_dev, values)) { + lis3lv02d_sysfs_poweron(lis3); + switch (lis3lv02d_selftest(lis3, values)) { case SELFTEST_FAIL: res = fail; break; @@ -755,33 +773,37 @@ static ssize_t lis3lv02d_selftest_show(struct device *dev, static ssize_t lis3lv02d_position_show(struct device *dev, struct device_attribute *attr, char *buf) { + struct lis3lv02d *lis3 = dev_get_drvdata(dev); int x, y, z; - lis3lv02d_sysfs_poweron(&lis3_dev); - mutex_lock(&lis3_dev.mutex); - lis3lv02d_get_xyz(&lis3_dev, &x, &y, &z); - mutex_unlock(&lis3_dev.mutex); + lis3lv02d_sysfs_poweron(lis3); + mutex_lock(&lis3->mutex); + lis3lv02d_get_xyz(lis3, &x, &y, &z); + mutex_unlock(&lis3->mutex); return sprintf(buf, "(%d,%d,%d)\n", x, y, z); } static ssize_t lis3lv02d_rate_show(struct device *dev, struct device_attribute *attr, char *buf) { - lis3lv02d_sysfs_poweron(&lis3_dev); - return sprintf(buf, "%d\n", lis3lv02d_get_odr()); + struct lis3lv02d *lis3 = dev_get_drvdata(dev); + + lis3lv02d_sysfs_poweron(lis3); + return sprintf(buf, "%d\n", lis3lv02d_get_odr(lis3)); } static ssize_t lis3lv02d_rate_set(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { + struct lis3lv02d *lis3 = dev_get_drvdata(dev); unsigned long rate; if (strict_strtoul(buf, 0, &rate)) return -EINVAL; - lis3lv02d_sysfs_poweron(&lis3_dev); - if (lis3lv02d_set_odr(rate)) + lis3lv02d_sysfs_poweron(lis3); + if (lis3lv02d_set_odr(lis3, rate)) return -EINVAL; return count; @@ -810,6 +832,7 @@ static int lis3lv02d_add_fs(struct lis3lv02d *lis3) if (IS_ERR(lis3->pdev)) return PTR_ERR(lis3->pdev); + platform_set_drvdata(lis3->pdev, lis3); return sysfs_create_group(&lis3->pdev->dev.kobj, &lis3lv02d_attribute_group); } @@ -823,7 +846,7 @@ int lis3lv02d_remove_fs(struct lis3lv02d *lis3) /* SYSFS may have left chip running. Turn off if necessary */ if (!pm_runtime_suspended(lis3->pm_dev)) - lis3lv02d_poweroff(&lis3_dev); + lis3lv02d_poweroff(lis3); pm_runtime_disable(lis3->pm_dev); pm_runtime_set_suspended(lis3->pm_dev); @@ -850,7 +873,7 @@ static void lis3lv02d_8b_configure(struct lis3lv02d *lis3, (p->click_thresh_y << 4)); if (lis3->idev) { - struct input_dev *input_dev = lis3_dev.idev->input; + struct input_dev *input_dev = lis3->idev->input; input_set_capability(input_dev, EV_KEY, BTN_X); input_set_capability(input_dev, EV_KEY, BTN_Y); input_set_capability(input_dev, EV_KEY, BTN_Z); @@ -881,7 +904,7 @@ static void lis3lv02d_8b_configure(struct lis3lv02d *lis3, lis302dl_interrupt_thread2_8b, IRQF_TRIGGER_RISING | IRQF_ONESHOT | (p->irq_flags2 & IRQF_TRIGGER_MASK), - DRIVER_NAME, &lis3_dev); + DRIVER_NAME, lis3); if (err < 0) pr_err("No second IRQ. Limited functionality\n"); } @@ -977,7 +1000,7 @@ int lis3lv02d_init_device(struct lis3lv02d *lis3) lis3->write(lis3, CTRL_REG3, p->irq_cfg); if (p->default_rate) - lis3lv02d_set_odr(p->default_rate); + lis3lv02d_set_odr(lis3, p->default_rate); } /* bail if we did not get an IRQ from the bus layer */ @@ -1006,14 +1029,18 @@ int lis3lv02d_init_device(struct lis3lv02d *lis3) thread_fn, IRQF_TRIGGER_RISING | IRQF_ONESHOT | irq_flags, - DRIVER_NAME, &lis3_dev); + DRIVER_NAME, lis3); if (err < 0) { pr_err("Cannot get IRQ\n"); goto out; } - if (misc_register(&lis3lv02d_misc_device)) + lis3->miscdev.minor = MISC_DYNAMIC_MINOR; + lis3->miscdev.name = "freefall"; + lis3->miscdev.fops = &lis3lv02d_misc_fops; + + if (misc_register(&lis3->miscdev)) pr_err("misc_register failed\n"); out: return 0; diff --git a/drivers/misc/lis3lv02d/lis3lv02d.h b/drivers/misc/lis3lv02d/lis3lv02d.h index b94e723..2b1482a 100644 --- a/drivers/misc/lis3lv02d/lis3lv02d.h +++ b/drivers/misc/lis3lv02d/lis3lv02d.h @@ -21,6 +21,7 @@ #include #include #include +#include /* * This driver tries to support the "digital" accelerometer chips from @@ -273,6 +274,8 @@ struct lis3lv02d { struct fasync_struct *async_queue; /* queue for the misc device */ wait_queue_head_t misc_wait; /* Wait queue for the misc device */ unsigned long misc_opened; /* bit0: whether the device is open */ + struct miscdevice miscdev; + int data_ready_count[2]; atomic_t wake_thread; unsigned char irq_cfg; -- cgit v0.10.2 From ec400c9fab99d16a491cea17d27d0c6a5780b97c Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 31 Oct 2011 17:11:07 -0700 Subject: lis3lv02d: make regulator API usage unconditional MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The regulator API contains a range of features for stubbing itself out when not in use and for transparently restricting the actual effect of regulator API calls where they can't be supported on a particular system so that drivers don't need to individually implement this. Simplify the driver slightly by making use of this idiom. The only in tree user is ecovec24 which does not use the regulator API. Signed-off-by: Mark Brown Cc: Éric Piel Cc: Ilkka Koskinen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/misc/lis3lv02d/lis3lv02d_i2c.c b/drivers/misc/lis3lv02d/lis3lv02d_i2c.c index 6cdc38f..c02fea0 100644 --- a/drivers/misc/lis3lv02d/lis3lv02d_i2c.c +++ b/drivers/misc/lis3lv02d/lis3lv02d_i2c.c @@ -79,8 +79,7 @@ static int lis3_i2c_init(struct lis3lv02d *lis3) u8 reg; int ret; - if (lis3->reg_ctrl) - lis3_reg_ctrl(lis3, LIS3_REG_ON); + lis3_reg_ctrl(lis3, LIS3_REG_ON); lis3->read(lis3, WHO_AM_I, ®); if (reg != lis3->whoami) @@ -106,10 +105,6 @@ static int __devinit lis3lv02d_i2c_probe(struct i2c_client *client, struct lis3lv02d_platform_data *pdata = client->dev.platform_data; if (pdata) { - /* Regulator control is optional */ - if (pdata->driver_features & LIS3_USE_REGULATOR_CTRL) - lis3_dev.reg_ctrl = lis3_reg_ctrl; - if ((pdata->driver_features & LIS3_USE_BLOCK_READ) && (i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_I2C_BLOCK))) @@ -131,15 +126,13 @@ static int __devinit lis3lv02d_i2c_probe(struct i2c_client *client, goto fail; } - if (lis3_dev.reg_ctrl) { - lis3_dev.regulators[0].supply = reg_vdd; - lis3_dev.regulators[1].supply = reg_vdd_io; - ret = regulator_bulk_get(&client->dev, - ARRAY_SIZE(lis3_dev.regulators), - lis3_dev.regulators); - if (ret < 0) - goto fail; - } + lis3_dev.regulators[0].supply = reg_vdd; + lis3_dev.regulators[1].supply = reg_vdd_io; + ret = regulator_bulk_get(&client->dev, + ARRAY_SIZE(lis3_dev.regulators), + lis3_dev.regulators); + if (ret < 0) + goto fail; lis3_dev.pdata = pdata; lis3_dev.bus_priv = client; @@ -153,13 +146,11 @@ static int __devinit lis3lv02d_i2c_probe(struct i2c_client *client, i2c_set_clientdata(client, &lis3_dev); /* Provide power over the init call */ - if (lis3_dev.reg_ctrl) - lis3_reg_ctrl(&lis3_dev, LIS3_REG_ON); + lis3_reg_ctrl(&lis3_dev, LIS3_REG_ON); ret = lis3lv02d_init_device(&lis3_dev); - if (lis3_dev.reg_ctrl) - lis3_reg_ctrl(&lis3_dev, LIS3_REG_OFF); + lis3_reg_ctrl(&lis3_dev, LIS3_REG_OFF); if (ret) goto fail2; @@ -185,9 +176,8 @@ static int __devexit lis3lv02d_i2c_remove(struct i2c_client *client) lis3lv02d_joystick_disable(lis3); lis3lv02d_remove_fs(&lis3_dev); - if (lis3_dev.reg_ctrl) - regulator_bulk_free(ARRAY_SIZE(lis3->regulators), - lis3_dev.regulators); + regulator_bulk_free(ARRAY_SIZE(lis3->regulators), + lis3_dev.regulators); return 0; } diff --git a/include/linux/lis3lv02d.h b/include/linux/lis3lv02d.h index d4292c8..f1664c6 100644 --- a/include/linux/lis3lv02d.h +++ b/include/linux/lis3lv02d.h @@ -113,7 +113,6 @@ struct lis3lv02d_platform_data { s8 axis_x; s8 axis_y; s8 axis_z; -#define LIS3_USE_REGULATOR_CTRL 0x01 #define LIS3_USE_BLOCK_READ 0x02 u16 driver_features; int default_rate; -- cgit v0.10.2 From e58a0f89b25a2299374176d5c37aa89b326681e6 Mon Sep 17 00:00:00 2001 From: Jonghwan Choi Date: Mon, 31 Oct 2011 17:11:09 -0700 Subject: driver/misc/fsa9480.c fix potential null-pointer dereference Signed-off-by: Jonghwan Choi Cc: Donggeun Kim Cc: Kyungmin Park Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/misc/fsa9480.c b/drivers/misc/fsa9480.c index 27dc0d2..f6586d5 100644 --- a/drivers/misc/fsa9480.c +++ b/drivers/misc/fsa9480.c @@ -400,7 +400,8 @@ static int fsa9480_irq_init(struct fsa9480_usbsw *usbsw) return ret; } - device_init_wakeup(&client->dev, pdata->wakeup); + if (pdata) + device_init_wakeup(&client->dev, pdata->wakeup); } return 0; -- cgit v0.10.2 From aaaa287b89a09f22573dc57fcbb6a7dc7d25c8ed Mon Sep 17 00:00:00 2001 From: Peter Korsgaard Date: Mon, 31 Oct 2011 17:11:12 -0700 Subject: drivers/misc/ad525x_dpot-i2c.c: add i2c support for AD5161 Commit 6c536e4ce8e ("ad525x_dpot: add support for SPI parts") added support for the AD5161 through SPI, but the device supports both I2C and SPI (depending on the DIS pin), so add it to -i2c as well. Signed-off-by: Peter Korsgaard Acked-by: Mike Frysinger Acked-by: Michael Hennerich Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/misc/ad525x_dpot-i2c.c b/drivers/misc/ad525x_dpot-i2c.c index 4ff73c2..a39e055 100644 --- a/drivers/misc/ad525x_dpot-i2c.c +++ b/drivers/misc/ad525x_dpot-i2c.c @@ -98,6 +98,7 @@ static const struct i2c_device_id ad_dpot_id[] = { {"ad5282", AD5282_ID}, {"adn2860", ADN2860_ID}, {"ad5273", AD5273_ID}, + {"ad5161", AD5161_ID}, {"ad5171", AD5171_ID}, {"ad5170", AD5170_ID}, {"ad5172", AD5172_ID}, -- cgit v0.10.2 From f445027e4e692bd885118460b292d08027fd5501 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 31 Oct 2011 17:11:15 -0700 Subject: stop_machine: make stop_machine safe and efficient to call early Make stop_machine() safe to call early in boot, before SMP has been set up, by simply calling the callback function directly if there's only one CPU online. [ Fixes from AKPM: - add comment - local_irq_flags, not save_flags - also call hard_irq_disable() for systems which need it Tejun suggested using an explicit flag rather than just looking at the online cpu count. ] Cc: Tejun Heo Acked-by: Rusty Russell Cc: Peter Zijlstra Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Steven Rostedt Acked-by: Tejun Heo Cc: Konrad Rzeszutek Wilk Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index ba5070c..5b0951a 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -41,6 +41,7 @@ struct cpu_stopper { }; static DEFINE_PER_CPU(struct cpu_stopper, cpu_stopper); +static bool stop_machine_initialized = false; static void cpu_stop_init_done(struct cpu_stop_done *done, unsigned int nr_todo) { @@ -386,6 +387,8 @@ static int __init cpu_stop_init(void) cpu_stop_cpu_callback(&cpu_stop_cpu_notifier, CPU_ONLINE, bcpu); register_cpu_notifier(&cpu_stop_cpu_notifier); + stop_machine_initialized = true; + return 0; } early_initcall(cpu_stop_init); @@ -485,6 +488,25 @@ int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus) .num_threads = num_online_cpus(), .active_cpus = cpus }; + if (!stop_machine_initialized) { + /* + * Handle the case where stop_machine() is called + * early in boot before stop_machine() has been + * initialized. + */ + unsigned long flags; + int ret; + + WARN_ON_ONCE(smdata.num_threads != 1); + + local_irq_save(flags); + hard_irq_disable(); + ret = (*fn)(data); + local_irq_restore(flags); + + return ret; + } + /* Set the initial state and stop all online cpus. */ set_state(&smdata, STOPMACHINE_PREPARE); return stop_cpus(cpu_online_mask, stop_machine_cpu_stop, &smdata); -- cgit v0.10.2 From 4ff819515b203f937cc6c8a0215a37a68d1ee71f Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Mon, 31 Oct 2011 17:11:18 -0700 Subject: watchdog: move watchdog_*_all_cpus under CONFIG_SYSCTL Fix compilation warnings for CONFIG_SYSCTL=n: fixed compilation warnings in case of disabled CONFIG_SYSCTL kernel/watchdog.c:483:13: warning: `watchdog_enable_all_cpus' defined but not used kernel/watchdog.c:500:13: warning: `watchdog_disable_all_cpus' defined but not used these functions are static and are used only in sysctl handler, so move them inside #ifdef CONFIG_SYSCTL too Signed-off-by: Vasily Averin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/kernel/watchdog.c b/kernel/watchdog.c index d680381..1d7bca7 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -481,6 +481,8 @@ static void watchdog_disable(int cpu) } } +/* sysctl functions */ +#ifdef CONFIG_SYSCTL static void watchdog_enable_all_cpus(void) { int cpu; @@ -510,8 +512,6 @@ static void watchdog_disable_all_cpus(void) } -/* sysctl functions */ -#ifdef CONFIG_SYSCTL /* * proc handler for /proc/sys/kernel/nmi_watchdog,watchdog_thresh */ -- cgit v0.10.2 From 73efc0394e148d0e15583e13712637831f926720 Mon Sep 17 00:00:00 2001 From: Dan Ballard Date: Mon, 31 Oct 2011 17:11:20 -0700 Subject: kernel/sysctl.c: add cap_last_cap to /proc/sys/kernel Userspace needs to know the highest valid capability of the running kernel, which right now cannot reliably be retrieved from the header files only. The fact that this value cannot be determined properly right now creates various problems for libraries compiled on newer header files which are run on older kernels. They assume capabilities are available which actually aren't. libcap-ng is one example. And we ran into the same problem with systemd too. Now the capability is exported in /proc/sys/kernel/cap_last_cap. [akpm@linux-foundation.org: make cap_last_cap const, per Ulrich] Signed-off-by: Dan Ballard Cc: Randy Dunlap Cc: Ingo Molnar Cc: Lennart Poettering Cc: Kay Sievers Cc: Ulrich Drepper Cc: James Morris Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index 704e474..1f24636 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -24,6 +24,7 @@ show up in /proc/sys/kernel: - bootloader_type [ X86 only ] - bootloader_version [ X86 only ] - callhome [ S390 only ] +- cap_last_cap - core_pattern - core_pipe_limit - core_uses_pid @@ -155,6 +156,13 @@ on has a service contract with IBM. ============================================================== +cap_last_cap + +Highest valid capability of the running kernel. Exports +CAP_LAST_CAP from the kernel. + +============================================================== + core_pattern: core_pattern is used to specify a core dumpfile pattern name. diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 2d2ecdc..c49d666 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -57,6 +57,7 @@ #include #include #include +#include #include #include @@ -134,6 +135,7 @@ static int minolduid; static int min_percpu_pagelist_fract = 8; static int ngroups_max = NGROUPS_MAX; +static const int cap_last_cap = CAP_LAST_CAP; #ifdef CONFIG_INOTIFY_USER #include @@ -740,6 +742,13 @@ static struct ctl_table kern_table[] = { .mode = 0444, .proc_handler = proc_dointvec, }, + { + .procname = "cap_last_cap", + .data = (void *)&cap_last_cap, + .maxlen = sizeof(int), + .mode = 0444, + .proc_handler = proc_dointvec, + }, #if defined(CONFIG_LOCKUP_DETECTOR) { .procname = "watchdog", -- cgit v0.10.2 From 0eca6b7c78fd997e02bd9850e608102382b7822e Mon Sep 17 00:00:00 2001 From: Yanmin Zhang Date: Mon, 31 Oct 2011 17:11:25 -0700 Subject: printk: add module parameter ignore_loglevel to control ignore_loglevel We are enabling some power features on medfield. To test suspend-2-RAM conveniently, we need turn on/off ignore_loglevel frequently without rebooting. Add a module parameter, so users can change it by: /sys/module/printk/parameters/ignore_loglevel Signed-off-by: Yanmin Zhang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 27e0488..106efe1 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -973,6 +973,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted. ignore_loglevel [KNL] Ignore loglevel setting - this will print /all/ kernel messages to the console. Useful for debugging. + We also add it as printk module parameter, so users + could change it dynamically, usually by + /sys/module/printk/parameters/ignore_loglevel. ihash_entries= [KNL] Set number of hash buckets for inode cache. diff --git a/kernel/printk.c b/kernel/printk.c index b7da183..e62f949 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -532,6 +532,9 @@ static int __init ignore_loglevel_setup(char *str) } early_param("ignore_loglevel", ignore_loglevel_setup); +module_param_named(ignore_loglevel, ignore_loglevel, bool, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(ignore_loglevel, "ignore loglevel setting, to" + "print all kernel messages to the console."); /* * Write out chars from start to end - 1 inclusive -- cgit v0.10.2 From 134620f7a865b3bc9e3d56d460603592b70ede21 Mon Sep 17 00:00:00 2001 From: Yanmin Zhang Date: Mon, 31 Oct 2011 17:11:27 -0700 Subject: printk: add console_suspend module parameter We are enabling some power features on medfield. To test suspend-2-RAM conveniently, we need turn on/off console_suspend_enabled frequently. Add a module parameter, so users could change it by: /sys/module/printk/parameters/console_suspend Signed-off-by: Yanmin Zhang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 106efe1..c2efe28 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1669,6 +1669,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted. debugging driver suspend/resume hooks). This may not work reliably with all consoles, but is known to work with serial and VGA consoles. + To facilitate more flexible debugging, we also add + console_suspend, a printk module parameter to control + it. Users could use console_suspend (usually + /sys/module/printk/parameters/console_suspend) to + turn on/off it dynamically. noaliencache [MM, NUMA, SLAB] Disables the allocation of alien caches in the slab allocator. Saves per-node memory, diff --git a/kernel/printk.c b/kernel/printk.c index e62f949..6d9dedd 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -1111,6 +1111,10 @@ static int __init console_suspend_disable(char *str) return 1; } __setup("no_console_suspend", console_suspend_disable); +module_param_named(console_suspend, console_suspend_enabled, + bool, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(console_suspend, "suspend console during suspend" + " and hibernate operations"); /** * suspend_console - suspend the console subsystem -- cgit v0.10.2 From 48e41899e4a3592746e5263c14681bf5c1393563 Mon Sep 17 00:00:00 2001 From: William Douglas Date: Mon, 31 Oct 2011 17:11:29 -0700 Subject: printk: fix bounds checking for log_prefix Currently log_prefix is testing that the first character of the log level and facility is less than '0' and greater than '9' (which is always false). It should be testing to see if the character less than '0' or greater than '9' instead. This patch makes that change. The code being changed worked because strtoul bombs out (endp isn't updated) and 0 is returned anyway. Signed-off-by: William Douglas Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/kernel/printk.c b/kernel/printk.c index 6d9dedd..286d2c7 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -595,7 +595,7 @@ static size_t log_prefix(const char *p, unsigned int *level, char *special) /* multi digit including the level and facility number */ char *endp = NULL; - if (p[1] < '0' && p[1] > '9') + if (p[1] < '0' || p[1] > '9') return 0; lev = (simple_strtoul(&p[1], &endp, 10) & 7); -- cgit v0.10.2 From ae29bc92da01a2e9d278a9a58c3b307d41cc0254 Mon Sep 17 00:00:00 2001 From: William Douglas Date: Mon, 31 Oct 2011 17:11:31 -0700 Subject: printk: remove bounds checking for log_prefix Currently log_prefix is testing that the first character of the log level and facility is less than '0' and greater than '9' (which is always false). Since the code being updated works because strtoul bombs out (endp isn't updated) and 0 is returned anyway just remove the check and don't change the behavior of the function. Signed-off-by: William Douglas Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/kernel/printk.c b/kernel/printk.c index 286d2c7..1455a0d 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -595,9 +595,6 @@ static size_t log_prefix(const char *p, unsigned int *level, char *special) /* multi digit including the level and facility number */ char *endp = NULL; - if (p[1] < '0' || p[1] > '9') - return 0; - lev = (simple_strtoul(&p[1], &endp, 10) & 7); if (endp == NULL || endp[0] != '>') return 0; -- cgit v0.10.2 From b9075fa968a0a4347aef35e235e2995c0e57dddd Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 31 Oct 2011 17:11:33 -0700 Subject: treewide: use __printf not __attribute__((format(printf,...))) Standardize the style for compiler based printf format verification. Standardized the location of __printf too. Done via script and a little typing. $ grep -rPl --include=*.[ch] -w "__attribute__" * | \ grep -vP "^(tools|scripts|include/linux/compiler-gcc.h)" | \ xargs perl -n -i -e 'local $/; while (<>) { s/\b__attribute__\s*\(\s*\(\s*format\s*\(\s*printf\s*,\s*(.+)\s*,\s*(.+)\s*\)\s*\)\s*\)/__printf($1, $2)/g ; print; }' [akpm@linux-foundation.org: revert arch bits] Signed-off-by: Joe Perches Cc: "Kirill A. Shutemov" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/isdn/hisax/callc.c b/drivers/isdn/hisax/callc.c index 37e685e..c4897e1 100644 --- a/drivers/isdn/hisax/callc.c +++ b/drivers/isdn/hisax/callc.c @@ -65,7 +65,7 @@ hisax_findcard(int driverid) return (struct IsdnCardState *) 0; } -static __attribute__((format(printf, 3, 4))) void +static __printf(3, 4) void link_debug(struct Channel *chanp, int direction, char *fmt, ...) { va_list args; @@ -1068,7 +1068,7 @@ init_d_st(struct Channel *chanp) return 0; } -static __attribute__((format(printf, 2, 3))) void +static __printf(2, 3) void callc_debug(struct FsmInst *fi, char *fmt, ...) { va_list args; diff --git a/drivers/isdn/hisax/hisax.h b/drivers/isdn/hisax/hisax.h index 0a5c42a..aff45a1 100644 --- a/drivers/isdn/hisax/hisax.h +++ b/drivers/isdn/hisax/hisax.h @@ -1287,9 +1287,9 @@ int jiftime(char *s, long mark); int HiSax_command(isdn_ctrl * ic); int HiSax_writebuf_skb(int id, int chan, int ack, struct sk_buff *skb); -__attribute__((format(printf, 3, 4))) +__printf(3, 4) void HiSax_putstatus(struct IsdnCardState *cs, char *head, char *fmt, ...); -__attribute__((format(printf, 3, 0))) +__printf(3, 0) void VHiSax_putstatus(struct IsdnCardState *cs, char *head, char *fmt, va_list args); void HiSax_reportcard(int cardnr, int sel); int QuickHex(char *txt, u_char * p, int cnt); diff --git a/drivers/isdn/hisax/isdnl1.h b/drivers/isdn/hisax/isdnl1.h index 425d861..66ddcab 100644 --- a/drivers/isdn/hisax/isdnl1.h +++ b/drivers/isdn/hisax/isdnl1.h @@ -21,7 +21,7 @@ #define B_XMTBUFREADY 1 #define B_ACKPENDING 2 -__attribute__((format(printf, 2, 3))) +__printf(2, 3) void debugl1(struct IsdnCardState *cs, char *fmt, ...); void DChannel_proc_xmt(struct IsdnCardState *cs); void DChannel_proc_rcv(struct IsdnCardState *cs); diff --git a/drivers/isdn/hisax/isdnl3.c b/drivers/isdn/hisax/isdnl3.c index ad291f2..1c24e44 100644 --- a/drivers/isdn/hisax/isdnl3.c +++ b/drivers/isdn/hisax/isdnl3.c @@ -66,7 +66,7 @@ static char *strL3Event[] = "EV_TIMEOUT", }; -static __attribute__((format(printf, 2, 3))) void +static __printf(2, 3) void l3m_debug(struct FsmInst *fi, char *fmt, ...) { va_list args; diff --git a/drivers/isdn/hisax/st5481_d.c b/drivers/isdn/hisax/st5481_d.c index 4408263..db247b7 100644 --- a/drivers/isdn/hisax/st5481_d.c +++ b/drivers/isdn/hisax/st5481_d.c @@ -167,7 +167,7 @@ static struct FsmNode L1FnList[] __initdata = {ST_L1_F8, EV_IND_RSY, l1_ignore}, }; -static __attribute__((format(printf, 2, 3))) +static __printf(2, 3) void l1m_debug(struct FsmInst *fi, char *fmt, ...) { va_list args; @@ -270,7 +270,7 @@ static char *strDoutEvent[] = "EV_DOUT_UNDERRUN", }; -static __attribute__((format(printf, 2, 3))) +static __printf(2, 3) void dout_debug(struct FsmInst *fi, char *fmt, ...) { va_list args; diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index fca6616..8fda331c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -581,8 +581,9 @@ extern const struct ethtool_ops mlx4_en_ethtool_ops; * printk / logging functions */ +__printf(3, 4) int en_print(const char *level, const struct mlx4_en_priv *priv, - const char *format, ...) __attribute__ ((format (printf, 3, 4))); + const char *format, ...); #define en_dbg(mlevel, priv, format, arg...) \ do { \ diff --git a/drivers/net/wireless/ath/ath.h b/drivers/net/wireless/ath/ath.h index 908fdbc..0f9ee46 100644 --- a/drivers/net/wireless/ath/ath.h +++ b/drivers/net/wireless/ath/ath.h @@ -173,8 +173,7 @@ bool ath_hw_keyreset(struct ath_common *common, u16 entry); void ath_hw_cycle_counters_update(struct ath_common *common); int32_t ath_hw_get_listen_time(struct ath_common *common); -extern __attribute__((format (printf, 2, 3))) -void ath_printk(const char *level, const char *fmt, ...); +extern __printf(2, 3) void ath_printk(const char *level, const char *fmt, ...); #define _ath_printk(level, common, fmt, ...) \ do { \ @@ -258,7 +257,7 @@ do { \ #else -static inline __attribute__((format (printf, 3, 4))) +static inline __attribute__ ((format (printf, 3, 4))) void ath_dbg(struct ath_common *common, enum ATH_DEBUG dbg_mask, const char *fmt, ...) { diff --git a/drivers/net/wireless/ath/ath5k/debug.h b/drivers/net/wireless/ath/ath5k/debug.h index 7f37df3..0a3f916 100644 --- a/drivers/net/wireless/ath/ath5k/debug.h +++ b/drivers/net/wireless/ath/ath5k/debug.h @@ -141,10 +141,10 @@ ath5k_debug_printtxbuf(struct ath5k_hw *ah, struct ath5k_buf *bf); #include -static inline void __attribute__ ((format (printf, 3, 4))) +static inline __printf(3, 4) void ATH5K_DBG(struct ath5k_hw *ah, unsigned int m, const char *fmt, ...) {} -static inline void __attribute__ ((format (printf, 3, 4))) +static inline __printf(3, 4) void ATH5K_DBG_UNLIMIT(struct ath5k_hw *ah, unsigned int m, const char *fmt, ...) {} diff --git a/drivers/net/wireless/ath/ath6kl/debug.h b/drivers/net/wireless/ath/ath6kl/debug.h index 9288a3c..7b7675f 100644 --- a/drivers/net/wireless/ath/ath6kl/debug.h +++ b/drivers/net/wireless/ath/ath6kl/debug.h @@ -44,8 +44,8 @@ enum ATH6K_DEBUG_MASK { }; extern unsigned int debug_mask; -extern int ath6kl_printk(const char *level, const char *fmt, ...) - __attribute__ ((format (printf, 2, 3))); +extern __printf(2, 3) +int ath6kl_printk(const char *level, const char *fmt, ...); #define ath6kl_info(fmt, ...) \ ath6kl_printk(KERN_INFO, fmt, ##__VA_ARGS__) diff --git a/drivers/net/wireless/b43/b43.h b/drivers/net/wireless/b43/b43.h index 447a230..37110df 100644 --- a/drivers/net/wireless/b43/b43.h +++ b/drivers/net/wireless/b43/b43.h @@ -1011,14 +1011,10 @@ static inline bool b43_using_pio_transfers(struct b43_wldev *dev) } /* Message printing */ -void b43info(struct b43_wl *wl, const char *fmt, ...) - __attribute__ ((format(printf, 2, 3))); -void b43err(struct b43_wl *wl, const char *fmt, ...) - __attribute__ ((format(printf, 2, 3))); -void b43warn(struct b43_wl *wl, const char *fmt, ...) - __attribute__ ((format(printf, 2, 3))); -void b43dbg(struct b43_wl *wl, const char *fmt, ...) - __attribute__ ((format(printf, 2, 3))); +__printf(2, 3) void b43info(struct b43_wl *wl, const char *fmt, ...); +__printf(2, 3) void b43err(struct b43_wl *wl, const char *fmt, ...); +__printf(2, 3) void b43warn(struct b43_wl *wl, const char *fmt, ...); +__printf(2, 3) void b43dbg(struct b43_wl *wl, const char *fmt, ...); /* A WARN_ON variant that vanishes when b43 debugging is disabled. diff --git a/drivers/net/wireless/b43legacy/b43legacy.h b/drivers/net/wireless/b43legacy/b43legacy.h index 12b5182..1d4fc9d 100644 --- a/drivers/net/wireless/b43legacy/b43legacy.h +++ b/drivers/net/wireless/b43legacy/b43legacy.h @@ -810,15 +810,15 @@ struct b43legacy_lopair *b43legacy_get_lopair(struct b43legacy_phy *phy, /* Message printing */ -void b43legacyinfo(struct b43legacy_wl *wl, const char *fmt, ...) - __attribute__((format(printf, 2, 3))); -void b43legacyerr(struct b43legacy_wl *wl, const char *fmt, ...) - __attribute__((format(printf, 2, 3))); -void b43legacywarn(struct b43legacy_wl *wl, const char *fmt, ...) - __attribute__((format(printf, 2, 3))); +__printf(2, 3) +void b43legacyinfo(struct b43legacy_wl *wl, const char *fmt, ...); +__printf(2, 3) +void b43legacyerr(struct b43legacy_wl *wl, const char *fmt, ...); +__printf(2, 3) +void b43legacywarn(struct b43legacy_wl *wl, const char *fmt, ...); #if B43legacy_DEBUG -void b43legacydbg(struct b43legacy_wl *wl, const char *fmt, ...) - __attribute__((format(printf, 2, 3))); +__printf(2, 3) +void b43legacydbg(struct b43legacy_wl *wl, const char *fmt, ...); #else /* DEBUG */ # define b43legacydbg(wl, fmt...) do { /* nothing */ } while (0) #endif /* DEBUG */ diff --git a/drivers/staging/iio/trigger.h b/drivers/staging/iio/trigger.h index 598fcb3..5cc42a6 100644 --- a/drivers/staging/iio/trigger.h +++ b/drivers/staging/iio/trigger.h @@ -115,8 +115,7 @@ void iio_trigger_poll_chained(struct iio_trigger *trig, s64 time); irqreturn_t iio_trigger_generic_data_rdy_poll(int irq, void *private); -struct iio_trigger *iio_allocate_trigger(const char *fmt, ...) - __attribute__((format(printf, 1, 2))); +__printf(1, 2) struct iio_trigger *iio_allocate_trigger(const char *fmt, ...); void iio_free_trigger(struct iio_trigger *trig); #endif /* _IIO_TRIGGER_H_ */ diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index b36c557..54481a3 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -514,7 +514,7 @@ ecryptfs_set_dentry_lower_mnt(struct dentry *dentry, struct vfsmount *lower_mnt) #define ecryptfs_printk(type, fmt, arg...) \ __ecryptfs_printk(type "%s: " fmt, __func__, ## arg); -__attribute__ ((format(printf, 1, 2))) +__printf(1, 2) void __ecryptfs_printk(const char *fmt, ...); extern const struct file_operations ecryptfs_main_fops; diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index af9fc89..9a4e5e2 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h @@ -135,10 +135,10 @@ extern long ext2_compat_ioctl(struct file *, unsigned int, unsigned long); struct dentry *ext2_get_parent(struct dentry *child); /* super.c */ -extern void ext2_error (struct super_block *, const char *, const char *, ...) - __attribute__ ((format (printf, 3, 4))); -extern void ext2_msg(struct super_block *, const char *, const char *, ...) - __attribute__ ((format (printf, 3, 4))); +extern __printf(3, 4) +void ext2_error(struct super_block *, const char *, const char *, ...); +extern __printf(3, 4) +void ext2_msg(struct super_block *, const char *, const char *, ...); extern void ext2_update_dynamic_rev (struct super_block *sb); extern void ext2_write_super (struct super_block *); diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index b7d7bd0..cec3145 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1878,40 +1878,40 @@ extern int ext4_group_extend(struct super_block *sb, extern void *ext4_kvmalloc(size_t size, gfp_t flags); extern void *ext4_kvzalloc(size_t size, gfp_t flags); extern void ext4_kvfree(void *ptr); -extern void __ext4_error(struct super_block *, const char *, unsigned int, - const char *, ...) - __attribute__ ((format (printf, 4, 5))); +extern __printf(4, 5) +void __ext4_error(struct super_block *, const char *, unsigned int, + const char *, ...); #define ext4_error(sb, message...) __ext4_error(sb, __func__, \ __LINE__, ## message) -extern void ext4_error_inode(struct inode *, const char *, unsigned int, - ext4_fsblk_t, const char *, ...) - __attribute__ ((format (printf, 5, 6))); -extern void ext4_error_file(struct file *, const char *, unsigned int, - ext4_fsblk_t, const char *, ...) - __attribute__ ((format (printf, 5, 6))); +extern __printf(5, 6) +void ext4_error_inode(struct inode *, const char *, unsigned int, ext4_fsblk_t, + const char *, ...); +extern __printf(5, 6) +void ext4_error_file(struct file *, const char *, unsigned int, ext4_fsblk_t, + const char *, ...); extern void __ext4_std_error(struct super_block *, const char *, unsigned int, int); -extern void __ext4_abort(struct super_block *, const char *, unsigned int, - const char *, ...) - __attribute__ ((format (printf, 4, 5))); +extern __printf(4, 5) +void __ext4_abort(struct super_block *, const char *, unsigned int, + const char *, ...); #define ext4_abort(sb, message...) __ext4_abort(sb, __func__, \ __LINE__, ## message) -extern void __ext4_warning(struct super_block *, const char *, unsigned int, - const char *, ...) - __attribute__ ((format (printf, 4, 5))); +extern __printf(4, 5) +void __ext4_warning(struct super_block *, const char *, unsigned int, + const char *, ...); #define ext4_warning(sb, message...) __ext4_warning(sb, __func__, \ __LINE__, ## message) -extern void ext4_msg(struct super_block *, const char *, const char *, ...) - __attribute__ ((format (printf, 3, 4))); +extern __printf(3, 4) +void ext4_msg(struct super_block *, const char *, const char *, ...); extern void __dump_mmp_msg(struct super_block *, struct mmp_struct *mmp, const char *, unsigned int, const char *); #define dump_mmp_msg(sb, mmp, msg) __dump_mmp_msg(sb, mmp, __func__, \ __LINE__, msg) -extern void __ext4_grp_locked_error(const char *, unsigned int, \ - struct super_block *, ext4_group_t, \ - unsigned long, ext4_fsblk_t, \ - const char *, ...) - __attribute__ ((format (printf, 7, 8))); +extern __printf(7, 8) +void __ext4_grp_locked_error(const char *, unsigned int, + struct super_block *, ext4_group_t, + unsigned long, ext4_fsblk_t, + const char *, ...); #define ext4_grp_locked_error(sb, grp, message...) \ __ext4_grp_locked_error(__func__, __LINE__, (sb), (grp), ## message) extern void ext4_update_dynamic_rev(struct super_block *sb); diff --git a/fs/fat/fat.h b/fs/fat/fat.h index a5d3853..1510a4d 100644 --- a/fs/fat/fat.h +++ b/fs/fat/fat.h @@ -326,15 +326,14 @@ extern int fat_fill_super(struct super_block *sb, void *data, int silent, extern int fat_flush_inodes(struct super_block *sb, struct inode *i1, struct inode *i2); /* fat/misc.c */ -extern void -__fat_fs_error(struct super_block *sb, int report, const char *fmt, ...) - __attribute__ ((format (printf, 3, 4))) __cold; +extern __printf(3, 4) __cold +void __fat_fs_error(struct super_block *sb, int report, const char *fmt, ...); #define fat_fs_error(sb, fmt, args...) \ __fat_fs_error(sb, 1, fmt , ## args) #define fat_fs_error_ratelimit(sb, fmt, args...) \ __fat_fs_error(sb, __ratelimit(&MSDOS_SB(sb)->ratelimit), fmt , ## args) -void fat_msg(struct super_block *sb, const char *level, const char *fmt, ...) - __attribute__ ((format (printf, 3, 4))) __cold; +__printf(3, 4) __cold +void fat_msg(struct super_block *sb, const char *level, const char *fmt, ...); extern int fat_clusters_flush(struct super_block *sb); extern int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster); extern void fat_time_fat2unix(struct msdos_sb_info *sbi, struct timespec *ts, diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index 6670711..2553b85 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -201,7 +201,7 @@ int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs); void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs); void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs); -__attribute__ ((format(printf, 2, 3))) +__printf(2, 3) void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...); /** diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h index 331b5e2..de94617 100644 --- a/fs/hpfs/hpfs_fn.h +++ b/fs/hpfs/hpfs_fn.h @@ -311,8 +311,8 @@ static inline struct hpfs_sb_info *hpfs_sb(struct super_block *sb) /* super.c */ -void hpfs_error(struct super_block *, const char *, ...) - __attribute__((format (printf, 2, 3))); +__printf(2, 3) +void hpfs_error(struct super_block *, const char *, ...); int hpfs_stop_cycles(struct super_block *, int, int *, int *, char *); unsigned hpfs_count_one_bitmap(struct super_block *, secno); diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index 255d5e1..3777d13 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -276,10 +276,10 @@ int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, /* super.c */ extern struct inode *nilfs_alloc_inode(struct super_block *); extern void nilfs_destroy_inode(struct inode *); -extern void nilfs_error(struct super_block *, const char *, const char *, ...) - __attribute__ ((format (printf, 3, 4))); -extern void nilfs_warning(struct super_block *, const char *, const char *, ...) - __attribute__ ((format (printf, 3, 4))); +extern __printf(3, 4) +void nilfs_error(struct super_block *, const char *, const char *, ...); +extern __printf(3, 4) +void nilfs_warning(struct super_block *, const char *, const char *, ...); extern struct nilfs_super_block * nilfs_read_super_block(struct super_block *, u64, int, struct buffer_head **); extern int nilfs_store_magic_and_option(struct super_block *, diff --git a/fs/ntfs/debug.h b/fs/ntfs/debug.h index 2142b1c..53c27ea 100644 --- a/fs/ntfs/debug.h +++ b/fs/ntfs/debug.h @@ -30,8 +30,9 @@ extern int debug_msgs; -extern void __ntfs_debug(const char *file, int line, const char *function, - const char *format, ...) __attribute__ ((format (printf, 4, 5))); +extern __printf(4, 5) +void __ntfs_debug(const char *file, int line, const char *function, + const char *format, ...); /** * ntfs_debug - write a debug level message to syslog * @f: a printf format string containing the message @@ -52,12 +53,14 @@ extern void ntfs_debug_dump_runlist(const runlist_element *rl); #endif /* !DEBUG */ -extern void __ntfs_warning(const char *function, const struct super_block *sb, - const char *fmt, ...) __attribute__ ((format (printf, 3, 4))); +extern __printf(3, 4) +void __ntfs_warning(const char *function, const struct super_block *sb, + const char *fmt, ...); #define ntfs_warning(sb, f, a...) __ntfs_warning(__func__, sb, f, ##a) -extern void __ntfs_error(const char *function, const struct super_block *sb, - const char *fmt, ...) __attribute__ ((format (printf, 3, 4))); +extern __printf(3, 4) +void __ntfs_error(const char *function, const struct super_block *sb, + const char *fmt, ...); #define ntfs_error(sb, f, a...) __ntfs_error(__func__, sb, f, ##a) #endif /* _LINUX_NTFS_DEBUG_H */ diff --git a/fs/ocfs2/super.h b/fs/ocfs2/super.h index 40c7de0..74ff74c 100644 --- a/fs/ocfs2/super.h +++ b/fs/ocfs2/super.h @@ -31,17 +31,15 @@ extern struct workqueue_struct *ocfs2_wq; int ocfs2_publish_get_mount_state(struct ocfs2_super *osb, int node_num); -void __ocfs2_error(struct super_block *sb, - const char *function, - const char *fmt, ...) - __attribute__ ((format (printf, 3, 4))); +__printf(3, 4) +void __ocfs2_error(struct super_block *sb, const char *function, + const char *fmt, ...); #define ocfs2_error(sb, fmt, args...) __ocfs2_error(sb, __PRETTY_FUNCTION__, fmt, ##args) -void __ocfs2_abort(struct super_block *sb, - const char *function, - const char *fmt, ...) - __attribute__ ((format (printf, 3, 4))); +__printf(3, 4) +void __ocfs2_abort(struct super_block *sb, const char *function, + const char *fmt, ...); #define ocfs2_abort(sb, fmt, args...) __ocfs2_abort(sb, __PRETTY_FUNCTION__, fmt, ##args) diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c index af9fdf0..bd8ae78 100644 --- a/fs/partitions/ldm.c +++ b/fs/partitions/ldm.c @@ -49,18 +49,20 @@ #define ldm_error(f, a...) _ldm_printk (KERN_ERR, __func__, f, ##a) #define ldm_info(f, a...) _ldm_printk (KERN_INFO, __func__, f, ##a) -__attribute__ ((format (printf, 3, 4))) -static void _ldm_printk (const char *level, const char *function, - const char *fmt, ...) +static __printf(3, 4) +void _ldm_printk(const char *level, const char *function, const char *fmt, ...) { - static char buf[128]; + struct va_format vaf; va_list args; va_start (args, fmt); - vsnprintf (buf, sizeof (buf), fmt, args); - va_end (args); - printk ("%s%s(): %s\n", level, function, buf); + vaf.fmt = fmt; + vaf.va = &args; + + printk("%s%s(): %pV\n", level, function, &vaf); + + va_end(args); } /** diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h index dbd52d4b..dc8a8dc 100644 --- a/fs/udf/udfdecl.h +++ b/fs/udf/udfdecl.h @@ -112,8 +112,8 @@ struct extent_position { /* super.c */ -__attribute__((format(printf, 3, 4))) -extern void udf_warning(struct super_block *, const char *, const char *, ...); +extern __printf(3, 4) void udf_warning(struct super_block *, const char *, + const char *, ...); static inline void udf_updated_lvid(struct super_block *sb) { struct buffer_head *bh = UDF_SB(sb)->s_lvid_bh; diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h index 5be2755..c26f2bc 100644 --- a/fs/ufs/ufs.h +++ b/fs/ufs/ufs.h @@ -117,9 +117,12 @@ extern int ufs_getfrag_block (struct inode *inode, sector_t fragment, struct buf extern const struct file_operations ufs_dir_operations; /* super.c */ -extern void ufs_warning (struct super_block *, const char *, const char *, ...) __attribute__ ((format (printf, 3, 4))); -extern void ufs_error (struct super_block *, const char *, const char *, ...) __attribute__ ((format (printf, 3, 4))); -extern void ufs_panic (struct super_block *, const char *, const char *, ...) __attribute__ ((format (printf, 3, 4))); +extern __printf(3, 4) +void ufs_warning(struct super_block *, const char *, const char *, ...); +extern __printf(3, 4) +void ufs_error(struct super_block *, const char *, const char *, ...); +extern __printf(3, 4) +void ufs_panic(struct super_block *, const char *, const char *, ...); /* symlink.c */ extern const struct inode_operations ufs_fast_symlink_inode_operations; diff --git a/fs/xfs/xfs_message.h b/fs/xfs/xfs_message.h index 7fb7ea0..56dc0c1 100644 --- a/fs/xfs/xfs_message.h +++ b/fs/xfs/xfs_message.h @@ -3,31 +3,29 @@ struct xfs_mount; -extern void xfs_emerg(const struct xfs_mount *mp, const char *fmt, ...) - __attribute__ ((format (printf, 2, 3))); -extern void xfs_alert(const struct xfs_mount *mp, const char *fmt, ...) - __attribute__ ((format (printf, 2, 3))); -extern void xfs_alert_tag(const struct xfs_mount *mp, int tag, - const char *fmt, ...) - __attribute__ ((format (printf, 3, 4))); -extern void xfs_crit(const struct xfs_mount *mp, const char *fmt, ...) - __attribute__ ((format (printf, 2, 3))); -extern void xfs_err(const struct xfs_mount *mp, const char *fmt, ...) - __attribute__ ((format (printf, 2, 3))); -extern void xfs_warn(const struct xfs_mount *mp, const char *fmt, ...) - __attribute__ ((format (printf, 2, 3))); -extern void xfs_notice(const struct xfs_mount *mp, const char *fmt, ...) - __attribute__ ((format (printf, 2, 3))); -extern void xfs_info(const struct xfs_mount *mp, const char *fmt, ...) - __attribute__ ((format (printf, 2, 3))); +extern __printf(2, 3) +void xfs_emerg(const struct xfs_mount *mp, const char *fmt, ...); +extern __printf(2, 3) +void xfs_alert(const struct xfs_mount *mp, const char *fmt, ...); +extern __printf(3, 4) +void xfs_alert_tag(const struct xfs_mount *mp, int tag, const char *fmt, ...); +extern __printf(2, 3) +void xfs_crit(const struct xfs_mount *mp, const char *fmt, ...); +extern __printf(2, 3) +void xfs_err(const struct xfs_mount *mp, const char *fmt, ...); +extern __printf(2, 3) +void xfs_warn(const struct xfs_mount *mp, const char *fmt, ...); +extern __printf(2, 3) +void xfs_notice(const struct xfs_mount *mp, const char *fmt, ...); +extern __printf(2, 3) +void xfs_info(const struct xfs_mount *mp, const char *fmt, ...); #ifdef DEBUG -extern void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...) - __attribute__ ((format (printf, 2, 3))); +extern __printf(2, 3) +void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...); #else -static inline void -__attribute__ ((format (printf, 2, 3))) -xfs_debug(const struct xfs_mount *mp, const char *fmt, ...) +static inline __printf(2, 3) +void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...) { } #endif diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h index dfb0ec6..84458b0 100644 --- a/include/asm-generic/bug.h +++ b/include/asm-generic/bug.h @@ -61,11 +61,12 @@ struct bug_entry { */ #ifndef __WARN_TAINT #ifndef __ASSEMBLY__ -extern void warn_slowpath_fmt(const char *file, const int line, - const char *fmt, ...) __attribute__((format(printf, 3, 4))); -extern void warn_slowpath_fmt_taint(const char *file, const int line, - unsigned taint, const char *fmt, ...) - __attribute__((format(printf, 4, 5))); +extern __printf(3, 4) +void warn_slowpath_fmt(const char *file, const int line, + const char *fmt, ...); +extern __printf(4, 5) +void warn_slowpath_fmt_taint(const char *file, const int line, unsigned taint, + const char *fmt, ...); extern void warn_slowpath_null(const char *file, const int line); #define WANT_WARN_ON_SLOWPATH #endif diff --git a/include/drm/drmP.h b/include/drm/drmP.h index 43538b6..cf3b446 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -122,12 +122,12 @@ struct drm_device; * using the DRM_DEBUG_KMS and DRM_DEBUG. */ -extern __attribute__((format (printf, 4, 5))) +extern __printf(4, 5) void drm_ut_debug_printk(unsigned int request_level, - const char *prefix, - const char *function_name, - const char *format, ...); -extern __attribute__((format (printf, 2, 3))) + const char *prefix, + const char *function_name, + const char *format, ...); +extern __printf(2, 3) int drm_err(const char *func, const char *format, ...); /***********************************************************************/ diff --git a/include/linux/audit.h b/include/linux/audit.h index 0c80061..2f81c6f 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -584,14 +584,13 @@ extern int audit_signals; #ifdef CONFIG_AUDIT /* These are defined in audit.c */ /* Public API */ -extern void audit_log(struct audit_context *ctx, gfp_t gfp_mask, - int type, const char *fmt, ...) - __attribute__((format(printf,4,5))); +extern __printf(4, 5) +void audit_log(struct audit_context *ctx, gfp_t gfp_mask, int type, + const char *fmt, ...); extern struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask, int type); -extern void audit_log_format(struct audit_buffer *ab, - const char *fmt, ...) - __attribute__((format(printf,2,3))); +extern __printf(2, 3) +void audit_log_format(struct audit_buffer *ab, const char *fmt, ...); extern void audit_log_end(struct audit_buffer *ab); extern int audit_string_contains_control(const char *string, size_t len); diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 8e9e4bc..4d1a074 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -170,7 +170,7 @@ extern void blk_trace_shutdown(struct request_queue *); extern int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, struct block_device *bdev, struct blk_user_trace_setup *buts); -extern __attribute__((format(printf, 2, 3))) +extern __printf(2, 3) void __trace_note_message(struct blk_trace *, const char *fmt, ...); /** diff --git a/include/linux/device.h b/include/linux/device.h index 85e78fc..e88abee 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -622,8 +622,8 @@ static inline const char *dev_name(const struct device *dev) return kobject_name(&dev->kobj); } -extern int dev_set_name(struct device *dev, const char *name, ...) - __attribute__((format(printf, 2, 3))); +extern __printf(2, 3) +int dev_set_name(struct device *dev, const char *name, ...); #ifdef CONFIG_NUMA static inline int dev_to_node(struct device *dev) @@ -753,10 +753,10 @@ extern struct device *device_create_vargs(struct class *cls, void *drvdata, const char *fmt, va_list vargs); -extern struct device *device_create(struct class *cls, struct device *parent, - dev_t devt, void *drvdata, - const char *fmt, ...) - __attribute__((format(printf, 5, 6))); +extern __printf(5, 6) +struct device *device_create(struct class *cls, struct device *parent, + dev_t devt, void *drvdata, + const char *fmt, ...); extern void device_destroy(struct class *cls, dev_t devt); /* @@ -800,64 +800,56 @@ extern const char *dev_driver_string(const struct device *dev); extern int __dev_printk(const char *level, const struct device *dev, struct va_format *vaf); -extern int dev_printk(const char *level, const struct device *dev, - const char *fmt, ...) - __attribute__ ((format (printf, 3, 4))); -extern int dev_emerg(const struct device *dev, const char *fmt, ...) - __attribute__ ((format (printf, 2, 3))); -extern int dev_alert(const struct device *dev, const char *fmt, ...) - __attribute__ ((format (printf, 2, 3))); -extern int dev_crit(const struct device *dev, const char *fmt, ...) - __attribute__ ((format (printf, 2, 3))); -extern int dev_err(const struct device *dev, const char *fmt, ...) - __attribute__ ((format (printf, 2, 3))); -extern int dev_warn(const struct device *dev, const char *fmt, ...) - __attribute__ ((format (printf, 2, 3))); -extern int dev_notice(const struct device *dev, const char *fmt, ...) - __attribute__ ((format (printf, 2, 3))); -extern int _dev_info(const struct device *dev, const char *fmt, ...) - __attribute__ ((format (printf, 2, 3))); +extern __printf(3, 4) +int dev_printk(const char *level, const struct device *dev, + const char *fmt, ...) + ; +extern __printf(2, 3) +int dev_emerg(const struct device *dev, const char *fmt, ...); +extern __printf(2, 3) +int dev_alert(const struct device *dev, const char *fmt, ...); +extern __printf(2, 3) +int dev_crit(const struct device *dev, const char *fmt, ...); +extern __printf(2, 3) +int dev_err(const struct device *dev, const char *fmt, ...); +extern __printf(2, 3) +int dev_warn(const struct device *dev, const char *fmt, ...); +extern __printf(2, 3) +int dev_notice(const struct device *dev, const char *fmt, ...); +extern __printf(2, 3) +int _dev_info(const struct device *dev, const char *fmt, ...); #else static inline int __dev_printk(const char *level, const struct device *dev, struct va_format *vaf) - { return 0; } -static inline int dev_printk(const char *level, const struct device *dev, - const char *fmt, ...) - __attribute__ ((format (printf, 3, 4))); -static inline int dev_printk(const char *level, const struct device *dev, - const char *fmt, ...) - { return 0; } - -static inline int dev_emerg(const struct device *dev, const char *fmt, ...) - __attribute__ ((format (printf, 2, 3))); -static inline int dev_emerg(const struct device *dev, const char *fmt, ...) - { return 0; } -static inline int dev_crit(const struct device *dev, const char *fmt, ...) - __attribute__ ((format (printf, 2, 3))); -static inline int dev_crit(const struct device *dev, const char *fmt, ...) - { return 0; } -static inline int dev_alert(const struct device *dev, const char *fmt, ...) - __attribute__ ((format (printf, 2, 3))); -static inline int dev_alert(const struct device *dev, const char *fmt, ...) - { return 0; } -static inline int dev_err(const struct device *dev, const char *fmt, ...) - __attribute__ ((format (printf, 2, 3))); -static inline int dev_err(const struct device *dev, const char *fmt, ...) - { return 0; } -static inline int dev_warn(const struct device *dev, const char *fmt, ...) - __attribute__ ((format (printf, 2, 3))); -static inline int dev_warn(const struct device *dev, const char *fmt, ...) - { return 0; } -static inline int dev_notice(const struct device *dev, const char *fmt, ...) - __attribute__ ((format (printf, 2, 3))); -static inline int dev_notice(const struct device *dev, const char *fmt, ...) - { return 0; } -static inline int _dev_info(const struct device *dev, const char *fmt, ...) - __attribute__ ((format (printf, 2, 3))); -static inline int _dev_info(const struct device *dev, const char *fmt, ...) - { return 0; } +{ return 0; } +static inline __printf(3, 4) +int dev_printk(const char *level, const struct device *dev, + const char *fmt, ...) +{ return 0; } + +static inline __printf(2, 3) +int dev_emerg(const struct device *dev, const char *fmt, ...) +{ return 0; } +static inline __printf(2, 3) +int dev_crit(const struct device *dev, const char *fmt, ...) +{ return 0; } +static inline __printf(2, 3) +int dev_alert(const struct device *dev, const char *fmt, ...) +{ return 0; } +static inline __printf(2, 3) +int dev_err(const struct device *dev, const char *fmt, ...) +{ return 0; } +static inline __printf(2, 3) +int dev_warn(const struct device *dev, const char *fmt, ...) +{ return 0; } +static inline __printf(2, 3) +int dev_notice(const struct device *dev, const char *fmt, ...) +{ return 0; } +static inline __printf(2, 3) +int _dev_info(const struct device *dev, const char *fmt, ...) +{ return 0; } #endif diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h index 13aae80..0564e3c 100644 --- a/include/linux/dynamic_debug.h +++ b/include/linux/dynamic_debug.h @@ -37,22 +37,21 @@ int ddebug_add_module(struct _ddebug *tab, unsigned int n, #if defined(CONFIG_DYNAMIC_DEBUG) extern int ddebug_remove_module(const char *mod_name); -extern int __dynamic_pr_debug(struct _ddebug *descriptor, const char *fmt, ...) - __attribute__ ((format (printf, 2, 3))); +extern __printf(2, 3) +int __dynamic_pr_debug(struct _ddebug *descriptor, const char *fmt, ...); struct device; -extern int __dynamic_dev_dbg(struct _ddebug *descriptor, - const struct device *dev, - const char *fmt, ...) - __attribute__ ((format (printf, 3, 4))); +extern __printf(3, 4) +int __dynamic_dev_dbg(struct _ddebug *descriptor, const struct device *dev, + const char *fmt, ...); struct net_device; -extern int __dynamic_netdev_dbg(struct _ddebug *descriptor, - const struct net_device *dev, - const char *fmt, ...) - __attribute__ ((format (printf, 3, 4))); +extern __printf(3, 4) +int __dynamic_netdev_dbg(struct _ddebug *descriptor, + const struct net_device *dev, + const char *fmt, ...); #define DEFINE_DYNAMIC_DEBUG_METADATA(name, fmt) \ static struct _ddebug __used __aligned(8) \ diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h index 67a803a..81965cc 100644 --- a/include/linux/ext3_fs.h +++ b/include/linux/ext3_fs.h @@ -937,15 +937,15 @@ extern int ext3_group_extend(struct super_block *sb, ext3_fsblk_t n_blocks_count); /* super.c */ -extern void ext3_error (struct super_block *, const char *, const char *, ...) - __attribute__ ((format (printf, 3, 4))); +extern __printf(3, 4) +void ext3_error(struct super_block *, const char *, const char *, ...); extern void __ext3_std_error (struct super_block *, const char *, int); -extern void ext3_abort (struct super_block *, const char *, const char *, ...) - __attribute__ ((format (printf, 3, 4))); -extern void ext3_warning (struct super_block *, const char *, const char *, ...) - __attribute__ ((format (printf, 3, 4))); -extern void ext3_msg(struct super_block *, const char *, const char *, ...) - __attribute__ ((format (printf, 3, 4))); +extern __printf(3, 4) +void ext3_abort(struct super_block *, const char *, const char *, ...); +extern __printf(3, 4) +void ext3_warning(struct super_block *, const char *, const char *, ...); +extern __printf(3, 4) +void ext3_msg(struct super_block *, const char *, const char *, ...); extern void ext3_update_dynamic_rev (struct super_block *sb); #define ext3_std_error(sb, errno) \ diff --git a/include/linux/fs.h b/include/linux/fs.h index 87b4c6b..7a049fd 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2635,8 +2635,8 @@ static const struct file_operations __fops = { \ .llseek = generic_file_llseek, \ }; -static inline void __attribute__((format(printf, 1, 2))) -__simple_attr_check_format(const char *fmt, ...) +static inline __printf(1, 2) +void __simple_attr_check_format(const char *fmt, ...) { /* don't do anything, just let the compiler check the arguments; */ } diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index af095b5..ce31408 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -492,10 +492,10 @@ static inline void fscache_end_io(struct fscache_retrieval *op, /* * out-of-line cache backend functions */ -extern void fscache_init_cache(struct fscache_cache *cache, - const struct fscache_cache_ops *ops, - const char *idfmt, - ...) __attribute__ ((format (printf, 3, 4))); +extern __printf(3, 4) +void fscache_init_cache(struct fscache_cache *cache, + const struct fscache_cache_ops *ops, + const char *idfmt, ...); extern int fscache_add_cache(struct fscache_cache *cache, struct fscache_object *fsdef, diff --git a/include/linux/gameport.h b/include/linux/gameport.h index b65a6f4..069ee41 100644 --- a/include/linux/gameport.h +++ b/include/linux/gameport.h @@ -78,8 +78,8 @@ static inline void gameport_register_port(struct gameport *gameport) void gameport_unregister_port(struct gameport *gameport); -void gameport_set_phys(struct gameport *gameport, const char *fmt, ...) - __attribute__ ((format (printf, 2, 3))); +__printf(2, 3) +void gameport_set_phys(struct gameport *gameport, const char *fmt, ...); #else @@ -93,8 +93,8 @@ static inline void gameport_unregister_port(struct gameport *gameport) return; } -static inline void gameport_set_phys(struct gameport *gameport, - const char *fmt, ...) +static inline __printf(2, 3) +void gameport_set_phys(struct gameport *gameport, const char *fmt, ...) { return; } diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h index 0df513b..3875719 100644 --- a/include/linux/kallsyms.h +++ b/include/linux/kallsyms.h @@ -101,9 +101,8 @@ static inline int lookup_symbol_attrs(unsigned long addr, unsigned long *size, u #endif /*CONFIG_KALLSYMS*/ /* This macro allows us to keep printk typechecking */ -static void __check_printsym_format(const char *fmt, ...) -__attribute__((format(printf,1,2))); -static inline void __check_printsym_format(const char *fmt, ...) +static __printf(1, 2) +void __check_printsym_format(const char *fmt, ...) { } diff --git a/include/linux/kdb.h b/include/linux/kdb.h index 529d9a0..0647258 100644 --- a/include/linux/kdb.h +++ b/include/linux/kdb.h @@ -114,12 +114,9 @@ typedef enum { } kdb_reason_t; extern int kdb_trap_printk; -extern int vkdb_printf(const char *fmt, va_list args) - __attribute__ ((format (printf, 1, 0))); -extern int kdb_printf(const char *, ...) - __attribute__ ((format (printf, 1, 2))); -typedef int (*kdb_printf_t)(const char *, ...) - __attribute__ ((format (printf, 1, 2))); +extern __printf(1, 0) int vkdb_printf(const char *fmt, va_list args); +extern __printf(1, 2) int kdb_printf(const char *, ...); +typedef __printf(1, 2) int (*kdb_printf_t)(const char *, ...); extern void kdb_init(int level); diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 8eefcf7..e40c950 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -296,20 +296,18 @@ extern long long simple_strtoll(const char *,char **,unsigned int); #define strict_strtoull kstrtoull #define strict_strtoll kstrtoll -extern int sprintf(char * buf, const char * fmt, ...) - __attribute__ ((format (printf, 2, 3))); -extern int vsprintf(char *buf, const char *, va_list) - __attribute__ ((format (printf, 2, 0))); -extern int snprintf(char * buf, size_t size, const char * fmt, ...) - __attribute__ ((format (printf, 3, 4))); -extern int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) - __attribute__ ((format (printf, 3, 0))); -extern int scnprintf(char * buf, size_t size, const char * fmt, ...) - __attribute__ ((format (printf, 3, 4))); -extern int vscnprintf(char *buf, size_t size, const char *fmt, va_list args) - __attribute__ ((format (printf, 3, 0))); -extern char *kasprintf(gfp_t gfp, const char *fmt, ...) - __attribute__ ((format (printf, 2, 3))); +extern __printf(2, 3) int sprintf(char *buf, const char * fmt, ...); +extern __printf(2, 0) int vsprintf(char *buf, const char *, va_list); +extern __printf(3, 4) +int snprintf(char *buf, size_t size, const char *fmt, ...); +extern __printf(3, 0) +int vsnprintf(char *buf, size_t size, const char *fmt, va_list args); +extern __printf(3, 4) +int scnprintf(char *buf, size_t size, const char *fmt, ...); +extern __printf(3, 0) +int vscnprintf(char *buf, size_t size, const char *fmt, va_list args); +extern __printf(2, 3) +char *kasprintf(gfp_t gfp, const char *fmt, ...); extern char *kvasprintf(gfp_t gfp, const char *fmt, va_list args); extern int sscanf(const char *, const char *, ...) @@ -427,8 +425,8 @@ extern void tracing_start(void); extern void tracing_stop(void); extern void ftrace_off_permanent(void); -static inline void __attribute__ ((format (printf, 1, 2))) -____trace_printk_check_format(const char *fmt, ...) +static inline __printf(1, 2) +void ____trace_printk_check_format(const char *fmt, ...) { } #define __trace_printk_check_format(fmt, args...) \ @@ -467,13 +465,11 @@ do { \ __trace_printk(_THIS_IP_, fmt, ##args); \ } while (0) -extern int -__trace_bprintk(unsigned long ip, const char *fmt, ...) - __attribute__ ((format (printf, 2, 3))); +extern __printf(2, 3) +int __trace_bprintk(unsigned long ip, const char *fmt, ...); -extern int -__trace_printk(unsigned long ip, const char *fmt, ...) - __attribute__ ((format (printf, 2, 3))); +extern __printf(2, 3) +int __trace_printk(unsigned long ip, const char *fmt, ...); extern void trace_dump_stack(void); @@ -502,8 +498,8 @@ __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap); extern void ftrace_dump(enum ftrace_dump_mode oops_dump_mode); #else -static inline int -trace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 2))); +static inline __printf(1, 2) +int trace_printk(const char *fmt, ...); static inline void tracing_start(void) { } static inline void tracing_stop(void) { } diff --git a/include/linux/kexec.h b/include/linux/kexec.h index c2478a3..8944ebe 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -130,8 +130,8 @@ int kexec_should_crash(struct task_struct *); void crash_save_cpu(struct pt_regs *regs, int cpu); void crash_save_vmcoreinfo(void); void arch_crash_save_vmcoreinfo(void); -void vmcoreinfo_append_str(const char *fmt, ...) - __attribute__ ((format (printf, 1, 2))); +__printf(1, 2) +void vmcoreinfo_append_str(const char *fmt, ...); unsigned long paddr_vmcoreinfo_note(void); #define VMCOREINFO_OSRELEASE(value) \ diff --git a/include/linux/kmod.h b/include/linux/kmod.h index 0da38cf..b16f653 100644 --- a/include/linux/kmod.h +++ b/include/linux/kmod.h @@ -32,8 +32,8 @@ extern char modprobe_path[]; /* for sysctl */ /* modprobe exit status on success, -ve on error. Return value * usually useless though. */ -extern int __request_module(bool wait, const char *name, ...) \ - __attribute__((format(printf, 2, 3))); +extern __printf(2, 3) +int __request_module(bool wait, const char *name, ...); #define request_module(mod...) __request_module(true, mod) #define request_module_nowait(mod...) __request_module(false, mod) #define try_then_request_module(x, mod...) \ diff --git a/include/linux/kobject.h b/include/linux/kobject.h index 668729c..ad81e1c 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -72,8 +72,8 @@ struct kobject { unsigned int uevent_suppress:1; }; -extern int kobject_set_name(struct kobject *kobj, const char *name, ...) - __attribute__((format(printf, 2, 3))); +extern __printf(2, 3) +int kobject_set_name(struct kobject *kobj, const char *name, ...); extern int kobject_set_name_vargs(struct kobject *kobj, const char *fmt, va_list vargs); @@ -83,15 +83,13 @@ static inline const char *kobject_name(const struct kobject *kobj) } extern void kobject_init(struct kobject *kobj, struct kobj_type *ktype); -extern int __must_check kobject_add(struct kobject *kobj, - struct kobject *parent, - const char *fmt, ...) - __attribute__((format(printf, 3, 4))); -extern int __must_check kobject_init_and_add(struct kobject *kobj, - struct kobj_type *ktype, - struct kobject *parent, - const char *fmt, ...) - __attribute__((format(printf, 4, 5))); +extern __printf(3, 4) __must_check +int kobject_add(struct kobject *kobj, struct kobject *parent, + const char *fmt, ...); +extern __printf(4, 5) __must_check +int kobject_init_and_add(struct kobject *kobj, + struct kobj_type *ktype, struct kobject *parent, + const char *fmt, ...); extern void kobject_del(struct kobject *kobj); @@ -212,8 +210,8 @@ int kobject_uevent(struct kobject *kobj, enum kobject_action action); int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, char *envp[]); -int add_uevent_var(struct kobj_uevent_env *env, const char *format, ...) - __attribute__((format (printf, 2, 3))); +__printf(2, 3) +int add_uevent_var(struct kobj_uevent_env *env, const char *format, ...); int kobject_action_type(const char *buf, size_t count, enum kobject_action *type); @@ -226,7 +224,7 @@ static inline int kobject_uevent_env(struct kobject *kobj, char *envp[]) { return 0; } -static inline __attribute__((format(printf, 2, 3))) +static inline __printf(2, 3) int add_uevent_var(struct kobj_uevent_env *env, const char *format, ...) { return 0; } diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 1e923e5..5cac19b 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -4,11 +4,11 @@ #include #include +__printf(4, 5) struct task_struct *kthread_create_on_node(int (*threadfn)(void *data), void *data, int node, - const char namefmt[], ...) - __attribute__((format(printf, 4, 5))); + const char namefmt[], ...); #define kthread_create(threadfn, data, namefmt, arg...) \ kthread_create_on_node(threadfn, data, -1, namefmt, ##arg) diff --git a/include/linux/libata.h b/include/linux/libata.h index 23fa829bf..cafc09a 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1256,13 +1256,13 @@ static inline int sata_srst_pmp(struct ata_link *link) /* * printk helpers */ -__attribute__((format (printf, 3, 4))) +__printf(3, 4) int ata_port_printk(const struct ata_port *ap, const char *level, const char *fmt, ...); -__attribute__((format (printf, 3, 4))) +__printf(3, 4) int ata_link_printk(const struct ata_link *link, const char *level, const char *fmt, ...); -__attribute__((format (printf, 3, 4))) +__printf(3, 4) int ata_dev_printk(const struct ata_device *dev, const char *level, const char *fmt, ...); @@ -1304,10 +1304,10 @@ void ata_print_version(const struct device *dev, const char *version); /* * ata_eh_info helpers */ -extern void __ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...) - __attribute__ ((format (printf, 2, 3))); -extern void ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...) - __attribute__ ((format (printf, 2, 3))); +extern __printf(2, 3) +void __ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...); +extern __printf(2, 3) +void ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...); extern void ata_ehi_clear_desc(struct ata_eh_info *ehi); static inline void ata_ehi_hotplugged(struct ata_eh_info *ehi) @@ -1321,8 +1321,8 @@ static inline void ata_ehi_hotplugged(struct ata_eh_info *ehi) /* * port description helpers */ -extern void ata_port_desc(struct ata_port *ap, const char *fmt, ...) - __attribute__ ((format (printf, 2, 3))); +extern __printf(2, 3) +void ata_port_desc(struct ata_port *ap, const char *fmt, ...); #ifdef CONFIG_PCI extern void ata_port_pbar_desc(struct ata_port *ap, int bar, ssize_t offset, const char *name); diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index 97491f7..c5d5278 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -49,8 +49,7 @@ extern void mmiotrace_ioremap(resource_size_t offset, unsigned long size, extern void mmiotrace_iounmap(volatile void __iomem *addr); /* For anyone to insert markers. Remember trailing newline. */ -extern int mmiotrace_printk(const char *fmt, ...) - __attribute__ ((format (printf, 1, 2))); +extern __printf(1, 2) int mmiotrace_printk(const char *fmt, ...); #else /* !CONFIG_MMIOTRACE: */ static inline int is_kmmio_active(void) { @@ -71,10 +70,7 @@ static inline void mmiotrace_iounmap(volatile void __iomem *addr) { } -static inline int mmiotrace_printk(const char *fmt, ...) - __attribute__ ((format (printf, 1, 0))); - -static inline int mmiotrace_printk(const char *fmt, ...) +static inline __printf(1, 2) int mmiotrace_printk(const char *fmt, ...) { return 0; } diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index df1c836..cbeb586 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2622,23 +2622,23 @@ static inline const char *netdev_name(const struct net_device *dev) extern int __netdev_printk(const char *level, const struct net_device *dev, struct va_format *vaf); -extern int netdev_printk(const char *level, const struct net_device *dev, - const char *format, ...) - __attribute__ ((format (printf, 3, 4))); -extern int netdev_emerg(const struct net_device *dev, const char *format, ...) - __attribute__ ((format (printf, 2, 3))); -extern int netdev_alert(const struct net_device *dev, const char *format, ...) - __attribute__ ((format (printf, 2, 3))); -extern int netdev_crit(const struct net_device *dev, const char *format, ...) - __attribute__ ((format (printf, 2, 3))); -extern int netdev_err(const struct net_device *dev, const char *format, ...) - __attribute__ ((format (printf, 2, 3))); -extern int netdev_warn(const struct net_device *dev, const char *format, ...) - __attribute__ ((format (printf, 2, 3))); -extern int netdev_notice(const struct net_device *dev, const char *format, ...) - __attribute__ ((format (printf, 2, 3))); -extern int netdev_info(const struct net_device *dev, const char *format, ...) - __attribute__ ((format (printf, 2, 3))); +extern __printf(3, 4) +int netdev_printk(const char *level, const struct net_device *dev, + const char *format, ...); +extern __printf(2, 3) +int netdev_emerg(const struct net_device *dev, const char *format, ...); +extern __printf(2, 3) +int netdev_alert(const struct net_device *dev, const char *format, ...); +extern __printf(2, 3) +int netdev_crit(const struct net_device *dev, const char *format, ...); +extern __printf(2, 3) +int netdev_err(const struct net_device *dev, const char *format, ...); +extern __printf(2, 3) +int netdev_warn(const struct net_device *dev, const char *format, ...); +extern __printf(2, 3) +int netdev_notice(const struct net_device *dev, const char *format, ...); +extern __printf(2, 3) +int netdev_info(const struct net_device *dev, const char *format, ...); #define MODULE_ALIAS_NETDEV(device) \ MODULE_ALIAS("netdev-" device) diff --git a/include/linux/printk.h b/include/linux/printk.h index 0101d55..f0e22f7 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -82,22 +82,22 @@ struct va_format { * Dummy printk for disabled debugging statements to use whilst maintaining * gcc's format and side-effect checking. */ -static inline __attribute__ ((format (printf, 1, 2))) +static inline __printf(1, 2) int no_printk(const char *fmt, ...) { return 0; } -extern asmlinkage __attribute__ ((format (printf, 1, 2))) +extern asmlinkage __printf(1, 2) void early_printk(const char *fmt, ...); extern int printk_needs_cpu(int cpu); extern void printk_tick(void); #ifdef CONFIG_PRINTK -asmlinkage __attribute__ ((format (printf, 1, 0))) +asmlinkage __printf(1, 0) int vprintk(const char *fmt, va_list args); -asmlinkage __attribute__ ((format (printf, 1, 2))) __cold +asmlinkage __printf(1, 2) __cold int printk(const char *fmt, ...); /* @@ -117,12 +117,12 @@ extern int kptr_restrict; void log_buf_kexec_setup(void); void __init setup_log_buf(int early); #else -static inline __attribute__ ((format (printf, 1, 0))) +static inline __printf(1, 0) int vprintk(const char *s, va_list args) { return 0; } -static inline __attribute__ ((format (printf, 1, 2))) __cold +static inline __printf(1, 2) __cold int printk(const char *s, ...) { return 0; diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index 26f9e36..d93f95e 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -31,7 +31,7 @@ static inline bool is_quota_modification(struct inode *inode, struct iattr *ia) #define quota_error(sb, fmt, args...) \ __quota_error((sb), __func__, fmt , ## args) -extern __attribute__((format (printf, 3, 4))) +extern __printf(3, 4) void __quota_error(struct super_block *sb, const char *func, const char *fmt, ...); diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index be720cd..0b69a46 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -84,8 +84,7 @@ int seq_putc(struct seq_file *m, char c); int seq_puts(struct seq_file *m, const char *s); int seq_write(struct seq_file *seq, const void *data, size_t len); -int seq_printf(struct seq_file *, const char *, ...) - __attribute__ ((format (printf,2,3))); +__printf(2, 3) int seq_printf(struct seq_file *, const char *, ...); int seq_path(struct seq_file *, struct path *, char *); int seq_dentry(struct seq_file *, struct dentry *, char *); diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h index 5cf397c..7dadc3d 100644 --- a/include/linux/trace_seq.h +++ b/include/linux/trace_seq.h @@ -29,10 +29,10 @@ trace_seq_init(struct trace_seq *s) * Currently only defined when tracing is enabled. */ #ifdef CONFIG_TRACING -extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...) - __attribute__ ((format (printf, 2, 3))); -extern int trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args) - __attribute__ ((format (printf, 2, 0))); +extern __printf(2, 3) +int trace_seq_printf(struct trace_seq *s, const char *fmt, ...); +extern __printf(2, 0) +int trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args); extern int trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary); extern int trace_print_seq(struct seq_file *m, struct trace_seq *s); diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index e727555..e86af08 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -77,7 +77,7 @@ struct bt_power { #define BT_POWER_FORCE_ACTIVE_OFF 0 #define BT_POWER_FORCE_ACTIVE_ON 1 -__attribute__((format (printf, 2, 3))) +__printf(2, 3) int bt_printk(const char *level, const char *fmt, ...); #define BT_INFO(fmt, arg...) bt_printk(KERN_INFO, pr_fmt(fmt), ##arg) diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h index 920997f..e991bd0 100644 --- a/include/net/netfilter/nf_log.h +++ b/include/net/netfilter/nf_log.h @@ -53,12 +53,13 @@ int nf_log_bind_pf(u_int8_t pf, const struct nf_logger *logger); void nf_log_unbind_pf(u_int8_t pf); /* Calls the registered backend logging function */ +__printf(7, 8) void nf_log_packet(u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, const struct nf_loginfo *li, - const char *fmt, ...) __attribute__ ((format(printf,7,8))); + const char *fmt, ...); #endif /* _NF_LOG_H */ diff --git a/include/net/sock.h b/include/net/sock.h index 5ac682f..c6658be 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -76,8 +76,8 @@ printk(KERN_DEBUG msg); } while (0) #else /* Validate arguments and do nothing */ -static inline void __attribute__ ((format (printf, 2, 3))) -SOCK_DEBUG(struct sock *sk, const char *msg, ...) +static inline __printf(2, 3) +void SOCK_DEBUG(struct sock *sk, const char *msg, ...) { } #endif diff --git a/include/sound/core.h b/include/sound/core.h index 1fa2407..91d5138 100644 --- a/include/sound/core.h +++ b/include/sound/core.h @@ -326,9 +326,9 @@ void release_and_free_resource(struct resource *res); /* --- */ #if defined(CONFIG_SND_DEBUG) || defined(CONFIG_SND_VERBOSE_PRINTK) +__printf(4, 5) void __snd_printk(unsigned int level, const char *file, int line, - const char *format, ...) - __attribute__ ((format (printf, 4, 5))); + const char *format, ...); #else #define __snd_printk(level, file, line, format, args...) \ printk(format, ##args) diff --git a/include/sound/info.h b/include/sound/info.h index 4e94cf1..5492cc4 100644 --- a/include/sound/info.h +++ b/include/sound/info.h @@ -110,8 +110,8 @@ void snd_card_info_read_oss(struct snd_info_buffer *buffer); static inline void snd_card_info_read_oss(struct snd_info_buffer *buffer) {} #endif -int snd_iprintf(struct snd_info_buffer *buffer, const char *fmt, ...) \ - __attribute__ ((format (printf, 2, 3))); +__printf(2, 3) +int snd_iprintf(struct snd_info_buffer *buffer, const char *fmt, ...); int snd_info_init(void); int snd_info_done(void); diff --git a/include/sound/seq_kernel.h b/include/sound/seq_kernel.h index 3d9afb6..f352a98 100644 --- a/include/sound/seq_kernel.h +++ b/include/sound/seq_kernel.h @@ -75,9 +75,9 @@ struct snd_seq_port_callback { }; /* interface for kernel client */ +__printf(3, 4) int snd_seq_create_kernel_client(struct snd_card *card, int client_index, - const char *name_fmt, ...) - __attribute__ ((format (printf, 3, 4))); + const char *name_fmt, ...); int snd_seq_delete_kernel_client(int client); int snd_seq_kernel_client_enqueue(int client, struct snd_seq_event *ev, int atomic, int hop); int snd_seq_kernel_client_dispatch(int client, struct snd_seq_event *ev, int atomic, int hop); diff --git a/include/xen/hvc-console.h b/include/xen/hvc-console.h index 901724d..b62dfef 100644 --- a/include/xen/hvc-console.h +++ b/include/xen/hvc-console.h @@ -6,12 +6,12 @@ extern struct console xenboot_console; #ifdef CONFIG_HVC_XEN void xen_console_resume(void); void xen_raw_console_write(const char *str); -__attribute__((format(printf, 1, 2))) +__printf(1, 2) void xen_raw_printk(const char *fmt, ...); #else static inline void xen_console_resume(void) { } static inline void xen_raw_console_write(const char *str) { } -static inline __attribute__((format(printf, 1, 2))) +static inline __printf(1, 2) void xen_raw_printk(const char *fmt, ...) { } #endif diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h index aceeca7..b9f9fb5 100644 --- a/include/xen/xenbus.h +++ b/include/xen/xenbus.h @@ -156,9 +156,9 @@ int xenbus_scanf(struct xenbus_transaction t, __attribute__((format(scanf, 4, 5))); /* Single printf and write: returns -errno or 0. */ +__printf(4, 5) int xenbus_printf(struct xenbus_transaction t, - const char *dir, const char *node, const char *fmt, ...) - __attribute__((format(printf, 4, 5))); + const char *dir, const char *node, const char *fmt, ...); /* Generic read function: NULL-terminated triples of name, * sprintf-style type string, and pointer. Returns 0 or errno.*/ @@ -200,11 +200,11 @@ int xenbus_watch_path(struct xenbus_device *dev, const char *path, struct xenbus_watch *watch, void (*callback)(struct xenbus_watch *, const char **, unsigned int)); +__printf(4, 5) int xenbus_watch_pathfmt(struct xenbus_device *dev, struct xenbus_watch *watch, void (*callback)(struct xenbus_watch *, const char **, unsigned int), - const char *pathfmt, ...) - __attribute__ ((format (printf, 4, 5))); + const char *pathfmt, ...); int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state new_state); int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn); @@ -223,9 +223,9 @@ int xenbus_free_evtchn(struct xenbus_device *dev, int port); enum xenbus_state xenbus_read_driver_state(const char *path); -__attribute__((format(printf, 3, 4))) +__printf(3, 4) void xenbus_dev_error(struct xenbus_device *dev, int err, const char *fmt, ...); -__attribute__((format(printf, 3, 4))) +__printf(3, 4) void xenbus_dev_fatal(struct xenbus_device *dev, int err, const char *fmt, ...); const char *xenbus_strstate(enum xenbus_state state); diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h index b6753f4..d86583f 100644 --- a/net/nfc/nfc.h +++ b/net/nfc/nfc.h @@ -27,7 +27,7 @@ #include #include -__attribute__((format (printf, 2, 3))) +__printf(2, 3) int nfc_printk(const char *level, const char *fmt, ...); #define nfc_info(fmt, arg...) nfc_printk(KERN_INFO, fmt, ##arg) diff --git a/net/rds/rds.h b/net/rds/rds.h index da8adac..7eaba18 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -36,8 +36,8 @@ #define rdsdebug(fmt, args...) pr_debug("%s(): " fmt, __func__ , ##args) #else /* sigh, pr_debug() causes unused variable warnings */ -static inline void __attribute__ ((format (printf, 1, 2))) -rdsdebug(char *fmt, ...) +static inline __printf(1, 2) +void rdsdebug(char *fmt, ...) { } #endif @@ -625,8 +625,8 @@ void rds_for_each_conn_info(struct socket *sock, unsigned int len, struct rds_info_lengths *lens, int (*visitor)(struct rds_connection *, void *), size_t item_len); -void __rds_conn_error(struct rds_connection *conn, const char *, ...) - __attribute__ ((format (printf, 2, 3))); +__printf(2, 3) +void __rds_conn_error(struct rds_connection *conn, const char *, ...); #define rds_conn_error(conn, fmt...) \ __rds_conn_error(conn, KERN_WARNING "RDS: " fmt) diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 30d70ab..dd5cc00 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -971,9 +971,8 @@ static void svc_unregister(const struct svc_serv *serv) /* * Printk the given error with the address of the client that caused it. */ -static int -__attribute__ ((format (printf, 2, 3))) -svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) +static __printf(2, 3) +int svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) { va_list args; int r; diff --git a/sound/firewire/cmp.c b/sound/firewire/cmp.c index 14cacbc..76294f2 100644 --- a/sound/firewire/cmp.c +++ b/sound/firewire/cmp.c @@ -32,7 +32,7 @@ enum bus_reset_handling { SUCCEED_ON_BUS_RESET, }; -static __attribute__((format(printf, 2, 3))) +static __printf(2, 3) void cmp_error(struct cmp_connection *c, const char *fmt, ...) { va_list va; -- cgit v0.10.2 From c41d5a1883670e6949f01b06807ef91be67c27b4 Mon Sep 17 00:00:00 2001 From: Ike Panhc Date: Mon, 31 Oct 2011 17:11:37 -0700 Subject: MAINTAINERS: add new entry for ideapad-laptop Signed-off-by: Ike Panhc Cc: Matthew Garrett Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/MAINTAINERS b/MAINTAINERS index 23b89e0..2ca4767 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3209,6 +3209,13 @@ F: Documentation/ide/ F: drivers/ide/ F: include/linux/ide.h +IDEAPAD LAPTOP EXTRAS DRIVER +M: Ike Panhc +L: platform-driver-x86@vger.kernel.org +W: http://launchpad.net/ideapad-laptop +S: Maintained +F: drivers/platform/x86/ideapad-laptop.c + IDE/ATAPI DRIVERS M: Borislav Petkov L: linux-ide@vger.kernel.org -- cgit v0.10.2 From 03535ab54a3f3ead011c14b98f2781a285f95025 Mon Sep 17 00:00:00 2001 From: Linas Vepstas Date: Mon, 31 Oct 2011 17:11:38 -0700 Subject: MAINTAINERS: Linas has moved While ego surfing, I noticed an email address problem. Signed-off-by: Linas Vepstas Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/MAINTAINERS b/MAINTAINERS index 2ca4767..a9837f2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4957,7 +4957,7 @@ F: include/linux/i2c-algo-pca.h F: include/linux/i2c-pca-platform.h PCI ERROR RECOVERY -M: Linas Vepstas +M: Linas Vepstas L: linux-pci@vger.kernel.org S: Supported F: Documentation/PCI/pci-error-recovery.txt -- cgit v0.10.2 From 1330b0dcf876201bda0872ea32d64c190384defc Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Mon, 31 Oct 2011 17:11:41 -0700 Subject: MAINTAINERS: add ASLR maintainer Since achieving the full ASLR by merging the PIE randomization in commit cc503c1b43 ("x86: PIE executable randomization"), I have been dealing with most (if not all) of the bugreports reported against userspace address space randomization, so it might be a good idea to provide a decent contact point in MAINTAINERS. Signed-off-by: Jiri Kosina Cc: Josh Boyer Cc: Nicolas Pitre Cc: Ingo Molnar Cc: Russell King Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/MAINTAINERS b/MAINTAINERS index a9837f2..3c17a20 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -316,6 +316,10 @@ W: http://wiki.analog.com/AD7879 S: Supported F: drivers/input/touchscreen/ad7879.c +ADDRESS SPACE LAYOUT RANDOMIZATION (ASLR) +M: Jiri Kosina +S: Maintained + ADM1025 HARDWARE MONITOR DRIVER M: Jean Delvare L: lm-sensors@lm-sensors.org -- cgit v0.10.2 From 2967b0ad3302d8627c0d99bfed4b8f167e21fdee Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 31 Oct 2011 17:11:44 -0700 Subject: video/backlight: remove obsolete cleanup for clientdata A few new i2c-drivers came into the kernel which clear the clientdata-pointer on exit or error. This is obsolete meanwhile, the core will do it. Signed-off-by: Wolfram Sang Cc: Richard Purdie Cc: Paul Mundt Acked-by: Jean Delvare Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/video/backlight/adp8870_bl.c b/drivers/video/backlight/adp8870_bl.c index 98e0304..6c68a68 100644 --- a/drivers/video/backlight/adp8870_bl.c +++ b/drivers/video/backlight/adp8870_bl.c @@ -931,7 +931,6 @@ out: out1: backlight_device_unregister(bl); out2: - i2c_set_clientdata(client, NULL); kfree(data); return ret; @@ -951,7 +950,6 @@ static int __devexit adp8870_remove(struct i2c_client *client) &adp8870_bl_attr_group); backlight_device_unregister(data->bl); - i2c_set_clientdata(client, NULL); kfree(data); return 0; -- cgit v0.10.2 From 0556dc340e5159cdff925a5ab7f3a72f49745661 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 31 Oct 2011 17:11:48 -0700 Subject: backlight: fix broken regulator API usage in l4f00242t03 The regulator support in the l4f00242t03 is very non-idiomatic. Rather than requesting the regulators based on the device name and the supply names used by the device the driver requires boards to pass system specific supply names around through platform data. The driver also conditionally requests the regulators based on this platform data, adding unneeded conditional code to the driver. Fix this by removing the platform data and converting to the standard idiom, also updating all in tree users of the driver. As no datasheet appears to be available for the LCD I'm guessing the names for the supplies based on the existing users and I've no ability to do anything more than compile test. The use of regulator_set_voltage() in the driver is also problematic, since fixed voltages are required the expectation would be that the voltages would be fixed in the constraints set by the machines rather than manually configured by the driver, but is less problematic. Signed-off-by: Mark Brown Tested-by: Fabio Estevam Cc: Richard Purdie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/arm/mach-imx/mach-mx27_3ds.c b/arch/arm/mach-imx/mach-mx27_3ds.c index 2eafbac..3772304 100644 --- a/arch/arm/mach-imx/mach-mx27_3ds.c +++ b/arch/arm/mach-imx/mach-mx27_3ds.c @@ -241,7 +241,7 @@ static struct regulator_init_data gpo_init = { }; static struct regulator_consumer_supply vmmc1_consumers[] = { - REGULATOR_SUPPLY("lcd_2v8", NULL), + REGULATOR_SUPPLY("vcore", "spi0.0"), }; static struct regulator_init_data vmmc1_init = { @@ -257,7 +257,7 @@ static struct regulator_init_data vmmc1_init = { }; static struct regulator_consumer_supply vgen_consumers[] = { - REGULATOR_SUPPLY("vdd_lcdio", NULL), + REGULATOR_SUPPLY("vdd", "spi0.0"), }; static struct regulator_init_data vgen_init = { @@ -348,8 +348,6 @@ static const struct imx_fb_platform_data mx27_3ds_fb_data __initconst = { static struct l4f00242t03_pdata mx27_3ds_lcd_pdata = { .reset_gpio = LCD_RESET, .data_enable_gpio = LCD_ENABLE, - .core_supply = "lcd_2v8", - .io_supply = "vdd_lcdio", }; static struct spi_board_info mx27_3ds_spi_devs[] __initdata = { diff --git a/arch/arm/mach-imx/mach-mx31_3ds.c b/arch/arm/mach-imx/mach-mx31_3ds.c index 589066f..6484db5 100644 --- a/arch/arm/mach-imx/mach-mx31_3ds.c +++ b/arch/arm/mach-imx/mach-mx31_3ds.c @@ -285,8 +285,6 @@ static struct mx3fb_platform_data mx3fb_pdata __initdata = { static struct l4f00242t03_pdata mx31_3ds_l4f00242t03_pdata = { .reset_gpio = IOMUX_TO_GPIO(MX31_PIN_LCS1), .data_enable_gpio = IOMUX_TO_GPIO(MX31_PIN_SER_RS), - .core_supply = "lcd_2v8", - .io_supply = "vdd_lcdio", }; /* @@ -411,7 +409,7 @@ static struct regulator_init_data vmmc2_init = { }; static struct regulator_consumer_supply vmmc1_consumers[] = { - REGULATOR_SUPPLY("lcd_2v8", NULL), + REGULATOR_SUPPLY("vcore", "spi0.0"), REGULATOR_SUPPLY("cmos_2v8", "soc-camera-pdrv.0"), }; @@ -428,7 +426,7 @@ static struct regulator_init_data vmmc1_init = { }; static struct regulator_consumer_supply vgen_consumers[] = { - REGULATOR_SUPPLY("vdd_lcdio", NULL), + REGULATOR_SUPPLY("vdd", "spi0.0"), }; static struct regulator_init_data vgen_init = { diff --git a/drivers/video/backlight/l4f00242t03.c b/drivers/video/backlight/l4f00242t03.c index 98ad3e5..77ec70b 100644 --- a/drivers/video/backlight/l4f00242t03.c +++ b/drivers/video/backlight/l4f00242t03.c @@ -52,15 +52,11 @@ static void l4f00242t03_lcd_init(struct spi_device *spi) dev_dbg(&spi->dev, "initializing LCD\n"); - if (priv->io_reg) { - regulator_set_voltage(priv->io_reg, 1800000, 1800000); - regulator_enable(priv->io_reg); - } + regulator_set_voltage(priv->io_reg, 1800000, 1800000); + regulator_enable(priv->io_reg); - if (priv->core_reg) { - regulator_set_voltage(priv->core_reg, 2800000, 2800000); - regulator_enable(priv->core_reg); - } + regulator_set_voltage(priv->core_reg, 2800000, 2800000); + regulator_enable(priv->core_reg); l4f00242t03_reset(pdata->reset_gpio); @@ -78,11 +74,8 @@ static void l4f00242t03_lcd_powerdown(struct spi_device *spi) gpio_set_value(pdata->data_enable_gpio, 0); - if (priv->io_reg) - regulator_disable(priv->io_reg); - - if (priv->core_reg) - regulator_disable(priv->core_reg); + regulator_disable(priv->io_reg); + regulator_disable(priv->core_reg); } static int l4f00242t03_lcd_power_get(struct lcd_device *ld) @@ -201,24 +194,18 @@ static int __devinit l4f00242t03_probe(struct spi_device *spi) if (ret) goto err3; - if (pdata->io_supply) { - priv->io_reg = regulator_get(NULL, pdata->io_supply); - - if (IS_ERR(priv->io_reg)) { - pr_err("%s: Unable to get the IO regulator\n", - __func__); - goto err3; - } + priv->io_reg = regulator_get(&spi->dev, "vdd"); + if (IS_ERR(priv->io_reg)) { + dev_err(&spi->dev, "%s: Unable to get the IO regulator\n", + __func__); + goto err3; } - if (pdata->core_supply) { - priv->core_reg = regulator_get(NULL, pdata->core_supply); - - if (IS_ERR(priv->core_reg)) { - pr_err("%s: Unable to get the core regulator\n", - __func__); - goto err4; - } + priv->core_reg = regulator_get(&spi->dev, "vcore"); + if (IS_ERR(priv->core_reg)) { + dev_err(&spi->dev, "%s: Unable to get the core regulator\n", + __func__); + goto err4; } priv->ld = lcd_device_register("l4f00242t03", @@ -238,11 +225,9 @@ static int __devinit l4f00242t03_probe(struct spi_device *spi) return 0; err5: - if (priv->core_reg) - regulator_put(priv->core_reg); + regulator_put(priv->core_reg); err4: - if (priv->io_reg) - regulator_put(priv->io_reg); + regulator_put(priv->io_reg); err3: gpio_free(pdata->data_enable_gpio); err2: @@ -266,10 +251,8 @@ static int __devexit l4f00242t03_remove(struct spi_device *spi) gpio_free(pdata->data_enable_gpio); gpio_free(pdata->reset_gpio); - if (priv->io_reg) - regulator_put(priv->io_reg); - if (priv->core_reg) - regulator_put(priv->core_reg); + regulator_put(priv->io_reg); + regulator_put(priv->core_reg); kfree(priv); diff --git a/include/linux/spi/l4f00242t03.h b/include/linux/spi/l4f00242t03.h index aee1dbd..bc8677c 100644 --- a/include/linux/spi/l4f00242t03.h +++ b/include/linux/spi/l4f00242t03.h @@ -24,8 +24,6 @@ struct l4f00242t03_pdata { unsigned int reset_gpio; unsigned int data_enable_gpio; - const char *io_supply; /* will be set to 1.8 V */ - const char *core_supply; /* will be set to 2.8 V */ }; #endif /* _INCLUDE_LINUX_SPI_L4F00242T03_H_ */ -- cgit v0.10.2 From 4f5a66b05971d6be08fd596139cd190758831686 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Mon, 31 Oct 2011 17:11:50 -0700 Subject: drivers/video/backlight/l4f00242t03.c: use gpio_request_one() to simplify error handling Using gpio_request_one can make the error handling simpler. Signed-off-by: Fabio Estevam Cc: Richard Purdie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/video/backlight/l4f00242t03.c b/drivers/video/backlight/l4f00242t03.c index 77ec70b..3543f1b 100644 --- a/drivers/video/backlight/l4f00242t03.c +++ b/drivers/video/backlight/l4f00242t03.c @@ -171,29 +171,22 @@ static int __devinit l4f00242t03_probe(struct spi_device *spi) priv->spi = spi; - ret = gpio_request(pdata->reset_gpio, "lcd l4f00242t03 reset"); + ret = gpio_request_one(pdata->reset_gpio, GPIOF_OUT_INIT_HIGH, + "lcd l4f00242t03 reset"); if (ret) { dev_err(&spi->dev, "Unable to get the lcd l4f00242t03 reset gpio.\n"); goto err; } - ret = gpio_direction_output(pdata->reset_gpio, 1); - if (ret) - goto err2; - - ret = gpio_request(pdata->data_enable_gpio, - "lcd l4f00242t03 data enable"); + ret = gpio_request_one(pdata->data_enable_gpio, GPIOF_OUT_INIT_LOW, + "lcd l4f00242t03 data enable"); if (ret) { dev_err(&spi->dev, "Unable to get the lcd l4f00242t03 data en gpio.\n"); goto err2; } - ret = gpio_direction_output(pdata->data_enable_gpio, 0); - if (ret) - goto err3; - priv->io_reg = regulator_get(&spi->dev, "vdd"); if (IS_ERR(priv->io_reg)) { dev_err(&spi->dev, "%s: Unable to get the IO regulator\n", -- cgit v0.10.2 From 1bff3a2093161d1b982a0958e4eb194bf3c8ce47 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Mon, 31 Oct 2011 17:11:52 -0700 Subject: backlight: rename corgibl_limit_intensity() to genericbl_limit_intensity() The rename of corgibl_limit_intensity is missed in commit d00ba726 ("backlight: Rename the corgi backlight driver to generic"). Let's fix it now. Signed-off-by: Axel Lin Cc: Richard Purdie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/video/backlight/generic_bl.c b/drivers/video/backlight/generic_bl.c index 8c6befd..adb1914 100644 --- a/drivers/video/backlight/generic_bl.c +++ b/drivers/video/backlight/generic_bl.c @@ -56,7 +56,7 @@ static int genericbl_get_intensity(struct backlight_device *bd) * Called when the battery is low to limit the backlight intensity. * If limit==0 clear any limit, otherwise limit the intensity */ -void corgibl_limit_intensity(int limit) +void genericbl_limit_intensity(int limit) { struct backlight_device *bd = generic_backlight_device; @@ -68,7 +68,7 @@ void corgibl_limit_intensity(int limit) backlight_update_status(generic_backlight_device); mutex_unlock(&bd->ops_lock); } -EXPORT_SYMBOL(corgibl_limit_intensity); +EXPORT_SYMBOL(genericbl_limit_intensity); static const struct backlight_ops genericbl_ops = { .options = BL_CORE_SUSPENDRESUME, -- cgit v0.10.2 From f59b6f9f323ff1b4567a69f9063cdd8bb57805e6 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Mon, 31 Oct 2011 17:11:55 -0700 Subject: leds: Renesas TPU LED driver Add V2 of the LED driver for a single timer channel for the TPU hardware block commonly found in Renesas SoCs. The driver has been written with optimal Power Management in mind, so to save power the LED is driven as a regular GPIO pin in case of maximum brightness and power off which allows the TPU hardware to be idle and which in turn allows the clocks to be stopped and the power domain to be turned off transparently. Any other brightness level requires use of the TPU hardware in PWM mode. TPU hardware device clocks and power are managed through Runtime PM. System suspend and resume is known to be working - during suspend the LED is set to off by the generic LED code. The TPU hardware timer is equipeed with a 16-bit counter together with an up-to-divide-by-64 prescaler which makes the hardware suitable for brightness control. Hardware blink is unsupported. The LED PWM waveform has been verified with a Fluke 123 Scope meter on a sh7372 Mackerel board. Tested with experimental sh7372 A3SP power domain patches. Platform device bind/unbind tested ok. V2 has been tested on the DS2 LED of the sh73a0-based AG5EVM. [axel.lin@gmail.com: include linux/module.h] Signed-off-by: Magnus Damm Cc: Paul Mundt Cc: Richard Purdie Cc: Grant Likely Signed-off-by: Axel Lin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig index dc7caad..ff203a4 100644 --- a/drivers/leds/Kconfig +++ b/drivers/leds/Kconfig @@ -375,6 +375,18 @@ config LEDS_ASIC3 cannot be used. This driver supports hardware blinking with an on+off period from 62ms to 125s. Say Y to enable LEDs on the HP iPAQ hx4700. +config LEDS_RENESAS_TPU + bool "LED support for Renesas TPU" + depends on LEDS_CLASS && HAVE_CLK && GENERIC_GPIO + help + This option enables build of the LED TPU platform driver, + suitable to drive any TPU channel on newer Renesas SoCs. + The driver controls the GPIO pin connected to the LED via + the GPIO framework and expects the LED to be connected to + a pin that can be driven in both GPIO mode and using TPU + pin function. The latter to support brightness control. + Brightness control is supported but hardware blinking is not. + config LEDS_TRIGGERS bool "LED Trigger support" depends on LEDS_CLASS diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile index a0a1b89..e4f6bf5 100644 --- a/drivers/leds/Makefile +++ b/drivers/leds/Makefile @@ -42,6 +42,7 @@ obj-$(CONFIG_LEDS_MC13783) += leds-mc13783.o obj-$(CONFIG_LEDS_NS2) += leds-ns2.o obj-$(CONFIG_LEDS_NETXBIG) += leds-netxbig.o obj-$(CONFIG_LEDS_ASIC3) += leds-asic3.o +obj-$(CONFIG_LEDS_RENESAS_TPU) += leds-renesas-tpu.o # LED SPI Drivers obj-$(CONFIG_LEDS_DAC124S085) += leds-dac124s085.o diff --git a/drivers/leds/leds-renesas-tpu.c b/drivers/leds/leds-renesas-tpu.c new file mode 100644 index 0000000..d9a7d72 --- /dev/null +++ b/drivers/leds/leds-renesas-tpu.c @@ -0,0 +1,344 @@ +/* + * LED control using Renesas TPU + * + * Copyright (C) 2011 Magnus Damm + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +enum r_tpu_pin { R_TPU_PIN_UNUSED, R_TPU_PIN_GPIO, R_TPU_PIN_GPIO_FN }; +enum r_tpu_timer { R_TPU_TIMER_UNUSED, R_TPU_TIMER_ON }; + +struct r_tpu_priv { + struct led_classdev ldev; + void __iomem *mapbase; + struct clk *clk; + struct platform_device *pdev; + enum r_tpu_pin pin_state; + enum r_tpu_timer timer_state; + unsigned long min_rate; + unsigned int refresh_rate; +}; + +static DEFINE_SPINLOCK(r_tpu_lock); + +#define TSTR -1 /* Timer start register (shared register) */ +#define TCR 0 /* Timer control register (+0x00) */ +#define TMDR 1 /* Timer mode register (+0x04) */ +#define TIOR 2 /* Timer I/O control register (+0x08) */ +#define TIER 3 /* Timer interrupt enable register (+0x0c) */ +#define TSR 4 /* Timer status register (+0x10) */ +#define TCNT 5 /* Timer counter (+0x14) */ +#define TGRA 6 /* Timer general register A (+0x18) */ +#define TGRB 7 /* Timer general register B (+0x1c) */ +#define TGRC 8 /* Timer general register C (+0x20) */ +#define TGRD 9 /* Timer general register D (+0x24) */ + +static inline unsigned short r_tpu_read(struct r_tpu_priv *p, int reg_nr) +{ + struct led_renesas_tpu_config *cfg = p->pdev->dev.platform_data; + void __iomem *base = p->mapbase; + unsigned long offs = reg_nr << 2; + + if (reg_nr == TSTR) + return ioread16(base - cfg->channel_offset); + + return ioread16(base + offs); +} + +static inline void r_tpu_write(struct r_tpu_priv *p, int reg_nr, + unsigned short value) +{ + struct led_renesas_tpu_config *cfg = p->pdev->dev.platform_data; + void __iomem *base = p->mapbase; + unsigned long offs = reg_nr << 2; + + if (reg_nr == TSTR) { + iowrite16(value, base - cfg->channel_offset); + return; + } + + iowrite16(value, base + offs); +} + +static void r_tpu_start_stop_ch(struct r_tpu_priv *p, int start) +{ + struct led_renesas_tpu_config *cfg = p->pdev->dev.platform_data; + unsigned long flags, value; + + /* start stop register shared by multiple timer channels */ + spin_lock_irqsave(&r_tpu_lock, flags); + value = r_tpu_read(p, TSTR); + + if (start) + value |= 1 << cfg->timer_bit; + else + value &= ~(1 << cfg->timer_bit); + + r_tpu_write(p, TSTR, value); + spin_unlock_irqrestore(&r_tpu_lock, flags); +} + +static int r_tpu_enable(struct r_tpu_priv *p, enum led_brightness brightness) +{ + struct led_renesas_tpu_config *cfg = p->pdev->dev.platform_data; + int prescaler[] = { 1, 4, 16, 64 }; + int k, ret; + unsigned long rate, tmp; + + if (p->timer_state == R_TPU_TIMER_ON) + return 0; + + /* wake up device and enable clock */ + pm_runtime_get_sync(&p->pdev->dev); + ret = clk_enable(p->clk); + if (ret) { + dev_err(&p->pdev->dev, "cannot enable clock\n"); + return ret; + } + + /* make sure channel is disabled */ + r_tpu_start_stop_ch(p, 0); + + /* get clock rate after enabling it */ + rate = clk_get_rate(p->clk); + + /* pick the lowest acceptable rate */ + for (k = 0; k < ARRAY_SIZE(prescaler); k++) + if ((rate / prescaler[k]) < p->min_rate) + break; + + if (!k) { + dev_err(&p->pdev->dev, "clock rate mismatch\n"); + goto err0; + } + dev_dbg(&p->pdev->dev, "rate = %lu, prescaler %u\n", + rate, prescaler[k - 1]); + + /* clear TCNT on TGRB match, count on rising edge, set prescaler */ + r_tpu_write(p, TCR, 0x0040 | (k - 1)); + + /* output 0 until TGRA, output 1 until TGRB */ + r_tpu_write(p, TIOR, 0x0002); + + rate /= prescaler[k - 1] * p->refresh_rate; + r_tpu_write(p, TGRB, rate); + dev_dbg(&p->pdev->dev, "TRGB = 0x%04lx\n", rate); + + tmp = (cfg->max_brightness - brightness) * rate; + r_tpu_write(p, TGRA, tmp / cfg->max_brightness); + dev_dbg(&p->pdev->dev, "TRGA = 0x%04lx\n", tmp / cfg->max_brightness); + + /* PWM mode */ + r_tpu_write(p, TMDR, 0x0002); + + /* enable channel */ + r_tpu_start_stop_ch(p, 1); + + p->timer_state = R_TPU_TIMER_ON; + return 0; + err0: + clk_disable(p->clk); + pm_runtime_put_sync(&p->pdev->dev); + return -ENOTSUPP; +} + +static void r_tpu_disable(struct r_tpu_priv *p) +{ + if (p->timer_state == R_TPU_TIMER_UNUSED) + return; + + /* disable channel */ + r_tpu_start_stop_ch(p, 0); + + /* stop clock and mark device as idle */ + clk_disable(p->clk); + pm_runtime_put_sync(&p->pdev->dev); + + p->timer_state = R_TPU_TIMER_UNUSED; +} + +static void r_tpu_set_pin(struct r_tpu_priv *p, enum r_tpu_pin new_state, + enum led_brightness brightness) +{ + struct led_renesas_tpu_config *cfg = p->pdev->dev.platform_data; + + if (p->pin_state == new_state) { + if (p->pin_state == R_TPU_PIN_GPIO) + gpio_set_value(cfg->pin_gpio, brightness); + return; + } + + if (p->pin_state == R_TPU_PIN_GPIO) + gpio_free(cfg->pin_gpio); + + if (p->pin_state == R_TPU_PIN_GPIO_FN) + gpio_free(cfg->pin_gpio_fn); + + if (new_state == R_TPU_PIN_GPIO) { + gpio_request(cfg->pin_gpio, cfg->name); + gpio_direction_output(cfg->pin_gpio, !!brightness); + } + if (new_state == R_TPU_PIN_GPIO_FN) + gpio_request(cfg->pin_gpio_fn, cfg->name); + + p->pin_state = new_state; +} + +static void r_tpu_set_brightness(struct led_classdev *ldev, + enum led_brightness brightness) +{ + struct r_tpu_priv *p = container_of(ldev, struct r_tpu_priv, ldev); + + r_tpu_disable(p); + + /* off and maximum are handled as GPIO pins, in between PWM */ + if ((brightness == 0) || (brightness == ldev->max_brightness)) + r_tpu_set_pin(p, R_TPU_PIN_GPIO, brightness); + else { + r_tpu_set_pin(p, R_TPU_PIN_GPIO_FN, 0); + r_tpu_enable(p, brightness); + } +} + +static int __devinit r_tpu_probe(struct platform_device *pdev) +{ + struct led_renesas_tpu_config *cfg = pdev->dev.platform_data; + struct r_tpu_priv *p; + struct resource *res; + int ret = -ENXIO; + + if (!cfg) { + dev_err(&pdev->dev, "missing platform data\n"); + goto err0; + } + + p = kzalloc(sizeof(*p), GFP_KERNEL); + if (p == NULL) { + dev_err(&pdev->dev, "failed to allocate driver data\n"); + ret = -ENOMEM; + goto err0; + } + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) { + dev_err(&pdev->dev, "failed to get I/O memory\n"); + goto err1; + } + + /* map memory, let mapbase point to our channel */ + p->mapbase = ioremap_nocache(res->start, resource_size(res)); + if (p->mapbase == NULL) { + dev_err(&pdev->dev, "failed to remap I/O memory\n"); + goto err1; + } + + /* get hold of clock */ + p->clk = clk_get(&pdev->dev, NULL); + if (IS_ERR(p->clk)) { + dev_err(&pdev->dev, "cannot get clock\n"); + ret = PTR_ERR(p->clk); + goto err2; + } + + p->pdev = pdev; + p->pin_state = R_TPU_PIN_UNUSED; + p->timer_state = R_TPU_TIMER_UNUSED; + p->refresh_rate = cfg->refresh_rate ? cfg->refresh_rate : 100; + r_tpu_set_pin(p, R_TPU_PIN_GPIO, LED_OFF); + platform_set_drvdata(pdev, p); + + + p->ldev.name = cfg->name; + p->ldev.brightness = LED_OFF; + p->ldev.max_brightness = cfg->max_brightness; + p->ldev.brightness_set = r_tpu_set_brightness; + p->ldev.flags |= LED_CORE_SUSPENDRESUME; + ret = led_classdev_register(&pdev->dev, &p->ldev); + if (ret < 0) + goto err3; + + /* max_brightness may be updated by the LED core code */ + p->min_rate = p->ldev.max_brightness * p->refresh_rate; + + pm_runtime_enable(&pdev->dev); + return 0; + + err3: + r_tpu_set_pin(p, R_TPU_PIN_UNUSED, LED_OFF); + clk_put(p->clk); + err2: + iounmap(p->mapbase); + err1: + kfree(p); + err0: + return ret; +} + +static int __devexit r_tpu_remove(struct platform_device *pdev) +{ + struct r_tpu_priv *p = platform_get_drvdata(pdev); + + r_tpu_set_brightness(&p->ldev, LED_OFF); + led_classdev_unregister(&p->ldev); + r_tpu_disable(p); + r_tpu_set_pin(p, R_TPU_PIN_UNUSED, LED_OFF); + + pm_runtime_disable(&pdev->dev); + clk_put(p->clk); + + iounmap(p->mapbase); + kfree(p); + return 0; +} + +static struct platform_driver r_tpu_device_driver = { + .probe = r_tpu_probe, + .remove = __devexit_p(r_tpu_remove), + .driver = { + .name = "leds-renesas-tpu", + } +}; + +static int __init r_tpu_init(void) +{ + return platform_driver_register(&r_tpu_device_driver); +} + +static void __exit r_tpu_exit(void) +{ + platform_driver_unregister(&r_tpu_device_driver); +} + +module_init(r_tpu_init); +module_exit(r_tpu_exit); + +MODULE_AUTHOR("Magnus Damm"); +MODULE_DESCRIPTION("Renesas TPU LED Driver"); +MODULE_LICENSE("GPL v2"); diff --git a/include/linux/leds-renesas-tpu.h b/include/linux/leds-renesas-tpu.h new file mode 100644 index 0000000..0553870 --- /dev/null +++ b/include/linux/leds-renesas-tpu.h @@ -0,0 +1,14 @@ +#ifndef __LEDS_RENESAS_TPU_H__ +#define __LEDS_RENESAS_TPU_H__ + +struct led_renesas_tpu_config { + char *name; + unsigned pin_gpio_fn; + unsigned pin_gpio; + unsigned int channel_offset; + unsigned int timer_bit; + unsigned int max_brightness; + unsigned int refresh_rate; +}; + +#endif /* __LEDS_RENESAS_TPU_H__ */ -- cgit v0.10.2 From cba4c2ac080e170732a030afc27664b00f838e03 Mon Sep 17 00:00:00 2001 From: Masakazu Mokuno Date: Mon, 31 Oct 2011 17:11:59 -0700 Subject: drivers/leds/led-triggers.c: fix memory leak The memory for struct led_trigger should be kfreed in the led_trigger_register() error path. Also this function should return NULL on error. Signed-off-by: Masakazu Mokuno Cc: Richard Purdie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/leds/led-triggers.c b/drivers/leds/led-triggers.c index 4bebae7..6f1ff93 100644 --- a/drivers/leds/led-triggers.c +++ b/drivers/leds/led-triggers.c @@ -261,9 +261,12 @@ void led_trigger_register_simple(const char *name, struct led_trigger **tp) if (trigger) { trigger->name = name; err = led_trigger_register(trigger); - if (err < 0) + if (err < 0) { + kfree(trigger); + trigger = NULL; printk(KERN_WARNING "LED trigger %s failed to register" " (%d)\n", name, err); + } } else printk(KERN_WARNING "LED trigger %s failed to register" " (no memory)\n", name); -- cgit v0.10.2 From 3edc5804b7d27ec48975d7d7acbc0b3e1c62c064 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 31 Oct 2011 17:12:03 -0700 Subject: drivers/leds/leds-lm3530.c: remove obsolete cleanup for clientdata A few new i2c-drivers came into the kernel which clear the clientdata-pointer on exit or error. This is obsolete meanwhile, the core will do it. Signed-off-by: Wolfram Sang Cc: Richard Purdie Acked-by: Jean Delvare Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/leds/leds-lm3530.c b/drivers/leds/leds-lm3530.c index 3dd7090..d2d25e6 100644 --- a/drivers/leds/leds-lm3530.c +++ b/drivers/leds/leds-lm3530.c @@ -421,7 +421,6 @@ err_class_register: err_reg_init: regulator_put(drvdata->regulator); err_regulator_get: - i2c_set_clientdata(client, NULL); kfree(drvdata); err_out: return err; -- cgit v0.10.2 From 02c3294174e170a47cfd58956a739901160381a8 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Mon, 31 Oct 2011 17:12:06 -0700 Subject: drivers/leds/leds-renesas-tpu.c: update driver to use workqueue Use a workqueue in the Renesas TPU LED driver to allow the Runtime PM code to sleep. Signed-off-by: Magnus Damm Cc: Paul Mundt Cc: Richard Purdie Cc: Grant Likely Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/leds/leds-renesas-tpu.c b/drivers/leds/leds-renesas-tpu.c index d9a7d72..45dbdf5 100644 --- a/drivers/leds/leds-renesas-tpu.c +++ b/drivers/leds/leds-renesas-tpu.c @@ -31,6 +31,7 @@ #include #include #include +#include enum r_tpu_pin { R_TPU_PIN_UNUSED, R_TPU_PIN_GPIO, R_TPU_PIN_GPIO_FN }; enum r_tpu_timer { R_TPU_TIMER_UNUSED, R_TPU_TIMER_ON }; @@ -44,6 +45,8 @@ struct r_tpu_priv { enum r_tpu_timer timer_state; unsigned long min_rate; unsigned int refresh_rate; + struct work_struct work; + enum led_brightness new_brightness; }; static DEFINE_SPINLOCK(r_tpu_lock); @@ -211,15 +214,15 @@ static void r_tpu_set_pin(struct r_tpu_priv *p, enum r_tpu_pin new_state, p->pin_state = new_state; } -static void r_tpu_set_brightness(struct led_classdev *ldev, - enum led_brightness brightness) +static void r_tpu_work(struct work_struct *work) { - struct r_tpu_priv *p = container_of(ldev, struct r_tpu_priv, ldev); + struct r_tpu_priv *p = container_of(work, struct r_tpu_priv, work); + enum led_brightness brightness = p->new_brightness; r_tpu_disable(p); /* off and maximum are handled as GPIO pins, in between PWM */ - if ((brightness == 0) || (brightness == ldev->max_brightness)) + if ((brightness == 0) || (brightness == p->ldev.max_brightness)) r_tpu_set_pin(p, R_TPU_PIN_GPIO, brightness); else { r_tpu_set_pin(p, R_TPU_PIN_GPIO_FN, 0); @@ -227,6 +230,14 @@ static void r_tpu_set_brightness(struct led_classdev *ldev, } } +static void r_tpu_set_brightness(struct led_classdev *ldev, + enum led_brightness brightness) +{ + struct r_tpu_priv *p = container_of(ldev, struct r_tpu_priv, ldev); + p->new_brightness = brightness; + schedule_work(&p->work); +} + static int __devinit r_tpu_probe(struct platform_device *pdev) { struct led_renesas_tpu_config *cfg = pdev->dev.platform_data; @@ -274,6 +285,7 @@ static int __devinit r_tpu_probe(struct platform_device *pdev) r_tpu_set_pin(p, R_TPU_PIN_GPIO, LED_OFF); platform_set_drvdata(pdev, p); + INIT_WORK(&p->work, r_tpu_work); p->ldev.name = cfg->name; p->ldev.brightness = LED_OFF; @@ -307,6 +319,7 @@ static int __devexit r_tpu_remove(struct platform_device *pdev) r_tpu_set_brightness(&p->ldev, LED_OFF); led_classdev_unregister(&p->ldev); + cancel_work_sync(&p->work); r_tpu_disable(p); r_tpu_set_pin(p, R_TPU_PIN_UNUSED, LED_OFF); -- cgit v0.10.2 From 2b67c95b74f17c13c7b3a990540c9dd9b4a8480d Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Mon, 31 Oct 2011 17:12:09 -0700 Subject: drivers/leds/leds-renesas-tpu.c: move Renesas TPU LED driver platform data Use the platform_data include directory for the TPU LED driver, as suggested by Paul Mundt. Signed-off-by: Magnus Damm Cc: Paul Mundt Cc: Richard Purdie Cc: Grant Likely Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/leds/leds-renesas-tpu.c b/drivers/leds/leds-renesas-tpu.c index 45dbdf5..3ee540e 100644 --- a/drivers/leds/leds-renesas-tpu.c +++ b/drivers/leds/leds-renesas-tpu.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/include/linux/leds-renesas-tpu.h b/include/linux/leds-renesas-tpu.h deleted file mode 100644 index 0553870..0000000 --- a/include/linux/leds-renesas-tpu.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef __LEDS_RENESAS_TPU_H__ -#define __LEDS_RENESAS_TPU_H__ - -struct led_renesas_tpu_config { - char *name; - unsigned pin_gpio_fn; - unsigned pin_gpio; - unsigned int channel_offset; - unsigned int timer_bit; - unsigned int max_brightness; - unsigned int refresh_rate; -}; - -#endif /* __LEDS_RENESAS_TPU_H__ */ diff --git a/include/linux/platform_data/leds-renesas-tpu.h b/include/linux/platform_data/leds-renesas-tpu.h new file mode 100644 index 0000000..0553870 --- /dev/null +++ b/include/linux/platform_data/leds-renesas-tpu.h @@ -0,0 +1,14 @@ +#ifndef __LEDS_RENESAS_TPU_H__ +#define __LEDS_RENESAS_TPU_H__ + +struct led_renesas_tpu_config { + char *name; + unsigned pin_gpio_fn; + unsigned pin_gpio; + unsigned int channel_offset; + unsigned int timer_bit; + unsigned int max_brightness; + unsigned int refresh_rate; +}; + +#endif /* __LEDS_RENESAS_TPU_H__ */ -- cgit v0.10.2 From fa0ea0e16f748e36d65931227188ccf07f2b77c7 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Mon, 31 Oct 2011 17:12:12 -0700 Subject: drivers/leds/leds-lp5521.c: avoid writing uninitialized value to LP5521_REG_OP_MODE register If lp5521_read fails, engine_state variable is not initialized. If lp5521_read fails, we should return error. This patch fixes below warning. CC drivers/leds/leds-lp5521.o drivers/leds/leds-lp5521.c: In function 'lp5521_set_engine_mode': drivers/leds/leds-lp5521.c:168: warning: 'engine_state' may be used uninitialized in this function [akpm@linux-foundation.org: remove unneeded "ret |="] Signed-off-by: Axel Lin Cc: Samu Onkalo Cc: Richard Purdie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/leds/leds-lp5521.c b/drivers/leds/leds-lp5521.c index 9fc122c..16f281b 100644 --- a/drivers/leds/leds-lp5521.c +++ b/drivers/leds/leds-lp5521.c @@ -175,14 +175,14 @@ static int lp5521_set_engine_mode(struct lp5521_engine *engine, u8 mode) mode = LP5521_CMD_DIRECT; ret = lp5521_read(client, LP5521_REG_OP_MODE, &engine_state); + if (ret < 0) + return ret; /* set mode only for this engine */ engine_state &= ~(engine->engine_mask); mode &= engine->engine_mask; engine_state |= mode; - ret |= lp5521_write(client, LP5521_REG_OP_MODE, engine_state); - - return ret; + return lp5521_write(client, LP5521_REG_OP_MODE, engine_state); } static int lp5521_load_program(struct lp5521_engine *eng, const u8 *pattern) -- cgit v0.10.2 From 93f539822ed60e18cd7fb417bff4efa1833af9f2 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Mon, 31 Oct 2011 17:12:15 -0700 Subject: drivers/leds/leds-lm3530.c: add __devexit_p where needed According to the comments in include/linux/init.h: "Pointers to __devexit functions must use __devexit_p(function_name), the wrapper will insert either the function_name or NULL, depending on the config options." We have __devexit annotation for lm3530_remove(), so add __devexit_p to the `struct i2c_driver'. Signed-off-by: Axel Lin Cc: Shreshtha Kumar SAHU Cc: Richard Purdie Acked-by: Linus Walleij Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/leds/leds-lm3530.c b/drivers/leds/leds-lm3530.c index d2d25e6..4dc510f 100644 --- a/drivers/leds/leds-lm3530.c +++ b/drivers/leds/leds-lm3530.c @@ -448,7 +448,7 @@ MODULE_DEVICE_TABLE(i2c, lm3530_id); static struct i2c_driver lm3530_i2c_driver = { .probe = lm3530_probe, - .remove = lm3530_remove, + .remove = __devexit_p(lm3530_remove), .id_table = lm3530_id, .driver = { .name = LM3530_NAME, -- cgit v0.10.2 From dabc69c24271bd6ac126e3dda4f2d72234da81df Mon Sep 17 00:00:00 2001 From: David Daney Date: Mon, 31 Oct 2011 17:12:17 -0700 Subject: drivers/leds/leds-gpio.c: use gpio_get_value_cansleep() when initializing I get the following warning: ------------[ cut here ]------------ WARNING: at drivers/gpio/gpiolib.c:1559 __gpio_get_value+0x90/0x98() Modules linked in: Call Trace: [] dump_stack+0x8/0x34 [] warn_slowpath_common+0x78/0xa0 [] __gpio_get_value+0x90/0x98 [] create_gpio_led+0xdc/0x194 [] gpio_led_probe+0x290/0x36c [] driver_probe_device+0x78/0x1b0 [] __driver_attach+0xc0/0xc8 [] bus_for_each_dev+0x64/0xb0 [] bus_add_driver+0x1c8/0x2a8 [] driver_register+0x90/0x180 [] do_one_initcall+0x38/0x160 ---[ end trace ee38723fbefcd65c ]--- My GPIOs are on an I2C port expander, so we must use the *_cansleep() variant of the GPIO functions. This is was not being done in create_gpio_led(). We can change gpio_get_value() to gpio_get_value_cansleep() because it is only called from the platform_driver probe function, which is a context where we can sleep. Only tested on my gpio_cansleep() system, but it seems safe for all systems. Signed-off-by: David Daney Cc: Richard Purdie Acked-by: Trent Piepho Cc: Grant Likely Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/leds/leds-gpio.c b/drivers/leds/leds-gpio.c index 3d8bc32..504cc26 100644 --- a/drivers/leds/leds-gpio.c +++ b/drivers/leds/leds-gpio.c @@ -121,7 +121,7 @@ static int __devinit create_gpio_led(const struct gpio_led *template, } led_dat->cdev.brightness_set = gpio_led_set; if (template->default_state == LEDS_GPIO_DEFSTATE_KEEP) - state = !!gpio_get_value(led_dat->gpio) ^ led_dat->active_low; + state = !!gpio_get_value_cansleep(led_dat->gpio) ^ led_dat->active_low; else state = (template->default_state == LEDS_GPIO_DEFSTATE_ON); led_dat->cdev.brightness = state ? LED_FULL : LED_OFF; -- cgit v0.10.2 From 6123b0e274503a0d3588e84fbe07c9aa01bfaf5d Mon Sep 17 00:00:00 2001 From: Antonio Ospite Date: Mon, 31 Oct 2011 17:12:19 -0700 Subject: leds: save the delay values after a successful call to blink_set() When calling the hardware blinking function implemented by blink_set(), the delay_on and delay_off values are not preserved across calls. Fix that and make the "timer" trigger work as expected when hardware blinking is available. BEFORE the fix: $ cd /sys/class/leds/someled $ echo timer > trigger $ cat delay_on delay_off 0 0 $ echo 100 > delay_on $ cat delay_on delay_off 0 0 $ echo 100 > delay_off $ cat delay_on delay_off 0 0 AFTER the fix: $ cd /sys/class/leds/someled $ echo timer > trigger $ cat delay_on delay_off 0 0 $ echo 100 > delay_on $ cat delay_on delay_off 100 0 $ echo 100 > delay_off $ cat delay_on delay_off 100 100 Signed-off-by: Antonio Ospite Reviewed-by: Johannes Berg Cc: Richard Purdie Cc: Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c index dc3d3d8..5c270ae 100644 --- a/drivers/leds/led-class.c +++ b/drivers/leds/led-class.c @@ -268,8 +268,11 @@ void led_blink_set(struct led_classdev *led_cdev, unsigned long *delay_off) { if (led_cdev->blink_set && - !led_cdev->blink_set(led_cdev, delay_on, delay_off)) + !led_cdev->blink_set(led_cdev, delay_on, delay_off)) { + led_cdev->blink_delay_on = *delay_on; + led_cdev->blink_delay_off = *delay_off; return; + } /* blink with 1 Hz as default if nothing specified */ if (!*delay_on && !*delay_off) -- cgit v0.10.2 From 488bc35bf40df89d37486c1826b178a2fba36ce7 Mon Sep 17 00:00:00 2001 From: Antonio Ospite Date: Mon, 31 Oct 2011 17:12:22 -0700 Subject: leds: turn the blink_timer off before starting to blink Depending on the implementation of the hardware blinking function in blink_set(), the led can support hardware blinking for some values of delay_on and delay_off and fall-back to software blinking for some other values. Turning off the blink_timer unconditionally before starting to blink make sure that a sequence like: OFF hardware blinking software blinking hardware blinking does not leave the software blinking timer active. Signed-off-by: Antonio Ospite Reviewed-by: Johannes Berg Cc: Richard Purdie Cc: Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c index 5c270ae..661b692 100644 --- a/drivers/leds/led-class.c +++ b/drivers/leds/led-class.c @@ -267,6 +267,8 @@ void led_blink_set(struct led_classdev *led_cdev, unsigned long *delay_on, unsigned long *delay_off) { + del_timer_sync(&led_cdev->blink_timer); + if (led_cdev->blink_set && !led_cdev->blink_set(led_cdev, delay_on, delay_off)) { led_cdev->blink_delay_on = *delay_on; -- cgit v0.10.2 From b3c49c05b737887443c894c66635ae68dcdf0027 Mon Sep 17 00:00:00 2001 From: Srinidhi KASAGAR Date: Mon, 31 Oct 2011 17:12:24 -0700 Subject: drivers/leds/leds-lp5521.c: check if reset is successful Make sure that the reset is successful by issuing a dummy read to R channel current register and check its default value. On some platforms, without this dummy read, any further access to {R/G/B}_EXEC will not have any impact. [akpm@linux-foundation.org: fix up code comment] Signed-off-by: srinidhi kasagar Tested-by: Naga Radhesh Acked-by: Linus Walleij Cc: Richard Purdie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/leds/leds-lp5521.c b/drivers/leds/leds-lp5521.c index 16f281b..cb641f1 100644 --- a/drivers/leds/leds-lp5521.c +++ b/drivers/leds/leds-lp5521.c @@ -97,6 +97,9 @@ /* Status */ #define LP5521_EXT_CLK_USED 0x08 +/* default R channel current register value */ +#define LP5521_REG_R_CURR_DEFAULT 0xAF + struct lp5521_engine { int id; u8 mode; @@ -643,6 +646,7 @@ static int __devinit lp5521_probe(struct i2c_client *client, struct lp5521_chip *chip; struct lp5521_platform_data *pdata; int ret, i, led; + u8 buf; chip = kzalloc(sizeof(*chip), GFP_KERNEL); if (!chip) @@ -681,6 +685,20 @@ static int __devinit lp5521_probe(struct i2c_client *client, * Exact value is not available. 10 - 20ms * appears to be enough for reset. */ + + /* + * Make sure that the chip is reset by reading back the r channel + * current reg. This is dummy read is required on some platforms - + * otherwise further access to the R G B channels in the + * LP5521_REG_ENABLE register will not have any effect - strange! + */ + lp5521_read(client, LP5521_REG_R_CURRENT, &buf); + if (buf != LP5521_REG_R_CURR_DEFAULT) { + dev_err(&client->dev, "error in reseting chip\n"); + goto fail2; + } + usleep_range(10000, 20000); + ret = lp5521_detect(client); if (ret) { -- cgit v0.10.2 From 1dff46d6987484eaa31f2fb1425216ba06418be3 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 31 Oct 2011 17:12:28 -0700 Subject: lib/kstrtox: common code between kstrto*() and simple_strto*() functions Currently termination logic (\0 or \n\0) is hardcoded in _kstrtoull(), avoid that for code reuse between kstrto*() and simple_strtoull(). Essentially, make them different only in termination logic. simple_strtoull() (and scanf(), BTW) ignores integer overflow, that's a bug we currently don't have guts to fix, making KSTRTOX_OVERFLOW hack necessary. Almost forgot: patch shrinks code size by about ~80 bytes on x86_64. Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/lib/kstrtox.c b/lib/kstrtox.c index 5e06675..7a94c8f 100644 --- a/lib/kstrtox.c +++ b/lib/kstrtox.c @@ -18,26 +18,40 @@ #include #include #include +#include "kstrtox.h" -static int _kstrtoull(const char *s, unsigned int base, unsigned long long *res) +const char *_parse_integer_fixup_radix(const char *s, unsigned int *base) { - unsigned long long acc; - int ok; - - if (base == 0) { + if (*base == 0) { if (s[0] == '0') { if (_tolower(s[1]) == 'x' && isxdigit(s[2])) - base = 16; + *base = 16; else - base = 8; + *base = 8; } else - base = 10; + *base = 10; } - if (base == 16 && s[0] == '0' && _tolower(s[1]) == 'x') + if (*base == 16 && s[0] == '0' && _tolower(s[1]) == 'x') s += 2; + return s; +} - acc = 0; - ok = 0; +/* + * Convert non-negative integer string representation in explicitly given radix + * to an integer. + * Return number of characters consumed maybe or-ed with overflow bit. + * If overflow occurs, result integer (incorrect) is still returned. + * + * Don't you dare use this function. + */ +unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long *res) +{ + unsigned int rv; + int overflow; + + *res = 0; + rv = 0; + overflow = 0; while (*s) { unsigned int val; @@ -45,23 +59,40 @@ static int _kstrtoull(const char *s, unsigned int base, unsigned long long *res) val = *s - '0'; else if ('a' <= _tolower(*s) && _tolower(*s) <= 'f') val = _tolower(*s) - 'a' + 10; - else if (*s == '\n' && *(s + 1) == '\0') - break; else - return -EINVAL; + break; if (val >= base) - return -EINVAL; - if (acc > div_u64(ULLONG_MAX - val, base)) - return -ERANGE; - acc = acc * base + val; - ok = 1; - + break; + if (*res > div_u64(ULLONG_MAX - val, base)) + overflow = 1; + *res = *res * base + val; + rv++; s++; } - if (!ok) + if (overflow) + rv |= KSTRTOX_OVERFLOW; + return rv; +} + +static int _kstrtoull(const char *s, unsigned int base, unsigned long long *res) +{ + unsigned long long _res; + unsigned int rv; + + s = _parse_integer_fixup_radix(s, &base); + rv = _parse_integer(s, base, &_res); + if (rv & KSTRTOX_OVERFLOW) + return -ERANGE; + rv &= ~KSTRTOX_OVERFLOW; + if (rv == 0) + return -EINVAL; + s += rv; + if (*s == '\n') + s++; + if (*s) return -EINVAL; - *res = acc; + *res = _res; return 0; } diff --git a/lib/kstrtox.h b/lib/kstrtox.h new file mode 100644 index 0000000..f13eeea --- /dev/null +++ b/lib/kstrtox.h @@ -0,0 +1,8 @@ +#ifndef _LIB_KSTRTOX_H +#define _LIB_KSTRTOX_H + +#define KSTRTOX_OVERFLOW (1U << 31) +const char *_parse_integer_fixup_radix(const char *s, unsigned int *base); +unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long *res); + +#endif diff --git a/lib/vsprintf.c b/lib/vsprintf.c index d7222a9..c1a3927 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -31,17 +31,7 @@ #include #include /* for dereference_function_descriptor() */ -static unsigned int simple_guess_base(const char *cp) -{ - if (cp[0] == '0') { - if (_tolower(cp[1]) == 'x' && isxdigit(cp[2])) - return 16; - else - return 8; - } else { - return 10; - } -} +#include "kstrtox.h" /** * simple_strtoull - convert a string to an unsigned long long @@ -51,23 +41,14 @@ static unsigned int simple_guess_base(const char *cp) */ unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base) { - unsigned long long result = 0; + unsigned long long result; + unsigned int rv; - if (!base) - base = simple_guess_base(cp); + cp = _parse_integer_fixup_radix(cp, &base); + rv = _parse_integer(cp, base, &result); + /* FIXME */ + cp += (rv & ~KSTRTOX_OVERFLOW); - if (base == 16 && cp[0] == '0' && _tolower(cp[1]) == 'x') - cp += 2; - - while (isxdigit(*cp)) { - unsigned int value; - - value = isdigit(*cp) ? *cp - '0' : _tolower(*cp) - 'a' + 10; - if (value >= base) - break; - result = result * base + value; - cp++; - } if (endp) *endp = (char *)cp; -- cgit v0.10.2 From 4e101b0e6aa885f5786a058eefc1ce4b7cc7c44e Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Mon, 31 Oct 2011 17:12:29 -0700 Subject: lib/spinlock_debug.c: print owner on spinlock lockup When SPIN_BUG_ON is triggered, the lock owner information is reported. But it is omitted when spinlock lockup is detected. This information is useful especially on the architectures which don't implement trigger_all_cpu_backtrace() that is called just after detecting lockup. So report it and also avoid message format duplication. Signed-off-by: Akinobu Mita Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/lib/spinlock_debug.c b/lib/spinlock_debug.c index 4755b98..5f3eacd 100644 --- a/lib/spinlock_debug.c +++ b/lib/spinlock_debug.c @@ -49,13 +49,10 @@ void __rwlock_init(rwlock_t *lock, const char *name, EXPORT_SYMBOL(__rwlock_init); -static void spin_bug(raw_spinlock_t *lock, const char *msg) +static void spin_dump(raw_spinlock_t *lock, const char *msg) { struct task_struct *owner = NULL; - if (!debug_locks_off()) - return; - if (lock->owner && lock->owner != SPINLOCK_OWNER_INIT) owner = lock->owner; printk(KERN_EMERG "BUG: spinlock %s on CPU#%d, %s/%d\n", @@ -70,6 +67,14 @@ static void spin_bug(raw_spinlock_t *lock, const char *msg) dump_stack(); } +static void spin_bug(raw_spinlock_t *lock, const char *msg) +{ + if (!debug_locks_off()) + return; + + spin_dump(lock, msg); +} + #define SPIN_BUG_ON(cond, lock, msg) if (unlikely(cond)) spin_bug(lock, msg) static inline void @@ -113,11 +118,7 @@ static void __spin_lock_debug(raw_spinlock_t *lock) /* lockup suspected: */ if (print_once) { print_once = 0; - printk(KERN_EMERG "BUG: spinlock lockup on CPU#%d, " - "%s/%d, %p\n", - raw_smp_processor_id(), current->comm, - task_pid_nr(current), lock); - dump_stack(); + spin_dump(lock, "lockup"); #ifdef CONFIG_SMP trigger_all_cpu_backtrace(); #endif -- cgit v0.10.2 From b9c321fd87b61f70888511d286500519d8b34141 Mon Sep 17 00:00:00 2001 From: H Hartley Sweeten Date: Mon, 31 Oct 2011 17:12:32 -0700 Subject: lib/bitmap.c: quiet sparse noise about address space __bitmap_parse() and __bitmap_parselist() both take a pointer to a kernel buffer as a parameter and then cast it to a pointer to user buffer for use in cases when the parameter is_user indicates that the buffer is actually located in user space. This casting, and the casts in the callers, results in sparse noise like the following: warning: incorrect type in initializer (different address spaces) expected char const [noderef] *ubuf got char const *buf warning: cast removes address space of expression Since these casts are intentional, use __force to quiet the noise. Signed-off-by: H Hartley Sweeten Cc: Len Brown Cc: Huang Ying Cc: Andy Shevchenko Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/lib/bitmap.c b/lib/bitmap.c index 2f4412e..0d4a127 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -419,7 +419,7 @@ int __bitmap_parse(const char *buf, unsigned int buflen, { int c, old_c, totaldigits, ndigits, nchunks, nbits; u32 chunk; - const char __user *ubuf = buf; + const char __user __force *ubuf = (const char __user __force *)buf; bitmap_zero(maskp, nmaskbits); @@ -504,7 +504,9 @@ int bitmap_parse_user(const char __user *ubuf, { if (!access_ok(VERIFY_READ, ubuf, ulen)) return -EFAULT; - return __bitmap_parse((const char *)ubuf, ulen, 1, maskp, nmaskbits); + return __bitmap_parse((const char __force *)ubuf, + ulen, 1, maskp, nmaskbits); + } EXPORT_SYMBOL(bitmap_parse_user); @@ -594,7 +596,7 @@ static int __bitmap_parselist(const char *buf, unsigned int buflen, { unsigned a, b; int c, old_c, totaldigits; - const char __user *ubuf = buf; + const char __user __force *ubuf = (const char __user __force *)buf; int exp_digit, in_range; totaldigits = c = 0; @@ -694,7 +696,7 @@ int bitmap_parselist_user(const char __user *ubuf, { if (!access_ok(VERIFY_READ, ubuf, ulen)) return -EFAULT; - return __bitmap_parselist((const char *)ubuf, + return __bitmap_parselist((const char __force *)ubuf, ulen, 1, maskp, nmaskbits); } EXPORT_SYMBOL(bitmap_parselist_user); -- cgit v0.10.2 From 3a8495c739c1c773181a246dbb7c12b5b67a8325 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Mon, 31 Oct 2011 17:12:34 -0700 Subject: lib/percpu_counter.c: enclose hotplug only variables in hotplug ifdef These variables are only used when CONFIG_HOTPLUG_CPU is enabled, they are ifdef'ed everywhere else. So don't define them when CONFIG_HOTPLUG_CPU is not enabled. Signed-off-by: Glauber Costa Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c index f087105..f8a3f1a 100644 --- a/lib/percpu_counter.c +++ b/lib/percpu_counter.c @@ -10,8 +10,10 @@ #include #include +#ifdef CONFIG_HOTPLUG_CPU static LIST_HEAD(percpu_counters); static DEFINE_MUTEX(percpu_counters_lock); +#endif #ifdef CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER -- cgit v0.10.2 From e3816c5407c800e4fb055d08f668286db6b7113f Mon Sep 17 00:00:00 2001 From: Wang Sheng-Hui Date: Mon, 31 Oct 2011 17:12:36 -0700 Subject: lib/idr.c: fix comment for ida_get_new_above() Signed-off-by: Wang Sheng-Hui Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/lib/idr.c b/lib/idr.c index 5acf9bb..bbf211a 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -767,8 +767,8 @@ EXPORT_SYMBOL(ida_pre_get); * @starting_id: id to start search at * @p_id: pointer to the allocated handle * - * Allocate new ID above or equal to @ida. It should be called with - * any required locks. + * Allocate new ID above or equal to @starting_id. It should be called + * with any required locks. * * If memory is required, it will return %-EAGAIN, you should unlock * and go back to the ida_pre_get() call. If the ida is full, it will -- cgit v0.10.2 From 66f6958e69d8055277356d3cc2e7a1d734db1755 Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Mon, 31 Oct 2011 17:12:37 -0700 Subject: lib/string.c: fix strim() semantics for strings that have only blanks Commit 84c95c9acf0 ("string: on strstrip(), first remove leading spaces before running over str") improved the performance of the strim() function. Unfortunately this changed the semantics of strim() and broke my code. Before the patch it was possible to use strim() without using the return value for removing trailing spaces from strings that had either only blanks or only trailing blanks. Now this does not work any longer for strings that *only* have blanks. Before patch: " " -> "" (empty string) After patch: " " -> " " (no change) I think we should remove your patch to restore the old behavior. The description (lib/string.c): * Note that the first trailing whitespace is replaced with a %NUL-terminator => The first trailing whitespace of a string that only has whitespace characters is the first whitespace The patch restores the old strim() semantics. Signed-off-by: Michael Holzheu Cc: Andre Goddard Rosa Cc: Martin Schwidefsky Cc: Heiko Carstens Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/lib/string.c b/lib/string.c index 11df543..dc4a863 100644 --- a/lib/string.c +++ b/lib/string.c @@ -360,7 +360,6 @@ char *strim(char *s) size_t size; char *end; - s = skip_spaces(s); size = strlen(s); if (!size) return s; @@ -370,7 +369,7 @@ char *strim(char *s) end--; *(end + 1) = '\0'; - return s; + return skip_spaces(s); } EXPORT_SYMBOL(strim); -- cgit v0.10.2 From 55036ba76b2d2fd53b5c00993fcec5ed56e83922 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 31 Oct 2011 17:12:41 -0700 Subject: lib: rename pack_hex_byte() to hex_byte_pack() As suggested by Andrew Morton in [1] there is better to have most significant part first in the function name. [1] https://lkml.org/lkml/2011/9/20/22 There is no functional change. Signed-off-by: Andy Shevchenko Cc: Jesper Nilsson Cc: David Howells Cc: Koichi Yasutake Cc: Jason Wessel Cc: Mimi Zohar Cc: James Morris Cc: OGAWA Hirofumi Cc: "John W. Linville" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/kernel.h b/include/linux/kernel.h index e40c950..4824f26 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -372,13 +372,18 @@ extern const char hex_asc[]; #define hex_asc_lo(x) hex_asc[((x) & 0x0f)] #define hex_asc_hi(x) hex_asc[((x) & 0xf0) >> 4] -static inline char *pack_hex_byte(char *buf, u8 byte) +static inline char *hex_byte_pack(char *buf, u8 byte) { *buf++ = hex_asc_hi(byte); *buf++ = hex_asc_lo(byte); return buf; } +static inline char * __deprecated pack_hex_byte(char *buf, u8 byte) +{ + return hex_byte_pack(buf, byte); +} + extern int hex_to_bin(char ch); extern int __must_check hex2bin(u8 *dst, const char *src, size_t count); diff --git a/lib/vsprintf.c b/lib/vsprintf.c index c1a3927..993599e 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -547,7 +547,7 @@ char *mac_address_string(char *buf, char *end, u8 *addr, } for (i = 0; i < 6; i++) { - p = pack_hex_byte(p, addr[i]); + p = hex_byte_pack(p, addr[i]); if (fmt[0] == 'M' && i != 5) *p++ = separator; } @@ -667,13 +667,13 @@ char *ip6_compressed_string(char *p, const char *addr) lo = word & 0xff; if (hi) { if (hi > 0x0f) - p = pack_hex_byte(p, hi); + p = hex_byte_pack(p, hi); else *p++ = hex_asc_lo(hi); - p = pack_hex_byte(p, lo); + p = hex_byte_pack(p, lo); } else if (lo > 0x0f) - p = pack_hex_byte(p, lo); + p = hex_byte_pack(p, lo); else *p++ = hex_asc_lo(lo); needcolon = true; @@ -695,8 +695,8 @@ char *ip6_string(char *p, const char *addr, const char *fmt) int i; for (i = 0; i < 8; i++) { - p = pack_hex_byte(p, *addr++); - p = pack_hex_byte(p, *addr++); + p = hex_byte_pack(p, *addr++); + p = hex_byte_pack(p, *addr++); if (fmt[0] == 'I' && i != 7) *p++ = ':'; } @@ -754,7 +754,7 @@ char *uuid_string(char *buf, char *end, const u8 *addr, } for (i = 0; i < 16; i++) { - p = pack_hex_byte(p, addr[index[i]]); + p = hex_byte_pack(p, addr[index[i]]); switch (i) { case 3: case 5: -- cgit v0.10.2 From 50e1499f468fd74c6db95deb2e1e6bfee578ae70 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 31 Oct 2011 17:12:51 -0700 Subject: kgdb: follow rename pack_hex_byte() to hex_byte_pack() There is no functional change. Signed-off-by: Andy Shevchenko Acked-by: Jesper Nilsson Cc: David Howells Cc: Koichi Yasutake Cc: Jason Wessel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/cris/arch-v10/kernel/kgdb.c b/arch/cris/arch-v10/kernel/kgdb.c index b9f9c8c..b579dd0 100644 --- a/arch/cris/arch-v10/kernel/kgdb.c +++ b/arch/cris/arch-v10/kernel/kgdb.c @@ -694,7 +694,7 @@ mem2hex(char *buf, unsigned char *mem, int count) /* Valid mem address. */ for (i = 0; i < count; i++) { ch = *mem++; - buf = pack_hex_byte(buf, ch); + buf = hex_byte_pack(buf, ch); } } @@ -868,7 +868,7 @@ stub_is_stopped(int sigval) /* Send trap type (converted to signal) */ *ptr++ = 'T'; - ptr = pack_hex_byte(ptr, sigval); + ptr = hex_byte_pack(ptr, sigval); /* Send register contents. We probably only need to send the * PC, frame pointer and stack pointer here. Other registers will be @@ -881,7 +881,7 @@ stub_is_stopped(int sigval) status = read_register (regno, ®_cont); if (status == SUCCESS) { - ptr = pack_hex_byte(ptr, regno); + ptr = hex_byte_pack(ptr, regno); *ptr++ = ':'; ptr = mem2hex(ptr, (unsigned char *)®_cont, diff --git a/arch/cris/arch-v32/kernel/kgdb.c b/arch/cris/arch-v32/kernel/kgdb.c index c0343c3..8c1d35c 100644 --- a/arch/cris/arch-v32/kernel/kgdb.c +++ b/arch/cris/arch-v32/kernel/kgdb.c @@ -677,7 +677,7 @@ mem2hex(char *buf, unsigned char *mem, int count) /* Valid mem address. */ for (i = 0; i < count; i++) { ch = *mem++; - buf = pack_hex_byte(buf, ch); + buf = hex_byte_pack(buf, ch); } } /* Terminate properly. */ @@ -695,7 +695,7 @@ mem2hex_nbo(char *buf, unsigned char *mem, int count) mem += count - 1; for (i = 0; i < count; i++) { ch = *mem--; - buf = pack_hex_byte(buf, ch); + buf = hex_byte_pack(buf, ch); } /* Terminate properly. */ @@ -880,7 +880,7 @@ stub_is_stopped(int sigval) /* Send trap type (converted to signal) */ *ptr++ = 'T'; - ptr = pack_hex_byte(ptr, sigval); + ptr = hex_byte_pack(ptr, sigval); if (((reg.exs & 0xff00) >> 8) == 0xc) { @@ -988,26 +988,26 @@ stub_is_stopped(int sigval) } /* Only send PC, frame and stack pointer. */ read_register(PC, ®_cont); - ptr = pack_hex_byte(ptr, PC); + ptr = hex_byte_pack(ptr, PC); *ptr++ = ':'; ptr = mem2hex(ptr, (unsigned char *)®_cont, register_size[PC]); *ptr++ = ';'; read_register(R8, ®_cont); - ptr = pack_hex_byte(ptr, R8); + ptr = hex_byte_pack(ptr, R8); *ptr++ = ':'; ptr = mem2hex(ptr, (unsigned char *)®_cont, register_size[R8]); *ptr++ = ';'; read_register(SP, ®_cont); - ptr = pack_hex_byte(ptr, SP); + ptr = hex_byte_pack(ptr, SP); *ptr++ = ':'; ptr = mem2hex(ptr, (unsigned char *)®_cont, register_size[SP]); *ptr++ = ';'; /* Send ERP as well; this will save us an entire register fetch in some cases. */ read_register(ERP, ®_cont); - ptr = pack_hex_byte(ptr, ERP); + ptr = hex_byte_pack(ptr, ERP); *ptr++ = ':'; ptr = mem2hex(ptr, (unsigned char *)®_cont, register_size[ERP]); *ptr++ = ';'; diff --git a/arch/frv/kernel/gdb-stub.c b/arch/frv/kernel/gdb-stub.c index a4dba6b..a6d5381 100644 --- a/arch/frv/kernel/gdb-stub.c +++ b/arch/frv/kernel/gdb-stub.c @@ -672,7 +672,7 @@ static unsigned char *mem2hex(const void *_mem, char *buf, int count, int may_fa if ((uint32_t)mem&1 && count>=1) { if (!gdbstub_read_byte(mem,ch)) return NULL; - buf = pack_hex_byte(buf, ch[0]); + buf = hex_byte_pack(buf, ch[0]); mem++; count--; } @@ -680,8 +680,8 @@ static unsigned char *mem2hex(const void *_mem, char *buf, int count, int may_fa if ((uint32_t)mem&3 && count>=2) { if (!gdbstub_read_word(mem,(uint16_t *)ch)) return NULL; - buf = pack_hex_byte(buf, ch[0]); - buf = pack_hex_byte(buf, ch[1]); + buf = hex_byte_pack(buf, ch[0]); + buf = hex_byte_pack(buf, ch[1]); mem += 2; count -= 2; } @@ -689,10 +689,10 @@ static unsigned char *mem2hex(const void *_mem, char *buf, int count, int may_fa while (count>=4) { if (!gdbstub_read_dword(mem,(uint32_t *)ch)) return NULL; - buf = pack_hex_byte(buf, ch[0]); - buf = pack_hex_byte(buf, ch[1]); - buf = pack_hex_byte(buf, ch[2]); - buf = pack_hex_byte(buf, ch[3]); + buf = hex_byte_pack(buf, ch[0]); + buf = hex_byte_pack(buf, ch[1]); + buf = hex_byte_pack(buf, ch[2]); + buf = hex_byte_pack(buf, ch[3]); mem += 4; count -= 4; } @@ -700,8 +700,8 @@ static unsigned char *mem2hex(const void *_mem, char *buf, int count, int may_fa if (count>=2) { if (!gdbstub_read_word(mem,(uint16_t *)ch)) return NULL; - buf = pack_hex_byte(buf, ch[0]); - buf = pack_hex_byte(buf, ch[1]); + buf = hex_byte_pack(buf, ch[0]); + buf = hex_byte_pack(buf, ch[1]); mem += 2; count -= 2; } @@ -709,7 +709,7 @@ static unsigned char *mem2hex(const void *_mem, char *buf, int count, int may_fa if (count>=1) { if (!gdbstub_read_byte(mem,ch)) return NULL; - buf = pack_hex_byte(buf, ch[0]); + buf = hex_byte_pack(buf, ch[0]); } *buf = 0; @@ -1498,21 +1498,21 @@ void gdbstub(int sigval) ptr = mem2hex(title, ptr, sizeof(title) - 1,0); hx = hex_asc_hi(brr >> 24); - ptr = pack_hex_byte(ptr, hx); + ptr = hex_byte_pack(ptr, hx); hx = hex_asc_lo(brr >> 24); - ptr = pack_hex_byte(ptr, hx); + ptr = hex_byte_pack(ptr, hx); hx = hex_asc_hi(brr >> 16); - ptr = pack_hex_byte(ptr, hx); + ptr = hex_byte_pack(ptr, hx); hx = hex_asc_lo(brr >> 16); - ptr = pack_hex_byte(ptr, hx); + ptr = hex_byte_pack(ptr, hx); hx = hex_asc_hi(brr >> 8); - ptr = pack_hex_byte(ptr, hx); + ptr = hex_byte_pack(ptr, hx); hx = hex_asc_lo(brr >> 8); - ptr = pack_hex_byte(ptr, hx); + ptr = hex_byte_pack(ptr, hx); hx = hex_asc_hi(brr); - ptr = pack_hex_byte(ptr, hx); + ptr = hex_byte_pack(ptr, hx); hx = hex_asc_lo(brr); - ptr = pack_hex_byte(ptr, hx); + ptr = hex_byte_pack(ptr, hx); ptr = mem2hex(crlf, ptr, sizeof(crlf) - 1, 0); *ptr = 0; @@ -1526,10 +1526,10 @@ void gdbstub(int sigval) /* Send trap type (converted to signal) */ *ptr++ = 'T'; - ptr = pack_hex_byte(ptr, sigval); + ptr = hex_byte_pack(ptr, sigval); /* Send Error PC */ - ptr = pack_hex_byte(ptr, GDB_REG_PC); + ptr = hex_byte_pack(ptr, GDB_REG_PC); *ptr++ = ':'; ptr = mem2hex(&__debug_frame->pc, ptr, 4, 0); *ptr++ = ';'; @@ -1537,7 +1537,7 @@ void gdbstub(int sigval) /* * Send frame pointer */ - ptr = pack_hex_byte(ptr, GDB_REG_FP); + ptr = hex_byte_pack(ptr, GDB_REG_FP); *ptr++ = ':'; ptr = mem2hex(&__debug_frame->fp, ptr, 4, 0); *ptr++ = ';'; @@ -1545,7 +1545,7 @@ void gdbstub(int sigval) /* * Send stack pointer */ - ptr = pack_hex_byte(ptr, GDB_REG_SP); + ptr = hex_byte_pack(ptr, GDB_REG_SP); *ptr++ = ':'; ptr = mem2hex(&__debug_frame->sp, ptr, 4, 0); *ptr++ = ';'; diff --git a/arch/mn10300/kernel/gdb-stub.c b/arch/mn10300/kernel/gdb-stub.c index 538266b..522eb8a 100644 --- a/arch/mn10300/kernel/gdb-stub.c +++ b/arch/mn10300/kernel/gdb-stub.c @@ -798,7 +798,7 @@ unsigned char *mem2hex(const void *_mem, char *buf, int count, int may_fault) if ((u32) mem & 1 && count >= 1) { if (gdbstub_read_byte(mem, ch) != 0) return 0; - buf = pack_hex_byte(buf, ch[0]); + buf = hex_byte_pack(buf, ch[0]); mem++; count--; } @@ -806,8 +806,8 @@ unsigned char *mem2hex(const void *_mem, char *buf, int count, int may_fault) if ((u32) mem & 3 && count >= 2) { if (gdbstub_read_word(mem, ch) != 0) return 0; - buf = pack_hex_byte(buf, ch[0]); - buf = pack_hex_byte(buf, ch[1]); + buf = hex_byte_pack(buf, ch[0]); + buf = hex_byte_pack(buf, ch[1]); mem += 2; count -= 2; } @@ -815,10 +815,10 @@ unsigned char *mem2hex(const void *_mem, char *buf, int count, int may_fault) while (count >= 4) { if (gdbstub_read_dword(mem, ch) != 0) return 0; - buf = pack_hex_byte(buf, ch[0]); - buf = pack_hex_byte(buf, ch[1]); - buf = pack_hex_byte(buf, ch[2]); - buf = pack_hex_byte(buf, ch[3]); + buf = hex_byte_pack(buf, ch[0]); + buf = hex_byte_pack(buf, ch[1]); + buf = hex_byte_pack(buf, ch[2]); + buf = hex_byte_pack(buf, ch[3]); mem += 4; count -= 4; } @@ -826,8 +826,8 @@ unsigned char *mem2hex(const void *_mem, char *buf, int count, int may_fault) if (count >= 2) { if (gdbstub_read_word(mem, ch) != 0) return 0; - buf = pack_hex_byte(buf, ch[0]); - buf = pack_hex_byte(buf, ch[1]); + buf = hex_byte_pack(buf, ch[0]); + buf = hex_byte_pack(buf, ch[1]); mem += 2; count -= 2; } @@ -835,7 +835,7 @@ unsigned char *mem2hex(const void *_mem, char *buf, int count, int may_fault) if (count >= 1) { if (gdbstub_read_byte(mem, ch) != 0) return 0; - buf = pack_hex_byte(buf, ch[0]); + buf = hex_byte_pack(buf, ch[0]); } *buf = 0; @@ -1273,13 +1273,13 @@ static int gdbstub(struct pt_regs *regs, enum exception_code excep) ptr = mem2hex(title, ptr, sizeof(title) - 1, 0); hx = hex_asc_hi(excep >> 8); - ptr = pack_hex_byte(ptr, hx); + ptr = hex_byte_pack(ptr, hx); hx = hex_asc_lo(excep >> 8); - ptr = pack_hex_byte(ptr, hx); + ptr = hex_byte_pack(ptr, hx); hx = hex_asc_hi(excep); - ptr = pack_hex_byte(ptr, hx); + ptr = hex_byte_pack(ptr, hx); hx = hex_asc_lo(excep); - ptr = pack_hex_byte(ptr, hx); + ptr = hex_byte_pack(ptr, hx); ptr = mem2hex(crlf, ptr, sizeof(crlf) - 1, 0); *ptr = 0; @@ -1291,21 +1291,21 @@ static int gdbstub(struct pt_regs *regs, enum exception_code excep) ptr = mem2hex(tbcberr, ptr, sizeof(tbcberr) - 1, 0); hx = hex_asc_hi(bcberr >> 24); - ptr = pack_hex_byte(ptr, hx); + ptr = hex_byte_pack(ptr, hx); hx = hex_asc_lo(bcberr >> 24); - ptr = pack_hex_byte(ptr, hx); + ptr = hex_byte_pack(ptr, hx); hx = hex_asc_hi(bcberr >> 16); - ptr = pack_hex_byte(ptr, hx); + ptr = hex_byte_pack(ptr, hx); hx = hex_asc_lo(bcberr >> 16); - ptr = pack_hex_byte(ptr, hx); + ptr = hex_byte_pack(ptr, hx); hx = hex_asc_hi(bcberr >> 8); - ptr = pack_hex_byte(ptr, hx); + ptr = hex_byte_pack(ptr, hx); hx = hex_asc_lo(bcberr >> 8); - ptr = pack_hex_byte(ptr, hx); + ptr = hex_byte_pack(ptr, hx); hx = hex_asc_hi(bcberr); - ptr = pack_hex_byte(ptr, hx); + ptr = hex_byte_pack(ptr, hx); hx = hex_asc_lo(bcberr); - ptr = pack_hex_byte(ptr, hx); + ptr = hex_byte_pack(ptr, hx); ptr = mem2hex(crlf, ptr, sizeof(crlf) - 1, 0); *ptr = 0; @@ -1321,12 +1321,12 @@ static int gdbstub(struct pt_regs *regs, enum exception_code excep) * Send trap type (converted to signal) */ *ptr++ = 'T'; - ptr = pack_hex_byte(ptr, sigval); + ptr = hex_byte_pack(ptr, sigval); /* * Send Error PC */ - ptr = pack_hex_byte(ptr, GDB_REGID_PC); + ptr = hex_byte_pack(ptr, GDB_REGID_PC); *ptr++ = ':'; ptr = mem2hex(®s->pc, ptr, 4, 0); *ptr++ = ';'; @@ -1334,7 +1334,7 @@ static int gdbstub(struct pt_regs *regs, enum exception_code excep) /* * Send frame pointer */ - ptr = pack_hex_byte(ptr, GDB_REGID_FP); + ptr = hex_byte_pack(ptr, GDB_REGID_FP); *ptr++ = ':'; ptr = mem2hex(®s->a3, ptr, 4, 0); *ptr++ = ';'; @@ -1343,7 +1343,7 @@ static int gdbstub(struct pt_regs *regs, enum exception_code excep) * Send stack pointer */ ssp = (unsigned long) (regs + 1); - ptr = pack_hex_byte(ptr, GDB_REGID_SP); + ptr = hex_byte_pack(ptr, GDB_REGID_SP); *ptr++ = ':'; ptr = mem2hex(&ssp, ptr, 4, 0); *ptr++ = ';'; diff --git a/kernel/debug/gdbstub.c b/kernel/debug/gdbstub.c index 3487248..c22d8c2 100644 --- a/kernel/debug/gdbstub.c +++ b/kernel/debug/gdbstub.c @@ -217,7 +217,7 @@ void gdbstub_msg_write(const char *s, int len) /* Pack in hex chars */ for (i = 0; i < wcount; i++) - bufptr = pack_hex_byte(bufptr, s[i]); + bufptr = hex_byte_pack(bufptr, s[i]); *bufptr = '\0'; /* Move up */ @@ -249,7 +249,7 @@ char *kgdb_mem2hex(char *mem, char *buf, int count) if (err) return NULL; while (count > 0) { - buf = pack_hex_byte(buf, *tmp); + buf = hex_byte_pack(buf, *tmp); tmp++; count--; } @@ -411,14 +411,14 @@ static char *pack_threadid(char *pkt, unsigned char *id) limit = id + (BUF_THREAD_ID_SIZE / 2); while (id < limit) { if (!lzero || *id != 0) { - pkt = pack_hex_byte(pkt, *id); + pkt = hex_byte_pack(pkt, *id); lzero = 0; } id++; } if (lzero) - pkt = pack_hex_byte(pkt, 0); + pkt = hex_byte_pack(pkt, 0); return pkt; } @@ -486,7 +486,7 @@ static void gdb_cmd_status(struct kgdb_state *ks) dbg_remove_all_break(); remcom_out_buffer[0] = 'S'; - pack_hex_byte(&remcom_out_buffer[1], ks->signo); + hex_byte_pack(&remcom_out_buffer[1], ks->signo); } static void gdb_get_regs_helper(struct kgdb_state *ks) @@ -954,7 +954,7 @@ int gdb_serial_stub(struct kgdb_state *ks) /* Reply to host that an exception has occurred */ ptr = remcom_out_buffer; *ptr++ = 'T'; - ptr = pack_hex_byte(ptr, ks->signo); + ptr = hex_byte_pack(ptr, ks->signo); ptr += strlen(strcpy(ptr, "thread:")); int_to_threadref(thref, shadow_pid(current->pid)); ptr = pack_threadid(ptr, thref); -- cgit v0.10.2 From 02473119bc54b0b239c2501064c7a37314347f87 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 31 Oct 2011 17:12:55 -0700 Subject: security: follow rename pack_hex_byte() to hex_byte_pack() There is no functional change. Signed-off-by: Andy Shevchenko Cc: Mimi Zohar Cc: James Morris Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/security/keys/encrypted-keys/encrypted.c b/security/keys/encrypted-keys/encrypted.c index f33804c..dcc843c 100644 --- a/security/keys/encrypted-keys/encrypted.c +++ b/security/keys/encrypted-keys/encrypted.c @@ -293,7 +293,7 @@ static char *datablob_format(struct encrypted_key_payload *epayload, /* convert the hex encoded iv, encrypted-data and HMAC to ascii */ bufp = &ascii_buf[len]; for (i = 0; i < (asciiblob_len - len) / 2; i++) - bufp = pack_hex_byte(bufp, iv[i]); + bufp = hex_byte_pack(bufp, iv[i]); out: return ascii_buf; } diff --git a/security/keys/trusted.c b/security/keys/trusted.c index 0964fc2..0ed5fdf 100644 --- a/security/keys/trusted.c +++ b/security/keys/trusted.c @@ -1098,7 +1098,7 @@ static long trusted_read(const struct key *key, char __user *buffer, bufp = ascii_buf; for (i = 0; i < p->blob_len; i++) - bufp = pack_hex_byte(bufp, p->blob[i]); + bufp = hex_byte_pack(bufp, p->blob[i]); if ((copy_to_user(buffer, ascii_buf, 2 * p->blob_len)) != 0) { kfree(ascii_buf); return -EFAULT; -- cgit v0.10.2 From 0a90e0f1012e576500b551455b046013324826b9 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 31 Oct 2011 17:12:57 -0700 Subject: fat: follow rename pack_hex_byte() to hex_byte_pack() There is no functional change. Signed-off-by: Andy Shevchenko Acked-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/fat/dir.c b/fs/fat/dir.c index 5efbd5d..aca191b 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -156,8 +156,8 @@ static int uni16_to_x8(struct super_block *sb, unsigned char *ascii, } else { if (uni_xlate == 1) { *op++ = ':'; - op = pack_hex_byte(op, ec >> 8); - op = pack_hex_byte(op, ec); + op = hex_byte_pack(op, ec >> 8); + op = hex_byte_pack(op, ec); len -= 5; } else { *op++ = '?'; -- cgit v0.10.2 From 67220a9ea3eb9cf61de7e384b6bcaaa78f680c9d Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 31 Oct 2011 17:13:00 -0700 Subject: wireless: at76c50x: follow rename pack_hex_byte to hex_byte_pack There is no functional change. Signed-off-by: Andy Shevchenko Acked-by: John W. Linville Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/net/wireless/at76c50x-usb.c b/drivers/net/wireless/at76c50x-usb.c index 39322d4..4045e5a 100644 --- a/drivers/net/wireless/at76c50x-usb.c +++ b/drivers/net/wireless/at76c50x-usb.c @@ -517,7 +517,7 @@ static char *hex2str(void *buf, size_t len) goto exit; while (len--) { - obuf = pack_hex_byte(obuf, *ibuf++); + obuf = hex_byte_pack(obuf, *ibuf++); *obuf++ = '-'; } obuf--; -- cgit v0.10.2 From fc23af34b00ef444eec088f744983b9ca6c7f5d1 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Mon, 31 Oct 2011 17:13:08 -0700 Subject: llist-return-whether-list-is-empty-before-adding-in-llist_add-fix clarify comment Cc: Huang Ying Cc: Mathieu Desnoyers Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/llist.h b/include/linux/llist.h index 7287734..801b44b 100644 --- a/include/linux/llist.h +++ b/include/linux/llist.h @@ -148,7 +148,7 @@ static inline struct llist_node *llist_next(struct llist_node *node) * @new: new entry to be added * @head: the head for your lock-less list * - * Return whether list is empty before adding. + * Returns true if the list was empty prior to adding this entry. */ static inline bool llist_add(struct llist_node *new, struct llist_head *head) { -- cgit v0.10.2 From 67d0a0754455f89ef3946946159d8ec9e45ce33a Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 31 Oct 2011 17:13:10 -0700 Subject: kernel.h/checkpatch: mark strict_strto and simple_strto as obsolete Mark obsolete/deprecated strict_strto and simple_strto functions and macros as obsolete. Update checkpatch to warn about their use. Signed-off-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 4824f26..4c0d3b2 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -287,6 +287,8 @@ static inline int __must_check kstrtos32_from_user(const char __user *s, size_t return kstrtoint_from_user(s, count, base, res); } +/* Obsolete, do not use. Use kstrto instead */ + extern unsigned long simple_strtoul(const char *,char **,unsigned int); extern long simple_strtol(const char *,char **,unsigned int); extern unsigned long long simple_strtoull(const char *,char **,unsigned int); @@ -296,6 +298,8 @@ extern long long simple_strtoll(const char *,char **,unsigned int); #define strict_strtoull kstrtoull #define strict_strtoll kstrtoll +/* lib/printf utilities */ + extern __printf(2, 3) int sprintf(char *buf, const char * fmt, ...); extern __printf(2, 0) int vsprintf(char *buf, const char *, va_list); extern __printf(3, 4) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 0b3e35c..5ba679c 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -3151,10 +3151,10 @@ sub process { "consider using a completion\n" . $herecurr); } -# recommend kstrto* over simple_strto* - if ($line =~ /\bsimple_(strto.*?)\s*\(/) { +# recommend kstrto* over simple_strto* and strict_strto* + if ($line =~ /\b((simple|strict)_(strto(l|ll|ul|ull)))\s*\(/) { WARN("CONSIDER_KSTRTO", - "consider using kstrto* in preference to simple_$1\n" . $herecurr); + "$1 is obsolete, use k$3 instead\n" . $herecurr); } # check for __initcall(), use device_initcall() explicitly please if ($line =~ /^.\s*__initcall\s*\(/) { -- cgit v0.10.2 From 15662b3e8644905032c2e26808401a487d4e90c1 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 31 Oct 2011 17:13:12 -0700 Subject: checkpatch: add a --strict check for utf-8 in commit logs Some find using utf-8 in commit logs inappropriate. Some patch commit logs contain unintended utf-8 characters when doing things like copy/pasting compilation output. Look for the start of any commit log by skipping initial lines that look like email headers and "From: " lines. Stop looking for utf-8 at the first signature line. Signed-off-by: Joe Perches Suggested-by: Andrew Morton Cc: Andy Whitcroft Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 5ba679c..5e93342 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -240,9 +240,8 @@ our $NonptrType; our $Type; our $Declare; -our $UTF8 = qr { - [\x09\x0A\x0D\x20-\x7E] # ASCII - | [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte +our $NON_ASCII_UTF8 = qr{ + [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte | \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # straight 3-byte | \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates @@ -251,6 +250,11 @@ our $UTF8 = qr { | \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16 }x; +our $UTF8 = qr{ + [\x09\x0A\x0D\x20-\x7E] # ASCII + | $NON_ASCII_UTF8 +}x; + our $typeTypedefs = qr{(?x: (?:__)?(?:u|s|be|le)(?:8|16|32|64)| atomic_t @@ -1330,6 +1334,9 @@ sub process { my $signoff = 0; my $is_patch = 0; + my $in_header_lines = 1; + my $in_commit_log = 0; #Scanning lines before patch + our @report = (); our $cnt_lines = 0; our $cnt_error = 0; @@ -1497,7 +1504,6 @@ sub process { if ($line =~ /^diff --git.*?(\S+)$/) { $realfile = $1; $realfile =~ s@^([^/]*)/@@; - } elsif ($line =~ /^\+\+\+\s+(\S+)/) { $realfile = $1; $realfile =~ s@^([^/]*)/@@; @@ -1536,6 +1542,7 @@ sub process { # Check the patch for a signoff: if ($line =~ /^\s*signed-off-by:/i) { $signoff++; + $in_commit_log = 0; } # Check signature styles @@ -1613,6 +1620,21 @@ sub process { "Invalid UTF-8, patch and commit message should be encoded in UTF-8\n" . $hereptr); } +# Check if it's the start of a commit log +# (not a header line and we haven't seen the patch filename) + if ($in_header_lines && $realfile =~ /^$/ && + $rawline !~ /^(commit\b|from\b|\w+:).+$/i) { + $in_header_lines = 0; + $in_commit_log = 1; + } + +# Still not yet in a patch, check for any UTF-8 + if ($in_commit_log && $realfile =~ /^$/ && + $rawline =~ /$NON_ASCII_UTF8/) { + CHK("UTF8_BEFORE_PATCH", + "8-bit UTF-8 used in possible commit log\n" . $herecurr); + } + # ignore non-hunk lines and lines being removed next if (!$hunk_line || $line =~ /^-/); -- cgit v0.10.2 From d8805e633e054c816c47cb6e727c81f156d9253d Mon Sep 17 00:00:00 2001 From: Nelson Elhage Date: Mon, 31 Oct 2011 17:13:14 -0700 Subject: epoll: fix spurious lockdep warnings epoll can acquire recursively acquire ep->mtx on multiple "struct eventpoll"s at once in the case where one epoll fd is monitoring another epoll fd. This is perfectly OK, since we're careful about the lock ordering, but it causes spurious lockdep warnings. Annotate the recursion using mutex_lock_nested, and add a comment explaining the nesting rules for good measure. Recent versions of systemd are triggering this, and it can also be demonstrated with the following trivial test program: --------------------8<-------------------- int main(void) { int e1, e2; struct epoll_event evt = { .events = EPOLLIN }; e1 = epoll_create1(0); e2 = epoll_create1(0); epoll_ctl(e1, EPOLL_CTL_ADD, e2, &evt); return 0; } --------------------8<-------------------- Reported-by: Paul Bolle Tested-by: Paul Bolle Signed-off-by: Nelson Elhage Acked-by: Jason Baron Cc: Dave Jones Cc: Davide Libenzi Cc: Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 9026fc9..828e750 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -70,6 +70,15 @@ * simultaneous inserts (A into B and B into A) from racing and * constructing a cycle without either insert observing that it is * going to. + * It is necessary to acquire multiple "ep->mtx"es at once in the + * case when one epoll fd is added to another. In this case, we + * always acquire the locks in the order of nesting (i.e. after + * epoll_ctl(e1, EPOLL_CTL_ADD, e2), e1->mtx will always be acquired + * before e2->mtx). Since we disallow cycles of epoll file + * descriptors, this ensures that the mutexes are well-ordered. In + * order to communicate this nesting to lockdep, when walking a tree + * of epoll file descriptors, we use the current recursion depth as + * the lockdep subkey. * It is possible to drop the "ep->mtx" and to use the global * mutex "epmutex" (together with "ep->lock") to have it working, * but having "ep->mtx" will make the interface more scalable. @@ -464,13 +473,15 @@ static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi) * @ep: Pointer to the epoll private data structure. * @sproc: Pointer to the scan callback. * @priv: Private opaque data passed to the @sproc callback. + * @depth: The current depth of recursive f_op->poll calls. * * Returns: The same integer error code returned by the @sproc callback. */ static int ep_scan_ready_list(struct eventpoll *ep, int (*sproc)(struct eventpoll *, struct list_head *, void *), - void *priv) + void *priv, + int depth) { int error, pwake = 0; unsigned long flags; @@ -481,7 +492,7 @@ static int ep_scan_ready_list(struct eventpoll *ep, * We need to lock this because we could be hit by * eventpoll_release_file() and epoll_ctl(). */ - mutex_lock(&ep->mtx); + mutex_lock_nested(&ep->mtx, depth); /* * Steal the ready list, and re-init the original one to the @@ -670,7 +681,7 @@ static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head, static int ep_poll_readyevents_proc(void *priv, void *cookie, int call_nests) { - return ep_scan_ready_list(priv, ep_read_events_proc, NULL); + return ep_scan_ready_list(priv, ep_read_events_proc, NULL, call_nests + 1); } static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait) @@ -737,7 +748,7 @@ void eventpoll_release_file(struct file *file) ep = epi->ep; list_del_init(&epi->fllink); - mutex_lock(&ep->mtx); + mutex_lock_nested(&ep->mtx, 0); ep_remove(ep, epi); mutex_unlock(&ep->mtx); } @@ -1134,7 +1145,7 @@ static int ep_send_events(struct eventpoll *ep, esed.maxevents = maxevents; esed.events = events; - return ep_scan_ready_list(ep, ep_send_events_proc, &esed); + return ep_scan_ready_list(ep, ep_send_events_proc, &esed, 0); } static inline struct timespec ep_set_mstimeout(long ms) @@ -1267,7 +1278,7 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests) struct rb_node *rbp; struct epitem *epi; - mutex_lock(&ep->mtx); + mutex_lock_nested(&ep->mtx, call_nests + 1); for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) { epi = rb_entry(rbp, struct epitem, rbn); if (unlikely(is_file_epoll(epi->ffd.file))) { @@ -1409,7 +1420,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, } - mutex_lock(&ep->mtx); + mutex_lock_nested(&ep->mtx, 0); /* * Try to lookup the file inside our RB tree, Since we grabbed "mtx" -- cgit v0.10.2