From 677a0b5ac16337277139798ec43fc32ebaae1347 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 30 Apr 2013 15:26:44 -0700 Subject: drivers/usb/storage/realtek_cr.c: fix build Remove unused local `us', which broke the build. Also nuke an unneeded cast. Repairs commit 191648d03d20 ("usb: storage: Convert US_DEBUGP to usb_stor_dbg"). Cc: Joe Perches Acked-by: David Rientjes Cc: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/usb/storage/realtek_cr.c b/drivers/usb/storage/realtek_cr.c index 4797228..8623577 100644 --- a/drivers/usb/storage/realtek_cr.c +++ b/drivers/usb/storage/realtek_cr.c @@ -933,14 +933,11 @@ static int realtek_cr_autosuspend_setup(struct us_data *us) static void realtek_cr_destructor(void *extra) { - struct rts51x_chip *chip = (struct rts51x_chip *)extra; - struct us_data *us; + struct rts51x_chip *chip = extra; if (!chip) return; - us = chip->us; - #ifdef CONFIG_REALTEK_AUTOPM if (ss_en) { del_timer(&chip->rts51x_suspend_timer); -- cgit v0.10.2 From ace6128d603d3f15238baba104d0b37ccf0b6c07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20Stehl=C3=A9?= Date: Tue, 30 Apr 2013 15:26:45 -0700 Subject: memory hotplug: fix warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the following compilation warnings: mm/slab.c: In function `kmem_cache_init_late': mm/slab.c:1778:2: warning: statement with no effect [-Wunused-value] mm/page_cgroup.c: In function `page_cgroup_init': mm/page_cgroup.c:305:2: warning: statement with no effect [-Wunused-value] Signed-off-by: Vincent StehlĂ© Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/memory.h b/include/linux/memory.h index 73817af..85c31a8 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -137,7 +137,7 @@ enum mem_add_context { BOOT, HOTPLUG }; #define register_hotmemory_notifier(nb) register_memory_notifier(nb) #define unregister_hotmemory_notifier(nb) unregister_memory_notifier(nb) #else -#define hotplug_memory_notifier(fn, pri) (0) +#define hotplug_memory_notifier(fn, pri) ({ 0; }) /* These aren't inline functions due to a GCC bug. */ #define register_hotmemory_notifier(nb) ({ (void)(nb); 0; }) #define unregister_hotmemory_notifier(nb) ({ (void)(nb); }) -- cgit v0.10.2 From 04df32fa10ab9a6f0643db2949d42efc966bc844 Mon Sep 17 00:00:00 2001 From: Zhao Hongjiang Date: Tue, 30 Apr 2013 15:26:46 -0700 Subject: inotify: invalid mask should return a error number but not set it When we run the crackerjack testsuite, the inotify_add_watch test is stalled. This is caused by the invalid mask 0 - the task is waiting for the event but it never comes. inotify_add_watch() should return -EINVAL as it did before commit 676a0675cf92 ("inotify: remove broken mask checks causing unmount to be EINVAL"). That commit removes the invalid mask check, but that check is needed. Check the mask's ALL_INOTIFY_BITS before the inotify_arg_to_mask() call. If none are set, just return -EINVAL. Because IN_UNMOUNT is in ALL_INOTIFY_BITS, this change will not trigger the problem that above commit fixed. [akpm@linux-foundation.org: fix build] Signed-off-by: Zhao Hongjiang Acked-by: Jim Somerville Cc: Paul Gortmaker Cc: Jerome Marchand Cc: Eric Paris Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 8562bd3..c616a70 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -570,7 +570,6 @@ static int inotify_update_existing_watch(struct fsnotify_group *group, int add = (arg & IN_MASK_ADD); int ret; - /* don't allow invalid bits: we don't want flags set */ mask = inotify_arg_to_mask(arg); fsn_mark = fsnotify_find_inode_mark(group, inode); @@ -621,7 +620,6 @@ static int inotify_new_watch(struct fsnotify_group *group, struct idr *idr = &group->inotify_data.idr; spinlock_t *idr_lock = &group->inotify_data.idr_lock; - /* don't allow invalid bits: we don't want flags set */ mask = inotify_arg_to_mask(arg); tmp_i_mark = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL); @@ -747,6 +745,10 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname, int ret; unsigned flags = 0; + /* don't allow invalid bits: we don't want flags set */ + if (unlikely(!(mask & ALL_INOTIFY_BITS))) + return -EINVAL; + f = fdget(fd); if (unlikely(!f.file)) return -EBADF; -- cgit v0.10.2 From b4ea2eaa11fa2588c241a80d6d1e59f0f68f7b29 Mon Sep 17 00:00:00 2001 From: Yan Hong Date: Tue, 30 Apr 2013 15:26:47 -0700 Subject: fs/block_dev.c: no need to check inode->i_bdev in bd_forget() Its only caller evict() has promised a non-NULL inode->i_bdev. Signed-off-by: Yan Hong Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/block_dev.c b/fs/block_dev.c index aae187a..ce08de7 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -617,11 +617,9 @@ void bd_forget(struct inode *inode) struct block_device *bdev = NULL; spin_lock(&bdev_lock); - if (inode->i_bdev) { - if (!sb_is_blkdev_sb(inode->i_sb)) - bdev = inode->i_bdev; - __bd_forget(inode); - } + if (!sb_is_blkdev_sb(inode->i_sb)) + bdev = inode->i_bdev; + __bd_forget(inode); spin_unlock(&bdev_lock); if (bdev) -- cgit v0.10.2 From 421348f1ca0bf17769dee0aed4d991845ae0536d Mon Sep 17 00:00:00 2001 From: Greg Thelen Date: Tue, 30 Apr 2013 15:26:48 -0700 Subject: fs/dcache.c: add cond_resched() to shrink_dcache_parent() Call cond_resched() in shrink_dcache_parent() to maintain interactivity. Before this patch: void shrink_dcache_parent(struct dentry * parent) { while ((found = select_parent(parent, &dispose)) != 0) shrink_dentry_list(&dispose); } select_parent() populates the dispose list with dentries which shrink_dentry_list() then deletes. select_parent() carefully uses need_resched() to avoid doing too much work at once. But neither shrink_dcache_parent() nor its called functions call cond_resched(). So once need_resched() is set select_parent() will return single dentry dispose list which is then deleted by shrink_dentry_list(). This is inefficient when there are a lot of dentry to process. This can cause softlockup and hurts interactivity on non preemptable kernels. This change adds cond_resched() in shrink_dcache_parent(). The benefit of this is that need_resched() is quickly cleared so that future calls to select_parent() are able to efficiently return a big batch of dentry. These additional cond_resched() do not seem to impact performance, at least for the workload below. Here is a program which can cause soft lockup if other system activity sets need_resched(). int main() { struct rlimit rlim; int i; int f[100000]; char buf[20]; struct timeval t1, t2; double diff; /* cleanup past run */ system("rm -rf x"); /* boost nfile rlimit */ rlim.rlim_cur = 200000; rlim.rlim_max = 200000; if (setrlimit(RLIMIT_NOFILE, &rlim)) err(1, "setrlimit"); /* make directory for files */ if (mkdir("x", 0700)) err(1, "mkdir"); if (gettimeofday(&t1, NULL)) err(1, "gettimeofday"); /* populate directory with open files */ for (i = 0; i < 100000; i++) { snprintf(buf, sizeof(buf), "x/%d", i); f[i] = open(buf, O_CREAT); if (f[i] == -1) err(1, "open"); } /* close some of the files */ for (i = 0; i < 85000; i++) close(f[i]); /* unlink all files, even open ones */ system("rm -rf x"); if (gettimeofday(&t2, NULL)) err(1, "gettimeofday"); diff = (((double)t2.tv_sec * 1000000 + t2.tv_usec) - ((double)t1.tv_sec * 1000000 + t1.tv_usec)); printf("done: %g elapsed\n", diff/1e6); return 0; } Signed-off-by: Greg Thelen Signed-off-by: Dave Chinner Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/dcache.c b/fs/dcache.c index e8bc342..e689268 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1230,8 +1230,10 @@ void shrink_dcache_parent(struct dentry * parent) LIST_HEAD(dispose); int found; - while ((found = select_parent(parent, &dispose)) != 0) + while ((found = select_parent(parent, &dispose)) != 0) { shrink_dentry_list(&dispose); + cond_resched(); + } } EXPORT_SYMBOL(shrink_dcache_parent); -- cgit v0.10.2 From 905cd0e1bf9ffe82d6906a01fd974ea0f70be97a Mon Sep 17 00:00:00 2001 From: Dan Magenheimer Date: Tue, 30 Apr 2013 15:26:50 -0700 Subject: mm: frontswap: lazy initialization to allow tmem backends to build/run as modules With the goal of allowing tmem backends (zcache, ramster, Xen tmem) to be built/loaded as modules rather than built-in and enabled by a boot parameter, this patch provides "lazy initialization", allowing backends to register to frontswap even after swapon was run. Before a backend registers all calls to init are recorded and the creation of tmem_pools delayed until a backend registers or until a frontswap store is attempted. Signed-off-by: Stefan Hengelein Signed-off-by: Florian Schmaus Signed-off-by: Andor Daam Signed-off-by: Dan Magenheimer [v1: Fixes per Seth Jennings suggestions] [v2: Removed FRONTSWAP_HAS_.. ] [v3: Fix up per Bob Liu recommendations] [v4: Fix up per Andrew's comments] Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Bob Liu Cc: Wanpeng Li Cc: Dan Magenheimer Cc: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/frontswap.c b/mm/frontswap.c index 2890e67..cbd2b8a 100644 --- a/mm/frontswap.c +++ b/mm/frontswap.c @@ -80,6 +80,46 @@ static inline void inc_frontswap_succ_stores(void) { } static inline void inc_frontswap_failed_stores(void) { } static inline void inc_frontswap_invalidates(void) { } #endif + +/* + * Due to the asynchronous nature of the backends loading potentially + * _after_ the swap system has been activated, we have chokepoints + * on all frontswap functions to not call the backend until the backend + * has registered. + * + * Specifically when no backend is registered (nobody called + * frontswap_register_ops) all calls to frontswap_init (which is done via + * swapon -> enable_swap_info -> frontswap_init) are registered and remembered + * (via the setting of need_init bitmap) but fail to create tmem_pools. When a + * backend registers with frontswap at some later point the previous + * calls to frontswap_init are executed (by iterating over the need_init + * bitmap) to create tmem_pools and set the respective poolids. All of that is + * guarded by us using atomic bit operations on the 'need_init' bitmap. + * + * This would not guards us against the user deciding to call swapoff right as + * we are calling the backend to initialize (so swapon is in action). + * Fortunatly for us, the swapon_mutex has been taked by the callee so we are + * OK. The other scenario where calls to frontswap_store (called via + * swap_writepage) is racing with frontswap_invalidate_area (called via + * swapoff) is again guarded by the swap subsystem. + * + * While no backend is registered all calls to frontswap_[store|load| + * invalidate_area|invalidate_page] are ignored or fail. + * + * The time between the backend being registered and the swap file system + * calling the backend (via the frontswap_* functions) is indeterminate as + * backend_registered is not atomic_t (or a value guarded by a spinlock). + * That is OK as we are comfortable missing some of these calls to the newly + * registered backend. + * + * Obviously the opposite (unloading the backend) must be done after all + * the frontswap_[store|load|invalidate_area|invalidate_page] start + * ignorning or failing the requests - at which point backend_registered + * would have to be made in some fashion atomic. + */ +static DECLARE_BITMAP(need_init, MAX_SWAPFILES); +static bool backend_registered __read_mostly; + /* * Register operations for frontswap, returning previous thus allowing * detection of multiple backends and possible nesting. @@ -87,9 +127,22 @@ static inline void inc_frontswap_invalidates(void) { } struct frontswap_ops frontswap_register_ops(struct frontswap_ops *ops) { struct frontswap_ops old = frontswap_ops; + int i; frontswap_ops = *ops; frontswap_enabled = true; + + for (i = 0; i < MAX_SWAPFILES; i++) { + if (test_and_clear_bit(i, need_init)) + (*frontswap_ops.init)(i); + } + /* + * We MUST have backend_registered set _after_ the frontswap_init's + * have been called. Otherwise __frontswap_store might fail. Hence + * the barrier to make sure compiler does not re-order us. + */ + barrier(); + backend_registered = true; return old; } EXPORT_SYMBOL(frontswap_register_ops); @@ -119,10 +172,16 @@ void __frontswap_init(unsigned type) { struct swap_info_struct *sis = swap_info[type]; - BUG_ON(sis == NULL); - if (sis->frontswap_map == NULL) - return; - frontswap_ops.init(type); + if (backend_registered) { + BUG_ON(sis == NULL); + if (sis->frontswap_map == NULL) + return; + (*frontswap_ops.init)(type); + } else { + BUG_ON(type > MAX_SWAPFILES); + set_bit(type, need_init); + } + } EXPORT_SYMBOL(__frontswap_init); @@ -147,6 +206,11 @@ int __frontswap_store(struct page *page) struct swap_info_struct *sis = swap_info[type]; pgoff_t offset = swp_offset(entry); + if (!backend_registered) { + inc_frontswap_failed_stores(); + return ret; + } + BUG_ON(!PageLocked(page)); BUG_ON(sis == NULL); if (frontswap_test(sis, offset)) @@ -186,6 +250,9 @@ int __frontswap_load(struct page *page) struct swap_info_struct *sis = swap_info[type]; pgoff_t offset = swp_offset(entry); + if (!backend_registered) + return ret; + BUG_ON(!PageLocked(page)); BUG_ON(sis == NULL); if (frontswap_test(sis, offset)) @@ -209,6 +276,9 @@ void __frontswap_invalidate_page(unsigned type, pgoff_t offset) { struct swap_info_struct *sis = swap_info[type]; + if (!backend_registered) + return; + BUG_ON(sis == NULL); if (frontswap_test(sis, offset)) { frontswap_ops.invalidate_page(type, offset); @@ -226,12 +296,15 @@ void __frontswap_invalidate_area(unsigned type) { struct swap_info_struct *sis = swap_info[type]; - BUG_ON(sis == NULL); - if (sis->frontswap_map == NULL) - return; - frontswap_ops.invalidate_area(type); - atomic_set(&sis->frontswap_pages, 0); - memset(sis->frontswap_map, 0, sis->max / sizeof(long)); + if (backend_registered) { + BUG_ON(sis == NULL); + if (sis->frontswap_map == NULL) + return; + (*frontswap_ops.invalidate_area)(type); + atomic_set(&sis->frontswap_pages, 0); + memset(sis->frontswap_map, 0, sis->max / sizeof(long)); + } + clear_bit(type, need_init); } EXPORT_SYMBOL(__frontswap_invalidate_area); @@ -364,6 +437,7 @@ static int __init init_frontswap(void) debugfs_create_u64("invalidates", S_IRUGO, root, &frontswap_invalidates); #endif + frontswap_enabled = 1; return 0; } -- cgit v0.10.2 From 1e01c968db3d0aebd48e31db15f24516b03128df Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Tue, 30 Apr 2013 15:26:51 -0700 Subject: frontswap: make frontswap_init use a pointer for the ops This simplifies the code in the frontswap - we can get rid of the 'backend_registered' test and instead check against frontswap_ops. [v1: Rebase on top of 703ba7fe5e0 (ramster->zcache move] Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Bob Liu Cc: Wanpeng Li Cc: Andor Daam Cc: Dan Magenheimer Cc: Florian Schmaus Cc: Minchan Kim Cc: Stefan Hengelein Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/staging/zcache/zcache-main.c b/drivers/staging/zcache/zcache-main.c index e23d814..09c69c8 100644 --- a/drivers/staging/zcache/zcache-main.c +++ b/drivers/staging/zcache/zcache-main.c @@ -1707,9 +1707,9 @@ static struct frontswap_ops zcache_frontswap_ops = { .init = zcache_frontswap_init }; -struct frontswap_ops zcache_frontswap_register_ops(void) +struct frontswap_ops *zcache_frontswap_register_ops(void) { - struct frontswap_ops old_ops = + struct frontswap_ops *old_ops = frontswap_register_ops(&zcache_frontswap_ops); return old_ops; @@ -1874,7 +1874,7 @@ static int __init zcache_init(void) pr_warn("%s: cleancache_ops overridden\n", namestr); } if (zcache_enabled && !disable_frontswap) { - struct frontswap_ops old_ops; + struct frontswap_ops *old_ops; old_ops = zcache_frontswap_register_ops(); if (frontswap_has_exclusive_gets) @@ -1886,7 +1886,7 @@ static int __init zcache_init(void) namestr, frontswap_has_exclusive_gets, !disable_frontswap_ignore_nonactive); #endif - if (old_ops.init != NULL) + if (old_ops != NULL) pr_warn("%s: frontswap_ops overridden\n", namestr); } if (ramster_enabled) diff --git a/drivers/xen/tmem.c b/drivers/xen/tmem.c index 3ee836d..7a01a5f 100644 --- a/drivers/xen/tmem.c +++ b/drivers/xen/tmem.c @@ -362,7 +362,7 @@ static int __init no_frontswap(char *s) } __setup("nofrontswap", no_frontswap); -static struct frontswap_ops __initdata tmem_frontswap_ops = { +static struct frontswap_ops tmem_frontswap_ops = { .store = tmem_frontswap_store, .load = tmem_frontswap_load, .invalidate_page = tmem_frontswap_flush_page, @@ -378,11 +378,11 @@ static int __init xen_tmem_init(void) #ifdef CONFIG_FRONTSWAP if (tmem_enabled && use_frontswap) { char *s = ""; - struct frontswap_ops old_ops = + struct frontswap_ops *old_ops = frontswap_register_ops(&tmem_frontswap_ops); tmem_frontswap_poolid = -1; - if (old_ops.init != NULL) + if (old_ops) s = " (WARNING: frontswap_ops overridden)"; printk(KERN_INFO "frontswap enabled, RAM provided by " "Xen Transcendent Memory%s\n", s); diff --git a/include/linux/frontswap.h b/include/linux/frontswap.h index 3044254..d4f2987 100644 --- a/include/linux/frontswap.h +++ b/include/linux/frontswap.h @@ -14,7 +14,7 @@ struct frontswap_ops { }; extern bool frontswap_enabled; -extern struct frontswap_ops +extern struct frontswap_ops * frontswap_register_ops(struct frontswap_ops *ops); extern void frontswap_shrink(unsigned long); extern unsigned long frontswap_curr_pages(void); diff --git a/mm/frontswap.c b/mm/frontswap.c index cbd2b8a..e44c9cb 100644 --- a/mm/frontswap.c +++ b/mm/frontswap.c @@ -24,7 +24,7 @@ * frontswap_ops is set by frontswap_register_ops to contain the pointers * to the frontswap "backend" implementation functions. */ -static struct frontswap_ops frontswap_ops __read_mostly; +static struct frontswap_ops *frontswap_ops __read_mostly; /* * This global enablement flag reduces overhead on systems where frontswap_ops @@ -108,41 +108,39 @@ static inline void inc_frontswap_invalidates(void) { } * * The time between the backend being registered and the swap file system * calling the backend (via the frontswap_* functions) is indeterminate as - * backend_registered is not atomic_t (or a value guarded by a spinlock). + * frontswap_ops is not atomic_t (or a value guarded by a spinlock). * That is OK as we are comfortable missing some of these calls to the newly * registered backend. * * Obviously the opposite (unloading the backend) must be done after all * the frontswap_[store|load|invalidate_area|invalidate_page] start - * ignorning or failing the requests - at which point backend_registered + * ignorning or failing the requests - at which point frontswap_ops * would have to be made in some fashion atomic. */ static DECLARE_BITMAP(need_init, MAX_SWAPFILES); -static bool backend_registered __read_mostly; /* * Register operations for frontswap, returning previous thus allowing * detection of multiple backends and possible nesting. */ -struct frontswap_ops frontswap_register_ops(struct frontswap_ops *ops) +struct frontswap_ops *frontswap_register_ops(struct frontswap_ops *ops) { - struct frontswap_ops old = frontswap_ops; + struct frontswap_ops *old = frontswap_ops; int i; - frontswap_ops = *ops; frontswap_enabled = true; for (i = 0; i < MAX_SWAPFILES; i++) { if (test_and_clear_bit(i, need_init)) - (*frontswap_ops.init)(i); + ops->init(i); } /* - * We MUST have backend_registered set _after_ the frontswap_init's + * We MUST have frontswap_ops set _after_ the frontswap_init's * have been called. Otherwise __frontswap_store might fail. Hence * the barrier to make sure compiler does not re-order us. */ barrier(); - backend_registered = true; + frontswap_ops = ops; return old; } EXPORT_SYMBOL(frontswap_register_ops); @@ -172,11 +170,11 @@ void __frontswap_init(unsigned type) { struct swap_info_struct *sis = swap_info[type]; - if (backend_registered) { + if (frontswap_ops) { BUG_ON(sis == NULL); if (sis->frontswap_map == NULL) return; - (*frontswap_ops.init)(type); + frontswap_ops->init(type); } else { BUG_ON(type > MAX_SWAPFILES); set_bit(type, need_init); @@ -206,7 +204,7 @@ int __frontswap_store(struct page *page) struct swap_info_struct *sis = swap_info[type]; pgoff_t offset = swp_offset(entry); - if (!backend_registered) { + if (!frontswap_ops) { inc_frontswap_failed_stores(); return ret; } @@ -215,7 +213,7 @@ int __frontswap_store(struct page *page) BUG_ON(sis == NULL); if (frontswap_test(sis, offset)) dup = 1; - ret = frontswap_ops.store(type, offset, page); + ret = frontswap_ops->store(type, offset, page); if (ret == 0) { frontswap_set(sis, offset); inc_frontswap_succ_stores(); @@ -250,13 +248,13 @@ int __frontswap_load(struct page *page) struct swap_info_struct *sis = swap_info[type]; pgoff_t offset = swp_offset(entry); - if (!backend_registered) + if (!frontswap_ops) return ret; BUG_ON(!PageLocked(page)); BUG_ON(sis == NULL); if (frontswap_test(sis, offset)) - ret = frontswap_ops.load(type, offset, page); + ret = frontswap_ops->load(type, offset, page); if (ret == 0) { inc_frontswap_loads(); if (frontswap_tmem_exclusive_gets_enabled) { @@ -276,12 +274,12 @@ void __frontswap_invalidate_page(unsigned type, pgoff_t offset) { struct swap_info_struct *sis = swap_info[type]; - if (!backend_registered) + if (!frontswap_ops) return; BUG_ON(sis == NULL); if (frontswap_test(sis, offset)) { - frontswap_ops.invalidate_page(type, offset); + frontswap_ops->invalidate_page(type, offset); __frontswap_clear(sis, offset); inc_frontswap_invalidates(); } @@ -296,11 +294,11 @@ void __frontswap_invalidate_area(unsigned type) { struct swap_info_struct *sis = swap_info[type]; - if (backend_registered) { + if (frontswap_ops) { BUG_ON(sis == NULL); if (sis->frontswap_map == NULL) return; - (*frontswap_ops.invalidate_area)(type); + frontswap_ops->invalidate_area(type); atomic_set(&sis->frontswap_pages, 0); memset(sis->frontswap_map, 0, sis->max / sizeof(long)); } -- cgit v0.10.2 From f066ea230a65f939afc354beae62716ab5f0e645 Mon Sep 17 00:00:00 2001 From: Bob Liu Date: Tue, 30 Apr 2013 15:26:53 -0700 Subject: mm: frontswap: cleanup code After allowing tmem backends to build/run as modules, frontswap_enabled always true if defined CONFIG_FRONTSWAP. But frontswap_test() depends on whether backend is registered, mv it into frontswap.c using fronstswap_ops to make the decision. frontswap_set/clear are not used outside frontswap, so don't export them. Signed-off-by: Bob Liu Cc: Wanpeng Li Cc: Andor Daam Cc: Dan Magenheimer Cc: Florian Schmaus Cc: Konrad Rzeszutek Wilk Cc: Minchan Kim Cc: Stefan Hengelein Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/frontswap.h b/include/linux/frontswap.h index d4f2987..6c49e1e 100644 --- a/include/linux/frontswap.h +++ b/include/linux/frontswap.h @@ -22,6 +22,7 @@ extern void frontswap_writethrough(bool); #define FRONTSWAP_HAS_EXCLUSIVE_GETS extern void frontswap_tmem_exclusive_gets(bool); +extern bool __frontswap_test(struct swap_info_struct *, pgoff_t); extern void __frontswap_init(unsigned type); extern int __frontswap_store(struct page *page); extern int __frontswap_load(struct page *page); @@ -29,26 +30,11 @@ extern void __frontswap_invalidate_page(unsigned, pgoff_t); extern void __frontswap_invalidate_area(unsigned); #ifdef CONFIG_FRONTSWAP +#define frontswap_enabled (1) static inline bool frontswap_test(struct swap_info_struct *sis, pgoff_t offset) { - bool ret = false; - - if (frontswap_enabled && sis->frontswap_map) - ret = test_bit(offset, sis->frontswap_map); - return ret; -} - -static inline void frontswap_set(struct swap_info_struct *sis, pgoff_t offset) -{ - if (frontswap_enabled && sis->frontswap_map) - set_bit(offset, sis->frontswap_map); -} - -static inline void frontswap_clear(struct swap_info_struct *sis, pgoff_t offset) -{ - if (frontswap_enabled && sis->frontswap_map) - clear_bit(offset, sis->frontswap_map); + return __frontswap_test(sis, offset); } static inline void frontswap_map_set(struct swap_info_struct *p, @@ -71,14 +57,6 @@ static inline bool frontswap_test(struct swap_info_struct *sis, pgoff_t offset) return false; } -static inline void frontswap_set(struct swap_info_struct *sis, pgoff_t offset) -{ -} - -static inline void frontswap_clear(struct swap_info_struct *sis, pgoff_t offset) -{ -} - static inline void frontswap_map_set(struct swap_info_struct *p, unsigned long *map) { diff --git a/mm/frontswap.c b/mm/frontswap.c index e44c9cb..2760b0f 100644 --- a/mm/frontswap.c +++ b/mm/frontswap.c @@ -27,14 +27,6 @@ static struct frontswap_ops *frontswap_ops __read_mostly; /* - * This global enablement flag reduces overhead on systems where frontswap_ops - * has not been registered, so is preferred to the slower alternative: a - * function call that checks a non-global. - */ -bool frontswap_enabled __read_mostly; -EXPORT_SYMBOL(frontswap_enabled); - -/* * If enabled, frontswap_store will return failure even on success. As * a result, the swap subsystem will always write the page to swap, in * effect converting frontswap into a writethrough cache. In this mode, @@ -128,8 +120,6 @@ struct frontswap_ops *frontswap_register_ops(struct frontswap_ops *ops) struct frontswap_ops *old = frontswap_ops; int i; - frontswap_enabled = true; - for (i = 0; i < MAX_SWAPFILES; i++) { if (test_and_clear_bit(i, need_init)) ops->init(i); @@ -183,9 +173,21 @@ void __frontswap_init(unsigned type) } EXPORT_SYMBOL(__frontswap_init); -static inline void __frontswap_clear(struct swap_info_struct *sis, pgoff_t offset) +bool __frontswap_test(struct swap_info_struct *sis, + pgoff_t offset) +{ + bool ret = false; + + if (frontswap_ops && sis->frontswap_map) + ret = test_bit(offset, sis->frontswap_map); + return ret; +} +EXPORT_SYMBOL(__frontswap_test); + +static inline void __frontswap_clear(struct swap_info_struct *sis, + pgoff_t offset) { - frontswap_clear(sis, offset); + clear_bit(offset, sis->frontswap_map); atomic_dec(&sis->frontswap_pages); } @@ -204,18 +206,20 @@ int __frontswap_store(struct page *page) struct swap_info_struct *sis = swap_info[type]; pgoff_t offset = swp_offset(entry); - if (!frontswap_ops) { - inc_frontswap_failed_stores(); + /* + * Return if no backend registed. + * Don't need to inc frontswap_failed_stores here. + */ + if (!frontswap_ops) return ret; - } BUG_ON(!PageLocked(page)); BUG_ON(sis == NULL); - if (frontswap_test(sis, offset)) + if (__frontswap_test(sis, offset)) dup = 1; ret = frontswap_ops->store(type, offset, page); if (ret == 0) { - frontswap_set(sis, offset); + set_bit(offset, sis->frontswap_map); inc_frontswap_succ_stores(); if (!dup) atomic_inc(&sis->frontswap_pages); @@ -248,18 +252,18 @@ int __frontswap_load(struct page *page) struct swap_info_struct *sis = swap_info[type]; pgoff_t offset = swp_offset(entry); - if (!frontswap_ops) - return ret; - BUG_ON(!PageLocked(page)); BUG_ON(sis == NULL); - if (frontswap_test(sis, offset)) + /* + * __frontswap_test() will check whether there is backend registered + */ + if (__frontswap_test(sis, offset)) ret = frontswap_ops->load(type, offset, page); if (ret == 0) { inc_frontswap_loads(); if (frontswap_tmem_exclusive_gets_enabled) { SetPageDirty(page); - frontswap_clear(sis, offset); + __frontswap_clear(sis, offset); } } return ret; @@ -274,11 +278,11 @@ void __frontswap_invalidate_page(unsigned type, pgoff_t offset) { struct swap_info_struct *sis = swap_info[type]; - if (!frontswap_ops) - return; - BUG_ON(sis == NULL); - if (frontswap_test(sis, offset)) { + /* + * __frontswap_test() will check whether there is backend registered + */ + if (__frontswap_test(sis, offset)) { frontswap_ops->invalidate_page(type, offset); __frontswap_clear(sis, offset); inc_frontswap_invalidates(); @@ -435,7 +439,6 @@ static int __init init_frontswap(void) debugfs_create_u64("invalidates", S_IRUGO, root, &frontswap_invalidates); #endif - frontswap_enabled = 1; return 0; } -- cgit v0.10.2 From 4f89849da22db9d0edb378acea65e23fcd546173 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Tue, 30 Apr 2013 15:26:54 -0700 Subject: frontswap: get rid of swap_lock dependency Frontswap initialization routine depends on swap_lock, which want to be atomic about frontswap's first appearance. IOW, frontswap is not present and will fail all calls OR frontswap is fully functional but if new swap_info_struct isn't registered by enable_swap_info, swap subsystem doesn't start I/O so there is no race between init procedure and page I/O working on frontswap. So let's remove unnecessary swap_lock dependency. Cc: Dan Magenheimer Signed-off-by: Minchan Kim [v1: Rebased on my branch, reworked to work with backends loading late] [v2: Added a check for !map] [v3: Made the invalidate path follow the init path] [v4: Address comments by Wanpeng Li ] Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Bob Liu Cc: Wanpeng Li Cc: Andor Daam Cc: Florian Schmaus Cc: Stefan Hengelein Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/frontswap.h b/include/linux/frontswap.h index 6c49e1e..8293262 100644 --- a/include/linux/frontswap.h +++ b/include/linux/frontswap.h @@ -23,7 +23,7 @@ extern void frontswap_writethrough(bool); extern void frontswap_tmem_exclusive_gets(bool); extern bool __frontswap_test(struct swap_info_struct *, pgoff_t); -extern void __frontswap_init(unsigned type); +extern void __frontswap_init(unsigned type, unsigned long *map); extern int __frontswap_store(struct page *page); extern int __frontswap_load(struct page *page); extern void __frontswap_invalidate_page(unsigned, pgoff_t); @@ -98,10 +98,10 @@ static inline void frontswap_invalidate_area(unsigned type) __frontswap_invalidate_area(type); } -static inline void frontswap_init(unsigned type) +static inline void frontswap_init(unsigned type, unsigned long *map) { if (frontswap_enabled) - __frontswap_init(type); + __frontswap_init(type, map); } #endif /* _LINUX_FRONTSWAP_H */ diff --git a/mm/frontswap.c b/mm/frontswap.c index 2760b0f..538367e 100644 --- a/mm/frontswap.c +++ b/mm/frontswap.c @@ -121,8 +121,13 @@ struct frontswap_ops *frontswap_register_ops(struct frontswap_ops *ops) int i; for (i = 0; i < MAX_SWAPFILES; i++) { - if (test_and_clear_bit(i, need_init)) + if (test_and_clear_bit(i, need_init)) { + struct swap_info_struct *sis = swap_info[i]; + /* __frontswap_init _should_ have set it! */ + if (!sis->frontswap_map) + return ERR_PTR(-EINVAL); ops->init(i); + } } /* * We MUST have frontswap_ops set _after_ the frontswap_init's @@ -156,20 +161,30 @@ EXPORT_SYMBOL(frontswap_tmem_exclusive_gets); /* * Called when a swap device is swapon'd. */ -void __frontswap_init(unsigned type) +void __frontswap_init(unsigned type, unsigned long *map) { struct swap_info_struct *sis = swap_info[type]; - if (frontswap_ops) { - BUG_ON(sis == NULL); - if (sis->frontswap_map == NULL) - return; + BUG_ON(sis == NULL); + + /* + * p->frontswap is a bitmap that we MUST have to figure out which page + * has gone in frontswap. Without it there is no point of continuing. + */ + if (WARN_ON(!map)) + return; + /* + * Irregardless of whether the frontswap backend has been loaded + * before this function or it will be later, we _MUST_ have the + * p->frontswap set to something valid to work properly. + */ + frontswap_map_set(sis, map); + if (frontswap_ops) frontswap_ops->init(type); - } else { + else { BUG_ON(type > MAX_SWAPFILES); set_bit(type, need_init); } - } EXPORT_SYMBOL(__frontswap_init); diff --git a/mm/swapfile.c b/mm/swapfile.c index d417efd..6c340d9 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1509,8 +1509,7 @@ static int setup_swap_extents(struct swap_info_struct *sis, sector_t *span) } static void _enable_swap_info(struct swap_info_struct *p, int prio, - unsigned char *swap_map, - unsigned long *frontswap_map) + unsigned char *swap_map) { int i, prev; @@ -1519,7 +1518,6 @@ static void _enable_swap_info(struct swap_info_struct *p, int prio, else p->prio = --least_priority; p->swap_map = swap_map; - frontswap_map_set(p, frontswap_map); p->flags |= SWP_WRITEOK; atomic_long_add(p->pages, &nr_swap_pages); total_swap_pages += p->pages; @@ -1542,10 +1540,10 @@ static void enable_swap_info(struct swap_info_struct *p, int prio, unsigned char *swap_map, unsigned long *frontswap_map) { + frontswap_init(p->type, frontswap_map); spin_lock(&swap_lock); spin_lock(&p->lock); - _enable_swap_info(p, prio, swap_map, frontswap_map); - frontswap_init(p->type); + _enable_swap_info(p, prio, swap_map); spin_unlock(&p->lock); spin_unlock(&swap_lock); } @@ -1554,7 +1552,7 @@ static void reinsert_swap_info(struct swap_info_struct *p) { spin_lock(&swap_lock); spin_lock(&p->lock); - _enable_swap_info(p, p->prio, p->swap_map, frontswap_map_get(p)); + _enable_swap_info(p, p->prio, p->swap_map); spin_unlock(&p->lock); spin_unlock(&swap_lock); } @@ -1563,6 +1561,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) { struct swap_info_struct *p = NULL; unsigned char *swap_map; + unsigned long *frontswap_map; struct file *swap_file, *victim; struct address_space *mapping; struct inode *inode; @@ -1662,12 +1661,14 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) swap_map = p->swap_map; p->swap_map = NULL; p->flags = 0; - frontswap_invalidate_area(type); + frontswap_map = frontswap_map_get(p); + frontswap_map_set(p, NULL); spin_unlock(&p->lock); spin_unlock(&swap_lock); + frontswap_invalidate_area(type); mutex_unlock(&swapon_mutex); vfree(swap_map); - vfree(frontswap_map_get(p)); + vfree(frontswap_map); /* Destroy swap account informatin */ swap_cgroup_swapoff(type); -- cgit v0.10.2 From 49a9ab815acb8379a2f5fd43abe40038821e8f87 Mon Sep 17 00:00:00 2001 From: Dan Magenheimer Date: Tue, 30 Apr 2013 15:26:56 -0700 Subject: mm: cleancache: lazy initialization to allow tmem backends to build/run as modules With the goal of allowing tmem backends (zcache, ramster, Xen tmem) to be built/loaded as modules rather than built-in and enabled by a boot parameter, this patch provides "lazy initialization", allowing backends to register to cleancache even after filesystems were mounted. Calls to init_fs and init_shared_fs are remembered as fake poolids but no real tmem_pools created. On backend registration the fake poolids are mapped to real poolids and respective tmem_pools. Signed-off-by: Stefan Hengelein Signed-off-by: Florian Schmaus Signed-off-by: Andor Daam Signed-off-by: Dan Magenheimer [v1: Minor fixes: used #define for some values and bools] [v2: Removed CLEANCACHE_HAS_LAZY_INIT] [v3: Added more comments, added a lock for [shared_|]fs_poolid_map] Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Bob Liu Cc: Wanpeng Li Cc: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/cleancache.c b/mm/cleancache.c index d76ba74..0cecdbb 100644 --- a/mm/cleancache.c +++ b/mm/cleancache.c @@ -45,15 +45,99 @@ static u64 cleancache_puts; static u64 cleancache_invalidates; /* - * register operations for cleancache, returning previous thus allowing - * detection of multiple backends and possible nesting + * When no backend is registered all calls to init_fs and init_shared_fs + * are registered and fake poolids (FAKE_FS_POOLID_OFFSET or + * FAKE_SHARED_FS_POOLID_OFFSET, plus offset in the respective array + * [shared_|]fs_poolid_map) are given to the respective super block + * (sb->cleancache_poolid) and no tmem_pools are created. When a backend + * registers with cleancache the previous calls to init_fs and init_shared_fs + * are executed to create tmem_pools and set the respective poolids. While no + * backend is registered all "puts", "gets" and "flushes" are ignored or failed. + */ +#define MAX_INITIALIZABLE_FS 32 +#define FAKE_FS_POOLID_OFFSET 1000 +#define FAKE_SHARED_FS_POOLID_OFFSET 2000 + +#define FS_NO_BACKEND (-1) +#define FS_UNKNOWN (-2) +static int fs_poolid_map[MAX_INITIALIZABLE_FS]; +static int shared_fs_poolid_map[MAX_INITIALIZABLE_FS]; +static char *uuids[MAX_INITIALIZABLE_FS]; +/* + * Mutex for the [shared_|]fs_poolid_map to guard against multiple threads + * invoking umount (and ending in __cleancache_invalidate_fs) and also multiple + * threads calling mount (and ending up in __cleancache_init_[shared|]fs). + */ +static DEFINE_MUTEX(poolid_mutex); +/* + * When set to false (default) all calls to the cleancache functions, except + * the __cleancache_invalidate_fs and __cleancache_init_[shared|]fs are guarded + * by the if (!backend_registered) return. This means multiple threads (from + * different filesystems) will be checking backend_registered. The usage of a + * bool instead of a atomic_t or a bool guarded by a spinlock is OK - we are + * OK if the time between the backend's have been initialized (and + * backend_registered has been set to true) and when the filesystems start + * actually calling the backends. The inverse (when unloading) is obviously + * not good - but this shim does not do that (yet). + */ +static bool backend_registered __read_mostly; + +/* + * The backends and filesystems work all asynchronously. This is b/c the + * backends can be built as modules. + * The usual sequence of events is: + * a) mount / -> __cleancache_init_fs is called. We set the + * [shared_|]fs_poolid_map and uuids for. + * + * b). user does I/Os -> we call the rest of __cleancache_* functions + * which return immediately as backend_registered is false. + * + * c). modprobe zcache -> cleancache_register_ops. We init the backend + * and set backend_registered to true, and for any fs_poolid_map + * (which is set by __cleancache_init_fs) we initialize the poolid. + * + * d). user does I/Os -> now that backend_registered is true all the + * __cleancache_* functions can call the backend. They all check + * that fs_poolid_map is valid and if so invoke the backend. + * + * e). umount / -> __cleancache_invalidate_fs, the fs_poolid_map is + * reset (which is the second check in the __cleancache_* ops + * to call the backend). + * + * The sequence of event could also be c), followed by a), and d). and e). The + * c) would not happen anymore. There is also the chance of c), and one thread + * doing a) + d), and another doing e). For that case we depend on the + * filesystem calling __cleancache_invalidate_fs in the proper sequence (so + * that it handles all I/Os before it invalidates the fs (which is last part + * of unmounting process). + * + * Note: The acute reader will notice that there is no "rmmod zcache" case. + * This is b/c the functionality for that is not yet implemented and when + * done, will require some extra locking not yet devised. + */ + +/* + * Register operations for cleancache, returning previous thus allowing + * detection of multiple backends and possible nesting. */ struct cleancache_ops cleancache_register_ops(struct cleancache_ops *ops) { struct cleancache_ops old = cleancache_ops; + int i; + mutex_lock(&poolid_mutex); cleancache_ops = *ops; - cleancache_enabled = 1; + + backend_registered = true; + for (i = 0; i < MAX_INITIALIZABLE_FS; i++) { + if (fs_poolid_map[i] == FS_NO_BACKEND) + fs_poolid_map[i] = (*cleancache_ops.init_fs)(PAGE_SIZE); + if (shared_fs_poolid_map[i] == FS_NO_BACKEND) + shared_fs_poolid_map[i] = (*cleancache_ops.init_shared_fs) + (uuids[i], PAGE_SIZE); + } +out: + mutex_unlock(&poolid_mutex); return old; } EXPORT_SYMBOL(cleancache_register_ops); @@ -61,15 +145,42 @@ EXPORT_SYMBOL(cleancache_register_ops); /* Called by a cleancache-enabled filesystem at time of mount */ void __cleancache_init_fs(struct super_block *sb) { - sb->cleancache_poolid = (*cleancache_ops.init_fs)(PAGE_SIZE); + int i; + + mutex_lock(&poolid_mutex); + for (i = 0; i < MAX_INITIALIZABLE_FS; i++) { + if (fs_poolid_map[i] == FS_UNKNOWN) { + sb->cleancache_poolid = i + FAKE_FS_POOLID_OFFSET; + if (backend_registered) + fs_poolid_map[i] = (*cleancache_ops.init_fs)(PAGE_SIZE); + else + fs_poolid_map[i] = FS_NO_BACKEND; + break; + } + } + mutex_unlock(&poolid_mutex); } EXPORT_SYMBOL(__cleancache_init_fs); /* Called by a cleancache-enabled clustered filesystem at time of mount */ void __cleancache_init_shared_fs(char *uuid, struct super_block *sb) { - sb->cleancache_poolid = - (*cleancache_ops.init_shared_fs)(uuid, PAGE_SIZE); + int i; + + mutex_lock(&poolid_mutex); + for (i = 0; i < MAX_INITIALIZABLE_FS; i++) { + if (shared_fs_poolid_map[i] == FS_UNKNOWN) { + sb->cleancache_poolid = i + FAKE_SHARED_FS_POOLID_OFFSET; + uuids[i] = uuid; + if (backend_registered) + shared_fs_poolid_map[i] = (*cleancache_ops.init_shared_fs) + (uuid, PAGE_SIZE); + else + shared_fs_poolid_map[i] = FS_NO_BACKEND; + break; + } + } + mutex_unlock(&poolid_mutex); } EXPORT_SYMBOL(__cleancache_init_shared_fs); @@ -99,27 +210,53 @@ static int cleancache_get_key(struct inode *inode, } /* + * Returns a pool_id that is associated with a given fake poolid. + */ +static int get_poolid_from_fake(int fake_pool_id) +{ + if (fake_pool_id >= FAKE_SHARED_FS_POOLID_OFFSET) + return shared_fs_poolid_map[fake_pool_id - + FAKE_SHARED_FS_POOLID_OFFSET]; + else if (fake_pool_id >= FAKE_FS_POOLID_OFFSET) + return fs_poolid_map[fake_pool_id - FAKE_FS_POOLID_OFFSET]; + return FS_NO_BACKEND; +} + +/* * "Get" data from cleancache associated with the poolid/inode/index * that were specified when the data was put to cleanache and, if * successful, use it to fill the specified page with data and return 0. * The pageframe is unchanged and returns -1 if the get fails. * Page must be locked by caller. + * + * The function has two checks before any action is taken - whether + * a backend is registered and whether the sb->cleancache_poolid + * is correct. */ int __cleancache_get_page(struct page *page) { int ret = -1; int pool_id; + int fake_pool_id; struct cleancache_filekey key = { .u.key = { 0 } }; + if (!backend_registered) { + cleancache_failed_gets++; + goto out; + } + VM_BUG_ON(!PageLocked(page)); - pool_id = page->mapping->host->i_sb->cleancache_poolid; - if (pool_id < 0) + fake_pool_id = page->mapping->host->i_sb->cleancache_poolid; + if (fake_pool_id < 0) goto out; + pool_id = get_poolid_from_fake(fake_pool_id); if (cleancache_get_key(page->mapping->host, &key) < 0) goto out; - ret = (*cleancache_ops.get_page)(pool_id, key, page->index, page); + if (pool_id >= 0) + ret = (*cleancache_ops.get_page)(pool_id, + key, page->index, page); if (ret == 0) cleancache_succ_gets++; else @@ -134,16 +271,31 @@ EXPORT_SYMBOL(__cleancache_get_page); * (previously-obtained per-filesystem) poolid and the page's, * inode and page index. Page must be locked. Note that a put_page * always "succeeds", though a subsequent get_page may succeed or fail. + * + * The function has two checks before any action is taken - whether + * a backend is registered and whether the sb->cleancache_poolid + * is correct. */ void __cleancache_put_page(struct page *page) { int pool_id; + int fake_pool_id; struct cleancache_filekey key = { .u.key = { 0 } }; + if (!backend_registered) { + cleancache_puts++; + return; + } + VM_BUG_ON(!PageLocked(page)); - pool_id = page->mapping->host->i_sb->cleancache_poolid; + fake_pool_id = page->mapping->host->i_sb->cleancache_poolid; + if (fake_pool_id < 0) + return; + + pool_id = get_poolid_from_fake(fake_pool_id); + if (pool_id >= 0 && - cleancache_get_key(page->mapping->host, &key) >= 0) { + cleancache_get_key(page->mapping->host, &key) >= 0) { (*cleancache_ops.put_page)(pool_id, key, page->index, page); cleancache_puts++; } @@ -153,19 +305,31 @@ EXPORT_SYMBOL(__cleancache_put_page); /* * Invalidate any data from cleancache associated with the poolid and the * page's inode and page index so that a subsequent "get" will fail. + * + * The function has two checks before any action is taken - whether + * a backend is registered and whether the sb->cleancache_poolid + * is correct. */ void __cleancache_invalidate_page(struct address_space *mapping, struct page *page) { /* careful... page->mapping is NULL sometimes when this is called */ - int pool_id = mapping->host->i_sb->cleancache_poolid; + int pool_id; + int fake_pool_id = mapping->host->i_sb->cleancache_poolid; struct cleancache_filekey key = { .u.key = { 0 } }; - if (pool_id >= 0) { + if (!backend_registered) + return; + + if (fake_pool_id >= 0) { + pool_id = get_poolid_from_fake(fake_pool_id); + if (pool_id < 0) + return; + VM_BUG_ON(!PageLocked(page)); if (cleancache_get_key(mapping->host, &key) >= 0) { (*cleancache_ops.invalidate_page)(pool_id, - key, page->index); + key, page->index); cleancache_invalidates++; } } @@ -176,12 +340,25 @@ EXPORT_SYMBOL(__cleancache_invalidate_page); * Invalidate all data from cleancache associated with the poolid and the * mappings's inode so that all subsequent gets to this poolid/inode * will fail. + * + * The function has two checks before any action is taken - whether + * a backend is registered and whether the sb->cleancache_poolid + * is correct. */ void __cleancache_invalidate_inode(struct address_space *mapping) { - int pool_id = mapping->host->i_sb->cleancache_poolid; + int pool_id; + int fake_pool_id = mapping->host->i_sb->cleancache_poolid; struct cleancache_filekey key = { .u.key = { 0 } }; + if (!backend_registered) + return; + + if (fake_pool_id < 0) + return; + + pool_id = get_poolid_from_fake(fake_pool_id); + if (pool_id >= 0 && cleancache_get_key(mapping->host, &key) >= 0) (*cleancache_ops.invalidate_inode)(pool_id, key); } @@ -189,21 +366,37 @@ EXPORT_SYMBOL(__cleancache_invalidate_inode); /* * Called by any cleancache-enabled filesystem at time of unmount; - * note that pool_id is surrendered and may be reutrned by a subsequent - * cleancache_init_fs or cleancache_init_shared_fs + * note that pool_id is surrendered and may be returned by a subsequent + * cleancache_init_fs or cleancache_init_shared_fs. */ void __cleancache_invalidate_fs(struct super_block *sb) { - if (sb->cleancache_poolid >= 0) { - int old_poolid = sb->cleancache_poolid; - sb->cleancache_poolid = -1; - (*cleancache_ops.invalidate_fs)(old_poolid); + int index; + int fake_pool_id = sb->cleancache_poolid; + int old_poolid = fake_pool_id; + + mutex_lock(&poolid_mutex); + if (fake_pool_id >= FAKE_SHARED_FS_POOLID_OFFSET) { + index = fake_pool_id - FAKE_SHARED_FS_POOLID_OFFSET; + old_poolid = shared_fs_poolid_map[index]; + shared_fs_poolid_map[index] = FS_UNKNOWN; + uuids[index] = NULL; + } else if (fake_pool_id >= FAKE_FS_POOLID_OFFSET) { + index = fake_pool_id - FAKE_FS_POOLID_OFFSET; + old_poolid = fs_poolid_map[index]; + fs_poolid_map[index] = FS_UNKNOWN; } + sb->cleancache_poolid = -1; + if (backend_registered) + (*cleancache_ops.invalidate_fs)(old_poolid); + mutex_unlock(&poolid_mutex); } EXPORT_SYMBOL(__cleancache_invalidate_fs); static int __init init_cleancache(void) { + int i; + #ifdef CONFIG_DEBUG_FS struct dentry *root = debugfs_create_dir("cleancache", NULL); if (root == NULL) @@ -215,6 +408,11 @@ static int __init init_cleancache(void) debugfs_create_u64("invalidates", S_IRUGO, root, &cleancache_invalidates); #endif + for (i = 0; i < MAX_INITIALIZABLE_FS; i++) { + fs_poolid_map[i] = FS_UNKNOWN; + shared_fs_poolid_map[i] = FS_UNKNOWN; + } + cleancache_enabled = 1; return 0; } module_init(init_cleancache) -- cgit v0.10.2 From 833f8662af9659508afc3cb80f09138eade378e2 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Tue, 30 Apr 2013 15:26:57 -0700 Subject: cleancache: Make cleancache_init use a pointer for the ops Instead of using a backend_registered to determine whether a backend is enabled. This allows us to remove the backend_register check and just do 'if (cleancache_ops)' [v1: Rebase on top of b97c4b430b0a (ramster->zcache move] Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Bob Liu Cc: Wanpeng Li Cc: Andor Daam Cc: Dan Magenheimer Cc: Florian Schmaus Cc: Minchan Kim Cc: Stefan Hengelein Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/staging/zcache/zcache-main.c b/drivers/staging/zcache/zcache-main.c index 09c69c8..6bd4ebb 100644 --- a/drivers/staging/zcache/zcache-main.c +++ b/drivers/staging/zcache/zcache-main.c @@ -1576,9 +1576,9 @@ static struct cleancache_ops zcache_cleancache_ops = { .init_fs = zcache_cleancache_init_fs }; -struct cleancache_ops zcache_cleancache_register_ops(void) +struct cleancache_ops *zcache_cleancache_register_ops(void) { - struct cleancache_ops old_ops = + struct cleancache_ops *old_ops = cleancache_register_ops(&zcache_cleancache_ops); return old_ops; @@ -1860,7 +1860,7 @@ static int __init zcache_init(void) } zbud_init(); if (zcache_enabled && !disable_cleancache) { - struct cleancache_ops old_ops; + struct cleancache_ops *old_ops; register_shrinker(&zcache_shrinker); old_ops = zcache_cleancache_register_ops(); @@ -1870,7 +1870,7 @@ static int __init zcache_init(void) pr_info("%s: cleancache: ignorenonactive = %d\n", namestr, !disable_cleancache_ignore_nonactive); #endif - if (old_ops.init_fs != NULL) + if (old_ops != NULL) pr_warn("%s: cleancache_ops overridden\n", namestr); } if (zcache_enabled && !disable_frontswap) { diff --git a/drivers/xen/tmem.c b/drivers/xen/tmem.c index 7a01a5f..fd79eab 100644 --- a/drivers/xen/tmem.c +++ b/drivers/xen/tmem.c @@ -236,7 +236,7 @@ static int __init no_cleancache(char *s) } __setup("nocleancache", no_cleancache); -static struct cleancache_ops __initdata tmem_cleancache_ops = { +static struct cleancache_ops tmem_cleancache_ops = { .put_page = tmem_cleancache_put_page, .get_page = tmem_cleancache_get_page, .invalidate_page = tmem_cleancache_flush_page, @@ -392,9 +392,9 @@ static int __init xen_tmem_init(void) BUG_ON(sizeof(struct cleancache_filekey) != sizeof(struct tmem_oid)); if (tmem_enabled && use_cleancache) { char *s = ""; - struct cleancache_ops old_ops = + struct cleancache_ops *old_ops = cleancache_register_ops(&tmem_cleancache_ops); - if (old_ops.init_fs != NULL) + if (old_ops) s = " (WARNING: cleancache_ops overridden)"; printk(KERN_INFO "cleancache enabled, RAM provided by " "Xen Transcendent Memory%s\n", s); diff --git a/include/linux/cleancache.h b/include/linux/cleancache.h index 42e55de..3af5ea8 100644 --- a/include/linux/cleancache.h +++ b/include/linux/cleancache.h @@ -33,7 +33,7 @@ struct cleancache_ops { void (*invalidate_fs)(int); }; -extern struct cleancache_ops +extern struct cleancache_ops * cleancache_register_ops(struct cleancache_ops *ops); extern void __cleancache_init_fs(struct super_block *); extern void __cleancache_init_shared_fs(char *, struct super_block *); diff --git a/mm/cleancache.c b/mm/cleancache.c index 0cecdbb..b3ae19b 100644 --- a/mm/cleancache.c +++ b/mm/cleancache.c @@ -32,7 +32,7 @@ EXPORT_SYMBOL(cleancache_enabled); * cleancache_ops is set by cleancache_ops_register to contain the pointers * to the cleancache "backend" implementation functions. */ -static struct cleancache_ops cleancache_ops __read_mostly; +static struct cleancache_ops *cleancache_ops __read_mostly; /* * Counters available via /sys/kernel/debug/frontswap (if debugfs is @@ -72,15 +72,14 @@ static DEFINE_MUTEX(poolid_mutex); /* * When set to false (default) all calls to the cleancache functions, except * the __cleancache_invalidate_fs and __cleancache_init_[shared|]fs are guarded - * by the if (!backend_registered) return. This means multiple threads (from - * different filesystems) will be checking backend_registered. The usage of a + * by the if (!cleancache_ops) return. This means multiple threads (from + * different filesystems) will be checking cleancache_ops. The usage of a * bool instead of a atomic_t or a bool guarded by a spinlock is OK - we are * OK if the time between the backend's have been initialized (and - * backend_registered has been set to true) and when the filesystems start + * cleancache_ops has been set to not NULL) and when the filesystems start * actually calling the backends. The inverse (when unloading) is obviously * not good - but this shim does not do that (yet). */ -static bool backend_registered __read_mostly; /* * The backends and filesystems work all asynchronously. This is b/c the @@ -90,13 +89,13 @@ static bool backend_registered __read_mostly; * [shared_|]fs_poolid_map and uuids for. * * b). user does I/Os -> we call the rest of __cleancache_* functions - * which return immediately as backend_registered is false. + * which return immediately as cleancache_ops is false. * * c). modprobe zcache -> cleancache_register_ops. We init the backend - * and set backend_registered to true, and for any fs_poolid_map + * and set cleancache_ops to true, and for any fs_poolid_map * (which is set by __cleancache_init_fs) we initialize the poolid. * - * d). user does I/Os -> now that backend_registered is true all the + * d). user does I/Os -> now that cleancache_ops is true all the * __cleancache_* functions can call the backend. They all check * that fs_poolid_map is valid and if so invoke the backend. * @@ -120,23 +119,26 @@ static bool backend_registered __read_mostly; * Register operations for cleancache, returning previous thus allowing * detection of multiple backends and possible nesting. */ -struct cleancache_ops cleancache_register_ops(struct cleancache_ops *ops) +struct cleancache_ops *cleancache_register_ops(struct cleancache_ops *ops) { - struct cleancache_ops old = cleancache_ops; + struct cleancache_ops *old = cleancache_ops; int i; mutex_lock(&poolid_mutex); - cleancache_ops = *ops; - - backend_registered = true; for (i = 0; i < MAX_INITIALIZABLE_FS; i++) { if (fs_poolid_map[i] == FS_NO_BACKEND) - fs_poolid_map[i] = (*cleancache_ops.init_fs)(PAGE_SIZE); + fs_poolid_map[i] = ops->init_fs(PAGE_SIZE); if (shared_fs_poolid_map[i] == FS_NO_BACKEND) - shared_fs_poolid_map[i] = (*cleancache_ops.init_shared_fs) + shared_fs_poolid_map[i] = ops->init_shared_fs (uuids[i], PAGE_SIZE); } -out: + /* + * We MUST set cleancache_ops _after_ we have called the backends + * init_fs or init_shared_fs functions. Otherwise the compiler might + * re-order where cleancache_ops is set in this function. + */ + barrier(); + cleancache_ops = ops; mutex_unlock(&poolid_mutex); return old; } @@ -151,8 +153,8 @@ void __cleancache_init_fs(struct super_block *sb) for (i = 0; i < MAX_INITIALIZABLE_FS; i++) { if (fs_poolid_map[i] == FS_UNKNOWN) { sb->cleancache_poolid = i + FAKE_FS_POOLID_OFFSET; - if (backend_registered) - fs_poolid_map[i] = (*cleancache_ops.init_fs)(PAGE_SIZE); + if (cleancache_ops) + fs_poolid_map[i] = cleancache_ops->init_fs(PAGE_SIZE); else fs_poolid_map[i] = FS_NO_BACKEND; break; @@ -172,8 +174,8 @@ void __cleancache_init_shared_fs(char *uuid, struct super_block *sb) if (shared_fs_poolid_map[i] == FS_UNKNOWN) { sb->cleancache_poolid = i + FAKE_SHARED_FS_POOLID_OFFSET; uuids[i] = uuid; - if (backend_registered) - shared_fs_poolid_map[i] = (*cleancache_ops.init_shared_fs) + if (cleancache_ops) + shared_fs_poolid_map[i] = cleancache_ops->init_shared_fs (uuid, PAGE_SIZE); else shared_fs_poolid_map[i] = FS_NO_BACKEND; @@ -240,7 +242,7 @@ int __cleancache_get_page(struct page *page) int fake_pool_id; struct cleancache_filekey key = { .u.key = { 0 } }; - if (!backend_registered) { + if (!cleancache_ops) { cleancache_failed_gets++; goto out; } @@ -255,7 +257,7 @@ int __cleancache_get_page(struct page *page) goto out; if (pool_id >= 0) - ret = (*cleancache_ops.get_page)(pool_id, + ret = cleancache_ops->get_page(pool_id, key, page->index, page); if (ret == 0) cleancache_succ_gets++; @@ -282,7 +284,7 @@ void __cleancache_put_page(struct page *page) int fake_pool_id; struct cleancache_filekey key = { .u.key = { 0 } }; - if (!backend_registered) { + if (!cleancache_ops) { cleancache_puts++; return; } @@ -296,7 +298,7 @@ void __cleancache_put_page(struct page *page) if (pool_id >= 0 && cleancache_get_key(page->mapping->host, &key) >= 0) { - (*cleancache_ops.put_page)(pool_id, key, page->index, page); + cleancache_ops->put_page(pool_id, key, page->index, page); cleancache_puts++; } } @@ -318,7 +320,7 @@ void __cleancache_invalidate_page(struct address_space *mapping, int fake_pool_id = mapping->host->i_sb->cleancache_poolid; struct cleancache_filekey key = { .u.key = { 0 } }; - if (!backend_registered) + if (!cleancache_ops) return; if (fake_pool_id >= 0) { @@ -328,7 +330,7 @@ void __cleancache_invalidate_page(struct address_space *mapping, VM_BUG_ON(!PageLocked(page)); if (cleancache_get_key(mapping->host, &key) >= 0) { - (*cleancache_ops.invalidate_page)(pool_id, + cleancache_ops->invalidate_page(pool_id, key, page->index); cleancache_invalidates++; } @@ -351,7 +353,7 @@ void __cleancache_invalidate_inode(struct address_space *mapping) int fake_pool_id = mapping->host->i_sb->cleancache_poolid; struct cleancache_filekey key = { .u.key = { 0 } }; - if (!backend_registered) + if (!cleancache_ops) return; if (fake_pool_id < 0) @@ -360,7 +362,7 @@ void __cleancache_invalidate_inode(struct address_space *mapping) pool_id = get_poolid_from_fake(fake_pool_id); if (pool_id >= 0 && cleancache_get_key(mapping->host, &key) >= 0) - (*cleancache_ops.invalidate_inode)(pool_id, key); + cleancache_ops->invalidate_inode(pool_id, key); } EXPORT_SYMBOL(__cleancache_invalidate_inode); @@ -387,8 +389,8 @@ void __cleancache_invalidate_fs(struct super_block *sb) fs_poolid_map[index] = FS_UNKNOWN; } sb->cleancache_poolid = -1; - if (backend_registered) - (*cleancache_ops.invalidate_fs)(old_poolid); + if (cleancache_ops) + cleancache_ops->invalidate_fs(old_poolid); mutex_unlock(&poolid_mutex); } EXPORT_SYMBOL(__cleancache_invalidate_fs); -- cgit v0.10.2 From ff610a1d55da22bf95bbc6a8b193e052169b34b7 Mon Sep 17 00:00:00 2001 From: Bob Liu Date: Tue, 30 Apr 2013 15:26:58 -0700 Subject: mm: cleancache: clean up cleancache_enabled cleancache_ops is used to decide whether backend is registered. So now cleancache_enabled is always true if defined CONFIG_CLEANCACHE. Signed-off-by: Bob Liu Cc: Wanpeng Li Cc: Andor Daam Cc: Dan Magenheimer Cc: Florian Schmaus Cc: Konrad Rzeszutek Wilk Cc: Minchan Kim Cc: Stefan Hengelein Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/cleancache.h b/include/linux/cleancache.h index 3af5ea8..4ce9056 100644 --- a/include/linux/cleancache.h +++ b/include/linux/cleancache.h @@ -42,9 +42,9 @@ extern void __cleancache_put_page(struct page *); extern void __cleancache_invalidate_page(struct address_space *, struct page *); extern void __cleancache_invalidate_inode(struct address_space *); extern void __cleancache_invalidate_fs(struct super_block *); -extern int cleancache_enabled; #ifdef CONFIG_CLEANCACHE +#define cleancache_enabled (1) static inline bool cleancache_fs_enabled(struct page *page) { return page->mapping->host->i_sb->cleancache_poolid >= 0; diff --git a/mm/cleancache.c b/mm/cleancache.c index b3ae19b..5875f48 100644 --- a/mm/cleancache.c +++ b/mm/cleancache.c @@ -19,16 +19,6 @@ #include /* - * This global enablement flag may be read thousands of times per second - * by cleancache_get/put/invalidate even on systems where cleancache_ops - * is not claimed (e.g. cleancache is config'ed on but remains - * disabled), so is preferred to the slower alternative: a function - * call that checks a non-global. - */ -int cleancache_enabled __read_mostly; -EXPORT_SYMBOL(cleancache_enabled); - -/* * cleancache_ops is set by cleancache_ops_register to contain the pointers * to the cleancache "backend" implementation functions. */ @@ -414,7 +404,6 @@ static int __init init_cleancache(void) fs_poolid_map[i] = FS_UNKNOWN; shared_fs_poolid_map[i] = FS_UNKNOWN; } - cleancache_enabled = 1; return 0; } module_init(init_cleancache) -- cgit v0.10.2 From 10a7a0771399a57a297fca9615450dbb3f88081a Mon Sep 17 00:00:00 2001 From: Dan Magenheimer Date: Tue, 30 Apr 2013 15:27:00 -0700 Subject: xen: tmem: enable Xen tmem shim to be built/loaded as a module Allow Xen tmem shim to be built/loaded as a module. Xen self-ballooning and frontswap-selfshrinking are now also "lazily" initialized when the Xen tmem shim is loaded as a module, unless explicitly disabled by module parameters. Note runtime dependency disallows loading if cleancache/frontswap lazy initialization patches are not present. If built-in (not built as a module), the original mechanism of enabling via a kernel boot parameter is retained, but this should be considered deprecated. Note that module unload is explicitly not yet supported. [v1: Removed the [CLEANCACHE|FRONTSWAP]_HAS_LAZY_INIT ifdef] [v2: Squashed the xen/tmem: Remove the subsys call patch in] [akpm@linux-foundation.org: fix build (disable_frontswap_selfshrinking undeclared)] Signed-off-by: Dan Magenheimer Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Bob Liu Cc: Wanpeng Li Cc: Andor Daam Cc: Florian Schmaus Cc: Minchan Kim Cc: Stefan Hengelein Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index 67af155..dd4d9cb 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -145,9 +145,9 @@ config SWIOTLB_XEN select SWIOTLB config XEN_TMEM - bool + tristate depends on !ARM - default y if (CLEANCACHE || FRONTSWAP) + default m if (CLEANCACHE || FRONTSWAP) help Shim to interface in-kernel Transcendent Memory hooks (e.g. cleancache and frontswap) to Xen tmem hypercalls. diff --git a/drivers/xen/tmem.c b/drivers/xen/tmem.c index fd79eab..8adde8e 100644 --- a/drivers/xen/tmem.c +++ b/drivers/xen/tmem.c @@ -5,6 +5,7 @@ * Author: Dan Magenheimer */ +#include #include #include #include @@ -128,6 +129,7 @@ static int xen_tmem_flush_object(u32 pool_id, struct tmem_oid oid) return xen_tmem_op(TMEM_FLUSH_OBJECT, pool_id, oid, 0, 0, 0, 0, 0); } +#ifndef CONFIG_XEN_TMEM_MODULE bool __read_mostly tmem_enabled = false; static int __init enable_tmem(char *s) @@ -136,6 +138,7 @@ static int __init enable_tmem(char *s) return 1; } __setup("tmem", enable_tmem); +#endif #ifdef CONFIG_CLEANCACHE static int xen_tmem_destroy_pool(u32 pool_id) @@ -227,14 +230,19 @@ static int tmem_cleancache_init_shared_fs(char *uuid, size_t pagesize) return xen_tmem_new_pool(shared_uuid, TMEM_POOL_SHARED, pagesize); } -static bool __initdata use_cleancache = true; - +static bool disable_cleancache __read_mostly; +static bool disable_selfballooning __read_mostly; +#ifdef CONFIG_XEN_TMEM_MODULE +module_param(disable_cleancache, bool, S_IRUGO); +module_param(disable_selfballooning, bool, S_IRUGO); +#else static int __init no_cleancache(char *s) { - use_cleancache = false; + disable_cleancache = true; return 1; } __setup("nocleancache", no_cleancache); +#endif static struct cleancache_ops tmem_cleancache_ops = { .put_page = tmem_cleancache_put_page, @@ -353,14 +361,19 @@ static void tmem_frontswap_init(unsigned ignored) xen_tmem_new_pool(private, TMEM_POOL_PERSIST, PAGE_SIZE); } -static bool __initdata use_frontswap = true; - +static bool disable_frontswap __read_mostly; +static bool disable_frontswap_selfshrinking __read_mostly; +#ifdef CONFIG_XEN_TMEM_MODULE +module_param(disable_frontswap, bool, S_IRUGO); +module_param(disable_frontswap_selfshrinking, bool, S_IRUGO); +#else static int __init no_frontswap(char *s) { - use_frontswap = false; + disable_frontswap = true; return 1; } __setup("nofrontswap", no_frontswap); +#endif static struct frontswap_ops tmem_frontswap_ops = { .store = tmem_frontswap_store, @@ -369,14 +382,16 @@ static struct frontswap_ops tmem_frontswap_ops = { .invalidate_area = tmem_frontswap_flush_area, .init = tmem_frontswap_init }; +#else /* CONFIG_FRONTSWAP */ +#define disable_frontswap_selfshrinking 1 #endif -static int __init xen_tmem_init(void) +static int xen_tmem_init(void) { if (!xen_domain()) return 0; #ifdef CONFIG_FRONTSWAP - if (tmem_enabled && use_frontswap) { + if (tmem_enabled && !disable_frontswap) { char *s = ""; struct frontswap_ops *old_ops = frontswap_register_ops(&tmem_frontswap_ops); @@ -390,7 +405,7 @@ static int __init xen_tmem_init(void) #endif #ifdef CONFIG_CLEANCACHE BUG_ON(sizeof(struct cleancache_filekey) != sizeof(struct tmem_oid)); - if (tmem_enabled && use_cleancache) { + if (tmem_enabled && !disable_cleancache) { char *s = ""; struct cleancache_ops *old_ops = cleancache_register_ops(&tmem_cleancache_ops); @@ -400,7 +415,14 @@ static int __init xen_tmem_init(void) "Xen Transcendent Memory%s\n", s); } #endif +#ifdef CONFIG_XEN_SELFBALLOONING + xen_selfballoon_init(!disable_selfballooning, + !disable_frontswap_selfshrinking); +#endif return 0; } module_init(xen_tmem_init) +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Dan Magenheimer "); +MODULE_DESCRIPTION("Shim to Xen transcendent memory"); diff --git a/drivers/xen/xen-selfballoon.c b/drivers/xen/xen-selfballoon.c index 2552d3e..f2ef569 100644 --- a/drivers/xen/xen-selfballoon.c +++ b/drivers/xen/xen-selfballoon.c @@ -121,7 +121,7 @@ static DECLARE_DELAYED_WORK(selfballoon_worker, selfballoon_process); static bool frontswap_selfshrinking __read_mostly; /* Enable/disable with kernel boot option. */ -static bool use_frontswap_selfshrink __initdata = true; +static bool use_frontswap_selfshrink = true; /* * The default values for the following parameters were deemed reasonable @@ -185,7 +185,7 @@ static int __init xen_nofrontswap_selfshrink_setup(char *s) __setup("noselfshrink", xen_nofrontswap_selfshrink_setup); /* Disable with kernel boot option. */ -static bool use_selfballooning __initdata = true; +static bool use_selfballooning = true; static int __init xen_noselfballooning_setup(char *s) { @@ -196,7 +196,7 @@ static int __init xen_noselfballooning_setup(char *s) __setup("noselfballooning", xen_noselfballooning_setup); #else /* !CONFIG_FRONTSWAP */ /* Enable with kernel boot option. */ -static bool use_selfballooning __initdata = false; +static bool use_selfballooning; static int __init xen_selfballooning_setup(char *s) { @@ -537,7 +537,7 @@ int register_xen_selfballooning(struct device *dev) } EXPORT_SYMBOL(register_xen_selfballooning); -static int __init xen_selfballoon_init(void) +int xen_selfballoon_init(bool use_selfballooning, bool use_frontswap_selfshrink) { bool enable = false; @@ -571,7 +571,4 @@ static int __init xen_selfballoon_init(void) return 0; } - -subsys_initcall(xen_selfballoon_init); - -MODULE_LICENSE("GPL"); +EXPORT_SYMBOL(xen_selfballoon_init); diff --git a/include/xen/tmem.h b/include/xen/tmem.h index 591550a..3930a90 100644 --- a/include/xen/tmem.h +++ b/include/xen/tmem.h @@ -3,7 +3,15 @@ #include +#ifdef CONFIG_XEN_TMEM_MODULE +#define tmem_enabled true +#else /* defined in drivers/xen/tmem.c */ extern bool tmem_enabled; +#endif + +#ifdef CONFIG_XEN_SELFBALLOONING +extern int xen_selfballoon_init(bool, bool); +#endif #endif /* _XEN_TMEM_H */ -- cgit v0.10.2 From f42158fe6785dd366073c7fd00f159a65c631597 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Tue, 30 Apr 2013 15:27:01 -0700 Subject: zcache/tmem: Better error checking on frontswap_register_ops return value. In the past it either used to be NULL or the "older" backend. Now we also return -Exx error codes. Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Bob Liu Cc: Wanpeng Li Cc: Andor Daam Cc: Dan Magenheimer Cc: Florian Schmaus Cc: Minchan Kim Cc: Stefan Hengelein Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/staging/zcache/zcache-main.c b/drivers/staging/zcache/zcache-main.c index 6bd4ebb..118602f 100644 --- a/drivers/staging/zcache/zcache-main.c +++ b/drivers/staging/zcache/zcache-main.c @@ -1886,8 +1886,11 @@ static int __init zcache_init(void) namestr, frontswap_has_exclusive_gets, !disable_frontswap_ignore_nonactive); #endif - if (old_ops != NULL) + if (IS_ERR(old_ops) || old_ops) { + if (IS_ERR(old_ops)) + return PTR_RET(old_ops); pr_warn("%s: frontswap_ops overridden\n", namestr); + } } if (ramster_enabled) ramster_init(!disable_cleancache, !disable_frontswap, diff --git a/drivers/xen/tmem.c b/drivers/xen/tmem.c index 8adde8e..e3600be 100644 --- a/drivers/xen/tmem.c +++ b/drivers/xen/tmem.c @@ -397,8 +397,11 @@ static int xen_tmem_init(void) frontswap_register_ops(&tmem_frontswap_ops); tmem_frontswap_poolid = -1; - if (old_ops) + if (IS_ERR(old_ops) || old_ops) { + if (IS_ERR(old_ops)) + return PTR_ERR(old_ops); s = " (WARNING: frontswap_ops overridden)"; + } printk(KERN_INFO "frontswap enabled, RAM provided by " "Xen Transcendent Memory%s\n", s); } -- cgit v0.10.2 From 1ac37bee81531cb62a9a64e78ffdad7da9b20ea2 Mon Sep 17 00:00:00 2001 From: Dan Magenheimer Date: Tue, 30 Apr 2013 15:27:03 -0700 Subject: staging: zcache: enable ramster to be built/loaded as a module Enable module support for ramster. Note runtime dependency disallows loading if cleancache/frontswap lazy initialization patches are not present. If built-in (not built as a module), the original mechanism of enabling via a kernel boot parameter is retained, but this should be considered deprecated. Note that module unload is explicitly not yet supported. [v1: Fixed compile issues since ramster_init now has four arguments] [v2: Fixed rebase on ramster->zcache move] [akpm@linux-foundation.org: use_frontswap_selfshrink cannot be __initdata] Signed-off-by: Dan Magenheimer Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Bob Liu Cc: Wanpeng Li Cc: Andor Daam Cc: Florian Schmaus Cc: Minchan Kim Cc: Stefan Hengelein Cc: Wu Fengguang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/staging/zcache/ramster.h b/drivers/staging/zcache/ramster.h index 1b71aea..e1f91d5 100644 --- a/drivers/staging/zcache/ramster.h +++ b/drivers/staging/zcache/ramster.h @@ -11,10 +11,14 @@ #ifndef _ZCACHE_RAMSTER_H_ #define _ZCACHE_RAMSTER_H_ +#ifdef CONFIG_RAMSTER_MODULE +#define CONFIG_RAMSTER +#endif + #ifdef CONFIG_RAMSTER #include "ramster/ramster.h" #else -static inline void ramster_init(bool x, bool y, bool z) +static inline void ramster_init(bool x, bool y, bool z, bool w) { } diff --git a/drivers/staging/zcache/ramster/debug.c b/drivers/staging/zcache/ramster/debug.c index bf34133..327e4f0 100644 --- a/drivers/staging/zcache/ramster/debug.c +++ b/drivers/staging/zcache/ramster/debug.c @@ -43,7 +43,7 @@ static struct debug_entry { }; #undef ATTR -int __init ramster_debugfs_init(void) +int ramster_debugfs_init(void) { int i; struct dentry *root = debugfs_create_dir("ramster", NULL); diff --git a/drivers/staging/zcache/ramster/nodemanager.c b/drivers/staging/zcache/ramster/nodemanager.c index c0f4815..2cfe933 100644 --- a/drivers/staging/zcache/ramster/nodemanager.c +++ b/drivers/staging/zcache/ramster/nodemanager.c @@ -949,7 +949,7 @@ static void __exit exit_r2nm(void) r2hb_exit(); } -static int __init init_r2nm(void) +int r2nm_init(void) { int ret = -1; @@ -986,10 +986,11 @@ out_r2hb: out: return ret; } +EXPORT_SYMBOL_GPL(r2nm_init); MODULE_AUTHOR("Oracle"); MODULE_LICENSE("GPL"); -/* module_init(init_r2nm) */ -late_initcall(init_r2nm); -/* module_exit(exit_r2nm) */ +#ifndef CONFIG_RAMSTER_MODULE +late_initcall(r2nm_init); +#endif diff --git a/drivers/staging/zcache/ramster/ramster.c b/drivers/staging/zcache/ramster/ramster.c index 8781627..b18b887 100644 --- a/drivers/staging/zcache/ramster/ramster.c +++ b/drivers/staging/zcache/ramster/ramster.c @@ -121,6 +121,7 @@ int ramster_do_preload_flnode(struct tmem_pool *pool) kmem_cache_free(ramster_flnode_cache, flnode); return ret; } +EXPORT_SYMBOL_GPL(ramster_do_preload_flnode); /* * Called by the message handler after a (still compressed) page has been @@ -388,6 +389,7 @@ void *ramster_pampd_free(void *pampd, struct tmem_pool *pool, } return local_pampd; } +EXPORT_SYMBOL_GPL(ramster_pampd_free); void ramster_count_foreign_pages(bool eph, int count) { @@ -408,6 +410,7 @@ void ramster_count_foreign_pages(bool eph, int count) } } } +EXPORT_SYMBOL_GPL(ramster_count_foreign_pages); /* * For now, just push over a few pages every few seconds to @@ -593,7 +596,7 @@ requeue: ramster_remotify_queue_delayed_work(HZ); } -void __init ramster_remotify_init(void) +void ramster_remotify_init(void) { unsigned long n = 60UL; ramster_remotify_workqueue = @@ -768,8 +771,10 @@ static bool frontswap_selfshrinking __read_mostly; static void selfshrink_process(struct work_struct *work); static DECLARE_DELAYED_WORK(selfshrink_worker, selfshrink_process); +#ifndef CONFIG_RAMSTER_MODULE /* Enable/disable with kernel boot option. */ -static bool use_frontswap_selfshrink __initdata = true; +static bool use_frontswap_selfshrink = true; +#endif /* * The default values for the following parameters were deemed reasonable @@ -824,6 +829,7 @@ static void frontswap_selfshrink(void) frontswap_shrink(tgt_frontswap_pages); } +#ifndef CONFIG_RAMSTER_MODULE static int __init ramster_nofrontswap_selfshrink_setup(char *s) { use_frontswap_selfshrink = false; @@ -831,6 +837,7 @@ static int __init ramster_nofrontswap_selfshrink_setup(char *s) } __setup("noselfshrink", ramster_nofrontswap_selfshrink_setup); +#endif static void selfshrink_process(struct work_struct *work) { @@ -849,6 +856,7 @@ void ramster_cpu_up(int cpu) per_cpu(ramster_remoteputmem1, cpu) = p1; per_cpu(ramster_remoteputmem2, cpu) = p2; } +EXPORT_SYMBOL_GPL(ramster_cpu_up); void ramster_cpu_down(int cpu) { @@ -864,6 +872,7 @@ void ramster_cpu_down(int cpu) kp->flnode = NULL; } } +EXPORT_SYMBOL_GPL(ramster_cpu_down); void ramster_register_pamops(struct tmem_pamops *pamops) { @@ -874,9 +883,11 @@ void ramster_register_pamops(struct tmem_pamops *pamops) pamops->repatriate = ramster_pampd_repatriate; pamops->repatriate_preload = ramster_pampd_repatriate_preload; } +EXPORT_SYMBOL_GPL(ramster_register_pamops); -void __init ramster_init(bool cleancache, bool frontswap, - bool frontswap_exclusive_gets) +void ramster_init(bool cleancache, bool frontswap, + bool frontswap_exclusive_gets, + bool frontswap_selfshrink) { int ret = 0; @@ -891,10 +902,17 @@ void __init ramster_init(bool cleancache, bool frontswap, if (ret) pr_err("ramster: can't create sysfs for ramster\n"); (void)r2net_register_handlers(); +#ifdef CONFIG_RAMSTER_MODULE + ret = r2nm_init(); + if (ret) + pr_err("ramster: can't init r2net\n"); + frontswap_selfshrinking = frontswap_selfshrink; +#else + frontswap_selfshrinking = use_frontswap_selfshrink; +#endif INIT_LIST_HEAD(&ramster_rem_op_list); ramster_flnode_cache = kmem_cache_create("ramster_flnode", sizeof(struct flushlist_node), 0, 0, NULL); - frontswap_selfshrinking = use_frontswap_selfshrink; if (frontswap_selfshrinking) { pr_info("ramster: Initializing frontswap selfshrink driver.\n"); schedule_delayed_work(&selfshrink_worker, @@ -902,3 +920,4 @@ void __init ramster_init(bool cleancache, bool frontswap, } ramster_remotify_init(); } +EXPORT_SYMBOL_GPL(ramster_init); diff --git a/drivers/staging/zcache/ramster/ramster.h b/drivers/staging/zcache/ramster/ramster.h index 12ae56f..6d41a7a 100644 --- a/drivers/staging/zcache/ramster/ramster.h +++ b/drivers/staging/zcache/ramster/ramster.h @@ -147,7 +147,7 @@ extern int r2net_register_handlers(void); extern int r2net_remote_target_node_set(int); extern int ramster_remotify_pageframe(bool); -extern void ramster_init(bool, bool, bool); +extern void ramster_init(bool, bool, bool, bool); extern void ramster_register_pamops(struct tmem_pamops *); extern int ramster_localify(int, struct tmem_oid *oidp, uint32_t, char *, unsigned int, void *); diff --git a/drivers/staging/zcache/ramster/ramster_nodemanager.h b/drivers/staging/zcache/ramster/ramster_nodemanager.h index 49f879d..dbaae34 100644 --- a/drivers/staging/zcache/ramster/ramster_nodemanager.h +++ b/drivers/staging/zcache/ramster/ramster_nodemanager.h @@ -36,4 +36,6 @@ /* host name, group name, cluster name all 64 bytes */ #define R2NM_MAX_NAME_LEN 64 /* __NEW_UTS_LEN */ +extern int r2nm_init(void); + #endif /* _RAMSTER_NODEMANAGER_H */ diff --git a/drivers/staging/zcache/zcache-main.c b/drivers/staging/zcache/zcache-main.c index 118602f..326c2ea 100644 --- a/drivers/staging/zcache/zcache-main.c +++ b/drivers/staging/zcache/zcache-main.c @@ -1894,7 +1894,7 @@ static int __init zcache_init(void) } if (ramster_enabled) ramster_init(!disable_cleancache, !disable_frontswap, - frontswap_has_exclusive_gets); + frontswap_has_exclusive_gets, false); out: return ret; } -- cgit v0.10.2 From 835f2f51608fd80e1aef5a8955dabcc36ea528a4 Mon Sep 17 00:00:00 2001 From: Dan Magenheimer Date: Tue, 30 Apr 2013 15:27:05 -0700 Subject: staging: zcache: enable zcache to be built/loaded as a module Allow zcache to be built/loaded as a module. Note runtime dependency disallows loading if cleancache/frontswap lazy initialization patches are not present. Zsmalloc support has not yet been merged into zcache but, once merged, could now easily be selected via a module_param. If built-in (not built as a module), the original mechanism of enabling via a kernel boot parameter is retained, but this should be considered deprecated. Note that module unload is explicitly not yet supported. Signed-off-by: Dan Magenheimer [v1: Rebased with different order of patches] [v2: Removed [CLEANCACHE|FRONTSWAP]_HAS_LAZY_INIT ifdef] [v3: Rebased on top of ramster->zcache move] [v4: Redid the Makefile] [v5: s/ZCACHE2/ZCACHE/] Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Bob Liu Cc: Wanpeng Li Cc: Andor Daam Cc: Florian Schmaus Cc: Minchan Kim Cc: Stefan Hengelein Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/staging/zcache/Kconfig b/drivers/staging/zcache/Kconfig index 05e87a1..2d7b2da 100644 --- a/drivers/staging/zcache/Kconfig +++ b/drivers/staging/zcache/Kconfig @@ -1,5 +1,5 @@ config ZCACHE - bool "Dynamic compression of swap pages and clean pagecache pages" + tristate "Dynamic compression of swap pages and clean pagecache pages" depends on CRYPTO=y && SWAP=y && CLEANCACHE && FRONTSWAP select CRYPTO_LZO default n @@ -19,8 +19,8 @@ config ZCACHE_DEBUG how zcache is doing. You probably want to set this to 'N'. config RAMSTER - bool "Cross-machine RAM capacity sharing, aka peer-to-peer tmem" - depends on CONFIGFS_FS=y && SYSFS=y && !HIGHMEM && ZCACHE=y + tristate "Cross-machine RAM capacity sharing, aka peer-to-peer tmem" + depends on CONFIGFS_FS=y && SYSFS=y && !HIGHMEM && ZCACHE depends on NET # must ensure struct page is 8-byte aligned select HAVE_ALIGNED_STRUCT_PAGE if !64BIT diff --git a/drivers/staging/zcache/tmem.c b/drivers/staging/zcache/tmem.c index a2b7e03..d7e51e4 100644 --- a/drivers/staging/zcache/tmem.c +++ b/drivers/staging/zcache/tmem.c @@ -35,7 +35,8 @@ #include #include #include -#ifdef CONFIG_RAMSTER +#include +#if defined(CONFIG_RAMSTER) || defined(CONFIG_RAMSTER_MODULE) #include #endif @@ -641,6 +642,7 @@ void *tmem_localify_get_pampd(struct tmem_pool *pool, struct tmem_oid *oidp, /* note, hashbucket remains locked */ return pampd; } +EXPORT_SYMBOL_GPL(tmem_localify_get_pampd); void tmem_localify_finish(struct tmem_obj *obj, uint32_t index, void *pampd, void *saved_hb, bool delete) @@ -658,6 +660,7 @@ void tmem_localify_finish(struct tmem_obj *obj, uint32_t index, } spin_unlock(&hb->lock); } +EXPORT_SYMBOL_GPL(tmem_localify_finish); /* * For ramster only. Helper function to support asynchronous tmem_get. @@ -719,6 +722,7 @@ out: spin_unlock(&hb->lock); return ret; } +EXPORT_SYMBOL_GPL(tmem_replace); #endif /* diff --git a/drivers/staging/zcache/tmem.h b/drivers/staging/zcache/tmem.h index adbe5a8..d128ce2 100644 --- a/drivers/staging/zcache/tmem.h +++ b/drivers/staging/zcache/tmem.h @@ -126,7 +126,7 @@ static inline unsigned tmem_oid_hash(struct tmem_oid *oidp) TMEM_HASH_BUCKET_BITS); } -#ifdef CONFIG_RAMSTER +#if defined(CONFIG_RAMSTER) || defined(CONFIG_RAMSTER_MODULE) struct tmem_xhandle { uint8_t client_id; uint8_t xh_data_cksum; @@ -171,7 +171,7 @@ struct tmem_obj { unsigned int objnode_tree_height; unsigned long objnode_count; long pampd_count; -#ifdef CONFIG_RAMSTER +#if defined(CONFIG_RAMSTER) || defined(CONFIG_RAMSTER_MODULE) /* * for current design of ramster, all pages belonging to * an object reside on the same remotenode and extra is @@ -215,7 +215,7 @@ struct tmem_pamops { uint32_t); void (*free)(void *, struct tmem_pool *, struct tmem_oid *, uint32_t, bool); -#ifdef CONFIG_RAMSTER +#if defined(CONFIG_RAMSTER) || defined(CONFIG_RAMSTER_MODULE) void (*new_obj)(struct tmem_obj *); void (*free_obj)(struct tmem_pool *, struct tmem_obj *, bool); void *(*repatriate_preload)(void *, struct tmem_pool *, @@ -247,7 +247,7 @@ extern int tmem_flush_page(struct tmem_pool *, struct tmem_oid *, extern int tmem_flush_object(struct tmem_pool *, struct tmem_oid *); extern int tmem_destroy_pool(struct tmem_pool *); extern void tmem_new_pool(struct tmem_pool *, uint32_t); -#ifdef CONFIG_RAMSTER +#if defined(CONFIG_RAMSTER) || defined(CONFIG_RAMSTER_MODULE) extern int tmem_replace(struct tmem_pool *, struct tmem_oid *, uint32_t index, void *); extern void *tmem_localify_get_pampd(struct tmem_pool *, struct tmem_oid *, diff --git a/drivers/staging/zcache/zcache-main.c b/drivers/staging/zcache/zcache-main.c index 326c2ea..522cb8e 100644 --- a/drivers/staging/zcache/zcache-main.c +++ b/drivers/staging/zcache/zcache-main.c @@ -37,8 +37,10 @@ #include "debug.h" #ifdef CONFIG_RAMSTER static bool ramster_enabled __read_mostly; +static int disable_frontswap_selfshrink; #else #define ramster_enabled false +#define disable_frontswap_selfshrink 0 #endif #ifndef __PG_WAS_ACTIVE @@ -81,8 +83,12 @@ static char *namestr __read_mostly = "zcache"; (__GFP_FS | __GFP_NORETRY | __GFP_NOWARN | __GFP_NOMEMALLOC) /* crypto API for zcache */ +#ifdef CONFIG_ZCACHE_MODULE +static char *zcache_comp_name = "lzo"; +#else #define ZCACHE_COMP_NAME_SZ CRYPTO_MAX_ALG_NAME static char zcache_comp_name[ZCACHE_COMP_NAME_SZ] __read_mostly; +#endif static struct crypto_comp * __percpu *zcache_comp_pcpu_tfms __read_mostly; enum comp_op { @@ -1721,6 +1727,7 @@ struct frontswap_ops *zcache_frontswap_register_ops(void) * OR NOTHING HAPPENS! */ +#ifndef CONFIG_ZCACHE_MODULE static int __init enable_zcache(char *s) { zcache_enabled = true; @@ -1787,18 +1794,27 @@ static int __init enable_zcache_compressor(char *s) return 1; } __setup("zcache=", enable_zcache_compressor); +#endif -static int __init zcache_comp_init(void) +static int zcache_comp_init(void) { int ret = 0; /* check crypto algorithm */ +#ifdef CONFIG_ZCACHE_MODULE + ret = crypto_has_comp(zcache_comp_name, 0, 0); + if (!ret) { + ret = -1; + goto out; + } +#else if (*zcache_comp_name != '\0') { ret = crypto_has_comp(zcache_comp_name, 0, 0); if (!ret) pr_info("zcache: %s not supported\n", zcache_comp_name); + goto out; } if (!ret) strcpy(zcache_comp_name, "lzo"); @@ -1807,6 +1823,7 @@ static int __init zcache_comp_init(void) ret = 1; goto out; } +#endif pr_info("zcache: using %s compressor\n", zcache_comp_name); /* alloc percpu transforms */ @@ -1818,10 +1835,13 @@ out: return ret; } -static int __init zcache_init(void) +static int zcache_init(void) { int ret = 0; +#ifdef CONFIG_ZCACHE_MODULE + zcache_enabled = 1; +#endif if (ramster_enabled) { namestr = "ramster"; ramster_register_pamops(&zcache_pamops); @@ -1894,9 +1914,28 @@ static int __init zcache_init(void) } if (ramster_enabled) ramster_init(!disable_cleancache, !disable_frontswap, - frontswap_has_exclusive_gets, false); + frontswap_has_exclusive_gets, + !disable_frontswap_selfshrink); out: return ret; } +#ifdef CONFIG_ZCACHE_MODULE +#ifdef CONFIG_RAMSTER +module_param(ramster_enabled, int, S_IRUGO); +module_param(disable_frontswap_selfshrink, int, S_IRUGO); +#endif +module_param(disable_cleancache, int, S_IRUGO); +module_param(disable_frontswap, int, S_IRUGO); +#ifdef FRONTSWAP_HAS_EXCLUSIVE_GETS +module_param(frontswap_has_exclusive_gets, bool, S_IRUGO); +#endif +module_param(disable_frontswap_ignore_nonactive, int, S_IRUGO); +module_param(zcache_comp_name, charp, S_IRUGO); +module_init(zcache_init); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Dan Magenheimer "); +MODULE_DESCRIPTION("In-kernel compression of cleancache/frontswap pages"); +#else late_initcall(zcache_init); +#endif diff --git a/drivers/staging/zcache/zcache.h b/drivers/staging/zcache/zcache.h index 81722b3..8491200 100644 --- a/drivers/staging/zcache/zcache.h +++ b/drivers/staging/zcache/zcache.h @@ -39,7 +39,7 @@ extern int zcache_flush_page(int, int, struct tmem_oid *, uint32_t); extern int zcache_flush_object(int, int, struct tmem_oid *); extern void zcache_decompress_to_page(char *, unsigned int, struct page *); -#ifdef CONFIG_RAMSTER +#if defined(CONFIG_RAMSTER) || defined(CONFIG_RAMSTER_MODULE) extern void *zcache_pampd_create(char *, unsigned int, bool, int, struct tmem_handle *); int zcache_autocreate_pool(unsigned int cli_id, unsigned int pool_id, bool eph); -- cgit v0.10.2 From 58c7be84fec87b3a96964d65129b36c0e8c59a19 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 30 Apr 2013 15:27:06 -0700 Subject: selftest: add simple test for soft-dirty bit It creates a mapping of 3 pages and checks that reads, writes and clear-refs result in present and soft-dirt bits reported from pagemap2 set as expected. [akpm@linux-foundation.org: alphasort the Makefile TARGETS to reduce rejects] Signed-off-by: Pavel Emelyanov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 3cc0ad7..5eff5f7 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -1,10 +1,11 @@ TARGETS = breakpoints +TARGETS += cpu-hotplug +TARGETS += efivarfs TARGETS += kcmp +TARGETS += memory-hotplug TARGETS += mqueue +TARGETS += soft-dirty TARGETS += vm -TARGETS += cpu-hotplug -TARGETS += memory-hotplug -TARGETS += efivarfs all: for TARGET in $(TARGETS); do \ diff --git a/tools/testing/selftests/soft-dirty/Makefile b/tools/testing/selftests/soft-dirty/Makefile new file mode 100644 index 0000000..a9cdc82 --- /dev/null +++ b/tools/testing/selftests/soft-dirty/Makefile @@ -0,0 +1,10 @@ +CFLAGS += -iquote../../../../include/uapi -Wall +soft-dirty: soft-dirty.c + +all: soft-dirty + +clean: + rm -f soft-dirty + +run_tests: all + @./soft-dirty || echo "soft-dirty selftests: [FAIL]" diff --git a/tools/testing/selftests/soft-dirty/soft-dirty.c b/tools/testing/selftests/soft-dirty/soft-dirty.c new file mode 100644 index 0000000..aba4f87 --- /dev/null +++ b/tools/testing/selftests/soft-dirty/soft-dirty.c @@ -0,0 +1,114 @@ +#include +#include +#include +#include +#include +#include + +typedef unsigned long long u64; + +#define PME_PRESENT (1ULL << 63) +#define PME_SOFT_DIRTY (1Ull << 55) + +#define PAGES_TO_TEST 3 +#ifndef PAGE_SIZE +#define PAGE_SIZE 4096 +#endif + +static void get_pagemap2(char *mem, u64 *map) +{ + int fd; + + fd = open("/proc/self/pagemap2", O_RDONLY); + if (fd < 0) { + perror("Can't open pagemap2"); + exit(1); + } + + lseek(fd, (unsigned long)mem / PAGE_SIZE * sizeof(u64), SEEK_SET); + read(fd, map, sizeof(u64) * PAGES_TO_TEST); + close(fd); +} + +static inline char map_p(u64 map) +{ + return map & PME_PRESENT ? 'p' : '-'; +} + +static inline char map_sd(u64 map) +{ + return map & PME_SOFT_DIRTY ? 'd' : '-'; +} + +static int check_pte(int step, int page, u64 *map, u64 want) +{ + if ((map[page] & want) != want) { + printf("Step %d Page %d has %c%c, want %c%c\n", + step, page, + map_p(map[page]), map_sd(map[page]), + map_p(want), map_sd(want)); + return 1; + } + + return 0; +} + +static void clear_refs(void) +{ + int fd; + char *v = "4"; + + fd = open("/proc/self/clear_refs", O_WRONLY); + if (write(fd, v, 3) < 3) { + perror("Can't clear soft-dirty bit"); + exit(1); + } + close(fd); +} + +int main(void) +{ + char *mem, x; + u64 map[PAGES_TO_TEST]; + + mem = mmap(NULL, PAGES_TO_TEST * PAGE_SIZE, + PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, 0, 0); + + x = mem[0]; + mem[2 * PAGE_SIZE] = 'c'; + get_pagemap2(mem, map); + + if (check_pte(1, 0, map, PME_PRESENT)) + return 1; + if (check_pte(1, 1, map, 0)) + return 1; + if (check_pte(1, 2, map, PME_PRESENT | PME_SOFT_DIRTY)) + return 1; + + clear_refs(); + get_pagemap2(mem, map); + + if (check_pte(2, 0, map, PME_PRESENT)) + return 1; + if (check_pte(2, 1, map, 0)) + return 1; + if (check_pte(2, 2, map, PME_PRESENT)) + return 1; + + mem[0] = 'a'; + mem[PAGE_SIZE] = 'b'; + x = mem[2 * PAGE_SIZE]; + get_pagemap2(mem, map); + + if (check_pte(3, 0, map, PME_PRESENT | PME_SOFT_DIRTY)) + return 1; + if (check_pte(3, 1, map, PME_PRESENT | PME_SOFT_DIRTY)) + return 1; + if (check_pte(3, 2, map, PME_PRESENT)) + return 1; + + (void)x; /* gcc warn */ + + printf("PASS\n"); + return 0; +} -- cgit v0.10.2 From 7fba2c27a3e0cf7fa411fbab4839e1807a3e0734 Mon Sep 17 00:00:00 2001 From: Lin Feng Date: Tue, 30 Apr 2013 15:27:07 -0700 Subject: kernel/range.c: subtract_range: fix the broken phrase issued by printk Also replace deprecated printk(KERN_ERR...) with pr_err() as suggested by Yinghai, attaching the function name to provide plenty info. Signed-off-by: Lin Feng Cc: Yinghai Lu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/kernel/range.c b/kernel/range.c index 9b8ae2d..071b0ab 100644 --- a/kernel/range.c +++ b/kernel/range.c @@ -97,7 +97,8 @@ void subtract_range(struct range *range, int az, u64 start, u64 end) range[i].end = range[j].end; range[i].start = end; } else { - printk(KERN_ERR "run of slot in ranges\n"); + pr_err("%s: run out of slot in ranges\n", + __func__); } range[j].end = start; continue; -- cgit v0.10.2 From a77f2a4e6f03f0fe0aedb5b13b31be8f9d66d10d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 30 Apr 2013 15:27:09 -0700 Subject: x86: don't show trace beyond show_stack(NULL, NULL) There are multiple ways a task can be dumped - explicit call to dump_stack(), triggering WARN() or BUG(), through sysrq-t and so on. Most of what gets printed is upto each architecture and the current state is not particularly pretty. Different pieces of information are presented differently depending on which path the dump takes and which architecture it's running on. This is messy for no good reason and makes it exceedingly difficult to add or modify debug information to task dumps. In all archs except for s390, there's nothing arch-specific about the printed debug information. This patchset updates all those archs to use the same helpers to consistently print out the same debug information. An example WARN dump after this patchset. WARNING: at kernel/workqueue.c:4841 init_workqueues+0x35/0x505() Modules linked in: CPU: 0 PID: 1 Comm: swapper/0 Not tainted 3.9.0-rc1-work+ #3 Hardware name: empty empty/S3992, BIOS 080011 10/26/2007 0000000000000009 ffff88007c861e08 ffffffff81c614dc ffff88007c861e48 ffffffff8108f500 ffffffff82228240 0000000000000040 ffffffff8234a08e 0000000000000000 0000000000000000 0000000000000000 ffff88007c861e58 Call Trace: [] dump_stack+0x19/0x1b [] warn_slowpath_common+0x70/0xa0 [] warn_slowpath_null+0x1a/0x20 [] init_workqueues+0x35/0x505 ... And BUG dump. kernel BUG at kernel/workqueue.c:4841! invalid opcode: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC Modules linked in: CPU: 0 PID: 1 Comm: swapper/0 Not tainted 3.9.0-rc1-work+ #7 Hardware name: empty empty/S3992, BIOS 080011 10/26/2007 task: ffff88007c85e040 ti: ffff88007c860000 task.ti: ffff88007c860000 RIP: 0010:[] [] init_workqueues+0x4/0x6 RSP: 0000:ffff88007c861ec8 EFLAGS: 00010246 RAX: ffff88007c861fd8 RBX: ffffffff824466a8 RCX: 0000000000000001 RDX: 0000000000000046 RSI: 0000000000000001 RDI: ffffffff8234a07a RBP: ffff88007c861ec8 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000000 R12: ffffffff8234a07a R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff88007dc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: ffff88015f7ff000 CR3: 00000000021f1000 CR4: 00000000000007f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Stack: ffff88007c861ef8 ffffffff81000312 ffffffff824466a8 ffff88007c85e650 0000000000000003 0000000000000000 ffff88007c861f38 ffffffff82335e5d ffff88007c862080 ffffffff8223d8c0 ffff88007c862080 ffffffff81c47760 Call Trace: [] do_one_initcall+0x122/0x170 [] kernel_init_freeable+0x9b/0x1c8 [] ? rest_init+0x140/0x140 [] kernel_init+0xe/0xf0 [] ret_from_fork+0x7c/0xb0 [] ? rest_init+0x140/0x140 ... This patchset contains the following seven patches. 0001-x86-don-t-show-trace-beyond-show_stack-NULL-NULL.patch 0002-sparc32-make-show_stack-acquire-fp-if-_ksp-is-not-sp.patch 0003-dump_stack-consolidate-dump_stack-implementations-an.patch 0004-dmi-morph-dmi_dump_ids-into-dmi_format_ids-which-for.patch 0005-dump_stack-implement-arch-specific-hardware-descript.patch 0006-dump_stack-unify-debug-information-printed-by-show_r.patch 0007-arc-print-fatal-signals-reduce-duplicated-informatio.patch 0001-0002 update stack dumping functions in x86 and sparc32 in preparation. 0003 makes all arches except blackfin use generic dump_stack(). blackfin still uses the generic helper to print the same info. 0004-0005 properly abstract DMI identifier printing in WARN() and show_regs() so that all dumps print out the information. This enables show_regs() to use the same debug info message. 0006 updates show_regs() of all arches to use a common generic helper to print debug info. 0007 removes somem duplicate information from arc dumps. While this patchset changes how debug info is printed on some archs, the printed information is always superset of what used to be there. This patchset makes task dump debug messages consistent and enables adding more information. Workqueue is scheduled to add worker information including the workqueue in use and work item specific description. While this patch touches a lot of archs, it isn't too likely to cause non-trivial conflicts with arch-specfic changes and would probably be best to route together either through -mm. x86 is tested but other archs are either only compile tested or not tested at all. Changes to most archs are generally trivial. This patch: show_stack(current or NULL, NULL) is used to print the backtrace of the current task. As trace beyond the function itself isn't of much interest to anyone, don't show it by determining sp and bp in show_stack()'s frame and passing them to show_stack_log_lvl(). This brings show_stack(NULL, NULL)'s behavior in line with dump_stack(). Signed-off-by: Tejun Heo Cc: Bjorn Helgaas Cc: David S. Miller Cc: Fengguang Wu Cc: Heiko Carstens Cc: Jesper Nilsson Cc: Martin Schwidefsky Cc: Mike Frysinger Cc: Vineet Gupta Cc: Sam Ravnborg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index c8797d5..dd1a7c3 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -176,7 +176,19 @@ void show_trace(struct task_struct *task, struct pt_regs *regs, void show_stack(struct task_struct *task, unsigned long *sp) { - show_stack_log_lvl(task, NULL, sp, 0, ""); + unsigned long bp = 0; + unsigned long stack; + + /* + * Stack frames below this one aren't interesting. Don't show them + * if we're printing for %current. + */ + if (!sp && (!task || task == current)) { + sp = &stack; + bp = stack_frame(current, NULL); + } + + show_stack_log_lvl(task, NULL, sp, bp, ""); } /* -- cgit v0.10.2 From 89e3f23da9c827bfa9806c3d4da83a12c1c8543a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 30 Apr 2013 15:27:10 -0700 Subject: sparc32: make show_stack() acquire %fp if @_ksp is not specified show_stack(current or NULL, NULL) is used by arch-independent code to dump backtrace of the current task; however, sparc32 show_stack() doesn't implement it and wouldn't print any backtrace when NULL @_ksp is specfied. Make show_stack() acquire and use %fp if @tsk is NULL or current and @_ksp is NULL. This makes %fp fetching in dump_stack() unnecessary. Make it use NULL for @_ksp instead. Only compile tested. Signed-off-by: Tejun Heo Acked-by: David S. Miller Cc: Bjorn Helgaas Cc: Fengguang Wu Cc: Heiko Carstens Cc: Jesper Nilsson Cc: Martin Schwidefsky Cc: Mike Frysinger Cc: Vineet Gupta Cc: Sam Ravnborg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c index c852410..2be4214 100644 --- a/arch/sparc/kernel/process_32.c +++ b/arch/sparc/kernel/process_32.c @@ -142,11 +142,13 @@ void show_stack(struct task_struct *tsk, unsigned long *_ksp) struct reg_window32 *rw; int count = 0; - if (tsk != NULL) - task_base = (unsigned long) task_stack_page(tsk); - else - task_base = (unsigned long) current_thread_info(); + if (!tsk) + tsk = current; + if (tsk == current && !_ksp) + __asm__ __volatile__("mov %%fp, %0" : "=r" (_ksp)); + + task_base = (unsigned long) task_stack_page(tsk); fp = (unsigned long) _ksp; do { /* Bogus frame pointer? */ @@ -164,11 +166,7 @@ void show_stack(struct task_struct *tsk, unsigned long *_ksp) void dump_stack(void) { - unsigned long *ksp; - - __asm__ __volatile__("mov %%fp, %0" - : "=r" (ksp)); - show_stack(current, ksp); + show_stack(current, NULL); } EXPORT_SYMBOL(dump_stack); -- cgit v0.10.2 From 196779b9b4ce1922afabdc20d0270720603bd46c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 30 Apr 2013 15:27:12 -0700 Subject: dump_stack: consolidate dump_stack() implementations and unify their behaviors Both dump_stack() and show_stack() are currently implemented by each architecture. show_stack(NULL, NULL) dumps the backtrace for the current task as does dump_stack(). On some archs, dump_stack() prints extra information - pid, utsname and so on - in addition to the backtrace while the two are identical on other archs. The usages in arch-independent code of the two functions indicate show_stack(NULL, NULL) should print out bare backtrace while dump_stack() is used for debugging purposes when something went wrong, so it does make sense to print additional information on the task which triggered dump_stack(). There's no reason to require archs to implement two separate but mostly identical functions. It leads to unnecessary subtle information. This patch expands the dummy fallback dump_stack() implementation in lib/dump_stack.c such that it prints out debug information (taken from x86) and invokes show_stack(NULL, NULL) and drops arch-specific dump_stack() implementations in all archs except blackfin. Blackfin's dump_stack() does something wonky that I don't understand. Debug information can be printed separately by calling dump_stack_print_info() so that arch-specific dump_stack() implementation can still emit the same debug information. This is used in blackfin. This patch brings the following behavior changes. * On some archs, an extra level in backtrace for show_stack() could be printed. This is because the top frame was determined in dump_stack() on those archs while generic dump_stack() can't do that reliably. It can be compensated by inlining dump_stack() but not sure whether that'd be necessary. * Most archs didn't use to print debug info on dump_stack(). They do now. An example WARN dump follows. WARNING: at kernel/workqueue.c:4841 init_workqueues+0x35/0x505() Hardware name: empty Modules linked in: CPU: 0 PID: 1 Comm: swapper/0 Not tainted 3.9.0-rc1-work+ #9 0000000000000009 ffff88007c861e08 ffffffff81c614dc ffff88007c861e48 ffffffff8108f50f ffffffff82228240 0000000000000040 ffffffff8234a03c 0000000000000000 0000000000000000 0000000000000000 ffff88007c861e58 Call Trace: [] dump_stack+0x19/0x1b [] warn_slowpath_common+0x7f/0xc0 [] warn_slowpath_null+0x1a/0x20 [] init_workqueues+0x35/0x505 ... v2: CPU number added to the generic debug info as requested by s390 folks and dropped the s390 specific dump_stack(). This loses %ksp from the debug message which the maintainers think isn't important enough to keep the s390-specific dump_stack() implementation. dump_stack_print_info() is moved to kernel/printk.c from lib/dump_stack.c. Because linkage is per objecct file, dump_stack_print_info() living in the same lib file as generic dump_stack() means that archs which implement custom dump_stack() - at this point, only blackfin - can't use dump_stack_print_info() as that will bring in the generic version of dump_stack() too. v1 The v1 patch broke build on blackfin due to this issue. The build breakage was reported by Fengguang Wu. Signed-off-by: Tejun Heo Acked-by: David S. Miller Acked-by: Vineet Gupta Acked-by: Jesper Nilsson Acked-by: Vineet Gupta Acked-by: Martin Schwidefsky [s390 bits] Cc: Heiko Carstens Cc: Mike Frysinger Cc: Fengguang Wu Cc: Bjorn Helgaas Cc: Sam Ravnborg Acked-by: Richard Kuo [hexagon bits] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c index 4037461..affccb9 100644 --- a/arch/alpha/kernel/traps.c +++ b/arch/alpha/kernel/traps.c @@ -169,13 +169,6 @@ void show_stack(struct task_struct *task, unsigned long *sp) dik_show_trace(sp); } -void dump_stack(void) -{ - show_stack(NULL, NULL); -} - -EXPORT_SYMBOL(dump_stack); - void die_if_kernel(char * str, struct pt_regs *regs, long err, unsigned long *r9_15) { diff --git a/arch/arc/kernel/stacktrace.c b/arch/arc/kernel/stacktrace.c index a63ff84..ca0207b 100644 --- a/arch/arc/kernel/stacktrace.c +++ b/arch/arc/kernel/stacktrace.c @@ -220,13 +220,6 @@ void show_stack(struct task_struct *tsk, unsigned long *sp) show_stacktrace(tsk, NULL); } -/* Expected by Rest of kernel code */ -void dump_stack(void) -{ - show_stacktrace(NULL, NULL); -} -EXPORT_SYMBOL(dump_stack); - /* Another API expected by schedular, shows up in "ps" as Wait Channel * Ofcourse just returning schedule( ) would be pointless so unwind until * the function is not in schedular code diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 1c08911..18b32e8 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -204,13 +204,6 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) } #endif -void dump_stack(void) -{ - dump_backtrace(NULL, NULL); -} - -EXPORT_SYMBOL(dump_stack); - void show_stack(struct task_struct *tsk, unsigned long *sp) { dump_backtrace(NULL, tsk); diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index b3c5f62..61d7dd2 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -167,13 +167,6 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) } } -void dump_stack(void) -{ - dump_backtrace(NULL, NULL); -} - -EXPORT_SYMBOL(dump_stack); - void show_stack(struct task_struct *tsk, unsigned long *sp) { dump_backtrace(NULL, tsk); diff --git a/arch/avr32/kernel/process.c b/arch/avr32/kernel/process.c index 073c3c2..89a8017 100644 --- a/arch/avr32/kernel/process.c +++ b/arch/avr32/kernel/process.c @@ -204,14 +204,6 @@ void show_stack(struct task_struct *tsk, unsigned long *stack) show_stack_log_lvl(tsk, (unsigned long)stack, NULL, ""); } -void dump_stack(void) -{ - unsigned long stack; - - show_trace_log_lvl(current, &stack, NULL, ""); -} -EXPORT_SYMBOL(dump_stack); - static const char *cpu_modes[] = { "Application", "Supervisor", "Interrupt level 0", "Interrupt level 1", "Interrupt level 2", "Interrupt level 3", "Exception", "NMI" diff --git a/arch/blackfin/kernel/dumpstack.c b/arch/blackfin/kernel/dumpstack.c index 5cfbaa2..95ba6d9 100644 --- a/arch/blackfin/kernel/dumpstack.c +++ b/arch/blackfin/kernel/dumpstack.c @@ -168,6 +168,7 @@ void dump_stack(void) #endif trace_buffer_save(tflags); dump_bfin_trace_buffer(); + dump_stack_print_info(KERN_DEFAULT); show_stack(current, &stack); trace_buffer_restore(tflags); } diff --git a/arch/c6x/kernel/traps.c b/arch/c6x/kernel/traps.c index 1be74e5..d0b96ef 100644 --- a/arch/c6x/kernel/traps.c +++ b/arch/c6x/kernel/traps.c @@ -67,15 +67,6 @@ void show_regs(struct pt_regs *regs) pr_err("A31: %08lx B31: %08lx\n", regs->a31, regs->b31); } -void dump_stack(void) -{ - unsigned long stack; - - show_stack(current, &stack); -} -EXPORT_SYMBOL(dump_stack); - - void die(char *str, struct pt_regs *fp, int nr) { console_verbose(); diff --git a/arch/cris/kernel/traps.c b/arch/cris/kernel/traps.c index a11ad32..0ffda73 100644 --- a/arch/cris/kernel/traps.c +++ b/arch/cris/kernel/traps.c @@ -147,13 +147,6 @@ show_stack(void) #endif void -dump_stack(void) -{ - show_stack(NULL, NULL); -} -EXPORT_SYMBOL(dump_stack); - -void set_nmi_handler(void (*handler)(struct pt_regs *)) { nmi_handler = handler; diff --git a/arch/frv/kernel/traps.c b/arch/frv/kernel/traps.c index 5cfd142..cfcd802 100644 --- a/arch/frv/kernel/traps.c +++ b/arch/frv/kernel/traps.c @@ -466,17 +466,6 @@ asmlinkage void compound_exception(unsigned long esfr1, BUG(); } /* end compound_exception() */ -/*****************************************************************************/ -/* - * The architecture-independent backtrace generator - */ -void dump_stack(void) -{ - show_stack(NULL, NULL); -} - -EXPORT_SYMBOL(dump_stack); - void show_stack(struct task_struct *task, unsigned long *sp) { } diff --git a/arch/h8300/kernel/traps.c b/arch/h8300/kernel/traps.c index 7833aa3..cfe494d 100644 --- a/arch/h8300/kernel/traps.c +++ b/arch/h8300/kernel/traps.c @@ -164,10 +164,3 @@ void show_trace_task(struct task_struct *tsk) { show_stack(tsk,(unsigned long *)tsk->thread.esp0); } - -void dump_stack(void) -{ - show_stack(NULL,NULL); -} - -EXPORT_SYMBOL(dump_stack); diff --git a/arch/hexagon/kernel/traps.c b/arch/hexagon/kernel/traps.c index be5e2dd..cc2171b 100644 --- a/arch/hexagon/kernel/traps.c +++ b/arch/hexagon/kernel/traps.c @@ -191,14 +191,6 @@ void show_stack(struct task_struct *task, unsigned long *fp) do_show_stack(task, fp, 0); } -void dump_stack(void) -{ - unsigned long *fp; - asm("%0 = r30" : "=r" (fp)); - show_stack(current, fp); -} -EXPORT_SYMBOL(dump_stack); - int die(const char *str, struct pt_regs *regs, long err) { static struct { diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index a26fc64..182bd64 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -96,14 +96,6 @@ show_stack (struct task_struct *task, unsigned long *sp) } void -dump_stack (void) -{ - show_stack(NULL, NULL); -} - -EXPORT_SYMBOL(dump_stack); - -void show_regs (struct pt_regs *regs) { unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri; diff --git a/arch/m32r/kernel/traps.c b/arch/m32r/kernel/traps.c index 9fe3467..a7a424f 100644 --- a/arch/m32r/kernel/traps.c +++ b/arch/m32r/kernel/traps.c @@ -167,15 +167,6 @@ void show_stack(struct task_struct *task, unsigned long *sp) show_trace(task, sp); } -void dump_stack(void) -{ - unsigned long stack; - - show_trace(current, &stack); -} - -EXPORT_SYMBOL(dump_stack); - static void show_registers(struct pt_regs *regs) { int i = 0; diff --git a/arch/m68k/kernel/traps.c b/arch/m68k/kernel/traps.c index f32ab22..88fcd8c 100644 --- a/arch/m68k/kernel/traps.c +++ b/arch/m68k/kernel/traps.c @@ -992,18 +992,6 @@ void show_stack(struct task_struct *task, unsigned long *stack) } /* - * The architecture-independent backtrace generator - */ -void dump_stack(void) -{ - unsigned long stack; - - show_trace(&stack); -} - -EXPORT_SYMBOL(dump_stack); - -/* * The vector number returned in the frame pointer may also contain * the "fs" (Fault Status) bits on ColdFire. These are in the bottom * 2 bits, and upper 2 bits. So we need to mask out the real vector diff --git a/arch/metag/kernel/traps.c b/arch/metag/kernel/traps.c index 8961f24..2ceeaae 100644 --- a/arch/metag/kernel/traps.c +++ b/arch/metag/kernel/traps.c @@ -987,9 +987,3 @@ void show_stack(struct task_struct *tsk, unsigned long *sp) show_trace(tsk, sp, NULL); } - -void dump_stack(void) -{ - show_stack(NULL, NULL); -} -EXPORT_SYMBOL(dump_stack); diff --git a/arch/microblaze/kernel/traps.c b/arch/microblaze/kernel/traps.c index 30e6b50..cb61953 100644 --- a/arch/microblaze/kernel/traps.c +++ b/arch/microblaze/kernel/traps.c @@ -75,9 +75,3 @@ void show_stack(struct task_struct *task, unsigned long *sp) debug_show_held_locks(task); } - -void dump_stack(void) -{ - show_stack(NULL, NULL); -} -EXPORT_SYMBOL(dump_stack); diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index c3abb88..b512b28 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -206,19 +206,6 @@ void show_stack(struct task_struct *task, unsigned long *sp) show_stacktrace(task, ®s); } -/* - * The architecture-independent dump_stack generator - */ -void dump_stack(void) -{ - struct pt_regs regs; - - prepare_frametrace(®s); - show_backtrace(current, ®s); -} - -EXPORT_SYMBOL(dump_stack); - static void show_code(unsigned int __user *pc) { long i; diff --git a/arch/mn10300/kernel/traps.c b/arch/mn10300/kernel/traps.c index b900e5a..a7a987c 100644 --- a/arch/mn10300/kernel/traps.c +++ b/arch/mn10300/kernel/traps.c @@ -294,17 +294,6 @@ void show_stack(struct task_struct *task, unsigned long *sp) } /* - * the architecture-independent dump_stack generator - */ -void dump_stack(void) -{ - unsigned long stack; - - show_stack(current, &stack); -} -EXPORT_SYMBOL(dump_stack); - -/* * dump the register file in the specified exception frame */ void show_registers_only(struct pt_regs *regs) diff --git a/arch/openrisc/kernel/traps.c b/arch/openrisc/kernel/traps.c index 5cce396..3d3f606 100644 --- a/arch/openrisc/kernel/traps.c +++ b/arch/openrisc/kernel/traps.c @@ -105,17 +105,6 @@ void show_trace_task(struct task_struct *tsk) */ } -/* - * The architecture-independent backtrace generator - */ -void dump_stack(void) -{ - unsigned long stack; - - show_stack(current, &stack); -} -EXPORT_SYMBOL(dump_stack); - void show_registers(struct pt_regs *regs) { int i; diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index aeb8f8f..e64cf5f 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c @@ -158,14 +158,6 @@ void show_regs(struct pt_regs *regs) } } - -void dump_stack(void) -{ - show_stack(NULL, NULL); -} - -EXPORT_SYMBOL(dump_stack); - static void do_show_stack(struct unwind_frame_info *info) { int i = 1; diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 16e77a8..624d44b 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1362,12 +1362,6 @@ void show_stack(struct task_struct *tsk, unsigned long *stack) } while (count++ < kstack_depth_to_print); } -void dump_stack(void) -{ - show_stack(current, NULL); -} -EXPORT_SYMBOL(dump_stack); - #ifdef CONFIG_PPC64 /* Called with hard IRQs off */ void __ppc64_runlatch_on(void) diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index 03dce39..2f1f639 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -129,23 +129,6 @@ static void show_last_breaking_event(struct pt_regs *regs) #endif } -/* - * The architecture-independent dump_stack generator - */ -void dump_stack(void) -{ - printk("CPU: %d %s %s %.*s\n", - task_thread_info(current)->cpu, print_tainted(), - init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); - printk("Process %s (pid: %d, task: %p, ksp: %p)\n", - current->comm, current->pid, current, - (void *) current->thread.ksp); - show_stack(NULL, NULL); -} -EXPORT_SYMBOL(dump_stack); - static inline int mask_bits(struct pt_regs *regs, unsigned long bits) { return (regs->psw.mask & bits) / ((~bits + 1) & bits); diff --git a/arch/score/kernel/traps.c b/arch/score/kernel/traps.c index 0e46fb1..a38f435 100644 --- a/arch/score/kernel/traps.c +++ b/arch/score/kernel/traps.c @@ -149,16 +149,6 @@ static void show_registers(struct pt_regs *regs) printk(KERN_NOTICE "\n"); } -/* - * The architecture-independent dump_stack generator - */ -void dump_stack(void) -{ - show_stack(current_thread_info()->task, - (long *) get_irq_regs()->regs[0]); -} -EXPORT_SYMBOL(dump_stack); - void __die(const char *str, struct pt_regs *regs, const char *file, const char *func, unsigned long line) { diff --git a/arch/sh/kernel/dumpstack.c b/arch/sh/kernel/dumpstack.c index 7617dc4..b959f55 100644 --- a/arch/sh/kernel/dumpstack.c +++ b/arch/sh/kernel/dumpstack.c @@ -158,9 +158,3 @@ void show_stack(struct task_struct *tsk, unsigned long *sp) (unsigned long)task_stack_page(tsk)); show_trace(tsk, sp, NULL); } - -void dump_stack(void) -{ - show_stack(NULL, NULL); -} -EXPORT_SYMBOL(dump_stack); diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c index 2be4214..dccf5f5 100644 --- a/arch/sparc/kernel/process_32.c +++ b/arch/sparc/kernel/process_32.c @@ -164,13 +164,6 @@ void show_stack(struct task_struct *tsk, unsigned long *_ksp) printk("\n"); } -void dump_stack(void) -{ - show_stack(current, NULL); -} - -EXPORT_SYMBOL(dump_stack); - /* * Note: sparc64 has a pretty intricated thread_saved_pc, check it out. */ diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c index 8d38ca9..b3f833a 100644 --- a/arch/sparc/kernel/traps_64.c +++ b/arch/sparc/kernel/traps_64.c @@ -2350,13 +2350,6 @@ void show_stack(struct task_struct *tsk, unsigned long *_ksp) } while (++count < 16); } -void dump_stack(void) -{ - show_stack(current, NULL); -} - -EXPORT_SYMBOL(dump_stack); - static inline struct reg_window *kernel_stack_up(struct reg_window *rw) { unsigned long fp = rw->ins[6]; diff --git a/arch/um/kernel/sysrq.c b/arch/um/kernel/sysrq.c index e562ff8..7d101a2 100644 --- a/arch/um/kernel/sysrq.c +++ b/arch/um/kernel/sysrq.c @@ -35,18 +35,6 @@ void show_trace(struct task_struct *task, unsigned long * stack) } #endif -/* - * stack dumps generator - this is used by arch-independent code. - * And this is identical to i386 currently. - */ -void dump_stack(void) -{ - unsigned long stack; - - show_trace(current, &stack); -} -EXPORT_SYMBOL(dump_stack); - /*Stolen from arch/i386/kernel/traps.c */ static const int kstack_depth_to_print = 24; diff --git a/arch/unicore32/kernel/traps.c b/arch/unicore32/kernel/traps.c index 0870b68..c54e324 100644 --- a/arch/unicore32/kernel/traps.c +++ b/arch/unicore32/kernel/traps.c @@ -170,12 +170,6 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) c_backtrace(fp, mode); } -void dump_stack(void) -{ - dump_backtrace(NULL, NULL); -} -EXPORT_SYMBOL(dump_stack); - void show_stack(struct task_struct *tsk, unsigned long *sp) { dump_backtrace(NULL, tsk); diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index dd1a7c3..deb6421 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -191,24 +191,6 @@ void show_stack(struct task_struct *task, unsigned long *sp) show_stack_log_lvl(task, NULL, sp, bp, ""); } -/* - * The architecture-independent dump_stack generator - */ -void dump_stack(void) -{ - unsigned long bp; - unsigned long stack; - - bp = stack_frame(current, NULL); - printk("Pid: %d, comm: %.20s %s %s %.*s\n", - current->pid, current->comm, print_tainted(), - init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); - show_trace(NULL, NULL, &stack, bp); -} -EXPORT_SYMBOL(dump_stack); - static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED; static int die_owner = -1; static unsigned int die_nest_count; diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c index 923db5c..384b7c7 100644 --- a/arch/xtensa/kernel/traps.c +++ b/arch/xtensa/kernel/traps.c @@ -481,14 +481,6 @@ void show_stack(struct task_struct *task, unsigned long *sp) show_trace(task, stack); } -void dump_stack(void) -{ - show_stack(current, NULL); -} - -EXPORT_SYMBOL(dump_stack); - - void show_code(unsigned int *pc) { long i; diff --git a/include/linux/printk.h b/include/linux/printk.h index 4890fe6..7ce1f87 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -145,6 +145,7 @@ extern void wake_up_klogd(void); void log_buf_kexec_setup(void); void __init setup_log_buf(int early); +void dump_stack_print_info(const char *log_lvl); #else static inline __printf(1, 0) int vprintk(const char *s, va_list args) @@ -182,6 +183,10 @@ static inline void log_buf_kexec_setup(void) static inline void setup_log_buf(int early) { } + +static inline void dump_stack_print_info(const char *log_lvl) +{ +} #endif extern void dump_stack(void) __cold; diff --git a/kernel/printk.c b/kernel/printk.c index 376914e..70b4b94 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -43,6 +43,7 @@ #include #include #include +#include #include @@ -2849,4 +2850,21 @@ void kmsg_dump_rewind(struct kmsg_dumper *dumper) raw_spin_unlock_irqrestore(&logbuf_lock, flags); } EXPORT_SYMBOL_GPL(kmsg_dump_rewind); + +/** + * dump_stack_print_info - print generic debug info for dump_stack() + * @log_lvl: log level + * + * Arch-specific dump_stack() implementations can use this function to + * print out the same debug information as the generic dump_stack(). + */ +void dump_stack_print_info(const char *log_lvl) +{ + printk("%sCPU: %d PID: %d Comm: %.20s %s %s %.*s\n", + log_lvl, raw_smp_processor_id(), current->pid, current->comm, + print_tainted(), init_utsname()->release, + (int)strcspn(init_utsname()->version, " "), + init_utsname()->version); +} + #endif diff --git a/lib/dump_stack.c b/lib/dump_stack.c index 42f4f55..53bad09 100644 --- a/lib/dump_stack.c +++ b/lib/dump_stack.c @@ -5,11 +5,16 @@ #include #include +#include +/** + * dump_stack - dump the current task information and its stack trace + * + * Architectures can override this implementation by implementing its own. + */ void dump_stack(void) { - printk(KERN_NOTICE - "This architecture does not implement dump_stack()\n"); + dump_stack_print_info(KERN_DEFAULT); + show_stack(NULL, NULL); } - EXPORT_SYMBOL(dump_stack); -- cgit v0.10.2 From c90fe6bc0343f7c26b30c9f503b1d061636ac8ee Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 30 Apr 2013 15:27:14 -0700 Subject: dmi: morph dmi_dump_ids() into dmi_format_ids() which formats into a buffer We're goning to use DMI identification for other purposes too. Morph dmi_dump_ids() which is used to print DMI identification as a debug message during boot into dmi_format_ids() which formats the same information sans the leading "DMI:" tag into a string buffer. dmi_present() is updated to format the information into dmi_ids_string[] using the new function and print it with "DMI:" prefix. dmi_ids_string[] will be used for another purpose by a future patch. Signed-off-by: Tejun Heo Cc: Bjorn Helgaas Cc: David S. Miller Cc: Fengguang Wu Cc: Heiko Carstens Cc: Jesper Nilsson Cc: Martin Schwidefsky Cc: Mike Frysinger Cc: Vineet Gupta Cc: Sam Ravnborg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c index 4cd392d..862b1d2 100644 --- a/drivers/firmware/dmi_scan.c +++ b/drivers/firmware/dmi_scan.c @@ -22,6 +22,9 @@ static u16 __initdata dmi_ver; */ static int dmi_initialized; +/* DMI system identification string used during boot */ +static char dmi_ids_string[128] __initdata; + static const char * __init dmi_string_nosave(const struct dmi_header *dm, u8 s) { const u8 *bp = ((u8 *) dm) + dm->length; @@ -376,38 +379,44 @@ static void __init dmi_decode(const struct dmi_header *dm, void *dummy) } } -static void __init print_filtered(const char *info) +static int __init print_filtered(char *buf, size_t len, const char *info) { + int c = 0; const char *p; if (!info) - return; + return c; for (p = info; *p; p++) if (isprint(*p)) - printk(KERN_CONT "%c", *p); + c += scnprintf(buf + c, len - c, "%c", *p); else - printk(KERN_CONT "\\x%02x", *p & 0xff); + c += scnprintf(buf + c, len - c, "\\x%02x", *p & 0xff); + return c; } -static void __init dmi_dump_ids(void) +static void __init dmi_format_ids(char *buf, size_t len) { + int c = 0; const char *board; /* Board Name is optional */ - printk(KERN_DEBUG "DMI: "); - print_filtered(dmi_get_system_info(DMI_SYS_VENDOR)); - printk(KERN_CONT " "); - print_filtered(dmi_get_system_info(DMI_PRODUCT_NAME)); + c += print_filtered(buf + c, len - c, + dmi_get_system_info(DMI_SYS_VENDOR)); + c += scnprintf(buf + c, len - c, " "); + c += print_filtered(buf + c, len - c, + dmi_get_system_info(DMI_PRODUCT_NAME)); + board = dmi_get_system_info(DMI_BOARD_NAME); if (board) { - printk(KERN_CONT "/"); - print_filtered(board); + c += scnprintf(buf + c, len - c, "/"); + c += print_filtered(buf + c, len - c, board); } - printk(KERN_CONT ", BIOS "); - print_filtered(dmi_get_system_info(DMI_BIOS_VERSION)); - printk(KERN_CONT " "); - print_filtered(dmi_get_system_info(DMI_BIOS_DATE)); - printk(KERN_CONT "\n"); + c += scnprintf(buf + c, len - c, ", BIOS "); + c += print_filtered(buf + c, len - c, + dmi_get_system_info(DMI_BIOS_VERSION)); + c += scnprintf(buf + c, len - c, " "); + c += print_filtered(buf + c, len - c, + dmi_get_system_info(DMI_BIOS_DATE)); } static int __init dmi_present(const char __iomem *p) @@ -431,7 +440,8 @@ static int __init dmi_present(const char __iomem *p) pr_info("Legacy DMI %d.%d present.\n", dmi_ver >> 8, dmi_ver & 0xFF); } - dmi_dump_ids(); + dmi_format_ids(dmi_ids_string, sizeof(dmi_ids_string)); + printk(KERN_DEBUG "DMI: %s\n", dmi_ids_string); return 0; } } -- cgit v0.10.2 From 98e5e1bf722c4f976a860aed06dd365a56a34ee0 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 30 Apr 2013 15:27:15 -0700 Subject: dump_stack: implement arch-specific hardware description in task dumps x86 and ia64 can acquire extra hardware identification information from DMI and print it along with task dumps; however, the usage isn't consistent. * x86 show_regs() collects vendor, product and board strings and print them out with PID, comm and utsname. Some of the information is printed again later in the same dump. * warn_slowpath_common() explicitly accesses the DMI board and prints it out with "Hardware name:" label. This applies to both x86 and ia64 but is irrelevant on all other archs. * ia64 doesn't show DMI information on other non-WARN dumps. This patch introduces arch-specific hardware description used by dump_stack(). It can be set by calling dump_stack_set_arch_desc() during boot and, if exists, printed out in a separate line with "Hardware name:" label. dmi_set_dump_stack_arch_desc() is added which sets arch-specific description from DMI data. It uses dmi_ids_string[] which is set from dmi_present() used for DMI debug message. It is superset of the information x86 show_regs() is using. The function is called from x86 and ia64 boot code right after dmi_scan_machine(). This makes the explicit DMI handling in warn_slowpath_common() unnecessary. Removed. show_regs() isn't yet converted to use generic debug information printing and this patch doesn't remove the duplicate DMI handling in x86 show_regs(). The next patch will unify show_regs() handling and remove the duplication. An example WARN dump follows. WARNING: at kernel/workqueue.c:4841 init_workqueues+0x35/0x505() Modules linked in: CPU: 0 PID: 1 Comm: swapper/0 Not tainted 3.9.0-rc1-work+ #3 Hardware name: empty empty/S3992, BIOS 080011 10/26/2007 0000000000000009 ffff88007c861e08 ffffffff81c614dc ffff88007c861e48 ffffffff8108f500 ffffffff82228240 0000000000000040 ffffffff8234a08e 0000000000000000 0000000000000000 0000000000000000 ffff88007c861e58 Call Trace: [] dump_stack+0x19/0x1b [] warn_slowpath_common+0x70/0xa0 [] warn_slowpath_null+0x1a/0x20 [] init_workqueues+0x35/0x505 ... v2: Use the same string as the debug message from dmi_present() which also contains BIOS information. Move hardware name into its own line as warn_slowpath_common() did. This change was suggested by Bjorn Helgaas. Signed-off-by: Tejun Heo Cc: Bjorn Helgaas Cc: David S. Miller Cc: Fengguang Wu Cc: Heiko Carstens Cc: Jesper Nilsson Cc: Martin Schwidefsky Cc: Mike Frysinger Cc: Vineet Gupta Cc: Sam Ravnborg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index 2029cc0..13bfdd2 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -1063,6 +1063,7 @@ check_bugs (void) static int __init run_dmi_scan(void) { dmi_scan_machine(); + dmi_set_dump_stack_arch_desc(); return 0; } core_initcall(run_dmi_scan); diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 4689855..56f7fcf 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -996,6 +996,7 @@ void __init setup_arch(char **cmdline_p) efi_init(); dmi_scan_machine(); + dmi_set_dump_stack_arch_desc(); /* * VMware detection requires dmi to be available, so this diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c index 862b1d2..98c62081 100644 --- a/drivers/firmware/dmi_scan.c +++ b/drivers/firmware/dmi_scan.c @@ -531,6 +531,19 @@ void __init dmi_scan_machine(void) } /** + * dmi_set_dump_stack_arch_desc - set arch description for dump_stack() + * + * Invoke dump_stack_set_arch_desc() with DMI system information so that + * DMI identifiers are printed out on task dumps. Arch boot code should + * call this function after dmi_scan_machine() if it wants to print out DMI + * identifiers on task dumps. + */ +void __init dmi_set_dump_stack_arch_desc(void) +{ + dump_stack_set_arch_desc("%s", dmi_ids_string); +} + +/** * dmi_matches - check if dmi_system_id structure matches system DMI data * @dmi: pointer to the dmi_system_id structure to check */ diff --git a/include/linux/dmi.h b/include/linux/dmi.h index f156cca..b6eb7a0 100644 --- a/include/linux/dmi.h +++ b/include/linux/dmi.h @@ -99,6 +99,7 @@ extern const char * dmi_get_system_info(int field); extern const struct dmi_device * dmi_find_device(int type, const char *name, const struct dmi_device *from); extern void dmi_scan_machine(void); +extern void dmi_set_dump_stack_arch_desc(void); extern bool dmi_get_date(int field, int *yearp, int *monthp, int *dayp); extern int dmi_name_in_vendors(const char *str); extern int dmi_name_in_serial(const char *str); @@ -114,6 +115,7 @@ static inline const char * dmi_get_system_info(int field) { return NULL; } static inline const struct dmi_device * dmi_find_device(int type, const char *name, const struct dmi_device *from) { return NULL; } static inline void dmi_scan_machine(void) { return; } +static inline void dmi_set_dump_stack_arch_desc(void) { } static inline bool dmi_get_date(int field, int *yearp, int *monthp, int *dayp) { if (yearp) diff --git a/include/linux/printk.h b/include/linux/printk.h index 7ce1f87..47827c0 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -145,6 +145,7 @@ extern void wake_up_klogd(void); void log_buf_kexec_setup(void); void __init setup_log_buf(int early); +void dump_stack_set_arch_desc(const char *fmt, ...); void dump_stack_print_info(const char *log_lvl); #else static inline __printf(1, 0) @@ -184,6 +185,10 @@ static inline void setup_log_buf(int early) { } +static inline void dump_stack_set_arch_desc(const char *fmt, ...) +{ +} + static inline void dump_stack_print_info(const char *log_lvl) { } diff --git a/kernel/panic.c b/kernel/panic.c index 7c57cc9..167ec097 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -22,7 +22,6 @@ #include #include #include -#include #define PANIC_TIMER_STEP 100 #define PANIC_BLINK_SPD 18 @@ -400,13 +399,8 @@ struct slowpath_args { static void warn_slowpath_common(const char *file, int line, void *caller, unsigned taint, struct slowpath_args *args) { - const char *board; - printk(KERN_WARNING "------------[ cut here ]------------\n"); printk(KERN_WARNING "WARNING: at %s:%d %pS()\n", file, line, caller); - board = dmi_get_system_info(DMI_PRODUCT_NAME); - if (board) - printk(KERN_WARNING "Hardware name: %s\n", board); if (args) vprintk(args->fmt, args->args); diff --git a/kernel/printk.c b/kernel/printk.c index 70b4b94..73a96de 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -2851,6 +2851,28 @@ void kmsg_dump_rewind(struct kmsg_dumper *dumper) } EXPORT_SYMBOL_GPL(kmsg_dump_rewind); +static char dump_stack_arch_desc_str[128]; + +/** + * dump_stack_set_arch_desc - set arch-specific str to show with task dumps + * @fmt: printf-style format string + * @...: arguments for the format string + * + * The configured string will be printed right after utsname during task + * dumps. Usually used to add arch-specific system identifiers. If an + * arch wants to make use of such an ID string, it should initialize this + * as soon as possible during boot. + */ +void __init dump_stack_set_arch_desc(const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + vsnprintf(dump_stack_arch_desc_str, sizeof(dump_stack_arch_desc_str), + fmt, args); + va_end(args); +} + /** * dump_stack_print_info - print generic debug info for dump_stack() * @log_lvl: log level @@ -2865,6 +2887,10 @@ void dump_stack_print_info(const char *log_lvl) print_tainted(), init_utsname()->release, (int)strcspn(init_utsname()->version, " "), init_utsname()->version); + + if (dump_stack_arch_desc_str[0] != '\0') + printk("%sHardware name: %s\n", + log_lvl, dump_stack_arch_desc_str); } #endif -- cgit v0.10.2 From a43cb95d547a061ed5bf1acb28e0f5fd575e26c1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 30 Apr 2013 15:27:17 -0700 Subject: dump_stack: unify debug information printed by show_regs() show_regs() is inherently arch-dependent but it does make sense to print generic debug information and some archs already do albeit in slightly different forms. This patch introduces a generic function to print debug information from show_regs() so that different archs print out the same information and it's much easier to modify what's printed. show_regs_print_info() prints out the same debug info as dump_stack() does plus task and thread_info pointers. * Archs which didn't print debug info now do. alpha, arc, blackfin, c6x, cris, frv, h8300, hexagon, ia64, m32r, metag, microblaze, mn10300, openrisc, parisc, score, sh64, sparc, um, xtensa * Already prints debug info. Replaced with show_regs_print_info(). The printed information is superset of what used to be there. arm, arm64, avr32, mips, powerpc, sh32, tile, unicore32, x86 * s390 is special in that it used to print arch-specific information along with generic debug info. Heiko and Martin think that the arch-specific extra isn't worth keeping s390 specfic implementation. Converted to use the generic version. Note that now all archs print the debug info before actual register dumps. An example BUG() dump follows. kernel BUG at /work/os/work/kernel/workqueue.c:4841! invalid opcode: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC Modules linked in: CPU: 0 PID: 1 Comm: swapper/0 Not tainted 3.9.0-rc1-work+ #7 Hardware name: empty empty/S3992, BIOS 080011 10/26/2007 task: ffff88007c85e040 ti: ffff88007c860000 task.ti: ffff88007c860000 RIP: 0010:[] [] init_workqueues+0x4/0x6 RSP: 0000:ffff88007c861ec8 EFLAGS: 00010246 RAX: ffff88007c861fd8 RBX: ffffffff824466a8 RCX: 0000000000000001 RDX: 0000000000000046 RSI: 0000000000000001 RDI: ffffffff8234a07a RBP: ffff88007c861ec8 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000000 R12: ffffffff8234a07a R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff88007dc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: ffff88015f7ff000 CR3: 00000000021f1000 CR4: 00000000000007f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Stack: ffff88007c861ef8 ffffffff81000312 ffffffff824466a8 ffff88007c85e650 0000000000000003 0000000000000000 ffff88007c861f38 ffffffff82335e5d ffff88007c862080 ffffffff8223d8c0 ffff88007c862080 ffffffff81c47760 Call Trace: [] do_one_initcall+0x122/0x170 [] kernel_init_freeable+0x9b/0x1c8 [] ? rest_init+0x140/0x140 [] kernel_init+0xe/0xf0 [] ret_from_fork+0x7c/0xb0 [] ? rest_init+0x140/0x140 ... v2: Typo fix in x86-32. v3: CPU number dropped from show_regs_print_info() as dump_stack_print_info() has been updated to print it. s390 specific implementation dropped as requested by s390 maintainers. Signed-off-by: Tejun Heo Acked-by: David S. Miller Acked-by: Jesper Nilsson Cc: Heiko Carstens Cc: Martin Schwidefsky Cc: Bjorn Helgaas Cc: Fengguang Wu Cc: Mike Frysinger Cc: Vineet Gupta Cc: Sam Ravnborg Acked-by: Chris Metcalf [tile bits] Acked-by: Richard Kuo [hexagon bits] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c index a3fd8a2..ab80a80 100644 --- a/arch/alpha/kernel/process.c +++ b/arch/alpha/kernel/process.c @@ -175,6 +175,7 @@ machine_power_off(void) void show_regs(struct pt_regs *regs) { + show_regs_print_info(KERN_DEFAULT); dik_show_regs(regs, NULL); } diff --git a/arch/arc/kernel/troubleshoot.c b/arch/arc/kernel/troubleshoot.c index 7c10873..96be1e6 100644 --- a/arch/arc/kernel/troubleshoot.c +++ b/arch/arc/kernel/troubleshoot.c @@ -163,6 +163,7 @@ void show_regs(struct pt_regs *regs) return; print_task_path_n_nm(tsk, buf); + show_regs_print_info(KERN_INFO); if (current->thread.cause_code) show_ecr_verbose(regs); diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index c9a5e2c..ae58d3b 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -225,11 +225,8 @@ void __show_regs(struct pt_regs *regs) unsigned long flags; char buf[64]; - printk("CPU: %d %s (%s %.*s)\n", - raw_smp_processor_id(), print_tainted(), - init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); + show_regs_print_info(KERN_DEFAULT); + print_symbol("PC is at %s\n", instruction_pointer(regs)); print_symbol("LR is at %s\n", regs->ARM_lr); printk("pc : [<%08lx>] lr : [<%08lx>] psr: %08lx\n" @@ -284,7 +281,6 @@ void __show_regs(struct pt_regs *regs) void show_regs(struct pt_regs * regs) { printk("\n"); - printk("Pid: %d, comm: %20s\n", task_pid_nr(current), current->comm); __show_regs(regs); dump_stack(); } diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 6f3822f..f491972 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -145,11 +145,7 @@ void __show_regs(struct pt_regs *regs) { int i; - printk("CPU: %d %s (%s %.*s)\n", - raw_smp_processor_id(), print_tainted(), - init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); + show_regs_print_info(KERN_DEFAULT); print_symbol("PC is at %s\n", instruction_pointer(regs)); print_symbol("LR is at %s\n", regs->regs[30]); printk("pc : [<%016llx>] lr : [<%016llx>] pstate: %08llx\n", @@ -166,7 +162,6 @@ void __show_regs(struct pt_regs *regs) void show_regs(struct pt_regs * regs) { printk("\n"); - printk("Pid: %d, comm: %20s\n", task_pid_nr(current), current->comm); __show_regs(regs); } diff --git a/arch/avr32/kernel/process.c b/arch/avr32/kernel/process.c index 89a8017..e7b6149 100644 --- a/arch/avr32/kernel/process.c +++ b/arch/avr32/kernel/process.c @@ -215,6 +215,8 @@ void show_regs_log_lvl(struct pt_regs *regs, const char *log_lvl) unsigned long lr = regs->lr; unsigned long mode = (regs->sr & MODE_MASK) >> MODE_SHIFT; + show_regs_print_info(log_lvl); + if (!user_mode(regs)) { sp = (unsigned long)regs + FRAME_SIZE_FULL; @@ -252,9 +254,6 @@ void show_regs_log_lvl(struct pt_regs *regs, const char *log_lvl) regs->sr & SR_I0M ? '0' : '.', regs->sr & SR_GM ? 'G' : 'g'); printk("%sCPU Mode: %s\n", log_lvl, cpu_modes[mode]); - printk("%sProcess: %s [%d] (task: %p thread: %p)\n", - log_lvl, current->comm, current->pid, current, - task_thread_info(current)); } void show_regs(struct pt_regs *regs) diff --git a/arch/blackfin/kernel/trace.c b/arch/blackfin/kernel/trace.c index f7f7a18..c36efa0 100644 --- a/arch/blackfin/kernel/trace.c +++ b/arch/blackfin/kernel/trace.c @@ -853,6 +853,8 @@ void show_regs(struct pt_regs *fp) unsigned char in_atomic = (bfin_read_IPEND() & 0x10) || in_atomic(); pr_notice("\n"); + show_regs_print_info(KERN_NOTICE); + if (CPUID != bfin_cpuid()) pr_notice("Compiled for cpu family 0x%04x (Rev %d), " "but running on:0x%04x (Rev %d)\n", diff --git a/arch/c6x/kernel/traps.c b/arch/c6x/kernel/traps.c index d0b96ef..dcc2c2f 100644 --- a/arch/c6x/kernel/traps.c +++ b/arch/c6x/kernel/traps.c @@ -31,6 +31,7 @@ void __init trap_init(void) void show_regs(struct pt_regs *regs) { pr_err("\n"); + show_regs_print_info(KERN_ERR); pr_err("PC: %08lx SP: %08lx\n", regs->pc, regs->sp); pr_err("Status: %08lx ORIG_A4: %08lx\n", regs->csr, regs->orig_a4); pr_err("A0: %08lx B0: %08lx\n", regs->a0, regs->b0); diff --git a/arch/cris/arch-v10/kernel/process.c b/arch/cris/arch-v10/kernel/process.c index 2ba23c1..753e9a0 100644 --- a/arch/cris/arch-v10/kernel/process.c +++ b/arch/cris/arch-v10/kernel/process.c @@ -176,6 +176,9 @@ unsigned long get_wchan(struct task_struct *p) void show_regs(struct pt_regs * regs) { unsigned long usp = rdusp(); + + show_regs_print_info(KERN_DEFAULT); + printk("IRP: %08lx SRP: %08lx DCCR: %08lx USP: %08lx MOF: %08lx\n", regs->irp, regs->srp, regs->dccr, usp, regs->mof ); printk(" r0: %08lx r1: %08lx r2: %08lx r3: %08lx\n", diff --git a/arch/cris/arch-v32/kernel/process.c b/arch/cris/arch-v32/kernel/process.c index 57451fa..cebd32e 100644 --- a/arch/cris/arch-v32/kernel/process.c +++ b/arch/cris/arch-v32/kernel/process.c @@ -164,6 +164,9 @@ get_wchan(struct task_struct *p) void show_regs(struct pt_regs * regs) { unsigned long usp = rdusp(); + + show_regs_print_info(KERN_DEFAULT); + printk("ERP: %08lx SRP: %08lx CCS: %08lx USP: %08lx MOF: %08lx\n", regs->erp, regs->srp, regs->ccs, usp, regs->mof); diff --git a/arch/frv/kernel/traps.c b/arch/frv/kernel/traps.c index cfcd802..4bff48c 100644 --- a/arch/frv/kernel/traps.c +++ b/arch/frv/kernel/traps.c @@ -497,6 +497,7 @@ void show_regs(struct pt_regs *regs) int loop; printk("\n"); + show_regs_print_info(KERN_DEFAULT); printk("Frame: @%08lx [%s]\n", (unsigned long) regs, @@ -511,8 +512,6 @@ void show_regs(struct pt_regs *regs) else printk(" | "); } - - printk("Process %s (pid: %d)\n", current->comm, current->pid); } void die_if_kernel(const char *str, ...) diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c index a17d2cd..1a744ab 100644 --- a/arch/h8300/kernel/process.c +++ b/arch/h8300/kernel/process.c @@ -83,6 +83,8 @@ void machine_power_off(void) void show_regs(struct pt_regs * regs) { + show_regs_print_info(KERN_DEFAULT); + printk("\nPC: %08lx Status: %02x", regs->pc, regs->ccr); printk("\nORIG_ER0: %08lx ER0: %08lx ER1: %08lx", diff --git a/arch/hexagon/kernel/vm_events.c b/arch/hexagon/kernel/vm_events.c index 9b5a4a2..f337281 100644 --- a/arch/hexagon/kernel/vm_events.c +++ b/arch/hexagon/kernel/vm_events.c @@ -33,6 +33,8 @@ */ void show_regs(struct pt_regs *regs) { + show_regs_print_info(KERN_EMERG); + printk(KERN_EMERG "restart_r0: \t0x%08lx syscall_nr: %ld\n", regs->restart_r0, regs->syscall_nr); printk(KERN_EMERG "preds: \t\t0x%08lx\n", regs->preds); diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index 182bd64..55d4ba4 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -101,8 +101,8 @@ show_regs (struct pt_regs *regs) unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri; print_modules(); - printk("\nPid: %d, CPU %d, comm: %20s\n", task_pid_nr(current), - smp_processor_id(), current->comm); + printk("\n"); + show_regs_print_info(KERN_DEFAULT); printk("psr : %016lx ifs : %016lx ip : [<%016lx>] %s (%s)\n", regs->cr_ipsr, regs->cr_ifs, ip, print_tainted(), init_utsname()->release); diff --git a/arch/m32r/kernel/process.c b/arch/m32r/kernel/process.c index e2d0490..e69221d 100644 --- a/arch/m32r/kernel/process.c +++ b/arch/m32r/kernel/process.c @@ -73,6 +73,8 @@ void machine_power_off(void) void show_regs(struct pt_regs * regs) { printk("\n"); + show_regs_print_info(KERN_DEFAULT); + printk("BPC[%08lx]:PSW[%08lx]:LR [%08lx]:FP [%08lx]\n", \ regs->bpc, regs->psw, regs->lr, regs->fp); printk("BBPC[%08lx]:BBPSW[%08lx]:SPU[%08lx]:SPI[%08lx]\n", \ diff --git a/arch/metag/kernel/process.c b/arch/metag/kernel/process.c index dc59235..483dff9 100644 --- a/arch/metag/kernel/process.c +++ b/arch/metag/kernel/process.c @@ -129,6 +129,8 @@ void show_regs(struct pt_regs *regs) "D1.7 " }; + show_regs_print_info(KERN_INFO); + pr_info(" pt_regs @ %p\n", regs); pr_info(" SaveMask = 0x%04hx\n", regs->ctx.SaveMask); pr_info(" Flags = 0x%04hx (%c%c%c%c)\n", regs->ctx.Flags, diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c index 7cce2e9..a558938 100644 --- a/arch/microblaze/kernel/process.c +++ b/arch/microblaze/kernel/process.c @@ -20,6 +20,8 @@ void show_regs(struct pt_regs *regs) { + show_regs_print_info(KERN_INFO); + pr_info(" Registers dump: mode=%X\r\n", regs->pt_mode); pr_info(" r1=%08lX, r2=%08lX, r3=%08lX, r4=%08lX\n", regs->r1, regs->r2, regs->r3, regs->r4); diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index b512b28..2522551 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -231,7 +231,7 @@ static void __show_regs(const struct pt_regs *regs) unsigned int cause = regs->cp0_cause; int i; - printk("Cpu %d\n", smp_processor_id()); + show_regs_print_info(KERN_DEFAULT); /* * Saved main processor registers diff --git a/arch/mn10300/kernel/process.c b/arch/mn10300/kernel/process.c index 2da39fb..3707da5 100644 --- a/arch/mn10300/kernel/process.c +++ b/arch/mn10300/kernel/process.c @@ -97,6 +97,7 @@ void machine_power_off(void) void show_regs(struct pt_regs *regs) { + show_regs_print_info(KERN_DEFAULT); } /* diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c index 00c233b..386af25 100644 --- a/arch/openrisc/kernel/process.c +++ b/arch/openrisc/kernel/process.c @@ -90,6 +90,7 @@ void show_regs(struct pt_regs *regs) { extern void show_registers(struct pt_regs *regs); + show_regs_print_info(KERN_DEFAULT); /* __PHX__ cleanup this mess */ show_registers(regs); } diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index e64cf5f..f702bff 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c @@ -126,6 +126,8 @@ void show_regs(struct pt_regs *regs) user = user_mode(regs); level = user ? KERN_DEBUG : KERN_CRIT; + show_regs_print_info(level); + print_gr(level, regs); for (i = 0; i < 8; i += 4) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 624d44b..13a8d9d 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -831,6 +831,8 @@ void show_regs(struct pt_regs * regs) { int i, trap; + show_regs_print_info(KERN_DEFAULT); + printk("NIP: "REG" LR: "REG" CTR: "REG"\n", regs->nip, regs->link, regs->ctr); printk("REGS: %p TRAP: %04lx %s (%s)\n", @@ -850,12 +852,6 @@ void show_regs(struct pt_regs * regs) #else printk("DAR: "REG", DSISR: %08lx\n", regs->dar, regs->dsisr); #endif - printk("TASK = %p[%d] '%s' THREAD: %p", - current, task_pid_nr(current), current->comm, task_thread_info(current)); - -#ifdef CONFIG_SMP - printk(" CPU: %d", raw_smp_processor_id()); -#endif /* CONFIG_SMP */ for (i = 0; i < 32; i++) { if ((i % REGS_PER_LINE) == 0) diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index 2f1f639..2982974 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -166,14 +166,7 @@ void show_registers(struct pt_regs *regs) void show_regs(struct pt_regs *regs) { - printk("CPU: %d %s %s %.*s\n", - task_thread_info(current)->cpu, print_tainted(), - init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); - printk("Process %s (pid: %d, task: %p, ksp: %p)\n", - current->comm, current->pid, current, - (void *) current->thread.ksp); + show_regs_print_info(KERN_DEFAULT); show_registers(regs); /* Show stack backtrace if pt_regs is from kernel mode */ if (!user_mode(regs)) diff --git a/arch/score/kernel/traps.c b/arch/score/kernel/traps.c index a38f435..1517a7d 100644 --- a/arch/score/kernel/traps.c +++ b/arch/score/kernel/traps.c @@ -117,6 +117,8 @@ static void show_code(unsigned int *pc) */ void show_regs(struct pt_regs *regs) { + show_regs_print_info(KERN_DEFAULT); + printk("r0 : %08lx %08lx %08lx %08lx %08lx %08lx %08lx %08lx\n", regs->regs[0], regs->regs[1], regs->regs[2], regs->regs[3], regs->regs[4], regs->regs[5], regs->regs[6], regs->regs[7]); diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c index 73eb66f..ebd3933 100644 --- a/arch/sh/kernel/process_32.c +++ b/arch/sh/kernel/process_32.c @@ -32,11 +32,7 @@ void show_regs(struct pt_regs * regs) { printk("\n"); - printk("Pid : %d, Comm: \t\t%s\n", task_pid_nr(current), current->comm); - printk("CPU : %d \t\t%s (%s %.*s)\n\n", - smp_processor_id(), print_tainted(), init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); + show_regs_print_info(KERN_DEFAULT); print_symbol("PC is at %s\n", instruction_pointer(regs)); print_symbol("PR is at %s\n", regs->pr); diff --git a/arch/sh/kernel/process_64.c b/arch/sh/kernel/process_64.c index e611c85..174d124 100644 --- a/arch/sh/kernel/process_64.c +++ b/arch/sh/kernel/process_64.c @@ -40,6 +40,7 @@ void show_regs(struct pt_regs *regs) unsigned long long ah, al, bh, bl, ch, cl; printk("\n"); + show_regs_print_info(KERN_DEFAULT); ah = (regs->pc) >> 32; al = (regs->pc) & 0xffffffff; diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c index dccf5f5..fdd819d 100644 --- a/arch/sparc/kernel/process_32.c +++ b/arch/sparc/kernel/process_32.c @@ -112,6 +112,8 @@ void show_regs(struct pt_regs *r) { struct reg_window32 *rw = (struct reg_window32 *) r->u_regs[14]; + show_regs_print_info(KERN_DEFAULT); + printk("PSR: %08lx PC: %08lx NPC: %08lx Y: %08lx %s\n", r->psr, r->pc, r->npc, r->y, print_tainted()); printk("PC: <%pS>\n", (void *) r->pc); diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index 9fbf0d1..c6dc1ba 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -163,6 +163,8 @@ static void show_regwindow(struct pt_regs *regs) void show_regs(struct pt_regs *regs) { + show_regs_print_info(KERN_DEFAULT); + printk("TSTATE: %016lx TPC: %016lx TNPC: %016lx Y: %08x %s\n", regs->tstate, regs->tpc, regs->tnpc, regs->y, print_tainted()); printk("TPC: <%pS>\n", (void *) regs->tpc); diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c index 80b2a18..8ac3044 100644 --- a/arch/tile/kernel/process.c +++ b/arch/tile/kernel/process.c @@ -573,8 +573,7 @@ void show_regs(struct pt_regs *regs) int i; pr_err("\n"); - pr_err(" Pid: %d, comm: %20s, CPU: %d\n", - tsk->pid, tsk->comm, smp_processor_id()); + show_regs_print_info(KERN_ERR); #ifdef __tilegx__ for (i = 0; i < 51; i += 3) pr_err(" r%-2d: "REGFMT" r%-2d: "REGFMT" r%-2d: "REGFMT"\n", diff --git a/arch/um/sys-ppc/sysrq.c b/arch/um/sys-ppc/sysrq.c index f889449..1ff1ad7 100644 --- a/arch/um/sys-ppc/sysrq.c +++ b/arch/um/sys-ppc/sysrq.c @@ -11,6 +11,8 @@ void show_regs(struct pt_regs_subarch *regs) { printk("\n"); + show_regs_print_info(KERN_DEFAULT); + printk("show_regs(): insert regs here.\n"); #if 0 printk("\n"); diff --git a/arch/unicore32/kernel/process.c b/arch/unicore32/kernel/process.c index 7fab86d..c944769 100644 --- a/arch/unicore32/kernel/process.c +++ b/arch/unicore32/kernel/process.c @@ -144,11 +144,7 @@ void __show_regs(struct pt_regs *regs) unsigned long flags; char buf[64]; - printk(KERN_DEFAULT "CPU: %d %s (%s %.*s)\n", - raw_smp_processor_id(), print_tainted(), - init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); + show_regs_print_info(KERN_DEFAULT); print_symbol("PC is at %s\n", instruction_pointer(regs)); print_symbol("LR is at %s\n", regs->UCreg_lr); printk(KERN_DEFAULT "pc : [<%08lx>] lr : [<%08lx>] psr: %08lx\n" diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h index 11e1152..2f03ff0 100644 --- a/arch/x86/include/asm/bug.h +++ b/arch/x86/include/asm/bug.h @@ -37,7 +37,4 @@ do { \ #include - -extern void show_regs_common(void); - #endif /* _ASM_X86_BUG_H */ diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index 1038a41..f2a1770 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -86,11 +86,9 @@ void show_regs(struct pt_regs *regs) { int i; + show_regs_print_info(KERN_EMERG); __show_regs(regs, !user_mode_vm(regs)); - pr_emerg("Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)\n", - TASK_COMM_LEN, current->comm, task_pid_nr(current), - current_thread_info(), current, task_thread_info(current)); /* * When in-kernel, we also print out the stack and code at the * time of the fault.. diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index b653675..addb207 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -249,14 +249,10 @@ void show_regs(struct pt_regs *regs) { int i; unsigned long sp; - const int cpu = smp_processor_id(); - struct task_struct *cur = current; sp = regs->sp; - printk("CPU %d ", cpu); + show_regs_print_info(KERN_DEFAULT); __show_regs(regs, 1); - printk(KERN_DEFAULT "Process %s (pid: %d, threadinfo %p, task %p)\n", - cur->comm, cur->pid, task_thread_info(cur), cur); /* * When in-kernel, we also print out the stack and code at the diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 14fcf55..607af0d 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -121,30 +121,6 @@ void exit_thread(void) drop_fpu(me); } -void show_regs_common(void) -{ - const char *vendor, *product, *board; - - vendor = dmi_get_system_info(DMI_SYS_VENDOR); - if (!vendor) - vendor = ""; - product = dmi_get_system_info(DMI_PRODUCT_NAME); - if (!product) - product = ""; - - /* Board Name is optional */ - board = dmi_get_system_info(DMI_BOARD_NAME); - - printk(KERN_DEFAULT "Pid: %d, comm: %.20s %s %s %.*s %s %s%s%s\n", - current->pid, current->comm, print_tainted(), - init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version, - vendor, product, - board ? "/" : "", - board ? board : ""); -} - void flush_thread(void) { struct task_struct *tsk = current; diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index b5a8905..7305f7d 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -84,8 +84,6 @@ void __show_regs(struct pt_regs *regs, int all) savesegment(gs, gs); } - show_regs_common(); - printk(KERN_DEFAULT "EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n", (u16)regs->cs, regs->ip, regs->flags, smp_processor_id()); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 0f49677..355ae06 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -62,7 +62,6 @@ void __show_regs(struct pt_regs *regs, int all) unsigned int fsindex, gsindex; unsigned int ds, cs, es; - show_regs_common(); printk(KERN_DEFAULT "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip); printk_address(regs->ip, 1); printk(KERN_DEFAULT "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c index 384b7c7..458186d 100644 --- a/arch/xtensa/kernel/traps.c +++ b/arch/xtensa/kernel/traps.c @@ -383,6 +383,8 @@ void show_regs(struct pt_regs * regs) { int i, wmask; + show_regs_print_info(KERN_DEFAULT); + wmask = regs->wmask & ~1; for (i = 0; i < 16; i++) { diff --git a/include/linux/printk.h b/include/linux/printk.h index 47827c0..6af944a 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -147,6 +147,7 @@ void log_buf_kexec_setup(void); void __init setup_log_buf(int early); void dump_stack_set_arch_desc(const char *fmt, ...); void dump_stack_print_info(const char *log_lvl); +void show_regs_print_info(const char *log_lvl); #else static inline __printf(1, 0) int vprintk(const char *s, va_list args) @@ -192,6 +193,10 @@ static inline void dump_stack_set_arch_desc(const char *fmt, ...) static inline void dump_stack_print_info(const char *log_lvl) { } + +static inline void show_regs_print_info(const char *log_lvl) +{ +} #endif extern void dump_stack(void) __cold; diff --git a/kernel/printk.c b/kernel/printk.c index 73a96de..e10ad51 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -2893,4 +2893,20 @@ void dump_stack_print_info(const char *log_lvl) log_lvl, dump_stack_arch_desc_str); } +/** + * show_regs_print_info - print generic debug info for show_regs() + * @log_lvl: log level + * + * show_regs() implementations can use this function to print out generic + * debug information. + */ +void show_regs_print_info(const char *log_lvl) +{ + dump_stack_print_info(log_lvl); + + printk("%stask: %p ti: %p task.ti: %p\n", + log_lvl, current, current_thread_info(), + task_thread_info(current)); +} + #endif -- cgit v0.10.2 From 681a90ffe829b8ee25d5266d7e69af256c090940 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 30 Apr 2013 15:27:19 -0700 Subject: arc, print-fatal-signals: reduce duplicated information After the recent generic debug info on dump_stack() and friends, arc is printing duplicate information on debug dumps. [ARCLinux]$ ./crash crash/50: potentially unexpected fatal signal 11. <-- [1] /sbin/crash, TGID 50 <-- [2] Pid: 50, comm: crash Not tainted 3.9.0-rc4+ #132 <-- [3] ... Remove them. [tj@kernel.org: updated patch desc] Signed-off-by: Vineet Gupta Signed-off-by: Tejun Heo Cc: Bjorn Helgaas Cc: David S. Miller Cc: Fengguang Wu Cc: Heiko Carstens Cc: Jesper Nilsson Cc: Martin Schwidefsky Cc: Mike Frysinger Cc: Sam Ravnborg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/arc/kernel/troubleshoot.c b/arch/arc/kernel/troubleshoot.c index 96be1e6..0aec0198 100644 --- a/arch/arc/kernel/troubleshoot.c +++ b/arch/arc/kernel/troubleshoot.c @@ -71,7 +71,7 @@ void print_task_path_n_nm(struct task_struct *tsk, char *buf) } done: - pr_info("%s, TGID %u\n", path_nm, tsk->tgid); + pr_info("Path: %s\n", path_nm); } EXPORT_SYMBOL(print_task_path_n_nm); diff --git a/kernel/signal.c b/kernel/signal.c index 598dc06..27ece01 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1160,8 +1160,7 @@ static int send_signal(int sig, struct siginfo *info, struct task_struct *t, static void print_fatal_signal(int signr) { struct pt_regs *regs = signal_pt_regs(); - printk(KERN_INFO "%s/%d: potentially unexpected fatal signal %d.\n", - current->comm, task_pid_nr(current), signr); + printk(KERN_INFO "potentially unexpected fatal signal %d.\n", signr); #if defined(__i386__) && !defined(__arch_um__) printk(KERN_INFO "code at %08lx: ", regs->ip); -- cgit v0.10.2 From cd42d559e45e3563c74403e453f8954b593db69d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 30 Apr 2013 15:27:21 -0700 Subject: kthread: implement probe_kthread_data() One of the problems that arise when converting dedicated custom threadpool to workqueue is that the shared worker pool used by workqueue anonimizes each worker making it more difficult to identify what the worker was doing on which target from the output of sysrq-t or debug dump from oops, BUG() and friends. For example, after writeback is converted to use workqueue instead of priviate thread pool, there's no easy to tell which backing device a writeback work item was working on at the time of task dump, which, according to our writeback brethren, is important in tracking down issues with a lot of mounted file systems on a lot of different devices. This patchset implements a way for a work function to mark its execution instance so that task dump of the worker task includes information to indicate what the work item was doing. An example WARN dump would look like the following. WARNING: at fs/fs-writeback.c:1015 bdi_writeback_workfn+0x2b4/0x3c0() Modules linked in: CPU: 0 Pid: 28 Comm: kworker/u18:0 Not tainted 3.9.0-rc1-work+ #24 Hardware name: empty empty/S3992, BIOS 080011 10/26/2007 Workqueue: writeback bdi_writeback_workfn (flush-8:16) ffffffff820a3a98 ffff88015b927cb8 ffffffff81c61855 ffff88015b927cf8 ffffffff8108f500 0000000000000000 ffff88007a171948 ffff88007a1716b0 ffff88015b49df00 ffff88015b8d3940 0000000000000000 ffff88015b927d08 Call Trace: [] dump_stack+0x19/0x1b [] warn_slowpath_common+0x70/0xa0 ... This patch: Implement probe_kthread_data() which returns kthread_data if accessible. The function is equivalent to kthread_data() except that the specified @task may not be a kthread or its vfork_done is already cleared rendering struct kthread inaccessible. In the former case, probe_kthread_data() may return any value. In the latter, NULL. This will be used to safely print debug information without affecting synchronization in the normal paths. Workqueue debug info printing on dump_stack() and friends will make use of it. Signed-off-by: Tejun Heo Cc: Oleg Nesterov Acked-by: Jan Kara Cc: Dave Chinner Cc: Ingo Molnar Cc: Jens Axboe Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 8d81664..7dcef33 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -43,6 +43,7 @@ bool kthread_should_stop(void); bool kthread_should_park(void); bool kthread_freezable_should_stop(bool *was_frozen); void *kthread_data(struct task_struct *k); +void *probe_kthread_data(struct task_struct *k); int kthread_park(struct task_struct *k); void kthread_unpark(struct task_struct *k); void kthread_parkme(void); diff --git a/kernel/kthread.c b/kernel/kthread.c index 16d8ddd..760e86d 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -17,6 +17,7 @@ #include #include #include +#include #include static DEFINE_SPINLOCK(kthread_create_lock); @@ -135,6 +136,24 @@ void *kthread_data(struct task_struct *task) return to_kthread(task)->data; } +/** + * probe_kthread_data - speculative version of kthread_data() + * @task: possible kthread task in question + * + * @task could be a kthread task. Return the data value specified when it + * was created if accessible. If @task isn't a kthread task or its data is + * inaccessible for any reason, %NULL is returned. This function requires + * that @task itself is safe to dereference. + */ +void *probe_kthread_data(struct task_struct *task) +{ + struct kthread *kthread = to_kthread(task); + void *data = NULL; + + probe_kernel_read(&data, &kthread->data, sizeof(data)); + return data; +} + static void __kthread_parkme(struct kthread *self) { __set_current_state(TASK_PARKED); -- cgit v0.10.2 From 3d1cb2059d9374e58da481b783332cf191cb6620 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 30 Apr 2013 15:27:22 -0700 Subject: workqueue: include workqueue info when printing debug dump of a worker task One of the problems that arise when converting dedicated custom threadpool to workqueue is that the shared worker pool used by workqueue anonimizes each worker making it more difficult to identify what the worker was doing on which target from the output of sysrq-t or debug dump from oops, BUG() and friends. This patch implements set_worker_desc() which can be called from any workqueue work function to set its description. When the worker task is dumped for whatever reason - sysrq-t, WARN, BUG, oops, lockdep assertion and so on - the description will be printed out together with the workqueue name and the worker function pointer. The printing side is implemented by print_worker_info() which is called from functions in task dump paths - sched_show_task() and dump_stack_print_info(). print_worker_info() can be safely called on any task in any state as long as the task struct itself is accessible. It uses probe_*() functions to access worker fields. It may print garbage if something went very wrong, but it wouldn't cause (another) oops. The description is currently limited to 24bytes including the terminating \0. worker->desc_valid and workder->desc[] are added and the 64 bytes marker which was already incorrect before adding the new fields is moved to the correct position. Here's an example dump with writeback updated to set the bdi name as worker desc. Hardware name: Bochs Modules linked in: Pid: 7, comm: kworker/u9:0 Not tainted 3.9.0-rc1-work+ #1 Workqueue: writeback bdi_writeback_workfn (flush-8:0) ffffffff820a3ab0 ffff88000f6e9cb8 ffffffff81c61845 ffff88000f6e9cf8 ffffffff8108f50f 0000000000000000 0000000000000000 ffff88000cde16b0 ffff88000cde1aa8 ffff88001ee19240 ffff88000f6e9fd8 ffff88000f6e9d08 Call Trace: [] dump_stack+0x19/0x1b [] warn_slowpath_common+0x7f/0xc0 [] warn_slowpath_null+0x1a/0x20 [] bdi_writeback_workfn+0x2a0/0x3b0 ... Signed-off-by: Tejun Heo Cc: Peter Zijlstra Cc: Ingo Molnar Acked-by: Jan Kara Cc: Oleg Nesterov Cc: Jens Axboe Cc: Dave Chinner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 7179756..623488f 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -92,6 +92,9 @@ enum { /* bit mask for work_busy() return values */ WORK_BUSY_PENDING = 1 << 0, WORK_BUSY_RUNNING = 1 << 1, + + /* maximum string length for set_worker_desc() */ + WORKER_DESC_LEN = 24, }; struct work_struct { @@ -447,6 +450,8 @@ extern void workqueue_set_max_active(struct workqueue_struct *wq, extern bool current_is_workqueue_rescuer(void); extern bool workqueue_congested(int cpu, struct workqueue_struct *wq); extern unsigned int work_busy(struct work_struct *work); +extern __printf(1, 2) void set_worker_desc(const char *fmt, ...); +extern void print_worker_info(const char *log_lvl, struct task_struct *task); /** * queue_work - queue work on a workqueue diff --git a/kernel/printk.c b/kernel/printk.c index e10ad51..96dcfcd 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -2891,6 +2891,8 @@ void dump_stack_print_info(const char *log_lvl) if (dump_stack_arch_desc_str[0] != '\0') printk("%sHardware name: %s\n", log_lvl, dump_stack_arch_desc_str); + + print_worker_info(log_lvl, current); } /** diff --git a/kernel/sched/core.c b/kernel/sched/core.c index c70a881..5662f58 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4586,6 +4586,7 @@ void sched_show_task(struct task_struct *p) task_pid_nr(p), ppid, (unsigned long)task_thread_info(p)->flags); + print_worker_info(KERN_INFO, p); show_stack(p, NULL); } diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 154aa12..4aa9f5b 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -46,6 +46,7 @@ #include #include #include +#include #include "workqueue_internal.h" @@ -2197,6 +2198,7 @@ __acquires(&pool->lock) worker->current_work = NULL; worker->current_func = NULL; worker->current_pwq = NULL; + worker->desc_valid = false; pwq_dec_nr_in_flight(pwq, work_color); } @@ -4365,6 +4367,83 @@ unsigned int work_busy(struct work_struct *work) } EXPORT_SYMBOL_GPL(work_busy); +/** + * set_worker_desc - set description for the current work item + * @fmt: printf-style format string + * @...: arguments for the format string + * + * This function can be called by a running work function to describe what + * the work item is about. If the worker task gets dumped, this + * information will be printed out together to help debugging. The + * description can be at most WORKER_DESC_LEN including the trailing '\0'. + */ +void set_worker_desc(const char *fmt, ...) +{ + struct worker *worker = current_wq_worker(); + va_list args; + + if (worker) { + va_start(args, fmt); + vsnprintf(worker->desc, sizeof(worker->desc), fmt, args); + va_end(args); + worker->desc_valid = true; + } +} + +/** + * print_worker_info - print out worker information and description + * @log_lvl: the log level to use when printing + * @task: target task + * + * If @task is a worker and currently executing a work item, print out the + * name of the workqueue being serviced and worker description set with + * set_worker_desc() by the currently executing work item. + * + * This function can be safely called on any task as long as the + * task_struct itself is accessible. While safe, this function isn't + * synchronized and may print out mixups or garbages of limited length. + */ +void print_worker_info(const char *log_lvl, struct task_struct *task) +{ + work_func_t *fn = NULL; + char name[WQ_NAME_LEN] = { }; + char desc[WORKER_DESC_LEN] = { }; + struct pool_workqueue *pwq = NULL; + struct workqueue_struct *wq = NULL; + bool desc_valid = false; + struct worker *worker; + + if (!(task->flags & PF_WQ_WORKER)) + return; + + /* + * This function is called without any synchronization and @task + * could be in any state. Be careful with dereferences. + */ + worker = probe_kthread_data(task); + + /* + * Carefully copy the associated workqueue's workfn and name. Keep + * the original last '\0' in case the original contains garbage. + */ + probe_kernel_read(&fn, &worker->current_func, sizeof(fn)); + probe_kernel_read(&pwq, &worker->current_pwq, sizeof(pwq)); + probe_kernel_read(&wq, &pwq->wq, sizeof(wq)); + probe_kernel_read(name, wq->name, sizeof(name) - 1); + + /* copy worker description */ + probe_kernel_read(&desc_valid, &worker->desc_valid, sizeof(desc_valid)); + if (desc_valid) + probe_kernel_read(desc, worker->desc, sizeof(desc) - 1); + + if (fn || name[0] || desc[0]) { + printk("%sWorkqueue: %s %pf", log_lvl, name, fn); + if (desc[0]) + pr_cont(" (%s)", desc); + pr_cont("\n"); + } +} + /* * CPU hotplug. * diff --git a/kernel/workqueue_internal.h b/kernel/workqueue_internal.h index 84ab6e1..ad83c96 100644 --- a/kernel/workqueue_internal.h +++ b/kernel/workqueue_internal.h @@ -29,15 +29,25 @@ struct worker { struct work_struct *current_work; /* L: work being processed */ work_func_t current_func; /* L: current_work's fn */ struct pool_workqueue *current_pwq; /* L: current_work's pwq */ + bool desc_valid; /* ->desc is valid */ struct list_head scheduled; /* L: scheduled works */ + + /* 64 bytes boundary on 64bit, 32 on 32bit */ + struct task_struct *task; /* I: worker task */ struct worker_pool *pool; /* I: the associated pool */ /* L: for rescuers */ - /* 64 bytes boundary on 64bit, 32 on 32bit */ + unsigned long last_active; /* L: last active timestamp */ unsigned int flags; /* X: flags */ int id; /* I: worker id */ + /* + * Opaque string set with work_set_desc(). Printed out with task + * dump for debugging - WARN, BUG, panic or sysrq. + */ + char desc[WORKER_DESC_LEN]; + /* used only by rescuers to point to the target workqueue */ struct workqueue_struct *rescue_wq; /* I: the workqueue to rescue */ }; -- cgit v0.10.2 From ef3b101925f2170c2b8cd2e126b37492ae02f77c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 30 Apr 2013 15:27:24 -0700 Subject: writeback: set worker desc to identify writeback workers in task dumps Writeback has been recently converted to use workqueue instead of its private thread pool implementation. One negative side effect of this conversion is that there's no easy to tell which backing device a writeback work item was working on at the time of task dump, be it sysrq-t, BUG, WARN or whatever, which, according to our writeback brethren, is important in tracking down issues with a lot of mounted file systems on a lot of different devices. This patch restores that information using the new worker description facility. bdi_writeback_workfn() calls set_work_desc() to identify which bdi it's working on. The description is printed out together with the worqueue name and worker function as in the following example dump. WARNING: at fs/fs-writeback.c:1015 bdi_writeback_workfn+0x2b4/0x3c0() Modules linked in: Pid: 28, comm: kworker/u18:0 Not tainted 3.9.0-rc1-work+ #24 empty empty/S3992 Workqueue: writeback bdi_writeback_workfn (flush-8:16) ffffffff820a3a98 ffff88015b927cb8 ffffffff81c61855 ffff88015b927cf8 ffffffff8108f500 0000000000000000 ffff88007a171948 ffff88007a1716b0 ffff88015b49df00 ffff88015b8d3940 0000000000000000 ffff88015b927d08 Call Trace: [] dump_stack+0x19/0x1b [] warn_slowpath_common+0x70/0xa0 [] warn_slowpath_null+0x1a/0x20 [] bdi_writeback_workfn+0x2b4/0x3c0 [] process_one_work+0x1d7/0x660 [] worker_thread+0x122/0x380 [] kthread+0xea/0xf0 [] ret_from_fork+0x7c/0xb0 Signed-off-by: Tejun Heo Cc: Dave Chinner Cc: Jan Kara Cc: Jens Axboe Cc: Oleg Nesterov Cc: Ingo Molnar Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 21f46fb..798d445 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1028,6 +1028,7 @@ int bdi_writeback_thread(void *data) struct backing_dev_info *bdi = wb->bdi; long pages_written; + set_worker_desc("flush-%s", dev_name(bdi->dev)); current->flags |= PF_SWAPWRITE; set_freezable(); wb->last_active = jiffies; -- cgit v0.10.2 From 1def1dc91715acdb6dddfaed1a44149d42d8063c Mon Sep 17 00:00:00 2001 From: liguang Date: Tue, 30 Apr 2013 15:27:25 -0700 Subject: kernel/smp.c: use '|=' for csd_lock csd_lock() uses assignment to data->flags rather than |=. That is not buggy at present because only one bit (CSD_FLAG_LOCK) is defined in call_single_data.flags. But it will become buggy if we later add another flag, so fix it now. Signed-off-by: liguang Cc: Peter Zijlstra Cc: Oleg Nesterov Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/kernel/smp.c b/kernel/smp.c index 8e451f3..b320622 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -109,7 +109,7 @@ static void csd_lock_wait(struct call_single_data *data) static void csd_lock(struct call_single_data *data) { csd_lock_wait(data); - data->flags = CSD_FLAG_LOCK; + data->flags |= CSD_FLAG_LOCK; /* * prevent CPU from reordering the above assignment -- cgit v0.10.2 From 3440a1ca99707f093e9568ba9762764d3162dd8f Mon Sep 17 00:00:00 2001 From: liguang Date: Tue, 30 Apr 2013 15:27:26 -0700 Subject: kernel/smp.c: remove 'priv' of call_single_data The 'priv' field is redundant; we can pass data via 'info'. Signed-off-by: liguang Cc: Peter Zijlstra Cc: Oleg Nesterov Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/smp.h b/include/linux/smp.h index 3e07a7d..e6564c1 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -20,7 +20,6 @@ struct call_single_data { smp_call_func_t func; void *info; u16 flags; - u16 priv; }; /* total number of cpus in this system (may exceed NR_CPUS) */ diff --git a/kernel/softirq.c b/kernel/softirq.c index 14d7758..aa82723 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -620,8 +620,7 @@ static void remote_softirq_receive(void *data) unsigned long flags; int softirq; - softirq = cp->priv; - + softirq = *(int *)cp->info; local_irq_save(flags); __local_trigger(cp, softirq); local_irq_restore(flags); @@ -631,9 +630,8 @@ static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softir { if (cpu_online(cpu)) { cp->func = remote_softirq_receive; - cp->info = cp; + cp->info = &softirq; cp->flags = 0; - cp->priv = softirq; __smp_call_function_single(cpu, cp, 0); return 0; -- cgit v0.10.2 From 74e3d1e17b2e11d175970b85acd44f5927000ba2 Mon Sep 17 00:00:00 2001 From: Fan Du Date: Tue, 30 Apr 2013 15:27:27 -0700 Subject: include/linux/fs.h: disable preempt when acquire i_size_seqcount write lock Two rt tasks bind to one CPU core. The higher priority rt task A preempts a lower priority rt task B which has already taken the write seq lock, and then the higher priority rt task A try to acquire read seq lock, it's doomed to lockup. rt task A with lower priority: call write i_size_write rt task B with higher priority: call sync, and preempt task A write_seqcount_begin(&inode->i_size_seqcount); i_size_read inode->i_size = i_size; read_seqcount_begin <-- lockup here... So disable preempt when acquiring every i_size_seqcount *write* lock will cure the problem. Signed-off-by: Fan Du Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/fs.h b/include/linux/fs.h index 2c28271..17d8b15 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -675,9 +675,11 @@ static inline loff_t i_size_read(const struct inode *inode) static inline void i_size_write(struct inode *inode, loff_t i_size) { #if BITS_PER_LONG==32 && defined(CONFIG_SMP) + preempt_disable(); write_seqcount_begin(&inode->i_size_seqcount); inode->i_size = i_size; write_seqcount_end(&inode->i_size_seqcount); + preempt_enable(); #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT) preempt_disable(); inode->i_size = i_size; -- cgit v0.10.2 From e1d12f327037a59e06c66520951ab4e0bd29f9c4 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 30 Apr 2013 15:27:28 -0700 Subject: kernel/smp.c: cleanups We sometimes use "struct call_single_data *data" and sometimes "struct call_single_data *csd". Use "csd" consistently. We sometimes use "struct call_function_data *data" and sometimes "struct call_function_data *cfd". Use "cfd" consistently. Also, avoid some 80-col layout tricks. Cc: Ingo Molnar Cc: Jens Axboe Cc: Peter Zijlstra Cc: Shaohua Li Cc: Shaohua Li Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/kernel/smp.c b/kernel/smp.c index b320622..4dba0f7 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -100,16 +100,16 @@ void __init call_function_init(void) * previous function call. For multi-cpu calls its even more interesting * as we'll have to ensure no other cpu is observing our csd. */ -static void csd_lock_wait(struct call_single_data *data) +static void csd_lock_wait(struct call_single_data *csd) { - while (data->flags & CSD_FLAG_LOCK) + while (csd->flags & CSD_FLAG_LOCK) cpu_relax(); } -static void csd_lock(struct call_single_data *data) +static void csd_lock(struct call_single_data *csd) { - csd_lock_wait(data); - data->flags |= CSD_FLAG_LOCK; + csd_lock_wait(csd); + csd->flags |= CSD_FLAG_LOCK; /* * prevent CPU from reordering the above assignment @@ -119,16 +119,16 @@ static void csd_lock(struct call_single_data *data) smp_mb(); } -static void csd_unlock(struct call_single_data *data) +static void csd_unlock(struct call_single_data *csd) { - WARN_ON(!(data->flags & CSD_FLAG_LOCK)); + WARN_ON(!(csd->flags & CSD_FLAG_LOCK)); /* * ensure we're all done before releasing data: */ smp_mb(); - data->flags &= ~CSD_FLAG_LOCK; + csd->flags &= ~CSD_FLAG_LOCK; } /* @@ -137,7 +137,7 @@ static void csd_unlock(struct call_single_data *data) * ->func, ->info, and ->flags set. */ static -void generic_exec_single(int cpu, struct call_single_data *data, int wait) +void generic_exec_single(int cpu, struct call_single_data *csd, int wait) { struct call_single_queue *dst = &per_cpu(call_single_queue, cpu); unsigned long flags; @@ -145,7 +145,7 @@ void generic_exec_single(int cpu, struct call_single_data *data, int wait) raw_spin_lock_irqsave(&dst->lock, flags); ipi = list_empty(&dst->list); - list_add_tail(&data->list, &dst->list); + list_add_tail(&csd->list, &dst->list); raw_spin_unlock_irqrestore(&dst->lock, flags); /* @@ -163,7 +163,7 @@ void generic_exec_single(int cpu, struct call_single_data *data, int wait) arch_send_call_function_single_ipi(cpu); if (wait) - csd_lock_wait(data); + csd_lock_wait(csd); } /* @@ -173,7 +173,6 @@ void generic_exec_single(int cpu, struct call_single_data *data, int wait) void generic_smp_call_function_single_interrupt(void) { struct call_single_queue *q = &__get_cpu_var(call_single_queue); - unsigned int data_flags; LIST_HEAD(list); /* @@ -186,25 +185,26 @@ void generic_smp_call_function_single_interrupt(void) raw_spin_unlock(&q->lock); while (!list_empty(&list)) { - struct call_single_data *data; + struct call_single_data *csd; + unsigned int csd_flags; - data = list_entry(list.next, struct call_single_data, list); - list_del(&data->list); + csd = list_entry(list.next, struct call_single_data, list); + list_del(&csd->list); /* - * 'data' can be invalid after this call if flags == 0 + * 'csd' can be invalid after this call if flags == 0 * (when called through generic_exec_single()), * so save them away before making the call: */ - data_flags = data->flags; + csd_flags = csd->flags; - data->func(data->info); + csd->func(csd->info); /* * Unlocked CSDs are valid through generic_exec_single(): */ - if (data_flags & CSD_FLAG_LOCK) - csd_unlock(data); + if (csd_flags & CSD_FLAG_LOCK) + csd_unlock(csd); } } @@ -249,16 +249,16 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info, local_irq_restore(flags); } else { if ((unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) { - struct call_single_data *data = &d; + struct call_single_data *csd = &d; if (!wait) - data = &__get_cpu_var(csd_data); + csd = &__get_cpu_var(csd_data); - csd_lock(data); + csd_lock(csd); - data->func = func; - data->info = info; - generic_exec_single(cpu, data, wait); + csd->func = func; + csd->info = info; + generic_exec_single(cpu, csd, wait); } else { err = -ENXIO; /* CPU not online */ } @@ -325,7 +325,7 @@ EXPORT_SYMBOL_GPL(smp_call_function_any); * pre-allocated data structure. Useful for embedding @data inside * other structures, for instance. */ -void __smp_call_function_single(int cpu, struct call_single_data *data, +void __smp_call_function_single(int cpu, struct call_single_data *csd, int wait) { unsigned int this_cpu; @@ -343,11 +343,11 @@ void __smp_call_function_single(int cpu, struct call_single_data *data, if (cpu == this_cpu) { local_irq_save(flags); - data->func(data->info); + csd->func(csd->info); local_irq_restore(flags); } else { - csd_lock(data); - generic_exec_single(cpu, data, wait); + csd_lock(csd); + generic_exec_single(cpu, csd, wait); } put_cpu(); } @@ -369,7 +369,7 @@ void __smp_call_function_single(int cpu, struct call_single_data *data, void smp_call_function_many(const struct cpumask *mask, smp_call_func_t func, void *info, bool wait) { - struct call_function_data *data; + struct call_function_data *cfd; int cpu, next_cpu, this_cpu = smp_processor_id(); /* @@ -401,24 +401,24 @@ void smp_call_function_many(const struct cpumask *mask, return; } - data = &__get_cpu_var(cfd_data); + cfd = &__get_cpu_var(cfd_data); - cpumask_and(data->cpumask, mask, cpu_online_mask); - cpumask_clear_cpu(this_cpu, data->cpumask); + cpumask_and(cfd->cpumask, mask, cpu_online_mask); + cpumask_clear_cpu(this_cpu, cfd->cpumask); /* Some callers race with other cpus changing the passed mask */ - if (unlikely(!cpumask_weight(data->cpumask))) + if (unlikely(!cpumask_weight(cfd->cpumask))) return; /* - * After we put an entry into the list, data->cpumask - * may be cleared again when another CPU sends another IPI for - * a SMP function call, so data->cpumask will be zero. + * After we put an entry into the list, cfd->cpumask may be cleared + * again when another CPU sends another IPI for a SMP function call, so + * cfd->cpumask will be zero. */ - cpumask_copy(data->cpumask_ipi, data->cpumask); + cpumask_copy(cfd->cpumask_ipi, cfd->cpumask); - for_each_cpu(cpu, data->cpumask) { - struct call_single_data *csd = per_cpu_ptr(data->csd, cpu); + for_each_cpu(cpu, cfd->cpumask) { + struct call_single_data *csd = per_cpu_ptr(cfd->csd, cpu); struct call_single_queue *dst = &per_cpu(call_single_queue, cpu); unsigned long flags; @@ -433,12 +433,13 @@ void smp_call_function_many(const struct cpumask *mask, } /* Send a message to all CPUs in the map */ - arch_send_call_function_ipi_mask(data->cpumask_ipi); + arch_send_call_function_ipi_mask(cfd->cpumask_ipi); if (wait) { - for_each_cpu(cpu, data->cpumask) { - struct call_single_data *csd = - per_cpu_ptr(data->csd, cpu); + for_each_cpu(cpu, cfd->cpumask) { + struct call_single_data *csd; + + csd = per_cpu_ptr(cfd->csd, cpu); csd_lock_wait(csd); } } -- cgit v0.10.2 From 16c7fa05829e8b91db48e3539c5d6ff3c2b18a23 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 30 Apr 2013 15:27:30 -0700 Subject: lib/string_helpers: introduce generic string_unescape There are several places in kernel where modules unescapes input to convert C-Style Escape Sequences into byte codes. The patch provides generic implementation of such approach. Test cases are also included into the patch. [akpm@linux-foundation.org: clarify comment] [akpm@linux-foundation.org: export get_random_int() to modules] Signed-off-by: Andy Shevchenko Cc: Samuel Thibault Cc: Greg Kroah-Hartman Cc: Jason Baron Cc: Alexander Viro Cc: William Hubbs Cc: Chris Brannon Cc: Kirk Reiser Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/char/random.c b/drivers/char/random.c index 32a6c57..cd9a621 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1485,6 +1485,7 @@ unsigned int get_random_int(void) return ret; } +EXPORT_SYMBOL(get_random_int); /* * randomize_range() returns a start address such that diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h index a3eb2f6..3eeee96 100644 --- a/include/linux/string_helpers.h +++ b/include/linux/string_helpers.h @@ -13,4 +13,62 @@ enum string_size_units { int string_get_size(u64 size, enum string_size_units units, char *buf, int len); +#define UNESCAPE_SPACE 0x01 +#define UNESCAPE_OCTAL 0x02 +#define UNESCAPE_HEX 0x04 +#define UNESCAPE_SPECIAL 0x08 +#define UNESCAPE_ANY \ + (UNESCAPE_SPACE | UNESCAPE_OCTAL | UNESCAPE_HEX | UNESCAPE_SPECIAL) + +/** + * string_unescape - unquote characters in the given string + * @src: source buffer (escaped) + * @dst: destination buffer (unescaped) + * @size: size of the destination buffer (0 to unlimit) + * @flags: combination of the flags (bitwise OR): + * %UNESCAPE_SPACE: + * '\f' - form feed + * '\n' - new line + * '\r' - carriage return + * '\t' - horizontal tab + * '\v' - vertical tab + * %UNESCAPE_OCTAL: + * '\NNN' - byte with octal value NNN (1 to 3 digits) + * %UNESCAPE_HEX: + * '\xHH' - byte with hexadecimal value HH (1 to 2 digits) + * %UNESCAPE_SPECIAL: + * '\"' - double quote + * '\\' - backslash + * '\a' - alert (BEL) + * '\e' - escape + * %UNESCAPE_ANY: + * all previous together + * + * Returns amount of characters processed to the destination buffer excluding + * trailing '\0'. + * + * Because the size of the output will be the same as or less than the size of + * the input, the transformation may be performed in place. + * + * Caller must provide valid source and destination pointers. Be aware that + * destination buffer will always be NULL-terminated. Source string must be + * NULL-terminated as well. + */ +int string_unescape(char *src, char *dst, size_t size, unsigned int flags); + +static inline int string_unescape_inplace(char *buf, unsigned int flags) +{ + return string_unescape(buf, buf, 0, flags); +} + +static inline int string_unescape_any(char *src, char *dst, size_t size) +{ + return string_unescape(src, dst, size, UNESCAPE_ANY); +} + +static inline int string_unescape_any_inplace(char *buf) +{ + return string_unescape_any(buf, buf, 0); +} + #endif diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 28be08c..77ebaa3 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1463,5 +1463,8 @@ source "lib/Kconfig.kgdb" source "lib/Kconfig.kmemcheck" +config TEST_STRING_HELPERS + tristate "Test functions located in the string_helpers module at runtime" + config TEST_KSTRTOX tristate "Test kstrto*() family of functions at runtime" diff --git a/lib/Makefile b/lib/Makefile index 6e2cc56..23c9a0f 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -22,8 +22,10 @@ lib-y += kobject.o klist.o obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \ bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \ - string_helpers.o gcd.o lcm.o list_sort.o uuid.o flex_array.o \ + gcd.o lcm.o list_sort.o uuid.o flex_array.o \ bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o +obj-y += string_helpers.o +obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o obj-y += kstrtox.o obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o diff --git a/lib/string_helpers.c b/lib/string_helpers.c index 1cffc22..ed5c145 100644 --- a/lib/string_helpers.c +++ b/lib/string_helpers.c @@ -2,10 +2,12 @@ * Helpers for formatting and printing strings * * Copyright 31 August 2008 James Bottomley + * Copyright (C) 2013, Intel Corporation */ #include #include #include +#include #include /** @@ -66,3 +68,134 @@ int string_get_size(u64 size, const enum string_size_units units, return 0; } EXPORT_SYMBOL(string_get_size); + +static bool unescape_space(char **src, char **dst) +{ + char *p = *dst, *q = *src; + + switch (*q) { + case 'n': + *p = '\n'; + break; + case 'r': + *p = '\r'; + break; + case 't': + *p = '\t'; + break; + case 'v': + *p = '\v'; + break; + case 'f': + *p = '\f'; + break; + default: + return false; + } + *dst += 1; + *src += 1; + return true; +} + +static bool unescape_octal(char **src, char **dst) +{ + char *p = *dst, *q = *src; + u8 num; + + if (isodigit(*q) == 0) + return false; + + num = (*q++) & 7; + while (num < 32 && isodigit(*q) && (q - *src < 3)) { + num <<= 3; + num += (*q++) & 7; + } + *p = num; + *dst += 1; + *src = q; + return true; +} + +static bool unescape_hex(char **src, char **dst) +{ + char *p = *dst, *q = *src; + int digit; + u8 num; + + if (*q++ != 'x') + return false; + + num = digit = hex_to_bin(*q++); + if (digit < 0) + return false; + + digit = hex_to_bin(*q); + if (digit >= 0) { + q++; + num = (num << 4) | digit; + } + *p = num; + *dst += 1; + *src = q; + return true; +} + +static bool unescape_special(char **src, char **dst) +{ + char *p = *dst, *q = *src; + + switch (*q) { + case '\"': + *p = '\"'; + break; + case '\\': + *p = '\\'; + break; + case 'a': + *p = '\a'; + break; + case 'e': + *p = '\e'; + break; + default: + return false; + } + *dst += 1; + *src += 1; + return true; +} + +int string_unescape(char *src, char *dst, size_t size, unsigned int flags) +{ + char *out = dst; + + while (*src && --size) { + if (src[0] == '\\' && src[1] != '\0' && size > 1) { + src++; + size--; + + if (flags & UNESCAPE_SPACE && + unescape_space(&src, &out)) + continue; + + if (flags & UNESCAPE_OCTAL && + unescape_octal(&src, &out)) + continue; + + if (flags & UNESCAPE_HEX && + unescape_hex(&src, &out)) + continue; + + if (flags & UNESCAPE_SPECIAL && + unescape_special(&src, &out)) + continue; + + *out++ = '\\'; + } + *out++ = *src++; + } + *out = '\0'; + + return out - dst; +} +EXPORT_SYMBOL(string_unescape); diff --git a/lib/test-string_helpers.c b/lib/test-string_helpers.c new file mode 100644 index 0000000..6ac48de --- /dev/null +++ b/lib/test-string_helpers.c @@ -0,0 +1,103 @@ +/* + * Test cases for lib/string_helpers.c module. + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include + +struct test_string { + const char *in; + const char *out; + unsigned int flags; +}; + +static const struct test_string strings[] __initconst = { + { + .in = "\\f\\ \\n\\r\\t\\v", + .out = "\f\\ \n\r\t\v", + .flags = UNESCAPE_SPACE, + }, + { + .in = "\\40\\1\\387\\0064\\05\\040\\8a\\110\\777", + .out = " \001\00387\0064\005 \\8aH?7", + .flags = UNESCAPE_OCTAL, + }, + { + .in = "\\xv\\xa\\x2c\\xD\\x6f2", + .out = "\\xv\n,\ro2", + .flags = UNESCAPE_HEX, + }, + { + .in = "\\h\\\\\\\"\\a\\e\\", + .out = "\\h\\\"\a\e\\", + .flags = UNESCAPE_SPECIAL, + }, +}; + +static void __init test_string_unescape(unsigned int flags, bool inplace) +{ + char in[256]; + char out_test[256]; + char out_real[256]; + int i, p = 0, q_test = 0, q_real = sizeof(out_real); + + for (i = 0; i < ARRAY_SIZE(strings); i++) { + const char *s = strings[i].in; + int len = strlen(strings[i].in); + + /* Copy string to in buffer */ + memcpy(&in[p], s, len); + p += len; + + /* Copy expected result for given flags */ + if (flags & strings[i].flags) { + s = strings[i].out; + len = strlen(strings[i].out); + } + memcpy(&out_test[q_test], s, len); + q_test += len; + } + in[p++] = '\0'; + + /* Call string_unescape and compare result */ + if (inplace) { + memcpy(out_real, in, p); + if (flags == UNESCAPE_ANY) + q_real = string_unescape_any_inplace(out_real); + else + q_real = string_unescape_inplace(out_real, flags); + } else if (flags == UNESCAPE_ANY) { + q_real = string_unescape_any(in, out_real, q_real); + } else { + q_real = string_unescape(in, out_real, q_real, flags); + } + + if (q_real != q_test || memcmp(out_test, out_real, q_test)) { + pr_warn("Test failed: flags = %u\n", flags); + print_hex_dump(KERN_WARNING, "Input: ", + DUMP_PREFIX_NONE, 16, 1, in, p - 1, true); + print_hex_dump(KERN_WARNING, "Expected: ", + DUMP_PREFIX_NONE, 16, 1, out_test, q_test, true); + print_hex_dump(KERN_WARNING, "Got: ", + DUMP_PREFIX_NONE, 16, 1, out_real, q_real, true); + } +} + +static int __init test_string_helpers_init(void) +{ + unsigned int i; + + pr_info("Running tests...\n"); + for (i = 0; i < UNESCAPE_ANY + 1; i++) + test_string_unescape(i, false); + test_string_unescape(get_random_int() % (UNESCAPE_ANY + 1), true); + + return -EINVAL; +} +module_init(test_string_helpers_init); +MODULE_LICENSE("Dual BSD/GPL"); -- cgit v0.10.2 From 576d742e4a0d0f1bab7950012addccb82fbc172a Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 30 Apr 2013 15:27:31 -0700 Subject: staging: speakup: remove custom string_unescape_any_inplace There is generic implementation of the function to unescape strings. Signed-off-by: Andy Shevchenko Cc: Samuel Thibault Cc: Greg Kroah-Hartman Cc: William Hubbs Cc: Chris Brannon Cc: Kirk Reiser Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/staging/speakup/kobjects.c b/drivers/staging/speakup/kobjects.c index d6d9264..943b6c1 100644 --- a/drivers/staging/speakup/kobjects.c +++ b/drivers/staging/speakup/kobjects.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -417,7 +418,7 @@ static ssize_t synth_direct_store(struct kobject *kobj, bytes = min_t(size_t, len, 250); strncpy(tmp, ptr, bytes); tmp[bytes] = '\0'; - spk_xlate(tmp); + string_unescape_any_inplace(tmp); synth_printf("%s", tmp); ptr += bytes; len -= bytes; @@ -605,7 +606,8 @@ ssize_t spk_var_store(struct kobject *kobj, struct kobj_attribute *attr, if (param->data == NULL) return 0; ret = 0; - cp = spk_xlate((char *) buf); + cp = (char *)buf; + string_unescape_any_inplace(cp); spk_lock(flags); switch (param->var_type) { diff --git a/drivers/staging/speakup/speakup.h b/drivers/staging/speakup/speakup.h index c387a02..0126f71 100644 --- a/drivers/staging/speakup/speakup.h +++ b/drivers/staging/speakup/speakup.h @@ -54,7 +54,6 @@ void spk_get_index_count(int *linecount, int *sentcount); extern int spk_set_key_info(const u_char *key_info, u_char *k_buffer); extern char *spk_strlwr(char *s); extern char *spk_s2uchar(char *start, char *dest); -extern char *spk_xlate(char *s); extern int speakup_kobj_init(void); extern void speakup_kobj_exit(void); extern int spk_chartab_get_value(char *keyword); diff --git a/drivers/staging/speakup/varhandlers.c b/drivers/staging/speakup/varhandlers.c index 0099cb1..7f6288f 100644 --- a/drivers/staging/speakup/varhandlers.c +++ b/drivers/staging/speakup/varhandlers.c @@ -328,49 +328,3 @@ char *spk_s2uchar(char *start, char *dest) *dest = (u_char)val; return start; } - -char *spk_xlate(char *s) -{ - static const char finds[] = "nrtvafe"; - static const char subs[] = "\n\r\t\013\001\014\033"; - static const char hx[] = "0123456789abcdefABCDEF"; - char *p = s, *p1, *p2, c; - int num; - while ((p = strchr(p, '\\'))) { - p1 = p+1; - p2 = strchr(finds, *p1); - if (p2) { - *p++ = subs[p2-finds]; - p1++; - } else if (*p1 >= '0' && *p1 <= '7') { - num = (*p1++)&7; - while (num < 32 && *p1 >= '0' && *p1 <= '7') { - num <<= 3; - num += (*p1++)&7; - } - *p++ = num; - } else if (*p1 == 'x' && - strchr(hx, p1[1]) && strchr(hx, p1[2])) { - p1++; - c = *p1++; - if (c > '9') - c = (c - '7') & 0x0f; - else - c -= '0'; - num = c << 4; - c = *p1++; - if (c > '9') - c = (c-'7')&0x0f; - else - c -= '0'; - num += c; - *p++ = num; - } else - *p++ = *p1++; - p2 = p; - while (*p1) - *p2++ = *p1++; - *p2 = '\0'; - } - return s; -} -- cgit v0.10.2 From d338b1379f96b20e63aa65082e19e9a18a93b608 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 30 Apr 2013 15:27:32 -0700 Subject: dynamic_debug: reuse generic string_unescape function There is kernel function to do the job in generic way. Let's use it. Signed-off-by: Andy Shevchenko Cc: Jason Baron Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index 4603245..99fec3a 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -276,47 +277,6 @@ static inline int parse_lineno(const char *str, unsigned int *val) return 0; } -/* - * Undo octal escaping in a string, inplace. This is useful to - * allow the user to express a query which matches a format - * containing embedded spaces. - */ -static char *unescape(char *str) -{ - char *in = str; - char *out = str; - - while (*in) { - if (*in == '\\') { - if (in[1] == '\\') { - *out++ = '\\'; - in += 2; - continue; - } else if (in[1] == 't') { - *out++ = '\t'; - in += 2; - continue; - } else if (in[1] == 'n') { - *out++ = '\n'; - in += 2; - continue; - } else if (isodigit(in[1]) && - isodigit(in[2]) && - isodigit(in[3])) { - *out++ = (((in[1] - '0') << 6) | - ((in[2] - '0') << 3) | - (in[3] - '0')); - in += 4; - continue; - } - } - *out++ = *in++; - } - *out = '\0'; - - return str; -} - static int check_set(const char **dest, char *src, char *name) { int rc = 0; @@ -370,8 +330,10 @@ static int ddebug_parse_query(char *words[], int nwords, } else if (!strcmp(words[i], "module")) { rc = check_set(&query->module, words[i+1], "module"); } else if (!strcmp(words[i], "format")) { - rc = check_set(&query->format, unescape(words[i+1]), - "format"); + string_unescape_inplace(words[i+1], UNESCAPE_SPACE | + UNESCAPE_OCTAL | + UNESCAPE_SPECIAL); + rc = check_set(&query->format, words[i+1], "format"); } else if (!strcmp(words[i], "line")) { char *first = words[i+1]; char *last = strchr(first, '-'); -- cgit v0.10.2 From 8d82e180b51f8ecca9ae9d6a3e7f01d50ea34c20 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 30 Apr 2013 15:27:33 -0700 Subject: binfmt_misc: reuse string_unescape_inplace() There is string_unescape_inplace() function which decodes strings in generic way. Let's use it. Signed-off-by: Andy Shevchenko Cc: Alexander Viro Cc: linux-fsdevel@vger.kernel.org Signed-off-by: Linus Torvalds diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 751df5e..1c740e1 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -234,24 +235,6 @@ static char *scanarg(char *s, char del) return s; } -static int unquote(char *from) -{ - char c = 0, *s = from, *p = from; - - while ((c = *s++) != '\0') { - if (c == '\\' && *s == 'x') { - s++; - c = toupper(*s++); - *p = (c - (isdigit(c) ? '0' : 'A' - 10)) << 4; - c = toupper(*s++); - *p++ |= c - (isdigit(c) ? '0' : 'A' - 10); - continue; - } - *p++ = c; - } - return p - from; -} - static char * check_special_flags (char * sfs, Node * e) { char * p = sfs; @@ -354,8 +337,9 @@ static Node *create_entry(const char __user *buffer, size_t count) p[-1] = '\0'; if (!e->mask[0]) e->mask = NULL; - e->size = unquote(e->magic); - if (e->mask && unquote(e->mask) != e->size) + e->size = string_unescape_inplace(e->magic, UNESCAPE_HEX); + if (e->mask && + string_unescape_inplace(e->mask, UNESCAPE_HEX) != e->size) goto Einval; if (e->size + e->offset > BINPRM_BUF_SIZE) goto Einval; -- cgit v0.10.2 From 1a0df59444972105f0d4c2b0c16ce414d70c420a Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 30 Apr 2013 15:27:34 -0700 Subject: kernel/compat.c: make do_sysinfo() static The only use outside of kernel/timer.c was in kernel/compat.c, so move compat_sys_sysinfo() next to sys_sysinfo() in kernel/timer.c. Signed-off-by: Stephen Rothwell Cc: Thomas Gleixner Cc: Guenter Roeck Cc: Al Viro Acked-by: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 2dac79c..6d1844f 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -798,6 +798,4 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } # define REBUILD_DUE_TO_FTRACE_MCOUNT_RECORD #endif -extern int do_sysinfo(struct sysinfo *info); - #endif diff --git a/kernel/compat.c b/kernel/compat.c index 19971d8..1e8f145 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -1138,71 +1138,6 @@ asmlinkage long compat_sys_migrate_pages(compat_pid_t pid, } #endif -struct compat_sysinfo { - s32 uptime; - u32 loads[3]; - u32 totalram; - u32 freeram; - u32 sharedram; - u32 bufferram; - u32 totalswap; - u32 freeswap; - u16 procs; - u16 pad; - u32 totalhigh; - u32 freehigh; - u32 mem_unit; - char _f[20-2*sizeof(u32)-sizeof(int)]; -}; - -asmlinkage long -compat_sys_sysinfo(struct compat_sysinfo __user *info) -{ - struct sysinfo s; - - do_sysinfo(&s); - - /* Check to see if any memory value is too large for 32-bit and scale - * down if needed - */ - if ((s.totalram >> 32) || (s.totalswap >> 32)) { - int bitcount = 0; - - while (s.mem_unit < PAGE_SIZE) { - s.mem_unit <<= 1; - bitcount++; - } - - s.totalram >>= bitcount; - s.freeram >>= bitcount; - s.sharedram >>= bitcount; - s.bufferram >>= bitcount; - s.totalswap >>= bitcount; - s.freeswap >>= bitcount; - s.totalhigh >>= bitcount; - s.freehigh >>= bitcount; - } - - if (!access_ok(VERIFY_WRITE, info, sizeof(struct compat_sysinfo)) || - __put_user (s.uptime, &info->uptime) || - __put_user (s.loads[0], &info->loads[0]) || - __put_user (s.loads[1], &info->loads[1]) || - __put_user (s.loads[2], &info->loads[2]) || - __put_user (s.totalram, &info->totalram) || - __put_user (s.freeram, &info->freeram) || - __put_user (s.sharedram, &info->sharedram) || - __put_user (s.bufferram, &info->bufferram) || - __put_user (s.totalswap, &info->totalswap) || - __put_user (s.freeswap, &info->freeswap) || - __put_user (s.procs, &info->procs) || - __put_user (s.totalhigh, &info->totalhigh) || - __put_user (s.freehigh, &info->freehigh) || - __put_user (s.mem_unit, &info->mem_unit)) - return -EFAULT; - - return 0; -} - COMPAT_SYSCALL_DEFINE2(sched_rr_get_interval, compat_pid_t, pid, struct compat_timespec __user *, interval) diff --git a/kernel/timer.c b/kernel/timer.c index dbf7a78..06b3245 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include @@ -1567,7 +1568,7 @@ SYSCALL_DEFINE0(gettid) * do_sysinfo - fill in sysinfo struct * @info: pointer to buffer to fill */ -int do_sysinfo(struct sysinfo *info) +static int do_sysinfo(struct sysinfo *info) { unsigned long mem_total, sav_total; unsigned int mem_unit, bitcount; @@ -1642,6 +1643,73 @@ SYSCALL_DEFINE1(sysinfo, struct sysinfo __user *, info) return 0; } +#ifdef CONFIG_COMPAT +struct compat_sysinfo { + s32 uptime; + u32 loads[3]; + u32 totalram; + u32 freeram; + u32 sharedram; + u32 bufferram; + u32 totalswap; + u32 freeswap; + u16 procs; + u16 pad; + u32 totalhigh; + u32 freehigh; + u32 mem_unit; + char _f[20-2*sizeof(u32)-sizeof(int)]; +}; + +asmlinkage long +compat_sys_sysinfo(struct compat_sysinfo __user *info) +{ + struct sysinfo s; + + do_sysinfo(&s); + + /* Check to see if any memory value is too large for 32-bit and scale + * down if needed + */ + if ((s.totalram >> 32) || (s.totalswap >> 32)) { + int bitcount = 0; + + while (s.mem_unit < PAGE_SIZE) { + s.mem_unit <<= 1; + bitcount++; + } + + s.totalram >>= bitcount; + s.freeram >>= bitcount; + s.sharedram >>= bitcount; + s.bufferram >>= bitcount; + s.totalswap >>= bitcount; + s.freeswap >>= bitcount; + s.totalhigh >>= bitcount; + s.freehigh >>= bitcount; + } + + if (!access_ok(VERIFY_WRITE, info, sizeof(struct compat_sysinfo)) || + __put_user (s.uptime, &info->uptime) || + __put_user (s.loads[0], &info->loads[0]) || + __put_user (s.loads[1], &info->loads[1]) || + __put_user (s.loads[2], &info->loads[2]) || + __put_user (s.totalram, &info->totalram) || + __put_user (s.freeram, &info->freeram) || + __put_user (s.sharedram, &info->sharedram) || + __put_user (s.bufferram, &info->bufferram) || + __put_user (s.totalswap, &info->totalswap) || + __put_user (s.freeswap, &info->freeswap) || + __put_user (s.procs, &info->procs) || + __put_user (s.totalhigh, &info->totalhigh) || + __put_user (s.freehigh, &info->freehigh) || + __put_user (s.mem_unit, &info->mem_unit)) + return -EFAULT; + + return 0; +} +#endif /* CONFIG_COMPAT */ + static int __cpuinit init_timers_cpu(int cpu) { int j; -- cgit v0.10.2 From 1043f65a573b65a5398925551583ea72092e1be2 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 30 Apr 2013 15:27:35 -0700 Subject: kernel/timer.c: convert compat_sys_sysinfo to COMPAT_SYSCALL_DEFINE Signed-off-by: Stephen Rothwell Cc: Thomas Gleixner Cc: Guenter Roeck Cc: Al Viro Cc: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/kernel/timer.c b/kernel/timer.c index 06b3245..f0e6588 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1661,8 +1661,7 @@ struct compat_sysinfo { char _f[20-2*sizeof(u32)-sizeof(int)]; }; -asmlinkage long -compat_sys_sysinfo(struct compat_sysinfo __user *info) +COMPAT_SYSCALL_DEFINE1(sysinfo, struct compat_sysinfo __user *, info) { struct sysinfo s; -- cgit v0.10.2 From 4a22f16636259f503847b0805e04824171b270fc Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 30 Apr 2013 15:27:37 -0700 Subject: kernel/timer.c: move some non timer related syscalls to kernel/sys.c Andrew Morton noted: akpm3:/usr/src/25> grep SYSCALL kernel/timer.c SYSCALL_DEFINE1(alarm, unsigned int, seconds) SYSCALL_DEFINE0(getpid) SYSCALL_DEFINE0(getppid) SYSCALL_DEFINE0(getuid) SYSCALL_DEFINE0(geteuid) SYSCALL_DEFINE0(getgid) SYSCALL_DEFINE0(getegid) SYSCALL_DEFINE0(gettid) SYSCALL_DEFINE1(sysinfo, struct sysinfo __user *, info) COMPAT_SYSCALL_DEFINE1(sysinfo, struct compat_sysinfo __user *, info) Only one of those should be in kernel/timer.c. Who wrote this thing? [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Stephen Rothwell Acked-by: Thomas Gleixner Cc: Guenter Roeck Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/kernel/sys.c b/kernel/sys.c index 0da73cf..e30eba4 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -49,6 +49,11 @@ #include #include +#include +#include +#include +#include + #include /* Move somewhere else to avoid recompiling? */ #include @@ -1044,6 +1049,67 @@ change_okay: return old_fsgid; } +/** + * sys_getpid - return the thread group id of the current process + * + * Note, despite the name, this returns the tgid not the pid. The tgid and + * the pid are identical unless CLONE_THREAD was specified on clone() in + * which case the tgid is the same in all threads of the same group. + * + * This is SMP safe as current->tgid does not change. + */ +SYSCALL_DEFINE0(getpid) +{ + return task_tgid_vnr(current); +} + +/* Thread ID - the internal kernel "pid" */ +SYSCALL_DEFINE0(gettid) +{ + return task_pid_vnr(current); +} + +/* + * Accessing ->real_parent is not SMP-safe, it could + * change from under us. However, we can use a stale + * value of ->real_parent under rcu_read_lock(), see + * release_task()->call_rcu(delayed_put_task_struct). + */ +SYSCALL_DEFINE0(getppid) +{ + int pid; + + rcu_read_lock(); + pid = task_tgid_vnr(rcu_dereference(current->real_parent)); + rcu_read_unlock(); + + return pid; +} + +SYSCALL_DEFINE0(getuid) +{ + /* Only we change this so SMP safe */ + return from_kuid_munged(current_user_ns(), current_uid()); +} + +SYSCALL_DEFINE0(geteuid) +{ + /* Only we change this so SMP safe */ + return from_kuid_munged(current_user_ns(), current_euid()); +} + +SYSCALL_DEFINE0(getgid) +{ + /* Only we change this so SMP safe */ + return from_kgid_munged(current_user_ns(), current_gid()); +} + +SYSCALL_DEFINE0(getegid) +{ + /* Only we change this so SMP safe */ + return from_kgid_munged(current_user_ns(), current_egid()); +} + void do_sys_times(struct tms *tms) { cputime_t tgutime, tgstime, cutime, cstime; @@ -2245,3 +2311,148 @@ int orderly_poweroff(bool force) return 0; } EXPORT_SYMBOL_GPL(orderly_poweroff); + +/** + * do_sysinfo - fill in sysinfo struct + * @info: pointer to buffer to fill + */ +static int do_sysinfo(struct sysinfo *info) +{ + unsigned long mem_total, sav_total; + unsigned int mem_unit, bitcount; + struct timespec tp; + + memset(info, 0, sizeof(struct sysinfo)); + + ktime_get_ts(&tp); + monotonic_to_bootbased(&tp); + info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0); + + get_avenrun(info->loads, 0, SI_LOAD_SHIFT - FSHIFT); + + info->procs = nr_threads; + + si_meminfo(info); + si_swapinfo(info); + + /* + * If the sum of all the available memory (i.e. ram + swap) + * is less than can be stored in a 32 bit unsigned long then + * we can be binary compatible with 2.2.x kernels. If not, + * well, in that case 2.2.x was broken anyways... + * + * -Erik Andersen + */ + + mem_total = info->totalram + info->totalswap; + if (mem_total < info->totalram || mem_total < info->totalswap) + goto out; + bitcount = 0; + mem_unit = info->mem_unit; + while (mem_unit > 1) { + bitcount++; + mem_unit >>= 1; + sav_total = mem_total; + mem_total <<= 1; + if (mem_total < sav_total) + goto out; + } + + /* + * If mem_total did not overflow, multiply all memory values by + * info->mem_unit and set it to 1. This leaves things compatible + * with 2.2.x, and also retains compatibility with earlier 2.4.x + * kernels... + */ + + info->mem_unit = 1; + info->totalram <<= bitcount; + info->freeram <<= bitcount; + info->sharedram <<= bitcount; + info->bufferram <<= bitcount; + info->totalswap <<= bitcount; + info->freeswap <<= bitcount; + info->totalhigh <<= bitcount; + info->freehigh <<= bitcount; + +out: + return 0; +} + +SYSCALL_DEFINE1(sysinfo, struct sysinfo __user *, info) +{ + struct sysinfo val; + + do_sysinfo(&val); + + if (copy_to_user(info, &val, sizeof(struct sysinfo))) + return -EFAULT; + + return 0; +} + +#ifdef CONFIG_COMPAT +struct compat_sysinfo { + s32 uptime; + u32 loads[3]; + u32 totalram; + u32 freeram; + u32 sharedram; + u32 bufferram; + u32 totalswap; + u32 freeswap; + u16 procs; + u16 pad; + u32 totalhigh; + u32 freehigh; + u32 mem_unit; + char _f[20-2*sizeof(u32)-sizeof(int)]; +}; + +COMPAT_SYSCALL_DEFINE1(sysinfo, struct compat_sysinfo __user *, info) +{ + struct sysinfo s; + + do_sysinfo(&s); + + /* Check to see if any memory value is too large for 32-bit and scale + * down if needed + */ + if ((s.totalram >> 32) || (s.totalswap >> 32)) { + int bitcount = 0; + + while (s.mem_unit < PAGE_SIZE) { + s.mem_unit <<= 1; + bitcount++; + } + + s.totalram >>= bitcount; + s.freeram >>= bitcount; + s.sharedram >>= bitcount; + s.bufferram >>= bitcount; + s.totalswap >>= bitcount; + s.freeswap >>= bitcount; + s.totalhigh >>= bitcount; + s.freehigh >>= bitcount; + } + + if (!access_ok(VERIFY_WRITE, info, sizeof(struct compat_sysinfo)) || + __put_user(s.uptime, &info->uptime) || + __put_user(s.loads[0], &info->loads[0]) || + __put_user(s.loads[1], &info->loads[1]) || + __put_user(s.loads[2], &info->loads[2]) || + __put_user(s.totalram, &info->totalram) || + __put_user(s.freeram, &info->freeram) || + __put_user(s.sharedram, &info->sharedram) || + __put_user(s.bufferram, &info->bufferram) || + __put_user(s.totalswap, &info->totalswap) || + __put_user(s.freeswap, &info->freeswap) || + __put_user(s.procs, &info->procs) || + __put_user(s.totalhigh, &info->totalhigh) || + __put_user(s.freehigh, &info->freehigh) || + __put_user(s.mem_unit, &info->mem_unit)) + return -EFAULT; + + return 0; +} +#endif /* CONFIG_COMPAT */ diff --git a/kernel/timer.c b/kernel/timer.c index f0e6588..09bca8c 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1,7 +1,7 @@ /* * linux/kernel/timer.c * - * Kernel internal timers, basic process system calls + * Kernel internal timers * * Copyright (C) 1991, 1992 Linus Torvalds * @@ -1396,61 +1396,6 @@ SYSCALL_DEFINE1(alarm, unsigned int, seconds) #endif -/** - * sys_getpid - return the thread group id of the current process - * - * Note, despite the name, this returns the tgid not the pid. The tgid and - * the pid are identical unless CLONE_THREAD was specified on clone() in - * which case the tgid is the same in all threads of the same group. - * - * This is SMP safe as current->tgid does not change. - */ -SYSCALL_DEFINE0(getpid) -{ - return task_tgid_vnr(current); -} - -/* - * Accessing ->real_parent is not SMP-safe, it could - * change from under us. However, we can use a stale - * value of ->real_parent under rcu_read_lock(), see - * release_task()->call_rcu(delayed_put_task_struct). - */ -SYSCALL_DEFINE0(getppid) -{ - int pid; - - rcu_read_lock(); - pid = task_tgid_vnr(rcu_dereference(current->real_parent)); - rcu_read_unlock(); - - return pid; -} - -SYSCALL_DEFINE0(getuid) -{ - /* Only we change this so SMP safe */ - return from_kuid_munged(current_user_ns(), current_uid()); -} - -SYSCALL_DEFINE0(geteuid) -{ - /* Only we change this so SMP safe */ - return from_kuid_munged(current_user_ns(), current_euid()); -} - -SYSCALL_DEFINE0(getgid) -{ - /* Only we change this so SMP safe */ - return from_kgid_munged(current_user_ns(), current_gid()); -} - -SYSCALL_DEFINE0(getegid) -{ - /* Only we change this so SMP safe */ - return from_kgid_munged(current_user_ns(), current_egid()); -} - static void process_timeout(unsigned long __data) { wake_up_process((struct task_struct *)__data); @@ -1558,157 +1503,6 @@ signed long __sched schedule_timeout_uninterruptible(signed long timeout) } EXPORT_SYMBOL(schedule_timeout_uninterruptible); -/* Thread ID - the internal kernel "pid" */ -SYSCALL_DEFINE0(gettid) -{ - return task_pid_vnr(current); -} - -/** - * do_sysinfo - fill in sysinfo struct - * @info: pointer to buffer to fill - */ -static int do_sysinfo(struct sysinfo *info) -{ - unsigned long mem_total, sav_total; - unsigned int mem_unit, bitcount; - struct timespec tp; - - memset(info, 0, sizeof(struct sysinfo)); - - ktime_get_ts(&tp); - monotonic_to_bootbased(&tp); - info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0); - - get_avenrun(info->loads, 0, SI_LOAD_SHIFT - FSHIFT); - - info->procs = nr_threads; - - si_meminfo(info); - si_swapinfo(info); - - /* - * If the sum of all the available memory (i.e. ram + swap) - * is less than can be stored in a 32 bit unsigned long then - * we can be binary compatible with 2.2.x kernels. If not, - * well, in that case 2.2.x was broken anyways... - * - * -Erik Andersen - */ - - mem_total = info->totalram + info->totalswap; - if (mem_total < info->totalram || mem_total < info->totalswap) - goto out; - bitcount = 0; - mem_unit = info->mem_unit; - while (mem_unit > 1) { - bitcount++; - mem_unit >>= 1; - sav_total = mem_total; - mem_total <<= 1; - if (mem_total < sav_total) - goto out; - } - - /* - * If mem_total did not overflow, multiply all memory values by - * info->mem_unit and set it to 1. This leaves things compatible - * with 2.2.x, and also retains compatibility with earlier 2.4.x - * kernels... - */ - - info->mem_unit = 1; - info->totalram <<= bitcount; - info->freeram <<= bitcount; - info->sharedram <<= bitcount; - info->bufferram <<= bitcount; - info->totalswap <<= bitcount; - info->freeswap <<= bitcount; - info->totalhigh <<= bitcount; - info->freehigh <<= bitcount; - -out: - return 0; -} - -SYSCALL_DEFINE1(sysinfo, struct sysinfo __user *, info) -{ - struct sysinfo val; - - do_sysinfo(&val); - - if (copy_to_user(info, &val, sizeof(struct sysinfo))) - return -EFAULT; - - return 0; -} - -#ifdef CONFIG_COMPAT -struct compat_sysinfo { - s32 uptime; - u32 loads[3]; - u32 totalram; - u32 freeram; - u32 sharedram; - u32 bufferram; - u32 totalswap; - u32 freeswap; - u16 procs; - u16 pad; - u32 totalhigh; - u32 freehigh; - u32 mem_unit; - char _f[20-2*sizeof(u32)-sizeof(int)]; -}; - -COMPAT_SYSCALL_DEFINE1(sysinfo, struct compat_sysinfo __user *, info) -{ - struct sysinfo s; - - do_sysinfo(&s); - - /* Check to see if any memory value is too large for 32-bit and scale - * down if needed - */ - if ((s.totalram >> 32) || (s.totalswap >> 32)) { - int bitcount = 0; - - while (s.mem_unit < PAGE_SIZE) { - s.mem_unit <<= 1; - bitcount++; - } - - s.totalram >>= bitcount; - s.freeram >>= bitcount; - s.sharedram >>= bitcount; - s.bufferram >>= bitcount; - s.totalswap >>= bitcount; - s.freeswap >>= bitcount; - s.totalhigh >>= bitcount; - s.freehigh >>= bitcount; - } - - if (!access_ok(VERIFY_WRITE, info, sizeof(struct compat_sysinfo)) || - __put_user (s.uptime, &info->uptime) || - __put_user (s.loads[0], &info->loads[0]) || - __put_user (s.loads[1], &info->loads[1]) || - __put_user (s.loads[2], &info->loads[2]) || - __put_user (s.totalram, &info->totalram) || - __put_user (s.freeram, &info->freeram) || - __put_user (s.sharedram, &info->sharedram) || - __put_user (s.bufferram, &info->bufferram) || - __put_user (s.totalswap, &info->totalswap) || - __put_user (s.freeswap, &info->freeswap) || - __put_user (s.procs, &info->procs) || - __put_user (s.totalhigh, &info->totalhigh) || - __put_user (s.freehigh, &info->freehigh) || - __put_user (s.mem_unit, &info->mem_unit)) - return -EFAULT; - - return 0; -} -#endif /* CONFIG_COMPAT */ - static int __cpuinit init_timers_cpu(int cpu) { int j; -- cgit v0.10.2 From 39732ca5af4b09f4db561149041ddad7211019a5 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 30 Apr 2013 15:27:38 -0700 Subject: epoll: trim epitem by one cache line It is common for epoll users to have thousands of epitems, so saving a cache line on every allocation leads to large memory savings. Since epitem allocations are cache-aligned, reducing sizeof(struct epitem) from 136 bytes to 128 bytes will allow it to squeeze under a cache line boundary on x86_64. Via /sys/kernel/slab/eventpoll_epi, I see the following changes on my x86_64 Core2 Duo (which has 64-byte cache alignment): object_size : 192 => 128 objs_per_slab: 21 => 32 Also, add a BUILD_BUG_ON() to check for future accidental breakage. [akpm@linux-foundation.org: use __packed, for all architectures] Signed-off-by: Eric Wong Cc: Davide Libenzi Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 9fec183..0e5eda0 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -104,7 +104,7 @@ struct epoll_filefd { struct file *file; int fd; -}; +} __packed; /* * Structure used to track possible nested calls, for too deep recursions @@ -128,6 +128,8 @@ struct nested_calls { /* * Each file descriptor added to the eventpoll interface will * have an entry of this type linked to the "rbr" RB tree. + * Avoid increasing the size of this struct, there can be many thousands + * of these on a server and we do not want this to take another cache line. */ struct epitem { /* RB tree node used to link this structure to the eventpoll RB tree */ @@ -1964,6 +1966,12 @@ static int __init eventpoll_init(void) /* Initialize the structure used to perform file's f_op->poll() calls */ ep_nested_calls_init(&poll_readywalk_ncalls); + /* + * We can have many thousands of epitems, so prevent this from + * using an extra cache line on 64-bit (and smaller) CPUs + */ + BUILD_BUG_ON(sizeof(void *) <= 8 && sizeof(struct epitem) > 128); + /* Allocates slab cache used to allocate "struct epitem" items */ epi_cache = kmem_cache_create("eventpoll_epi", sizeof(struct epitem), 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); -- cgit v0.10.2 From eea1d585917c538d90bc26fda5d8e53796feada2 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 30 Apr 2013 15:27:39 -0700 Subject: epoll: use RCU to protect wakeup_source in epitem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This prevents wakeup_source destruction when a user hits the item with EPOLL_CTL_MOD while ep_poll_callback is running. Tested with CONFIG_SPARSE_RCU_POINTER=y and "make fs/eventpoll.o C=2" Signed-off-by: Eric Wong Cc: Alexander Viro Cc: Arve HjønnevĂĄg Cc: Davide Libenzi Cc: Eric Dumazet Cc: NeilBrown Cc: "Rafael J. Wysocki" Cc: "Paul E. McKenney" Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 0e5eda0..a3acf93 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -160,7 +160,7 @@ struct epitem { struct list_head fllink; /* wakeup_source used when EPOLLWAKEUP is set */ - struct wakeup_source *ws; + struct wakeup_source __rcu *ws; /* The structure that describe the interested events and the source fd */ struct epoll_event event; @@ -538,6 +538,38 @@ static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi) } } +/* call only when ep->mtx is held */ +static inline struct wakeup_source *ep_wakeup_source(struct epitem *epi) +{ + return rcu_dereference_check(epi->ws, lockdep_is_held(&epi->ep->mtx)); +} + +/* call only when ep->mtx is held */ +static inline void ep_pm_stay_awake(struct epitem *epi) +{ + struct wakeup_source *ws = ep_wakeup_source(epi); + + if (ws) + __pm_stay_awake(ws); +} + +static inline bool ep_has_wakeup_source(struct epitem *epi) +{ + return rcu_access_pointer(epi->ws) ? true : false; +} + +/* call when ep->mtx cannot be held (ep_poll_callback) */ +static inline void ep_pm_stay_awake_rcu(struct epitem *epi) +{ + struct wakeup_source *ws; + + rcu_read_lock(); + ws = rcu_dereference(epi->ws); + if (ws) + __pm_stay_awake(ws); + rcu_read_unlock(); +} + /** * ep_scan_ready_list - Scans the ready list in a way that makes possible for * the scan code, to call f_op->poll(). Also allows for @@ -601,7 +633,7 @@ static int ep_scan_ready_list(struct eventpoll *ep, */ if (!ep_is_linked(&epi->rdllink)) { list_add_tail(&epi->rdllink, &ep->rdllist); - __pm_stay_awake(epi->ws); + ep_pm_stay_awake(epi); } } /* @@ -670,7 +702,7 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi) list_del_init(&epi->rdllink); spin_unlock_irqrestore(&ep->lock, flags); - wakeup_source_unregister(epi->ws); + wakeup_source_unregister(ep_wakeup_source(epi)); /* At this point it is safe to free the eventpoll item */ kmem_cache_free(epi_cache, epi); @@ -754,7 +786,7 @@ static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head, * callback, but it's not actually ready, as far as * caller requested events goes. We can remove it here. */ - __pm_relax(epi->ws); + __pm_relax(ep_wakeup_source(epi)); list_del_init(&epi->rdllink); } } @@ -986,7 +1018,7 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k /* If this file is already in the ready list we exit soon */ if (!ep_is_linked(&epi->rdllink)) { list_add_tail(&epi->rdllink, &ep->rdllist); - __pm_stay_awake(epi->ws); + ep_pm_stay_awake_rcu(epi); } /* @@ -1148,6 +1180,7 @@ static int reverse_path_check(void) static int ep_create_wakeup_source(struct epitem *epi) { const char *name; + struct wakeup_source *ws; if (!epi->ep->ws) { epi->ep->ws = wakeup_source_register("eventpoll"); @@ -1156,17 +1189,29 @@ static int ep_create_wakeup_source(struct epitem *epi) } name = epi->ffd.file->f_path.dentry->d_name.name; - epi->ws = wakeup_source_register(name); - if (!epi->ws) + ws = wakeup_source_register(name); + + if (!ws) return -ENOMEM; + rcu_assign_pointer(epi->ws, ws); return 0; } -static void ep_destroy_wakeup_source(struct epitem *epi) +/* rare code path, only used when EPOLL_CTL_MOD removes a wakeup source */ +static noinline void ep_destroy_wakeup_source(struct epitem *epi) { - wakeup_source_unregister(epi->ws); - epi->ws = NULL; + struct wakeup_source *ws = ep_wakeup_source(epi); + + rcu_assign_pointer(epi->ws, NULL); + + /* + * wait for ep_pm_stay_awake_rcu to finish, synchronize_rcu is + * used internally by wakeup_source_remove, too (called by + * wakeup_source_unregister), so we cannot use call_rcu + */ + synchronize_rcu(); + wakeup_source_unregister(ws); } /* @@ -1201,7 +1246,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, if (error) goto error_create_wakeup_source; } else { - epi->ws = NULL; + RCU_INIT_POINTER(epi->ws, NULL); } /* Initialize the poll table using the queue callback */ @@ -1249,7 +1294,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, /* If the file is already "ready" we drop it inside the ready list */ if ((revents & event->events) && !ep_is_linked(&epi->rdllink)) { list_add_tail(&epi->rdllink, &ep->rdllist); - __pm_stay_awake(epi->ws); + ep_pm_stay_awake(epi); /* Notify waiting tasks that events are available */ if (waitqueue_active(&ep->wq)) @@ -1290,7 +1335,7 @@ error_unregister: list_del_init(&epi->rdllink); spin_unlock_irqrestore(&ep->lock, flags); - wakeup_source_unregister(epi->ws); + wakeup_source_unregister(ep_wakeup_source(epi)); error_create_wakeup_source: kmem_cache_free(epi_cache, epi); @@ -1319,9 +1364,9 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even pt._key = event->events; epi->event.data = event->data; /* protected by mtx */ if (epi->event.events & EPOLLWAKEUP) { - if (!epi->ws) + if (!ep_has_wakeup_source(epi)) ep_create_wakeup_source(epi); - } else if (epi->ws) { + } else if (ep_has_wakeup_source(epi)) { ep_destroy_wakeup_source(epi); } @@ -1359,7 +1404,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even spin_lock_irq(&ep->lock); if (!ep_is_linked(&epi->rdllink)) { list_add_tail(&epi->rdllink, &ep->rdllist); - __pm_stay_awake(epi->ws); + ep_pm_stay_awake(epi); /* Notify waiting tasks that events are available */ if (waitqueue_active(&ep->wq)) @@ -1385,6 +1430,7 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head, unsigned int revents; struct epitem *epi; struct epoll_event __user *uevent; + struct wakeup_source *ws; poll_table pt; init_poll_funcptr(&pt, NULL); @@ -1407,9 +1453,13 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head, * instead, but then epi->ws would temporarily be out of sync * with ep_is_linked(). */ - if (epi->ws && epi->ws->active) - __pm_stay_awake(ep->ws); - __pm_relax(epi->ws); + ws = ep_wakeup_source(epi); + if (ws) { + if (ws->active) + __pm_stay_awake(ep->ws); + __pm_relax(ws); + } + list_del_init(&epi->rdllink); pt._key = epi->event.events; @@ -1426,7 +1476,7 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head, if (__put_user(revents, &uevent->events) || __put_user(epi->event.data, &uevent->data)) { list_add(&epi->rdllink, head); - __pm_stay_awake(epi->ws); + ep_pm_stay_awake(epi); return eventcnt ? eventcnt : -EFAULT; } eventcnt++; @@ -1446,7 +1496,7 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head, * poll callback will queue them in ep->ovflist. */ list_add_tail(&epi->rdllink, &ep->rdllist); - __pm_stay_awake(epi->ws); + ep_pm_stay_awake(epi); } } } -- cgit v0.10.2 From ddf676c38b56a8641c8eb9fb856fa304018ff390 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 30 Apr 2013 15:27:40 -0700 Subject: epoll: lock ep->mtx in ep_free to silence lockdep MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Technically we do not need to hold ep->mtx during ep_free since we are certain there are no other users of ep at that point. However, lockdep complains with a "suspicious rcu_dereference_check() usage!" message; so lock the mutex before ep_remove to silence the warning. Signed-off-by: Eric Wong Cc: Al Viro Cc: Arve HjønnevĂĄg Cc: Davide Libenzi Cc: Eric Dumazet Cc: NeilBrown , Cc: Rafael J. Wysocki Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/eventpoll.c b/fs/eventpoll.c index a3acf93..5744a7f 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -745,11 +745,15 @@ static void ep_free(struct eventpoll *ep) * point we are sure no poll callbacks will be lingering around, and also by * holding "epmutex" we can be sure that no file cleanup code will hit * us during this operation. So we can avoid the lock on "ep->lock". + * We do not need to lock ep->mtx, either, we only do it to prevent + * a lockdep warning. */ + mutex_lock(&ep->mtx); while ((rbp = rb_first(&ep->rbr)) != NULL) { epi = rb_entry(rbp, struct epitem, rbn); ep_remove(ep, epi); } + mutex_unlock(&ep->mtx); mutex_unlock(&epmutex); mutex_destroy(&ep->mtx); -- cgit v0.10.2 From 450d89ec0a91dbad81adaa635fdb1325b57f8d69 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 30 Apr 2013 15:27:42 -0700 Subject: epoll: cleanup: hoist out f_op->poll calls This reduces the amount of code inside the ready list iteration loops for better readability IMHO. Signed-off-by: Eric Wong Cc: Davide Libenzi Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 5744a7f..c5d9880 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -772,6 +772,13 @@ static int ep_eventpoll_release(struct inode *inode, struct file *file) return 0; } +static inline unsigned int ep_item_poll(struct epitem *epi, poll_table *pt) +{ + pt->_key = epi->event.events; + + return epi->ffd.file->f_op->poll(epi->ffd.file, pt) & epi->event.events; +} + static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head, void *priv) { @@ -779,10 +786,9 @@ static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head, poll_table pt; init_poll_funcptr(&pt, NULL); + list_for_each_entry_safe(epi, tmp, head, rdllink) { - pt._key = epi->event.events; - if (epi->ffd.file->f_op->poll(epi->ffd.file, &pt) & - epi->event.events) + if (ep_item_poll(epi, &pt)) return POLLIN | POLLRDNORM; else { /* @@ -1256,7 +1262,6 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, /* Initialize the poll table using the queue callback */ epq.epi = epi; init_poll_funcptr(&epq.pt, ep_ptable_queue_proc); - epq.pt._key = event->events; /* * Attach the item to the poll hooks and get current event bits. @@ -1265,7 +1270,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, * this operation completes, the poll callback can start hitting * the new item. */ - revents = tfile->f_op->poll(tfile, &epq.pt); + revents = ep_item_poll(epi, &epq.pt); /* * We have to check if something went wrong during the poll wait queue @@ -1365,7 +1370,6 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even * f_op->poll() call and the new event set registering. */ epi->event.events = event->events; /* need barrier below */ - pt._key = event->events; epi->event.data = event->data; /* protected by mtx */ if (epi->event.events & EPOLLWAKEUP) { if (!ep_has_wakeup_source(epi)) @@ -1398,7 +1402,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even * Get current event bits. We can safely use the file* here because * its usage count has been increased by the caller of this function. */ - revents = epi->ffd.file->f_op->poll(epi->ffd.file, &pt); + revents = ep_item_poll(epi, &pt); /* * If the item is "hot" and it is not registered inside the ready @@ -1466,9 +1470,7 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head, list_del_init(&epi->rdllink); - pt._key = epi->event.events; - revents = epi->ffd.file->f_op->poll(epi->ffd.file, &pt) & - epi->event.events; + revents = ep_item_poll(epi, &pt); /* * If the event mask intersect the caller-requested one, -- cgit v0.10.2 From d6d67e7231c97d535069970c840d33fc6c4350ee Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 30 Apr 2013 15:27:43 -0700 Subject: epoll: cleanup: use RCU_INIT_POINTER when nulling It is always safe to use RCU_INIT_POINTER to NULL a pointer. This results in slightly smaller/faster code. Signed-off-by: Eric Wong Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/eventpoll.c b/fs/eventpoll.c index c5d9880..277cc38 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1213,7 +1213,7 @@ static noinline void ep_destroy_wakeup_source(struct epitem *epi) { struct wakeup_source *ws = ep_wakeup_source(epi); - rcu_assign_pointer(epi->ws, NULL); + RCU_INIT_POINTER(epi->ws, NULL); /* * wait for ep_pm_stay_awake_rcu to finish, synchronize_rcu is -- cgit v0.10.2 From 2535e0d723e4d7723b030f39fb350e436bdb983f Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Tue, 30 Apr 2013 15:27:44 -0700 Subject: fs: make binfmt support for #! scripts modular and removable Add a new configuration option CONFIG_BINFMT_SCRIPT to configure support for interpreted scripts starting with "#!"; allow compiling out that support, or building it as a module. Embedded systems running exclusively compiled binaries could leave this support out, and systems that don't need scripts before mounting the root filesystem can build this as a module. Signed-off-by: Josh Triplett Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt index 0efd152..370b24c 100644 --- a/fs/Kconfig.binfmt +++ b/fs/Kconfig.binfmt @@ -65,6 +65,20 @@ config CORE_DUMP_DEFAULT_ELF_HEADERS This config option changes the default setting of coredump_filter seen at boot time. If unsure, say Y. +config BINFMT_SCRIPT + tristate "Kernel support for scripts starting with #!" + default y + help + Say Y here if you want to execute interpreted scripts starting with + #! followed by the path to an interpreter. + + You can build this support as a module; however, until that module + gets loaded, you cannot run scripts. Thus, if you want to load this + module from an initramfs, the portion of the initramfs before loading + this module must consist of compiled binaries only. + + Most systems will not boot if you say M or N here. If unsure, say Y. + config BINFMT_FLAT bool "Kernel support for flat binaries" depends on !MMU && (!FRV || BROKEN) diff --git a/fs/Makefile b/fs/Makefile index 3b2c767..5e67e57 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -34,10 +34,7 @@ obj-$(CONFIG_COMPAT) += compat.o compat_ioctl.o obj-$(CONFIG_BINFMT_AOUT) += binfmt_aout.o obj-$(CONFIG_BINFMT_EM86) += binfmt_em86.o obj-$(CONFIG_BINFMT_MISC) += binfmt_misc.o - -# binfmt_script is always there -obj-y += binfmt_script.o - +obj-$(CONFIG_BINFMT_SCRIPT) += binfmt_script.o obj-$(CONFIG_BINFMT_ELF) += binfmt_elf.o obj-$(CONFIG_COMPAT_BINFMT_ELF) += compat_binfmt_elf.o obj-$(CONFIG_BINFMT_ELF_FDPIC) += binfmt_elf_fdpic.o -- cgit v0.10.2 From c1d025e22e0ef167a4589cf5a6b2a32bfef0472c Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Tue, 30 Apr 2013 15:27:45 -0700 Subject: binfmt_elf: PIE: make PF_RANDOMIZE check comment more accurate The comment I originally added in commit a3defbe5c337 ("binfmt_elf: fix PIE execution with randomization disabled") is not really 100% accurate -- sysctl is not the only way how PF_RANDOMIZE could be forcibly unset in runtime. Another option of course is direct modification of personality flags (i.e. running through setarch wrapper). Make the comment more explicit and accurate. Signed-off-by: Jiri Kosina Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 86af964..e32344b 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -803,7 +803,8 @@ static int load_elf_binary(struct linux_binprm *bprm) * follow the loader, and is not movable. */ #ifdef CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE /* Memory randomization might have been switched off - * in runtime via sysctl. + * in runtime via sysctl or explicit setting of + * personality flags. * If that is the case, retain the original non-zero * load_bias value in order to establish proper * non-randomized mappings. -- cgit v0.10.2 From 79bae42d51a5d498500c890c19ef76df41d2bf59 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 30 Apr 2013 15:27:46 -0700 Subject: dmi_scan: refactor dmi_scan_machine(), {smbios,dmi}_present() Move the calls to memcpy_fromio() up into the loop in dmi_scan_machine(), and move the signature checks back down into dmi_decode(). We need to check at 16-byte intervals but keep a 32-byte buffer for an SMBIOS entry, so shift the buffer after each iteration. Merge smbios_present() into dmi_present(), so we look for an SMBIOS signature at the beginning of the given buffer and then for a DMI signature at an offset of 16 bytes. [artem.savkov@gmail.com: use proper buf type in dmi_present()] Signed-off-by: Ben Hutchings Reported-by: Tim McGrath Tested-by: Tim Mcgrath Cc: Zhenzhong Duan Signed-off-by: Artem Savkov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c index 98c62081..b95159b 100644 --- a/drivers/firmware/dmi_scan.c +++ b/drivers/firmware/dmi_scan.c @@ -419,22 +419,45 @@ static void __init dmi_format_ids(char *buf, size_t len) dmi_get_system_info(DMI_BIOS_DATE)); } -static int __init dmi_present(const char __iomem *p) +static int __init dmi_present(const u8 *buf) { - u8 buf[15]; + int smbios_ver; - memcpy_fromio(buf, p, 15); - if (dmi_checksum(buf, 15)) { + if (memcmp(buf, "_SM_", 4) == 0 && + buf[5] < 32 && dmi_checksum(buf, buf[5])) { + smbios_ver = (buf[6] << 8) + buf[7]; + + /* Some BIOS report weird SMBIOS version, fix that up */ + switch (smbios_ver) { + case 0x021F: + case 0x0221: + pr_debug("SMBIOS version fixup(2.%d->2.%d)\n", + smbios_ver & 0xFF, 3); + smbios_ver = 0x0203; + break; + case 0x0233: + pr_debug("SMBIOS version fixup(2.%d->2.%d)\n", 51, 6); + smbios_ver = 0x0206; + break; + } + } else { + smbios_ver = 0; + } + + buf += 16; + + if (memcmp(buf, "_DMI_", 5) == 0 && dmi_checksum(buf, 15)) { dmi_num = (buf[13] << 8) | buf[12]; dmi_len = (buf[7] << 8) | buf[6]; dmi_base = (buf[11] << 24) | (buf[10] << 16) | (buf[9] << 8) | buf[8]; if (dmi_walk_early(dmi_decode) == 0) { - if (dmi_ver) + if (smbios_ver) { + dmi_ver = smbios_ver; pr_info("SMBIOS %d.%d present.\n", dmi_ver >> 8, dmi_ver & 0xFF); - else { + } else { dmi_ver = (buf[14] & 0xF0) << 4 | (buf[14] & 0x0F); pr_info("Legacy DMI %d.%d present.\n", @@ -445,40 +468,14 @@ static int __init dmi_present(const char __iomem *p) return 0; } } - dmi_ver = 0; - return 1; -} -static int __init smbios_present(const char __iomem *p) -{ - u8 buf[32]; - - memcpy_fromio(buf, p, 32); - if ((buf[5] < 32) && dmi_checksum(buf, buf[5])) { - dmi_ver = (buf[6] << 8) + buf[7]; - - /* Some BIOS report weird SMBIOS version, fix that up */ - switch (dmi_ver) { - case 0x021F: - case 0x0221: - pr_debug("SMBIOS version fixup(2.%d->2.%d)\n", - dmi_ver & 0xFF, 3); - dmi_ver = 0x0203; - break; - case 0x0233: - pr_debug("SMBIOS version fixup(2.%d->2.%d)\n", 51, 6); - dmi_ver = 0x0206; - break; - } - return memcmp(p + 16, "_DMI_", 5) || dmi_present(p + 16); - } return 1; } void __init dmi_scan_machine(void) { char __iomem *p, *q; - int rc; + char buf[32]; if (efi_enabled(EFI_CONFIG_TABLES)) { if (efi.smbios == EFI_INVALID_TABLE_ADDR) @@ -491,10 +488,10 @@ void __init dmi_scan_machine(void) p = dmi_ioremap(efi.smbios, 32); if (p == NULL) goto error; - - rc = smbios_present(p); + memcpy_fromio(buf, p, 32); dmi_iounmap(p, 32); - if (!rc) { + + if (!dmi_present(buf)) { dmi_available = 1; goto out; } @@ -509,18 +506,15 @@ void __init dmi_scan_machine(void) if (p == NULL) goto error; + memset(buf, 0, 16); for (q = p; q < p + 0x10000; q += 16) { - if (memcmp(q, "_SM_", 4) == 0 && q - p <= 0xFFE0) - rc = smbios_present(q); - else if (memcmp(q, "_DMI_", 5) == 0) - rc = dmi_present(q); - else - continue; - if (!rc) { + memcpy_fromio(buf + 16, q, 16); + if (!dmi_present(buf)) { dmi_available = 1; dmi_iounmap(p, 0x10000); goto out; } + memcpy(buf, buf + 16, 16); } dmi_iounmap(p, 0x10000); } -- cgit v0.10.2 From 9151b3982dafaa87bca3834c4d20db831ca98bcb Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 30 Apr 2013 15:27:47 -0700 Subject: i2o: check copy_from_user() size parameter Limit the size of the copy so we don't corrupt memory. Hopefully this can only be called by root, but fixing this makes the static checkers happier. Signed-off-by: Dan Carpenter Cc: Jiri Kosina Cc: Masanari Iida Cc: Alan Cox Cc: Guenter Roeck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/message/i2o/i2o_config.c b/drivers/message/i2o/i2o_config.c index 5451bef..a60c188 100644 --- a/drivers/message/i2o/i2o_config.c +++ b/drivers/message/i2o/i2o_config.c @@ -687,6 +687,11 @@ static int i2o_cfg_passthru32(struct file *file, unsigned cmnd, } size = size >> 16; size *= 4; + if (size > sizeof(rmsg)) { + rcode = -EINVAL; + goto sg_list_cleanup; + } + /* Copy in the user's I2O command */ if (copy_from_user(rmsg, user_msg, size)) { rcode = -EFAULT; @@ -922,6 +927,11 @@ static int i2o_cfg_passthru(unsigned long arg) } size = size >> 16; size *= 4; + if (size > sizeof(rmsg)) { + rcode = -EFAULT; + goto sg_list_cleanup; + } + /* Copy in the user's I2O command */ if (copy_from_user(rmsg, user_msg, size)) { rcode = -EFAULT; -- cgit v0.10.2 From 8c26c4e2694a163d525976e804d81cd955bbb40c Mon Sep 17 00:00:00 2001 From: Vyacheslav Dubeyko Date: Tue, 30 Apr 2013 15:27:48 -0700 Subject: nilfs2: fix issue with flush kernel thread after remount in RO mode because of driver's internal error or metadata corruption The NILFS2 driver remounts itself in RO mode in the case of discovering metadata corruption (for example, discovering a broken bmap). But usually, this takes place when there have been file system operations before remounting in RO mode. Thereby, NILFS2 driver can be in RO mode with presence of dirty pages in modified inodes' address spaces. It results in flush kernel thread's infinite trying to flush dirty pages in RO mode. As a result, it is possible to see such side effects as: (1) flush kernel thread occupies 50% - 99% of CPU time; (2) system can't be shutdowned without manual power switch off. SYMPTOMS: (1) System log contains error message: "Remounting filesystem read-only". (2) The flush kernel thread occupies 50% - 99% of CPU time. (3) The system can't be shutdowned without manual power switch off. REPRODUCTION PATH: (1) Create volume group with name "unencrypted" by means of vgcreate utility. (2) Run script (prepared by Anthony Doggett ): ----------------[BEGIN SCRIPT]-------------------- #!/bin/bash VG=unencrypted #apt-get install nilfs-tools darcs lvcreate --size 2G --name ntest $VG mkfs.nilfs2 -b 1024 -B 8192 /dev/mapper/$VG-ntest mkdir /var/tmp/n mkdir /var/tmp/n/ntest mount /dev/mapper/$VG-ntest /var/tmp/n/ntest mkdir /var/tmp/n/ntest/thedir cd /var/tmp/n/ntest/thedir sleep 2 date darcs init sleep 2 dmesg|tail -n 5 date darcs whatsnew || true date sleep 2 dmesg|tail -n 5 ----------------[END SCRIPT]-------------------- (3) Try to shutdown the system. REPRODUCIBILITY: 100% FIX: This patch implements checking mount state of NILFS2 driver in nilfs_writepage(), nilfs_writepages() and nilfs_mdt_write_page() methods. If it is detected the RO mount state then all dirty pages are simply discarded with warning messages is written in system log. [akpm@linux-foundation.org: fix printk warning] Signed-off-by: Vyacheslav Dubeyko Acked-by: Ryusuke Konishi Cc: Anthony Doggett Cc: ARAI Shun-ichi Cc: Piotr Szymaniak Cc: Zahid Chowdhury Cc: Elmer Zhang Cc: Wu Fengguang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 6b49f14..ba7a1da 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -175,6 +175,11 @@ static int nilfs_writepages(struct address_space *mapping, struct inode *inode = mapping->host; int err = 0; + if (inode->i_sb->s_flags & MS_RDONLY) { + nilfs_clear_dirty_pages(mapping, false); + return -EROFS; + } + if (wbc->sync_mode == WB_SYNC_ALL) err = nilfs_construct_dsync_segment(inode->i_sb, inode, wbc->range_start, @@ -187,6 +192,18 @@ static int nilfs_writepage(struct page *page, struct writeback_control *wbc) struct inode *inode = page->mapping->host; int err; + if (inode && (inode->i_sb->s_flags & MS_RDONLY)) { + /* + * It means that filesystem was remounted in read-only + * mode because of error or metadata corruption. But we + * have dirty pages that try to be flushed in background. + * So, here we simply discard this dirty page. + */ + nilfs_clear_dirty_page(page, false); + unlock_page(page); + return -EROFS; + } + redirty_page_for_writepage(wbc, page); unlock_page(page); diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index f9897d0..c4dcd1d 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c @@ -375,14 +375,25 @@ int nilfs_mdt_fetch_dirty(struct inode *inode) static int nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc) { - struct inode *inode; + struct inode *inode = page->mapping->host; struct super_block *sb; int err = 0; + if (inode && (inode->i_sb->s_flags & MS_RDONLY)) { + /* + * It means that filesystem was remounted in read-only + * mode because of error or metadata corruption. But we + * have dirty pages that try to be flushed in background. + * So, here we simply discard this dirty page. + */ + nilfs_clear_dirty_page(page, false); + unlock_page(page); + return -EROFS; + } + redirty_page_for_writepage(wbc, page); unlock_page(page); - inode = page->mapping->host; if (!inode) return 0; @@ -561,10 +572,10 @@ void nilfs_mdt_restore_from_shadow_map(struct inode *inode) if (mi->mi_palloc_cache) nilfs_palloc_clear_cache(inode); - nilfs_clear_dirty_pages(inode->i_mapping); + nilfs_clear_dirty_pages(inode->i_mapping, true); nilfs_copy_back_pages(inode->i_mapping, &shadow->frozen_data); - nilfs_clear_dirty_pages(&ii->i_btnode_cache); + nilfs_clear_dirty_pages(&ii->i_btnode_cache, true); nilfs_copy_back_pages(&ii->i_btnode_cache, &shadow->frozen_btnodes); nilfs_bmap_restore(ii->i_bmap, &shadow->bmap_store); diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c index 07f76db..131a584 100644 --- a/fs/nilfs2/page.c +++ b/fs/nilfs2/page.c @@ -370,7 +370,12 @@ repeat: goto repeat; } -void nilfs_clear_dirty_pages(struct address_space *mapping) +/** + * nilfs_clear_dirty_pages - discard dirty pages in address space + * @mapping: address space with dirty pages for discarding + * @silent: suppress [true] or print [false] warning messages + */ +void nilfs_clear_dirty_pages(struct address_space *mapping, bool silent) { struct pagevec pvec; unsigned int i; @@ -382,25 +387,9 @@ void nilfs_clear_dirty_pages(struct address_space *mapping) PAGEVEC_SIZE)) { for (i = 0; i < pagevec_count(&pvec); i++) { struct page *page = pvec.pages[i]; - struct buffer_head *bh, *head; lock_page(page); - ClearPageUptodate(page); - ClearPageMappedToDisk(page); - bh = head = page_buffers(page); - do { - lock_buffer(bh); - clear_buffer_dirty(bh); - clear_buffer_nilfs_volatile(bh); - clear_buffer_nilfs_checked(bh); - clear_buffer_nilfs_redirected(bh); - clear_buffer_uptodate(bh); - clear_buffer_mapped(bh); - unlock_buffer(bh); - bh = bh->b_this_page; - } while (bh != head); - - __nilfs_clear_page_dirty(page); + nilfs_clear_dirty_page(page, silent); unlock_page(page); } pagevec_release(&pvec); @@ -408,6 +397,51 @@ void nilfs_clear_dirty_pages(struct address_space *mapping) } } +/** + * nilfs_clear_dirty_page - discard dirty page + * @page: dirty page that will be discarded + * @silent: suppress [true] or print [false] warning messages + */ +void nilfs_clear_dirty_page(struct page *page, bool silent) +{ + struct inode *inode = page->mapping->host; + struct super_block *sb = inode->i_sb; + + BUG_ON(!test_bit(PG_locked, &page->flags)); + + if (!silent) { + nilfs_warning(sb, __func__, + "discard page: offset %lld, ino %lu", + page_offset(page), inode->i_ino); + } + + ClearPageUptodate(page); + ClearPageMappedToDisk(page); + + if (page_has_buffers(page)) { + struct buffer_head *bh, *head; + + bh = head = page_buffers(page); + do { + lock_buffer(bh); + if (!silent) { + nilfs_warning(sb, __func__, + "discard block %llu, size %zu", + (u64)bh->b_blocknr, bh->b_size); + } + clear_buffer_dirty(bh); + clear_buffer_nilfs_volatile(bh); + clear_buffer_nilfs_checked(bh); + clear_buffer_nilfs_redirected(bh); + clear_buffer_uptodate(bh); + clear_buffer_mapped(bh); + unlock_buffer(bh); + } while (bh = bh->b_this_page, bh != head); + } + + __nilfs_clear_page_dirty(page); +} + unsigned nilfs_page_count_clean_buffers(struct page *page, unsigned from, unsigned to) { diff --git a/fs/nilfs2/page.h b/fs/nilfs2/page.h index fb7de71..ef30c5c 100644 --- a/fs/nilfs2/page.h +++ b/fs/nilfs2/page.h @@ -55,7 +55,8 @@ void nilfs_page_bug(struct page *); int nilfs_copy_dirty_pages(struct address_space *, struct address_space *); void nilfs_copy_back_pages(struct address_space *, struct address_space *); -void nilfs_clear_dirty_pages(struct address_space *); +void nilfs_clear_dirty_page(struct page *, bool); +void nilfs_clear_dirty_pages(struct address_space *, bool); void nilfs_mapping_init(struct address_space *mapping, struct inode *inode, struct backing_dev_info *bdi); unsigned nilfs_page_count_clean_buffers(struct page *, unsigned, unsigned); -- cgit v0.10.2 From dc33f5f3c9988026aad5b788c761c8c1b363e919 Mon Sep 17 00:00:00 2001 From: Vyacheslav Dubeyko Date: Tue, 30 Apr 2013 15:27:50 -0700 Subject: nilfs2: fix using of PageLocked() in nilfs_clear_dirty_page() Change test_bit(PG_locked, &page->flags) to PageLocked(). Signed-off-by: Vyacheslav Dubeyko Cc: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c index 131a584..0ba6798 100644 --- a/fs/nilfs2/page.c +++ b/fs/nilfs2/page.c @@ -407,7 +407,7 @@ void nilfs_clear_dirty_page(struct page *page, bool silent) struct inode *inode = page->mapping->host; struct super_block *sb = inode->i_sb; - BUG_ON(!test_bit(PG_locked, &page->flags)); + BUG_ON(!PageLocked(page)); if (!silent) { nilfs_warning(sb, __func__, -- cgit v0.10.2 From eb53b6db7a53642b80b0ca4885cb91d5c7dbc0f8 Mon Sep 17 00:00:00 2001 From: Vyacheslav Dubeyko Date: Tue, 30 Apr 2013 15:27:51 -0700 Subject: nilfs2: remove unneeded test in nilfs_writepage() page->mapping->host cannot be NULL in nilfs_writepage(), so remove the unneeded test. The fixes the smatch warning: "fs/nilfs2/inode.c:211 nilfs_writepage() error: we previously assumed 'inode' could be null (see line 195)". Reported-by: Dan Carpenter Signed-off-by: Vyacheslav Dubeyko Cc: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index ba7a1da..cf02f55 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -192,7 +192,7 @@ static int nilfs_writepage(struct page *page, struct writeback_control *wbc) struct inode *inode = page->mapping->host; int err; - if (inode && (inode->i_sb->s_flags & MS_RDONLY)) { + if (inode->i_sb->s_flags & MS_RDONLY) { /* * It means that filesystem was remounted in read-only * mode because of error or metadata corruption. But we -- cgit v0.10.2 From 9509f17851da294f8ecf0fc0bfe0fe609671352d Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Tue, 30 Apr 2013 15:27:52 -0700 Subject: hfs: add error checking for hfs_find_init() hfs_find_init() may fail with ENOMEM, but there are places, where the returned value is not checked. The consequences can be very unpleasant, e.g. kfree uninitialized pointer and inappropriate mutex unlocking. The patch adds checks for errors in hfs_find_init(). Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Reviewed-by: Vyacheslav Dubeyko Cc: Hin-Tak Leung Cc: Al Viro Cc: Artem Bityutskiy Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/hfs/catalog.c b/fs/hfs/catalog.c index 424b033..9569b39 100644 --- a/fs/hfs/catalog.c +++ b/fs/hfs/catalog.c @@ -92,7 +92,9 @@ int hfs_cat_create(u32 cnid, struct inode *dir, struct qstr *str, struct inode * return -ENOSPC; sb = dir->i_sb; - hfs_find_init(HFS_SB(sb)->cat_tree, &fd); + err = hfs_find_init(HFS_SB(sb)->cat_tree, &fd); + if (err) + return err; hfs_cat_build_key(sb, fd.search_key, cnid, NULL); entry_size = hfs_cat_build_thread(sb, &entry, S_ISDIR(inode->i_mode) ? @@ -214,7 +216,9 @@ int hfs_cat_delete(u32 cnid, struct inode *dir, struct qstr *str) dprint(DBG_CAT_MOD, "delete_cat: %s,%u\n", str ? str->name : NULL, cnid); sb = dir->i_sb; - hfs_find_init(HFS_SB(sb)->cat_tree, &fd); + res = hfs_find_init(HFS_SB(sb)->cat_tree, &fd); + if (res) + return res; hfs_cat_build_key(sb, fd.search_key, dir->i_ino, str); res = hfs_brec_find(&fd); @@ -281,7 +285,9 @@ int hfs_cat_move(u32 cnid, struct inode *src_dir, struct qstr *src_name, dprint(DBG_CAT_MOD, "rename_cat: %u - %lu,%s - %lu,%s\n", cnid, src_dir->i_ino, src_name->name, dst_dir->i_ino, dst_name->name); sb = src_dir->i_sb; - hfs_find_init(HFS_SB(sb)->cat_tree, &src_fd); + err = hfs_find_init(HFS_SB(sb)->cat_tree, &src_fd); + if (err) + return err; dst_fd = src_fd; /* find the old dir entry and read the data */ diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c index 5f7f1ab..e1c8048 100644 --- a/fs/hfs/dir.c +++ b/fs/hfs/dir.c @@ -25,7 +25,9 @@ static struct dentry *hfs_lookup(struct inode *dir, struct dentry *dentry, struct inode *inode = NULL; int res; - hfs_find_init(HFS_SB(dir->i_sb)->cat_tree, &fd); + res = hfs_find_init(HFS_SB(dir->i_sb)->cat_tree, &fd); + if (res) + return ERR_PTR(res); hfs_cat_build_key(dir->i_sb, fd.search_key, dir->i_ino, &dentry->d_name); res = hfs_brec_read(&fd, &rec, sizeof(rec)); if (res) { @@ -63,7 +65,9 @@ static int hfs_readdir(struct file *filp, void *dirent, filldir_t filldir) if (filp->f_pos >= inode->i_size) return 0; - hfs_find_init(HFS_SB(sb)->cat_tree, &fd); + err = hfs_find_init(HFS_SB(sb)->cat_tree, &fd); + if (err) + return err; hfs_cat_build_key(sb, fd.search_key, inode->i_ino, NULL); err = hfs_brec_find(&fd); if (err) diff --git a/fs/hfs/extent.c b/fs/hfs/extent.c index a67955a..813447b 100644 --- a/fs/hfs/extent.c +++ b/fs/hfs/extent.c @@ -107,7 +107,7 @@ static u16 hfs_ext_lastblock(struct hfs_extent *ext) return be16_to_cpu(ext->block) + be16_to_cpu(ext->count); } -static void __hfs_ext_write_extent(struct inode *inode, struct hfs_find_data *fd) +static int __hfs_ext_write_extent(struct inode *inode, struct hfs_find_data *fd) { int res; @@ -116,26 +116,31 @@ static void __hfs_ext_write_extent(struct inode *inode, struct hfs_find_data *fd res = hfs_brec_find(fd); if (HFS_I(inode)->flags & HFS_FLG_EXT_NEW) { if (res != -ENOENT) - return; + return res; hfs_brec_insert(fd, HFS_I(inode)->cached_extents, sizeof(hfs_extent_rec)); HFS_I(inode)->flags &= ~(HFS_FLG_EXT_DIRTY|HFS_FLG_EXT_NEW); } else { if (res) - return; + return res; hfs_bnode_write(fd->bnode, HFS_I(inode)->cached_extents, fd->entryoffset, fd->entrylength); HFS_I(inode)->flags &= ~HFS_FLG_EXT_DIRTY; } + return 0; } -void hfs_ext_write_extent(struct inode *inode) +int hfs_ext_write_extent(struct inode *inode) { struct hfs_find_data fd; + int res = 0; if (HFS_I(inode)->flags & HFS_FLG_EXT_DIRTY) { - hfs_find_init(HFS_SB(inode->i_sb)->ext_tree, &fd); - __hfs_ext_write_extent(inode, &fd); + res = hfs_find_init(HFS_SB(inode->i_sb)->ext_tree, &fd); + if (res) + return res; + res = __hfs_ext_write_extent(inode, &fd); hfs_find_exit(&fd); } + return res; } static inline int __hfs_ext_read_extent(struct hfs_find_data *fd, struct hfs_extent *extent, @@ -161,8 +166,11 @@ static inline int __hfs_ext_cache_extent(struct hfs_find_data *fd, struct inode { int res; - if (HFS_I(inode)->flags & HFS_FLG_EXT_DIRTY) - __hfs_ext_write_extent(inode, fd); + if (HFS_I(inode)->flags & HFS_FLG_EXT_DIRTY) { + res = __hfs_ext_write_extent(inode, fd); + if (res) + return res; + } res = __hfs_ext_read_extent(fd, HFS_I(inode)->cached_extents, inode->i_ino, block, HFS_IS_RSRC(inode) ? HFS_FK_RSRC : HFS_FK_DATA); @@ -185,9 +193,11 @@ static int hfs_ext_read_extent(struct inode *inode, u16 block) block < HFS_I(inode)->cached_start + HFS_I(inode)->cached_blocks) return 0; - hfs_find_init(HFS_SB(inode->i_sb)->ext_tree, &fd); - res = __hfs_ext_cache_extent(&fd, inode, block); - hfs_find_exit(&fd); + res = hfs_find_init(HFS_SB(inode->i_sb)->ext_tree, &fd); + if (!res) { + res = __hfs_ext_cache_extent(&fd, inode, block); + hfs_find_exit(&fd); + } return res; } @@ -298,7 +308,9 @@ int hfs_free_fork(struct super_block *sb, struct hfs_cat_file *file, int type) if (total_blocks == blocks) return 0; - hfs_find_init(HFS_SB(sb)->ext_tree, &fd); + res = hfs_find_init(HFS_SB(sb)->ext_tree, &fd); + if (res) + return res; do { res = __hfs_ext_read_extent(&fd, extent, cnid, total_blocks, type); if (res) @@ -438,7 +450,9 @@ out: insert_extent: dprint(DBG_EXTENT, "insert new extent\n"); - hfs_ext_write_extent(inode); + res = hfs_ext_write_extent(inode); + if (res) + goto out; memset(HFS_I(inode)->cached_extents, 0, sizeof(hfs_extent_rec)); HFS_I(inode)->cached_extents[0].block = cpu_to_be16(start); @@ -466,7 +480,6 @@ void hfs_file_truncate(struct inode *inode) struct address_space *mapping = inode->i_mapping; void *fsdata; struct page *page; - int res; /* XXX: Can use generic_cont_expand? */ size = inode->i_size - 1; @@ -488,7 +501,12 @@ void hfs_file_truncate(struct inode *inode) goto out; mutex_lock(&HFS_I(inode)->extents_lock); - hfs_find_init(HFS_SB(sb)->ext_tree, &fd); + res = hfs_find_init(HFS_SB(sb)->ext_tree, &fd); + if (res) { + mutex_unlock(&HFS_I(inode)->extents_lock); + /* XXX: We lack error handling of hfs_file_truncate() */ + return; + } while (1) { if (alloc_cnt == HFS_I(inode)->first_blocks) { hfs_free_extents(sb, HFS_I(inode)->first_extents, diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h index 693df9f..67817af 100644 --- a/fs/hfs/hfs_fs.h +++ b/fs/hfs/hfs_fs.h @@ -174,7 +174,7 @@ extern const struct inode_operations hfs_dir_inode_operations; /* extent.c */ extern int hfs_ext_keycmp(const btree_key *, const btree_key *); extern int hfs_free_fork(struct super_block *, struct hfs_cat_file *, int); -extern void hfs_ext_write_extent(struct inode *); +extern int hfs_ext_write_extent(struct inode *); extern int hfs_extend_file(struct inode *); extern void hfs_file_truncate(struct inode *); diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index 3031dfd..0847471 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -416,9 +416,12 @@ int hfs_write_inode(struct inode *inode, struct writeback_control *wbc) struct inode *main_inode = inode; struct hfs_find_data fd; hfs_cat_rec rec; + int res; dprint(DBG_INODE, "hfs_write_inode: %lu\n", inode->i_ino); - hfs_ext_write_extent(inode); + res = hfs_ext_write_extent(inode); + if (res) + return res; if (inode->i_ino < HFS_FIRSTUSER_CNID) { switch (inode->i_ino) { @@ -515,7 +518,11 @@ static struct dentry *hfs_file_lookup(struct inode *dir, struct dentry *dentry, if (!inode) return ERR_PTR(-ENOMEM); - hfs_find_init(HFS_SB(dir->i_sb)->cat_tree, &fd); + res = hfs_find_init(HFS_SB(dir->i_sb)->cat_tree, &fd); + if (res) { + iput(inode); + return ERR_PTR(res); + } fd.search_key->cat = HFS_I(dir)->cat_key; res = hfs_brec_read(&fd, &rec, sizeof(rec)); if (!res) { diff --git a/fs/hfs/super.c b/fs/hfs/super.c index bbaaa8a..719760b 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -418,7 +418,9 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent) } /* try to get the root inode */ - hfs_find_init(HFS_SB(sb)->cat_tree, &fd); + res = hfs_find_init(HFS_SB(sb)->cat_tree, &fd); + if (res) + goto bail_no_root; res = hfs_cat_find_brec(sb, HFS_ROOT_CNID, &fd); if (!res) { if (fd.entrylength > sizeof(rec) || fd.entrylength < 0) { -- cgit v0.10.2 From 5f3726f9457d5a863721353786e3395cdbd0f943 Mon Sep 17 00:00:00 2001 From: Vyacheslav Dubeyko Date: Tue, 30 Apr 2013 15:27:53 -0700 Subject: hfsplus: fix warnings in fs/hfsplus/bfind.c fs/hfsplus/bfind.c: In function 'hfs_find_1st_rec_by_cnid': (1) include/uapi/linux/swab.h:60:2: warning: 'search_cnid' may be used uninitialized in this function [-Wmaybe-uninitialized] (2) include/uapi/linux/swab.h:60:2: warning: 'cur_cnid' may be used uninitialized in this function [-Wmaybe-uninitialized] [akpm@linux-foundation.org: make the workaround more explicit] Signed-off-by: Vyacheslav Dubeyko Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/hfsplus/bfind.c b/fs/hfsplus/bfind.c index d73c98d..bbfdc17 100644 --- a/fs/hfsplus/bfind.c +++ b/fs/hfsplus/bfind.c @@ -56,7 +56,8 @@ int hfs_find_1st_rec_by_cnid(struct hfs_bnode *bnode, int *end, int *cur_rec) { - __be32 cur_cnid, search_cnid; + __be32 cur_cnid; + __be32 search_cnid; if (bnode->tree->cnid == HFSPLUS_EXT_CNID) { cur_cnid = fd->key->ext.cnid; @@ -67,8 +68,11 @@ int hfs_find_1st_rec_by_cnid(struct hfs_bnode *bnode, } else if (bnode->tree->cnid == HFSPLUS_ATTR_CNID) { cur_cnid = fd->key->attr.cnid; search_cnid = fd->search_key->attr.cnid; - } else + } else { + cur_cnid = 0; /* used-uninitialized warning */ + search_cnid = 0; BUG(); + } if (cur_cnid == search_cnid) { (*end) = (*cur_rec); -- cgit v0.10.2 From c2b3e1f76e5c90215bc7f740b376c0220eb8a8e3 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 30 Apr 2013 15:27:54 -0700 Subject: hfs/hfsplus: convert dprint to hfs_dbg Use a more current logging style. Rename macro and uses. Add do {} while (0) to macro. Add DBG_ to macro. Add and use hfs_dbg_cont variant where appropriate. Signed-off-by: Joe Perches Cc: Vyacheslav Dubeyko Cc: Hin-Tak Leung Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/hfs/bfind.c b/fs/hfs/bfind.c index 571abe9..e928f6c 100644 --- a/fs/hfs/bfind.c +++ b/fs/hfs/bfind.c @@ -22,7 +22,8 @@ int hfs_find_init(struct hfs_btree *tree, struct hfs_find_data *fd) return -ENOMEM; fd->search_key = ptr; fd->key = ptr + tree->max_key_len + 2; - dprint(DBG_BNODE_REFS, "find_init: %d (%p)\n", tree->cnid, __builtin_return_address(0)); + hfs_dbg(BNODE_REFS, "find_init: %d (%p)\n", + tree->cnid, __builtin_return_address(0)); mutex_lock(&tree->tree_lock); return 0; } @@ -31,7 +32,8 @@ void hfs_find_exit(struct hfs_find_data *fd) { hfs_bnode_put(fd->bnode); kfree(fd->search_key); - dprint(DBG_BNODE_REFS, "find_exit: %d (%p)\n", fd->tree->cnid, __builtin_return_address(0)); + hfs_dbg(BNODE_REFS, "find_exit: %d (%p)\n", + fd->tree->cnid, __builtin_return_address(0)); mutex_unlock(&fd->tree->tree_lock); fd->tree = NULL; } diff --git a/fs/hfs/bitmap.c b/fs/hfs/bitmap.c index c6e9736..28307bc 100644 --- a/fs/hfs/bitmap.c +++ b/fs/hfs/bitmap.c @@ -158,7 +158,7 @@ u32 hfs_vbm_search_free(struct super_block *sb, u32 goal, u32 *num_bits) } } - dprint(DBG_BITMAP, "alloc_bits: %u,%u\n", pos, *num_bits); + hfs_dbg(BITMAP, "alloc_bits: %u,%u\n", pos, *num_bits); HFS_SB(sb)->free_ablocks -= *num_bits; hfs_bitmap_dirty(sb); out: @@ -200,7 +200,7 @@ int hfs_clear_vbm_bits(struct super_block *sb, u16 start, u16 count) if (!count) return 0; - dprint(DBG_BITMAP, "clear_bits: %u,%u\n", start, count); + hfs_dbg(BITMAP, "clear_bits: %u,%u\n", start, count); /* are all of the bits in range? */ if ((start + count) > HFS_SB(sb)->fs_ablocks) return -2; diff --git a/fs/hfs/bnode.c b/fs/hfs/bnode.c index cdb41a1..8693919 100644 --- a/fs/hfs/bnode.c +++ b/fs/hfs/bnode.c @@ -100,7 +100,7 @@ void hfs_bnode_copy(struct hfs_bnode *dst_node, int dst, struct hfs_btree *tree; struct page *src_page, *dst_page; - dprint(DBG_BNODE_MOD, "copybytes: %u,%u,%u\n", dst, src, len); + hfs_dbg(BNODE_MOD, "copybytes: %u,%u,%u\n", dst, src, len); if (!len) return; tree = src_node->tree; @@ -120,7 +120,7 @@ void hfs_bnode_move(struct hfs_bnode *node, int dst, int src, int len) struct page *page; void *ptr; - dprint(DBG_BNODE_MOD, "movebytes: %u,%u,%u\n", dst, src, len); + hfs_dbg(BNODE_MOD, "movebytes: %u,%u,%u\n", dst, src, len); if (!len) return; src += node->page_offset; @@ -138,16 +138,16 @@ void hfs_bnode_dump(struct hfs_bnode *node) __be32 cnid; int i, off, key_off; - dprint(DBG_BNODE_MOD, "bnode: %d\n", node->this); + hfs_dbg(BNODE_MOD, "bnode: %d\n", node->this); hfs_bnode_read(node, &desc, 0, sizeof(desc)); - dprint(DBG_BNODE_MOD, "%d, %d, %d, %d, %d\n", + hfs_dbg(BNODE_MOD, "%d, %d, %d, %d, %d\n", be32_to_cpu(desc.next), be32_to_cpu(desc.prev), desc.type, desc.height, be16_to_cpu(desc.num_recs)); off = node->tree->node_size - 2; for (i = be16_to_cpu(desc.num_recs); i >= 0; off -= 2, i--) { key_off = hfs_bnode_read_u16(node, off); - dprint(DBG_BNODE_MOD, " %d", key_off); + hfs_dbg_cont(BNODE_MOD, " %d", key_off); if (i && node->type == HFS_NODE_INDEX) { int tmp; @@ -155,17 +155,18 @@ void hfs_bnode_dump(struct hfs_bnode *node) tmp = (hfs_bnode_read_u8(node, key_off) | 1) + 1; else tmp = node->tree->max_key_len + 1; - dprint(DBG_BNODE_MOD, " (%d,%d", tmp, hfs_bnode_read_u8(node, key_off)); + hfs_dbg_cont(BNODE_MOD, " (%d,%d", + tmp, hfs_bnode_read_u8(node, key_off)); hfs_bnode_read(node, &cnid, key_off + tmp, 4); - dprint(DBG_BNODE_MOD, ",%d)", be32_to_cpu(cnid)); + hfs_dbg_cont(BNODE_MOD, ",%d)", be32_to_cpu(cnid)); } else if (i && node->type == HFS_NODE_LEAF) { int tmp; tmp = hfs_bnode_read_u8(node, key_off); - dprint(DBG_BNODE_MOD, " (%d)", tmp); + hfs_dbg_cont(BNODE_MOD, " (%d)", tmp); } } - dprint(DBG_BNODE_MOD, "\n"); + hfs_dbg_cont(BNODE_MOD, "\n"); } void hfs_bnode_unlink(struct hfs_bnode *node) @@ -257,8 +258,8 @@ static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid) node->this = cnid; set_bit(HFS_BNODE_NEW, &node->flags); atomic_set(&node->refcnt, 1); - dprint(DBG_BNODE_REFS, "new_node(%d:%d): 1\n", - node->tree->cnid, node->this); + hfs_dbg(BNODE_REFS, "new_node(%d:%d): 1\n", + node->tree->cnid, node->this); init_waitqueue_head(&node->lock_wq); spin_lock(&tree->hash_lock); node2 = hfs_bnode_findhash(tree, cnid); @@ -301,7 +302,7 @@ void hfs_bnode_unhash(struct hfs_bnode *node) { struct hfs_bnode **p; - dprint(DBG_BNODE_REFS, "remove_node(%d:%d): %d\n", + hfs_dbg(BNODE_REFS, "remove_node(%d:%d): %d\n", node->tree->cnid, node->this, atomic_read(&node->refcnt)); for (p = &node->tree->node_hash[hfs_bnode_hash(node->this)]; *p && *p != node; p = &(*p)->next_hash) @@ -443,8 +444,9 @@ void hfs_bnode_get(struct hfs_bnode *node) { if (node) { atomic_inc(&node->refcnt); - dprint(DBG_BNODE_REFS, "get_node(%d:%d): %d\n", - node->tree->cnid, node->this, atomic_read(&node->refcnt)); + hfs_dbg(BNODE_REFS, "get_node(%d:%d): %d\n", + node->tree->cnid, node->this, + atomic_read(&node->refcnt)); } } @@ -455,8 +457,9 @@ void hfs_bnode_put(struct hfs_bnode *node) struct hfs_btree *tree = node->tree; int i; - dprint(DBG_BNODE_REFS, "put_node(%d:%d): %d\n", - node->tree->cnid, node->this, atomic_read(&node->refcnt)); + hfs_dbg(BNODE_REFS, "put_node(%d:%d): %d\n", + node->tree->cnid, node->this, + atomic_read(&node->refcnt)); BUG_ON(!atomic_read(&node->refcnt)); if (!atomic_dec_and_lock(&node->refcnt, &tree->hash_lock)) return; diff --git a/fs/hfs/brec.c b/fs/hfs/brec.c index 92fb358..5764257 100644 --- a/fs/hfs/brec.c +++ b/fs/hfs/brec.c @@ -94,7 +94,8 @@ again: end_rec_off = tree->node_size - (node->num_recs + 1) * 2; end_off = hfs_bnode_read_u16(node, end_rec_off); end_rec_off -= 2; - dprint(DBG_BNODE_MOD, "insert_rec: %d, %d, %d, %d\n", rec, size, end_off, end_rec_off); + hfs_dbg(BNODE_MOD, "insert_rec: %d, %d, %d, %d\n", + rec, size, end_off, end_rec_off); if (size > end_rec_off - end_off) { if (new_node) panic("not enough room!\n"); @@ -190,7 +191,8 @@ again: mark_inode_dirty(tree->inode); } hfs_bnode_dump(node); - dprint(DBG_BNODE_MOD, "remove_rec: %d, %d\n", fd->record, fd->keylength + fd->entrylength); + hfs_dbg(BNODE_MOD, "remove_rec: %d, %d\n", + fd->record, fd->keylength + fd->entrylength); if (!--node->num_recs) { hfs_bnode_unlink(node); if (!node->parent) @@ -240,7 +242,7 @@ static struct hfs_bnode *hfs_bnode_split(struct hfs_find_data *fd) if (IS_ERR(new_node)) return new_node; hfs_bnode_get(node); - dprint(DBG_BNODE_MOD, "split_nodes: %d - %d - %d\n", + hfs_dbg(BNODE_MOD, "split_nodes: %d - %d - %d\n", node->this, new_node->this, node->next); new_node->next = node->next; new_node->prev = node->this; @@ -374,7 +376,8 @@ again: newkeylen = (hfs_bnode_read_u8(node, 14) | 1) + 1; else fd->keylength = newkeylen = tree->max_key_len + 1; - dprint(DBG_BNODE_MOD, "update_rec: %d, %d, %d\n", rec, fd->keylength, newkeylen); + hfs_dbg(BNODE_MOD, "update_rec: %d, %d, %d\n", + rec, fd->keylength, newkeylen); rec_off = tree->node_size - (rec + 2) * 2; end_rec_off = tree->node_size - (parent->num_recs + 1) * 2; diff --git a/fs/hfs/btree.c b/fs/hfs/btree.c index 1cbdeea..07d94ce 100644 --- a/fs/hfs/btree.c +++ b/fs/hfs/btree.c @@ -316,7 +316,7 @@ void hfs_bmap_free(struct hfs_bnode *node) u32 nidx; u8 *data, byte, m; - dprint(DBG_BNODE_MOD, "btree_free_node: %u\n", node->this); + hfs_dbg(BNODE_MOD, "btree_free_node: %u\n", node->this); tree = node->tree; nidx = node->this; node = hfs_bnode_find(tree, 0); diff --git a/fs/hfs/catalog.c b/fs/hfs/catalog.c index 9569b39..6ecb3d7 100644 --- a/fs/hfs/catalog.c +++ b/fs/hfs/catalog.c @@ -87,7 +87,8 @@ int hfs_cat_create(u32 cnid, struct inode *dir, struct qstr *str, struct inode * int entry_size; int err; - dprint(DBG_CAT_MOD, "create_cat: %s,%u(%d)\n", str->name, cnid, inode->i_nlink); + hfs_dbg(CAT_MOD, "create_cat: %s,%u(%d)\n", + str->name, cnid, inode->i_nlink); if (dir->i_size >= HFS_MAX_VALENCE) return -ENOSPC; @@ -214,7 +215,7 @@ int hfs_cat_delete(u32 cnid, struct inode *dir, struct qstr *str) struct list_head *pos; int res, type; - dprint(DBG_CAT_MOD, "delete_cat: %s,%u\n", str ? str->name : NULL, cnid); + hfs_dbg(CAT_MOD, "delete_cat: %s,%u\n", str ? str->name : NULL, cnid); sb = dir->i_sb; res = hfs_find_init(HFS_SB(sb)->cat_tree, &fd); if (res) @@ -282,7 +283,8 @@ int hfs_cat_move(u32 cnid, struct inode *src_dir, struct qstr *src_name, int entry_size, type; int err; - dprint(DBG_CAT_MOD, "rename_cat: %u - %lu,%s - %lu,%s\n", cnid, src_dir->i_ino, src_name->name, + hfs_dbg(CAT_MOD, "rename_cat: %u - %lu,%s - %lu,%s\n", + cnid, src_dir->i_ino, src_name->name, dst_dir->i_ino, dst_name->name); sb = src_dir->i_sb; err = hfs_find_init(HFS_SB(sb)->cat_tree, &src_fd); diff --git a/fs/hfs/extent.c b/fs/hfs/extent.c index 813447b..e33a0d3 100644 --- a/fs/hfs/extent.c +++ b/fs/hfs/extent.c @@ -205,11 +205,12 @@ static void hfs_dump_extent(struct hfs_extent *extent) { int i; - dprint(DBG_EXTENT, " "); + hfs_dbg(EXTENT, " "); for (i = 0; i < 3; i++) - dprint(DBG_EXTENT, " %u:%u", be16_to_cpu(extent[i].block), - be16_to_cpu(extent[i].count)); - dprint(DBG_EXTENT, "\n"); + hfs_dbg_cont(EXTENT, " %u:%u", + be16_to_cpu(extent[i].block), + be16_to_cpu(extent[i].count)); + hfs_dbg_cont(EXTENT, "\n"); } static int hfs_add_extent(struct hfs_extent *extent, u16 offset, @@ -404,10 +405,10 @@ int hfs_extend_file(struct inode *inode) goto out; } - dprint(DBG_EXTENT, "extend %lu: %u,%u\n", inode->i_ino, start, len); + hfs_dbg(EXTENT, "extend %lu: %u,%u\n", inode->i_ino, start, len); if (HFS_I(inode)->alloc_blocks == HFS_I(inode)->first_blocks) { if (!HFS_I(inode)->first_blocks) { - dprint(DBG_EXTENT, "first extents\n"); + hfs_dbg(EXTENT, "first extents\n"); /* no extents yet */ HFS_I(inode)->first_extents[0].block = cpu_to_be16(start); HFS_I(inode)->first_extents[0].count = cpu_to_be16(len); @@ -449,7 +450,7 @@ out: return res; insert_extent: - dprint(DBG_EXTENT, "insert new extent\n"); + hfs_dbg(EXTENT, "insert new extent\n"); res = hfs_ext_write_extent(inode); if (res) goto out; @@ -474,8 +475,9 @@ void hfs_file_truncate(struct inode *inode) u32 size; int res; - dprint(DBG_INODE, "truncate: %lu, %Lu -> %Lu\n", inode->i_ino, - (long long)HFS_I(inode)->phys_size, inode->i_size); + hfs_dbg(INODE, "truncate: %lu, %Lu -> %Lu\n", + inode->i_ino, (long long)HFS_I(inode)->phys_size, + inode->i_size); if (inode->i_size > HFS_I(inode)->phys_size) { struct address_space *mapping = inode->i_mapping; void *fsdata; diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h index 67817af..d363b1e 100644 --- a/fs/hfs/hfs_fs.h +++ b/fs/hfs/hfs_fs.h @@ -34,8 +34,18 @@ //#define DBG_MASK (DBG_CAT_MOD|DBG_BNODE_REFS|DBG_INODE|DBG_EXTENT) #define DBG_MASK (0) -#define dprint(flg, fmt, args...) \ - if (flg & DBG_MASK) printk(fmt , ## args) +#define hfs_dbg(flg, fmt, ...) \ +do { \ + if (DBG_##flg & DBG_MASK) \ + printk(KERN_DEBUG fmt, ##__VA_ARGS__); \ +} while (0) + +#define hfs_dbg_cont(flg, fmt, ...) \ +do { \ + if (DBG_##flg & DBG_MASK) \ + printk(KERN_CONT fmt, ##__VA_ARGS__); \ +} while (0) + /* * struct hfs_inode_info diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index 0847471..716e1aa 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -237,7 +237,7 @@ void hfs_delete_inode(struct inode *inode) { struct super_block *sb = inode->i_sb; - dprint(DBG_INODE, "delete_inode: %lu\n", inode->i_ino); + hfs_dbg(INODE, "delete_inode: %lu\n", inode->i_ino); if (S_ISDIR(inode->i_mode)) { HFS_SB(sb)->folder_count--; if (HFS_I(inode)->cat_key.ParID == cpu_to_be32(HFS_ROOT_CNID)) @@ -418,7 +418,7 @@ int hfs_write_inode(struct inode *inode, struct writeback_control *wbc) hfs_cat_rec rec; int res; - dprint(DBG_INODE, "hfs_write_inode: %lu\n", inode->i_ino); + hfs_dbg(INODE, "hfs_write_inode: %lu\n", inode->i_ino); res = hfs_ext_write_extent(inode); if (res) return res; diff --git a/fs/hfsplus/attributes.c b/fs/hfsplus/attributes.c index 8d691f1..2043b50 100644 --- a/fs/hfsplus/attributes.c +++ b/fs/hfsplus/attributes.c @@ -166,7 +166,7 @@ int hfsplus_find_attr(struct super_block *sb, u32 cnid, { int err = 0; - dprint(DBG_ATTR_MOD, "find_attr: %s,%d\n", name ? name : NULL, cnid); + hfs_dbg(ATTR_MOD, "find_attr: %s,%d\n", name ? name : NULL, cnid); if (!HFSPLUS_SB(sb)->attr_tree) { printk(KERN_ERR "hfs: attributes file doesn't exist\n"); @@ -228,7 +228,7 @@ int hfsplus_create_attr(struct inode *inode, int entry_size; int err; - dprint(DBG_ATTR_MOD, "create_attr: %s,%ld\n", + hfs_dbg(ATTR_MOD, "create_attr: %s,%ld\n", name ? name : NULL, inode->i_ino); if (!HFSPLUS_SB(sb)->attr_tree) { @@ -328,7 +328,7 @@ int hfsplus_delete_attr(struct inode *inode, const char *name) struct super_block *sb = inode->i_sb; struct hfs_find_data fd; - dprint(DBG_ATTR_MOD, "delete_attr: %s,%ld\n", + hfs_dbg(ATTR_MOD, "delete_attr: %s,%ld\n", name ? name : NULL, inode->i_ino); if (!HFSPLUS_SB(sb)->attr_tree) { @@ -369,7 +369,7 @@ int hfsplus_delete_all_attrs(struct inode *dir, u32 cnid) int err = 0; struct hfs_find_data fd; - dprint(DBG_ATTR_MOD, "delete_all_attrs: %d\n", cnid); + hfs_dbg(ATTR_MOD, "delete_all_attrs: %d\n", cnid); if (!HFSPLUS_SB(dir->i_sb)->attr_tree) { printk(KERN_ERR "hfs: attributes file doesn't exist\n"); diff --git a/fs/hfsplus/bfind.c b/fs/hfsplus/bfind.c index bbfdc17..d27f37f 100644 --- a/fs/hfsplus/bfind.c +++ b/fs/hfsplus/bfind.c @@ -22,7 +22,7 @@ int hfs_find_init(struct hfs_btree *tree, struct hfs_find_data *fd) return -ENOMEM; fd->search_key = ptr; fd->key = ptr + tree->max_key_len + 2; - dprint(DBG_BNODE_REFS, "find_init: %d (%p)\n", + hfs_dbg(BNODE_REFS, "find_init: %d (%p)\n", tree->cnid, __builtin_return_address(0)); switch (tree->cnid) { case HFSPLUS_CAT_CNID: @@ -44,7 +44,7 @@ void hfs_find_exit(struct hfs_find_data *fd) { hfs_bnode_put(fd->bnode); kfree(fd->search_key); - dprint(DBG_BNODE_REFS, "find_exit: %d (%p)\n", + hfs_dbg(BNODE_REFS, "find_exit: %d (%p)\n", fd->tree->cnid, __builtin_return_address(0)); mutex_unlock(&fd->tree->tree_lock); fd->tree = NULL; diff --git a/fs/hfsplus/bitmap.c b/fs/hfsplus/bitmap.c index 6feefc0..7da6d46 100644 --- a/fs/hfsplus/bitmap.c +++ b/fs/hfsplus/bitmap.c @@ -30,7 +30,7 @@ int hfsplus_block_allocate(struct super_block *sb, u32 size, if (!len) return size; - dprint(DBG_BITMAP, "block_allocate: %u,%u,%u\n", size, offset, len); + hfs_dbg(BITMAP, "block_allocate: %u,%u,%u\n", size, offset, len); mutex_lock(&sbi->alloc_mutex); mapping = sbi->alloc_file->i_mapping; page = read_mapping_page(mapping, offset / PAGE_CACHE_BITS, NULL); @@ -89,14 +89,14 @@ int hfsplus_block_allocate(struct super_block *sb, u32 size, else end = pptr + ((size + 31) & (PAGE_CACHE_BITS - 1)) / 32; } - dprint(DBG_BITMAP, "bitmap full\n"); + hfs_dbg(BITMAP, "bitmap full\n"); start = size; goto out; found: start = offset + (curr - pptr) * 32 + i; if (start >= size) { - dprint(DBG_BITMAP, "bitmap full\n"); + hfs_dbg(BITMAP, "bitmap full\n"); goto out; } /* do any partial u32 at the start */ @@ -154,7 +154,7 @@ done: *max = offset + (curr - pptr) * 32 + i - start; sbi->free_blocks -= *max; hfsplus_mark_mdb_dirty(sb); - dprint(DBG_BITMAP, "-> %u,%u\n", start, *max); + hfs_dbg(BITMAP, "-> %u,%u\n", start, *max); out: mutex_unlock(&sbi->alloc_mutex); return start; @@ -173,7 +173,7 @@ int hfsplus_block_free(struct super_block *sb, u32 offset, u32 count) if (!count) return 0; - dprint(DBG_BITMAP, "block_free: %u,%u\n", offset, count); + hfs_dbg(BITMAP, "block_free: %u,%u\n", offset, count); /* are all of the bits in range? */ if ((offset + count) > sbi->total_blocks) return -ENOENT; diff --git a/fs/hfsplus/bnode.c b/fs/hfsplus/bnode.c index f31ac6f..1ca9304 100644 --- a/fs/hfsplus/bnode.c +++ b/fs/hfsplus/bnode.c @@ -130,7 +130,7 @@ void hfs_bnode_copy(struct hfs_bnode *dst_node, int dst, struct page **src_page, **dst_page; int l; - dprint(DBG_BNODE_MOD, "copybytes: %u,%u,%u\n", dst, src, len); + hfs_dbg(BNODE_MOD, "copybytes: %u,%u,%u\n", dst, src, len); if (!len) return; tree = src_node->tree; @@ -188,7 +188,7 @@ void hfs_bnode_move(struct hfs_bnode *node, int dst, int src, int len) struct page **src_page, **dst_page; int l; - dprint(DBG_BNODE_MOD, "movebytes: %u,%u,%u\n", dst, src, len); + hfs_dbg(BNODE_MOD, "movebytes: %u,%u,%u\n", dst, src, len); if (!len) return; src += node->page_offset; @@ -302,16 +302,16 @@ void hfs_bnode_dump(struct hfs_bnode *node) __be32 cnid; int i, off, key_off; - dprint(DBG_BNODE_MOD, "bnode: %d\n", node->this); + hfs_dbg(BNODE_MOD, "bnode: %d\n", node->this); hfs_bnode_read(node, &desc, 0, sizeof(desc)); - dprint(DBG_BNODE_MOD, "%d, %d, %d, %d, %d\n", + hfs_dbg(BNODE_MOD, "%d, %d, %d, %d, %d\n", be32_to_cpu(desc.next), be32_to_cpu(desc.prev), desc.type, desc.height, be16_to_cpu(desc.num_recs)); off = node->tree->node_size - 2; for (i = be16_to_cpu(desc.num_recs); i >= 0; off -= 2, i--) { key_off = hfs_bnode_read_u16(node, off); - dprint(DBG_BNODE_MOD, " %d", key_off); + hfs_dbg(BNODE_MOD, " %d", key_off); if (i && node->type == HFS_NODE_INDEX) { int tmp; @@ -320,17 +320,17 @@ void hfs_bnode_dump(struct hfs_bnode *node) tmp = hfs_bnode_read_u16(node, key_off) + 2; else tmp = node->tree->max_key_len + 2; - dprint(DBG_BNODE_MOD, " (%d", tmp); + hfs_dbg_cont(BNODE_MOD, " (%d", tmp); hfs_bnode_read(node, &cnid, key_off + tmp, 4); - dprint(DBG_BNODE_MOD, ",%d)", be32_to_cpu(cnid)); + hfs_dbg_cont(BNODE_MOD, ",%d)", be32_to_cpu(cnid)); } else if (i && node->type == HFS_NODE_LEAF) { int tmp; tmp = hfs_bnode_read_u16(node, key_off); - dprint(DBG_BNODE_MOD, " (%d)", tmp); + hfs_dbg_cont(BNODE_MOD, " (%d)", tmp); } } - dprint(DBG_BNODE_MOD, "\n"); + hfs_dbg_cont(BNODE_MOD, "\n"); } void hfs_bnode_unlink(struct hfs_bnode *node) @@ -366,7 +366,7 @@ void hfs_bnode_unlink(struct hfs_bnode *node) /* move down? */ if (!node->prev && !node->next) - dprint(DBG_BNODE_MOD, "hfs_btree_del_level\n"); + hfs_dbg(BNODE_MOD, "hfs_btree_del_level\n"); if (!node->parent) { tree->root = 0; tree->depth = 0; @@ -425,8 +425,8 @@ static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid) node->this = cnid; set_bit(HFS_BNODE_NEW, &node->flags); atomic_set(&node->refcnt, 1); - dprint(DBG_BNODE_REFS, "new_node(%d:%d): 1\n", - node->tree->cnid, node->this); + hfs_dbg(BNODE_REFS, "new_node(%d:%d): 1\n", + node->tree->cnid, node->this); init_waitqueue_head(&node->lock_wq); spin_lock(&tree->hash_lock); node2 = hfs_bnode_findhash(tree, cnid); @@ -470,7 +470,7 @@ void hfs_bnode_unhash(struct hfs_bnode *node) { struct hfs_bnode **p; - dprint(DBG_BNODE_REFS, "remove_node(%d:%d): %d\n", + hfs_dbg(BNODE_REFS, "remove_node(%d:%d): %d\n", node->tree->cnid, node->this, atomic_read(&node->refcnt)); for (p = &node->tree->node_hash[hfs_bnode_hash(node->this)]; *p && *p != node; p = &(*p)->next_hash) @@ -620,7 +620,7 @@ void hfs_bnode_get(struct hfs_bnode *node) { if (node) { atomic_inc(&node->refcnt); - dprint(DBG_BNODE_REFS, "get_node(%d:%d): %d\n", + hfs_dbg(BNODE_REFS, "get_node(%d:%d): %d\n", node->tree->cnid, node->this, atomic_read(&node->refcnt)); } @@ -633,7 +633,7 @@ void hfs_bnode_put(struct hfs_bnode *node) struct hfs_btree *tree = node->tree; int i; - dprint(DBG_BNODE_REFS, "put_node(%d:%d): %d\n", + hfs_dbg(BNODE_REFS, "put_node(%d:%d): %d\n", node->tree->cnid, node->this, atomic_read(&node->refcnt)); BUG_ON(!atomic_read(&node->refcnt)); diff --git a/fs/hfsplus/brec.c b/fs/hfsplus/brec.c index 298d4e4..e238ba8 100644 --- a/fs/hfsplus/brec.c +++ b/fs/hfsplus/brec.c @@ -90,7 +90,7 @@ again: end_rec_off = tree->node_size - (node->num_recs + 1) * 2; end_off = hfs_bnode_read_u16(node, end_rec_off); end_rec_off -= 2; - dprint(DBG_BNODE_MOD, "insert_rec: %d, %d, %d, %d\n", + hfs_dbg(BNODE_MOD, "insert_rec: %d, %d, %d, %d\n", rec, size, end_off, end_rec_off); if (size > end_rec_off - end_off) { if (new_node) @@ -191,7 +191,7 @@ again: mark_inode_dirty(tree->inode); } hfs_bnode_dump(node); - dprint(DBG_BNODE_MOD, "remove_rec: %d, %d\n", + hfs_dbg(BNODE_MOD, "remove_rec: %d, %d\n", fd->record, fd->keylength + fd->entrylength); if (!--node->num_recs) { hfs_bnode_unlink(node); @@ -244,7 +244,7 @@ static struct hfs_bnode *hfs_bnode_split(struct hfs_find_data *fd) if (IS_ERR(new_node)) return new_node; hfs_bnode_get(node); - dprint(DBG_BNODE_MOD, "split_nodes: %d - %d - %d\n", + hfs_dbg(BNODE_MOD, "split_nodes: %d - %d - %d\n", node->this, new_node->this, node->next); new_node->next = node->next; new_node->prev = node->this; @@ -379,7 +379,7 @@ again: newkeylen = hfs_bnode_read_u16(node, 14) + 2; else fd->keylength = newkeylen = tree->max_key_len + 2; - dprint(DBG_BNODE_MOD, "update_rec: %d, %d, %d\n", + hfs_dbg(BNODE_MOD, "update_rec: %d, %d, %d\n", rec, fd->keylength, newkeylen); rec_off = tree->node_size - (rec + 2) * 2; @@ -391,7 +391,7 @@ again: end_off = hfs_bnode_read_u16(parent, end_rec_off); if (end_rec_off - end_off < diff) { - dprint(DBG_BNODE_MOD, "hfs: splitting index node.\n"); + hfs_dbg(BNODE_MOD, "splitting index node\n"); fd->bnode = parent; new_node = hfs_bnode_split(fd); if (IS_ERR(new_node)) diff --git a/fs/hfsplus/btree.c b/fs/hfsplus/btree.c index efb689c..c2fa4bf 100644 --- a/fs/hfsplus/btree.c +++ b/fs/hfsplus/btree.c @@ -303,7 +303,7 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree) kunmap(*pagep); nidx = node->next; if (!nidx) { - dprint(DBG_BNODE_MOD, "hfs: create new bmap node.\n"); + hfs_dbg(BNODE_MOD, "create new bmap node\n"); next_node = hfs_bmap_new_bmap(node, idx); } else next_node = hfs_bnode_find(tree, nidx); @@ -329,7 +329,7 @@ void hfs_bmap_free(struct hfs_bnode *node) u32 nidx; u8 *data, byte, m; - dprint(DBG_BNODE_MOD, "btree_free_node: %u\n", node->this); + hfs_dbg(BNODE_MOD, "btree_free_node: %u\n", node->this); BUG_ON(!node->this); tree = node->tree; nidx = node->this; diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c index 840d71e..12cea23 100644 --- a/fs/hfsplus/catalog.c +++ b/fs/hfsplus/catalog.c @@ -212,7 +212,7 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir, int entry_size; int err; - dprint(DBG_CAT_MOD, "create_cat: %s,%u(%d)\n", + hfs_dbg(CAT_MOD, "create_cat: %s,%u(%d)\n", str->name, cnid, inode->i_nlink); err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); if (err) @@ -271,8 +271,7 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str) int err, off; u16 type; - dprint(DBG_CAT_MOD, "delete_cat: %s,%u\n", - str ? str->name : NULL, cnid); + hfs_dbg(CAT_MOD, "delete_cat: %s,%u\n", str ? str->name : NULL, cnid); err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); if (err) return err; @@ -361,7 +360,7 @@ int hfsplus_rename_cat(u32 cnid, int entry_size, type; int err; - dprint(DBG_CAT_MOD, "rename_cat: %u - %lu,%s - %lu,%s\n", + hfs_dbg(CAT_MOD, "rename_cat: %u - %lu,%s - %lu,%s\n", cnid, src_dir->i_ino, src_name->name, dst_dir->i_ino, dst_name->name); err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &src_fd); diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c index fe0a762..64a532a 100644 --- a/fs/hfsplus/extents.c +++ b/fs/hfsplus/extents.c @@ -265,7 +265,7 @@ int hfsplus_get_block(struct inode *inode, sector_t iblock, mutex_unlock(&hip->extents_lock); done: - dprint(DBG_EXTENT, "get_block(%lu): %llu - %u\n", + hfs_dbg(EXTENT, "get_block(%lu): %llu - %u\n", inode->i_ino, (long long)iblock, dblock); mask = (1 << sbi->fs_shift) - 1; @@ -288,11 +288,12 @@ static void hfsplus_dump_extent(struct hfsplus_extent *extent) { int i; - dprint(DBG_EXTENT, " "); + hfs_dbg(EXTENT, " "); for (i = 0; i < 8; i++) - dprint(DBG_EXTENT, " %u:%u", be32_to_cpu(extent[i].start_block), - be32_to_cpu(extent[i].block_count)); - dprint(DBG_EXTENT, "\n"); + hfs_dbg_cont(EXTENT, " %u:%u", + be32_to_cpu(extent[i].start_block), + be32_to_cpu(extent[i].block_count)); + hfs_dbg_cont(EXTENT, "\n"); } static int hfsplus_add_extent(struct hfsplus_extent *extent, u32 offset, @@ -349,7 +350,7 @@ found: err = hfsplus_block_free(sb, start, count); if (err) { printk(KERN_ERR "hfs: can't free extent\n"); - dprint(DBG_EXTENT, " start: %u count: %u\n", + hfs_dbg(EXTENT, " start: %u count: %u\n", start, count); } extent->block_count = 0; @@ -360,7 +361,7 @@ found: err = hfsplus_block_free(sb, start + count, block_nr); if (err) { printk(KERN_ERR "hfs: can't free extent\n"); - dprint(DBG_EXTENT, " start: %u count: %u\n", + hfs_dbg(EXTENT, " start: %u count: %u\n", start, count); } extent->block_count = cpu_to_be32(count); @@ -459,11 +460,11 @@ int hfsplus_file_extend(struct inode *inode) } } - dprint(DBG_EXTENT, "extend %lu: %u,%u\n", inode->i_ino, start, len); + hfs_dbg(EXTENT, "extend %lu: %u,%u\n", inode->i_ino, start, len); if (hip->alloc_blocks <= hip->first_blocks) { if (!hip->first_blocks) { - dprint(DBG_EXTENT, "first extents\n"); + hfs_dbg(EXTENT, "first extents\n"); /* no extents yet */ hip->first_extents[0].start_block = cpu_to_be32(start); hip->first_extents[0].block_count = cpu_to_be32(len); @@ -500,7 +501,7 @@ out: return res; insert_extent: - dprint(DBG_EXTENT, "insert new extent\n"); + hfs_dbg(EXTENT, "insert new extent\n"); res = hfsplus_ext_write_extent_locked(inode); if (res) goto out; @@ -525,9 +526,8 @@ void hfsplus_file_truncate(struct inode *inode) u32 alloc_cnt, blk_cnt, start; int res; - dprint(DBG_INODE, "truncate: %lu, %llu -> %llu\n", - inode->i_ino, (long long)hip->phys_size, - inode->i_size); + hfs_dbg(INODE, "truncate: %lu, %llu -> %llu\n", + inode->i_ino, (long long)hip->phys_size, inode->i_size); if (inode->i_size > hip->phys_size) { struct address_space *mapping = inode->i_mapping; diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index 05b11f3..910ea98e 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -32,9 +32,17 @@ #endif #define DBG_MASK (0) -#define dprint(flg, fmt, args...) \ - if (flg & DBG_MASK) \ - printk(fmt , ## args) +#define hfs_dbg(flg, fmt, ...) \ +do { \ + if (DBG_##flg & DBG_MASK) \ + printk(KERN_DEBUG fmt, ##__VA_ARGS__); \ +} while (0) + +#define hfs_dbg_cont(flg, fmt, ...) \ +do { \ + if (DBG_##flg & DBG_MASK) \ + printk(KERN_CONT fmt, ##__VA_ARGS__); \ +} while (0) /* Runtime config options */ #define HFSPLUS_DEF_CR_TYPE 0x3F3F3F3F /* '????' */ diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 7b87284..4886fd3 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -145,7 +145,7 @@ static int hfsplus_write_inode(struct inode *inode, { int err; - dprint(DBG_INODE, "hfsplus_write_inode: %lu\n", inode->i_ino); + hfs_dbg(INODE, "hfsplus_write_inode: %lu\n", inode->i_ino); err = hfsplus_ext_write_extent(inode); if (err) @@ -160,7 +160,7 @@ static int hfsplus_write_inode(struct inode *inode, static void hfsplus_evict_inode(struct inode *inode) { - dprint(DBG_INODE, "hfsplus_evict_inode: %lu\n", inode->i_ino); + hfs_dbg(INODE, "hfsplus_evict_inode: %lu\n", inode->i_ino); truncate_inode_pages(&inode->i_data, 0); clear_inode(inode); if (HFSPLUS_IS_RSRC(inode)) { @@ -179,7 +179,7 @@ static int hfsplus_sync_fs(struct super_block *sb, int wait) if (!wait) return 0; - dprint(DBG_SUPER, "hfsplus_sync_fs\n"); + hfs_dbg(SUPER, "hfsplus_sync_fs\n"); /* * Explicitly write out the special metadata inodes. @@ -275,7 +275,7 @@ static void hfsplus_put_super(struct super_block *sb) { struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); - dprint(DBG_SUPER, "hfsplus_put_super\n"); + hfs_dbg(SUPER, "hfsplus_put_super\n"); cancel_delayed_work_sync(&sbi->sync_work); -- cgit v0.10.2 From d614267329f2bee7a082ed8781c581c0f3aaa808 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 30 Apr 2013 15:27:55 -0700 Subject: hfs/hfsplus: convert printks to pr_ Use a more current logging style. Add #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt hfsplus now uses "hfsplus: " for all messages. Coalesce formats. Prefix debugging messages too. Signed-off-by: Joe Perches Cc: Vyacheslav Dubeyko Cc: Hin-Tak Leung Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/hfs/bfind.c b/fs/hfs/bfind.c index e928f6c..de69d8a 100644 --- a/fs/hfs/bfind.c +++ b/fs/hfs/bfind.c @@ -137,8 +137,8 @@ int hfs_brec_find(struct hfs_find_data *fd) return res; invalid: - printk(KERN_ERR "hfs: inconsistency in B*Tree (%d,%d,%d,%u,%u)\n", - height, bnode->height, bnode->type, nidx, parent); + pr_err("inconsistency in B*Tree (%d,%d,%d,%u,%u)\n", + height, bnode->height, bnode->type, nidx, parent); res = -EIO; release: hfs_bnode_put(bnode); diff --git a/fs/hfs/bnode.c b/fs/hfs/bnode.c index 8693919..f3b1a15 100644 --- a/fs/hfs/bnode.c +++ b/fs/hfs/bnode.c @@ -221,7 +221,7 @@ struct hfs_bnode *hfs_bnode_findhash(struct hfs_btree *tree, u32 cnid) struct hfs_bnode *node; if (cnid >= tree->node_count) { - printk(KERN_ERR "hfs: request for non-existent node %d in B*Tree\n", cnid); + pr_err("request for non-existent node %d in B*Tree\n", cnid); return NULL; } @@ -244,7 +244,7 @@ static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid) loff_t off; if (cnid >= tree->node_count) { - printk(KERN_ERR "hfs: request for non-existent node %d in B*Tree\n", cnid); + pr_err("request for non-existent node %d in B*Tree\n", cnid); return NULL; } diff --git a/fs/hfs/brec.c b/fs/hfs/brec.c index 5764257..9f4ee7f 100644 --- a/fs/hfs/brec.c +++ b/fs/hfs/brec.c @@ -47,15 +47,13 @@ u16 hfs_brec_keylen(struct hfs_bnode *node, u16 rec) if (node->tree->attributes & HFS_TREE_BIGKEYS) { retval = hfs_bnode_read_u16(node, recoff) + 2; if (retval > node->tree->max_key_len + 2) { - printk(KERN_ERR "hfs: keylen %d too large\n", - retval); + pr_err("keylen %d too large\n", retval); retval = 0; } } else { retval = (hfs_bnode_read_u8(node, recoff) | 1) + 1; if (retval > node->tree->max_key_len + 1) { - printk(KERN_ERR "hfs: keylen %d too large\n", - retval); + pr_err("keylen %d too large\n", retval); retval = 0; } } @@ -388,7 +386,7 @@ again: end_off = hfs_bnode_read_u16(parent, end_rec_off); if (end_rec_off - end_off < diff) { - printk(KERN_DEBUG "hfs: splitting index node...\n"); + printk(KERN_DEBUG "splitting index node...\n"); fd->bnode = parent; new_node = hfs_bnode_split(fd); if (IS_ERR(new_node)) diff --git a/fs/hfs/btree.c b/fs/hfs/btree.c index 07d94ce..1ab19e6 100644 --- a/fs/hfs/btree.c +++ b/fs/hfs/btree.c @@ -48,7 +48,7 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id, btree_keycmp ke mdb->drXTFlSize, be32_to_cpu(mdb->drXTClpSiz)); if (HFS_I(tree->inode)->alloc_blocks > HFS_I(tree->inode)->first_blocks) { - printk(KERN_ERR "hfs: invalid btree extent records\n"); + pr_err("invalid btree extent records\n"); unlock_new_inode(tree->inode); goto free_inode; } @@ -60,8 +60,7 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id, btree_keycmp ke mdb->drCTFlSize, be32_to_cpu(mdb->drCTClpSiz)); if (!HFS_I(tree->inode)->first_blocks) { - printk(KERN_ERR "hfs: invalid btree extent records " - "(0 size).\n"); + pr_err("invalid btree extent records (0 size)\n"); unlock_new_inode(tree->inode); goto free_inode; } @@ -100,15 +99,15 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id, btree_keycmp ke switch (id) { case HFS_EXT_CNID: if (tree->max_key_len != HFS_MAX_EXT_KEYLEN) { - printk(KERN_ERR "hfs: invalid extent max_key_len %d\n", - tree->max_key_len); + pr_err("invalid extent max_key_len %d\n", + tree->max_key_len); goto fail_page; } break; case HFS_CAT_CNID: if (tree->max_key_len != HFS_MAX_CAT_KEYLEN) { - printk(KERN_ERR "hfs: invalid catalog max_key_len %d\n", - tree->max_key_len); + pr_err("invalid catalog max_key_len %d\n", + tree->max_key_len); goto fail_page; } break; @@ -146,8 +145,9 @@ void hfs_btree_close(struct hfs_btree *tree) while ((node = tree->node_hash[i])) { tree->node_hash[i] = node->next_hash; if (atomic_read(&node->refcnt)) - printk(KERN_ERR "hfs: node %d:%d still has %d user(s)!\n", - node->tree->cnid, node->this, atomic_read(&node->refcnt)); + pr_err("node %d:%d still has %d user(s)!\n", + node->tree->cnid, node->this, + atomic_read(&node->refcnt)); hfs_bnode_free(node); tree->node_hash_cnt--; } @@ -290,7 +290,7 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree) kunmap(*pagep); nidx = node->next; if (!nidx) { - printk(KERN_DEBUG "hfs: create new bmap node...\n"); + printk(KERN_DEBUG "create new bmap node...\n"); next_node = hfs_bmap_new_bmap(node, idx); } else next_node = hfs_bnode_find(tree, nidx); @@ -331,7 +331,8 @@ void hfs_bmap_free(struct hfs_bnode *node) hfs_bnode_put(node); if (!i) { /* panic */; - printk(KERN_CRIT "hfs: unable to free bnode %u. bmap not found!\n", node->this); + pr_crit("unable to free bnode %u. bmap not found!\n", + node->this); return; } node = hfs_bnode_find(tree, i); @@ -339,7 +340,8 @@ void hfs_bmap_free(struct hfs_bnode *node) return; if (node->type != HFS_NODE_MAP) { /* panic */; - printk(KERN_CRIT "hfs: invalid bmap found! (%u,%d)\n", node->this, node->type); + pr_crit("invalid bmap found! (%u,%d)\n", + node->this, node->type); hfs_bnode_put(node); return; } @@ -352,7 +354,8 @@ void hfs_bmap_free(struct hfs_bnode *node) m = 1 << (~nidx & 7); byte = data[off]; if (!(byte & m)) { - printk(KERN_CRIT "hfs: trying to free free bnode %u(%d)\n", node->this, node->type); + pr_crit("trying to free free bnode %u(%d)\n", + node->this, node->type); kunmap(page); hfs_bnode_put(node); return; diff --git a/fs/hfs/catalog.c b/fs/hfs/catalog.c index 6ecb3d7..ff0316b 100644 --- a/fs/hfs/catalog.c +++ b/fs/hfs/catalog.c @@ -187,14 +187,14 @@ int hfs_cat_find_brec(struct super_block *sb, u32 cnid, type = rec.type; if (type != HFS_CDR_THD && type != HFS_CDR_FTH) { - printk(KERN_ERR "hfs: found bad thread record in catalog\n"); + pr_err("found bad thread record in catalog\n"); return -EIO; } fd->search_key->cat.ParID = rec.thread.ParID; len = fd->search_key->cat.CName.len = rec.thread.CName.len; if (len > HFS_NAMELEN) { - printk(KERN_ERR "hfs: bad catalog namelength\n"); + pr_err("bad catalog namelength\n"); return -EIO; } memcpy(fd->search_key->cat.CName.name, rec.thread.CName.name, len); diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c index e1c8048..17c22a8 100644 --- a/fs/hfs/dir.c +++ b/fs/hfs/dir.c @@ -88,12 +88,12 @@ static int hfs_readdir(struct file *filp, void *dirent, filldir_t filldir) hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, fd.entrylength); if (entry.type != HFS_CDR_THD) { - printk(KERN_ERR "hfs: bad catalog folder thread\n"); + pr_err("bad catalog folder thread\n"); err = -EIO; goto out; } //if (fd.entrylength < HFS_MIN_THREAD_SZ) { - // printk(KERN_ERR "hfs: truncated catalog thread\n"); + // pr_err("truncated catalog thread\n"); // err = -EIO; // goto out; //} @@ -112,7 +112,7 @@ static int hfs_readdir(struct file *filp, void *dirent, filldir_t filldir) for (;;) { if (be32_to_cpu(fd.key->cat.ParID) != inode->i_ino) { - printk(KERN_ERR "hfs: walked past end of dir\n"); + pr_err("walked past end of dir\n"); err = -EIO; goto out; } @@ -127,7 +127,7 @@ static int hfs_readdir(struct file *filp, void *dirent, filldir_t filldir) len = hfs_mac2asc(sb, strbuf, &fd.key->cat.CName); if (type == HFS_CDR_DIR) { if (fd.entrylength < sizeof(struct hfs_cat_dir)) { - printk(KERN_ERR "hfs: small dir entry\n"); + pr_err("small dir entry\n"); err = -EIO; goto out; } @@ -136,7 +136,7 @@ static int hfs_readdir(struct file *filp, void *dirent, filldir_t filldir) break; } else if (type == HFS_CDR_FIL) { if (fd.entrylength < sizeof(struct hfs_cat_file)) { - printk(KERN_ERR "hfs: small file entry\n"); + pr_err("small file entry\n"); err = -EIO; goto out; } @@ -144,7 +144,7 @@ static int hfs_readdir(struct file *filp, void *dirent, filldir_t filldir) be32_to_cpu(entry.file.FlNum), DT_REG)) break; } else { - printk(KERN_ERR "hfs: bad catalog entry type %d\n", type); + pr_err("bad catalog entry type %d\n", type); err = -EIO; goto out; } diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h index d363b1e..a73b118 100644 --- a/fs/hfs/hfs_fs.h +++ b/fs/hfs/hfs_fs.h @@ -9,6 +9,12 @@ #ifndef _LINUX_HFS_FS_H #define _LINUX_HFS_FS_H +#ifdef pr_fmt +#undef pr_fmt +#endif + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -34,16 +40,16 @@ //#define DBG_MASK (DBG_CAT_MOD|DBG_BNODE_REFS|DBG_INODE|DBG_EXTENT) #define DBG_MASK (0) -#define hfs_dbg(flg, fmt, ...) \ -do { \ - if (DBG_##flg & DBG_MASK) \ - printk(KERN_DEBUG fmt, ##__VA_ARGS__); \ +#define hfs_dbg(flg, fmt, ...) \ +do { \ + if (DBG_##flg & DBG_MASK) \ + printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); \ } while (0) -#define hfs_dbg_cont(flg, fmt, ...) \ -do { \ - if (DBG_##flg & DBG_MASK) \ - printk(KERN_CONT fmt, ##__VA_ARGS__); \ +#define hfs_dbg_cont(flg, fmt, ...) \ +do { \ + if (DBG_##flg & DBG_MASK) \ + pr_cont(fmt, ##__VA_ARGS__); \ } while (0) diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c index b7ec224..aa3f0d6 100644 --- a/fs/hfs/mdb.c +++ b/fs/hfs/mdb.c @@ -48,7 +48,7 @@ static int hfs_get_last_session(struct super_block *sb, *start = (sector_t)te.cdte_addr.lba << 2; return 0; } - printk(KERN_ERR "hfs: invalid session number or type of track\n"); + pr_err("invalid session number or type of track\n"); return -EINVAL; } ms_info.addr_format = CDROM_LBA; @@ -101,7 +101,7 @@ int hfs_mdb_get(struct super_block *sb) HFS_SB(sb)->alloc_blksz = size = be32_to_cpu(mdb->drAlBlkSiz); if (!size || (size & (HFS_SECTOR_SIZE - 1))) { - printk(KERN_ERR "hfs: bad allocation block size %d\n", size); + pr_err("bad allocation block size %d\n", size); goto out_bh; } @@ -118,7 +118,7 @@ int hfs_mdb_get(struct super_block *sb) size >>= 1; brelse(bh); if (!sb_set_blocksize(sb, size)) { - printk(KERN_ERR "hfs: unable to set blocksize to %u\n", size); + pr_err("unable to set blocksize to %u\n", size); goto out; } @@ -162,8 +162,8 @@ int hfs_mdb_get(struct super_block *sb) } if (!HFS_SB(sb)->alt_mdb) { - printk(KERN_WARNING "hfs: unable to locate alternate MDB\n"); - printk(KERN_WARNING "hfs: continuing without an alternate MDB\n"); + pr_warn("unable to locate alternate MDB\n"); + pr_warn("continuing without an alternate MDB\n"); } HFS_SB(sb)->bitmap = (__be32 *)__get_free_pages(GFP_KERNEL, PAGE_SIZE < 8192 ? 1 : 0); @@ -178,7 +178,7 @@ int hfs_mdb_get(struct super_block *sb) while (size) { bh = sb_bread(sb, off >> sb->s_blocksize_bits); if (!bh) { - printk(KERN_ERR "hfs: unable to read volume bitmap\n"); + pr_err("unable to read volume bitmap\n"); goto out; } off2 = off & (sb->s_blocksize - 1); @@ -192,23 +192,22 @@ int hfs_mdb_get(struct super_block *sb) HFS_SB(sb)->ext_tree = hfs_btree_open(sb, HFS_EXT_CNID, hfs_ext_keycmp); if (!HFS_SB(sb)->ext_tree) { - printk(KERN_ERR "hfs: unable to open extent tree\n"); + pr_err("unable to open extent tree\n"); goto out; } HFS_SB(sb)->cat_tree = hfs_btree_open(sb, HFS_CAT_CNID, hfs_cat_keycmp); if (!HFS_SB(sb)->cat_tree) { - printk(KERN_ERR "hfs: unable to open catalog tree\n"); + pr_err("unable to open catalog tree\n"); goto out; } attrib = mdb->drAtrb; if (!(attrib & cpu_to_be16(HFS_SB_ATTRIB_UNMNT))) { - printk(KERN_WARNING "hfs: filesystem was not cleanly unmounted, " - "running fsck.hfs is recommended. mounting read-only.\n"); + pr_warn("filesystem was not cleanly unmounted, running fsck.hfs is recommended. mounting read-only.\n"); sb->s_flags |= MS_RDONLY; } if ((attrib & cpu_to_be16(HFS_SB_ATTRIB_SLOCK))) { - printk(KERN_WARNING "hfs: filesystem is marked locked, mounting read-only.\n"); + pr_warn("filesystem is marked locked, mounting read-only.\n"); sb->s_flags |= MS_RDONLY; } if (!(sb->s_flags & MS_RDONLY)) { @@ -312,7 +311,7 @@ void hfs_mdb_commit(struct super_block *sb) while (size) { bh = sb_bread(sb, block); if (!bh) { - printk(KERN_ERR "hfs: unable to read volume bitmap\n"); + pr_err("unable to read volume bitmap\n"); break; } len = min((int)sb->s_blocksize - off, size); diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 719760b..2d2039e 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -117,12 +117,11 @@ static int hfs_remount(struct super_block *sb, int *flags, char *data) return 0; if (!(*flags & MS_RDONLY)) { if (!(HFS_SB(sb)->mdb->drAtrb & cpu_to_be16(HFS_SB_ATTRIB_UNMNT))) { - printk(KERN_WARNING "hfs: filesystem was not cleanly unmounted, " - "running fsck.hfs is recommended. leaving read-only.\n"); + pr_warn("filesystem was not cleanly unmounted, running fsck.hfs is recommended. leaving read-only.\n"); sb->s_flags |= MS_RDONLY; *flags |= MS_RDONLY; } else if (HFS_SB(sb)->mdb->drAtrb & cpu_to_be16(HFS_SB_ATTRIB_SLOCK)) { - printk(KERN_WARNING "hfs: filesystem is marked locked, leaving read-only.\n"); + pr_warn("filesystem is marked locked, leaving read-only.\n"); sb->s_flags |= MS_RDONLY; *flags |= MS_RDONLY; } @@ -253,29 +252,29 @@ static int parse_options(char *options, struct hfs_sb_info *hsb) switch (token) { case opt_uid: if (match_int(&args[0], &tmp)) { - printk(KERN_ERR "hfs: uid requires an argument\n"); + pr_err("uid requires an argument\n"); return 0; } hsb->s_uid = make_kuid(current_user_ns(), (uid_t)tmp); if (!uid_valid(hsb->s_uid)) { - printk(KERN_ERR "hfs: invalid uid %d\n", tmp); + pr_err("invalid uid %d\n", tmp); return 0; } break; case opt_gid: if (match_int(&args[0], &tmp)) { - printk(KERN_ERR "hfs: gid requires an argument\n"); + pr_err("gid requires an argument\n"); return 0; } hsb->s_gid = make_kgid(current_user_ns(), (gid_t)tmp); if (!gid_valid(hsb->s_gid)) { - printk(KERN_ERR "hfs: invalid gid %d\n", tmp); + pr_err("invalid gid %d\n", tmp); return 0; } break; case opt_umask: if (match_octal(&args[0], &tmp)) { - printk(KERN_ERR "hfs: umask requires a value\n"); + pr_err("umask requires a value\n"); return 0; } hsb->s_file_umask = (umode_t)tmp; @@ -283,39 +282,39 @@ static int parse_options(char *options, struct hfs_sb_info *hsb) break; case opt_file_umask: if (match_octal(&args[0], &tmp)) { - printk(KERN_ERR "hfs: file_umask requires a value\n"); + pr_err("file_umask requires a value\n"); return 0; } hsb->s_file_umask = (umode_t)tmp; break; case opt_dir_umask: if (match_octal(&args[0], &tmp)) { - printk(KERN_ERR "hfs: dir_umask requires a value\n"); + pr_err("dir_umask requires a value\n"); return 0; } hsb->s_dir_umask = (umode_t)tmp; break; case opt_part: if (match_int(&args[0], &hsb->part)) { - printk(KERN_ERR "hfs: part requires an argument\n"); + pr_err("part requires an argument\n"); return 0; } break; case opt_session: if (match_int(&args[0], &hsb->session)) { - printk(KERN_ERR "hfs: session requires an argument\n"); + pr_err("session requires an argument\n"); return 0; } break; case opt_type: if (match_fourchar(&args[0], &hsb->s_type)) { - printk(KERN_ERR "hfs: type requires a 4 character value\n"); + pr_err("type requires a 4 character value\n"); return 0; } break; case opt_creator: if (match_fourchar(&args[0], &hsb->s_creator)) { - printk(KERN_ERR "hfs: creator requires a 4 character value\n"); + pr_err("creator requires a 4 character value\n"); return 0; } break; @@ -324,14 +323,14 @@ static int parse_options(char *options, struct hfs_sb_info *hsb) break; case opt_codepage: if (hsb->nls_disk) { - printk(KERN_ERR "hfs: unable to change codepage\n"); + pr_err("unable to change codepage\n"); return 0; } p = match_strdup(&args[0]); if (p) hsb->nls_disk = load_nls(p); if (!hsb->nls_disk) { - printk(KERN_ERR "hfs: unable to load codepage \"%s\"\n", p); + pr_err("unable to load codepage \"%s\"\n", p); kfree(p); return 0; } @@ -339,14 +338,14 @@ static int parse_options(char *options, struct hfs_sb_info *hsb) break; case opt_iocharset: if (hsb->nls_io) { - printk(KERN_ERR "hfs: unable to change iocharset\n"); + pr_err("unable to change iocharset\n"); return 0; } p = match_strdup(&args[0]); if (p) hsb->nls_io = load_nls(p); if (!hsb->nls_io) { - printk(KERN_ERR "hfs: unable to load iocharset \"%s\"\n", p); + pr_err("unable to load iocharset \"%s\"\n", p); kfree(p); return 0; } @@ -360,7 +359,7 @@ static int parse_options(char *options, struct hfs_sb_info *hsb) if (hsb->nls_disk && !hsb->nls_io) { hsb->nls_io = load_nls_default(); if (!hsb->nls_io) { - printk(KERN_ERR "hfs: unable to load default iocharset\n"); + pr_err("unable to load default iocharset\n"); return 0; } } @@ -400,7 +399,7 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent) res = -EINVAL; if (!parse_options((char *)data, sbi)) { - printk(KERN_ERR "hfs: unable to parse mount options.\n"); + pr_err("unable to parse mount options\n"); goto bail; } @@ -411,7 +410,7 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent) res = hfs_mdb_get(sb); if (res) { if (!silent) - printk(KERN_WARNING "hfs: can't find a HFS filesystem on dev %s.\n", + pr_warn("can't find a HFS filesystem on dev %s\n", hfs_mdb_name(sb)); res = -EINVAL; goto bail; @@ -449,7 +448,7 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent) return 0; bail_no_root: - printk(KERN_ERR "hfs: get root inode failed.\n"); + pr_err("get root inode failed\n"); bail: hfs_mdb_put(sb); return res; diff --git a/fs/hfsplus/attributes.c b/fs/hfsplus/attributes.c index 2043b50..0f47890 100644 --- a/fs/hfsplus/attributes.c +++ b/fs/hfsplus/attributes.c @@ -56,7 +56,7 @@ int hfsplus_attr_build_key(struct super_block *sb, hfsplus_btree_key *key, if (name) { len = strlen(name); if (len > HFSPLUS_ATTR_MAX_STRLEN) { - printk(KERN_ERR "hfs: invalid xattr name's length\n"); + pr_err("invalid xattr name's length\n"); return -EINVAL; } hfsplus_asc2uni(sb, @@ -169,7 +169,7 @@ int hfsplus_find_attr(struct super_block *sb, u32 cnid, hfs_dbg(ATTR_MOD, "find_attr: %s,%d\n", name ? name : NULL, cnid); if (!HFSPLUS_SB(sb)->attr_tree) { - printk(KERN_ERR "hfs: attributes file doesn't exist\n"); + pr_err("attributes file doesn't exist\n"); return -EINVAL; } @@ -232,7 +232,7 @@ int hfsplus_create_attr(struct inode *inode, name ? name : NULL, inode->i_ino); if (!HFSPLUS_SB(sb)->attr_tree) { - printk(KERN_ERR "hfs: attributes file doesn't exist\n"); + pr_err("attributes file doesn't exist\n"); return -EINVAL; } @@ -307,10 +307,10 @@ static int __hfsplus_delete_attr(struct inode *inode, u32 cnid, break; case HFSPLUS_ATTR_FORK_DATA: case HFSPLUS_ATTR_EXTENTS: - printk(KERN_ERR "hfs: only inline data xattr are supported\n"); + pr_err("only inline data xattr are supported\n"); return -EOPNOTSUPP; default: - printk(KERN_ERR "hfs: invalid extended attribute record\n"); + pr_err("invalid extended attribute record\n"); return -ENOENT; } @@ -332,7 +332,7 @@ int hfsplus_delete_attr(struct inode *inode, const char *name) name ? name : NULL, inode->i_ino); if (!HFSPLUS_SB(sb)->attr_tree) { - printk(KERN_ERR "hfs: attributes file doesn't exist\n"); + pr_err("attributes file doesn't exist\n"); return -EINVAL; } @@ -346,7 +346,7 @@ int hfsplus_delete_attr(struct inode *inode, const char *name) if (err) goto out; } else { - printk(KERN_ERR "hfs: invalid extended attribute name\n"); + pr_err("invalid extended attribute name\n"); err = -EINVAL; goto out; } @@ -372,7 +372,7 @@ int hfsplus_delete_all_attrs(struct inode *dir, u32 cnid) hfs_dbg(ATTR_MOD, "delete_all_attrs: %d\n", cnid); if (!HFSPLUS_SB(dir->i_sb)->attr_tree) { - printk(KERN_ERR "hfs: attributes file doesn't exist\n"); + pr_err("attributes file doesn't exist\n"); return -EINVAL; } @@ -384,7 +384,7 @@ int hfsplus_delete_all_attrs(struct inode *dir, u32 cnid) err = hfsplus_find_attr(dir->i_sb, cnid, NULL, &fd); if (err) { if (err != -ENOENT) - printk(KERN_ERR "hfs: xattr search failed.\n"); + pr_err("xattr search failed\n"); goto end_delete_all; } diff --git a/fs/hfsplus/bfind.c b/fs/hfsplus/bfind.c index d27f37f..c1422d9 100644 --- a/fs/hfsplus/bfind.c +++ b/fs/hfsplus/bfind.c @@ -208,7 +208,7 @@ int hfs_brec_find(struct hfs_find_data *fd, search_strategy_t do_key_compare) return res; invalid: - printk(KERN_ERR "hfs: inconsistency in B*Tree (%d,%d,%d,%u,%u)\n", + pr_err("inconsistency in B*Tree (%d,%d,%d,%u,%u)\n", height, bnode->height, bnode->type, nidx, parent); res = -EIO; release: diff --git a/fs/hfsplus/bitmap.c b/fs/hfsplus/bitmap.c index 7da6d46..826e864 100644 --- a/fs/hfsplus/bitmap.c +++ b/fs/hfsplus/bitmap.c @@ -238,7 +238,7 @@ out: return 0; kaboom: - printk(KERN_CRIT "hfsplus: unable to mark blocks free: error %ld\n", + pr_crit("hfsplus: unable to mark blocks free: error %ld\n", PTR_ERR(page)); mutex_unlock(&sbi->alloc_mutex); diff --git a/fs/hfsplus/bnode.c b/fs/hfsplus/bnode.c index 1ca9304..11c8602 100644 --- a/fs/hfsplus/bnode.c +++ b/fs/hfsplus/bnode.c @@ -386,7 +386,7 @@ struct hfs_bnode *hfs_bnode_findhash(struct hfs_btree *tree, u32 cnid) struct hfs_bnode *node; if (cnid >= tree->node_count) { - printk(KERN_ERR "hfs: request for non-existent node " + pr_err("request for non-existent node " "%d in B*Tree\n", cnid); return NULL; @@ -409,7 +409,7 @@ static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid) loff_t off; if (cnid >= tree->node_count) { - printk(KERN_ERR "hfs: request for non-existent node " + pr_err("request for non-existent node " "%d in B*Tree\n", cnid); return NULL; @@ -588,7 +588,7 @@ struct hfs_bnode *hfs_bnode_create(struct hfs_btree *tree, u32 num) node = hfs_bnode_findhash(tree, num); spin_unlock(&tree->hash_lock); if (node) { - printk(KERN_CRIT "new node %u already hashed?\n", num); + pr_crit("new node %u already hashed?\n", num); WARN_ON(1); return node; } diff --git a/fs/hfsplus/brec.c b/fs/hfsplus/brec.c index e238ba8..6e560d5 100644 --- a/fs/hfsplus/brec.c +++ b/fs/hfsplus/brec.c @@ -45,13 +45,13 @@ u16 hfs_brec_keylen(struct hfs_bnode *node, u16 rec) if (!recoff) return 0; if (recoff > node->tree->node_size - 2) { - printk(KERN_ERR "hfs: recoff %d too large\n", recoff); + pr_err("recoff %d too large\n", recoff); return 0; } retval = hfs_bnode_read_u16(node, recoff) + 2; if (retval > node->tree->max_key_len + 2) { - printk(KERN_ERR "hfs: keylen %d too large\n", + pr_err("keylen %d too large\n", retval); retval = 0; } diff --git a/fs/hfsplus/btree.c b/fs/hfsplus/btree.c index c2fa4bf..0c6540c 100644 --- a/fs/hfsplus/btree.c +++ b/fs/hfsplus/btree.c @@ -40,8 +40,7 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id) tree->inode = inode; if (!HFSPLUS_I(tree->inode)->first_blocks) { - printk(KERN_ERR - "hfs: invalid btree extent records (0 size).\n"); + pr_err("invalid btree extent records (0 size)\n"); goto free_inode; } @@ -68,12 +67,12 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id) switch (id) { case HFSPLUS_EXT_CNID: if (tree->max_key_len != HFSPLUS_EXT_KEYLEN - sizeof(u16)) { - printk(KERN_ERR "hfs: invalid extent max_key_len %d\n", + pr_err("invalid extent max_key_len %d\n", tree->max_key_len); goto fail_page; } if (tree->attributes & HFS_TREE_VARIDXKEYS) { - printk(KERN_ERR "hfs: invalid extent btree flag\n"); + pr_err("invalid extent btree flag\n"); goto fail_page; } @@ -81,12 +80,12 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id) break; case HFSPLUS_CAT_CNID: if (tree->max_key_len != HFSPLUS_CAT_KEYLEN - sizeof(u16)) { - printk(KERN_ERR "hfs: invalid catalog max_key_len %d\n", + pr_err("invalid catalog max_key_len %d\n", tree->max_key_len); goto fail_page; } if (!(tree->attributes & HFS_TREE_VARIDXKEYS)) { - printk(KERN_ERR "hfs: invalid catalog btree flag\n"); + pr_err("invalid catalog btree flag\n"); goto fail_page; } @@ -100,19 +99,19 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id) break; case HFSPLUS_ATTR_CNID: if (tree->max_key_len != HFSPLUS_ATTR_KEYLEN - sizeof(u16)) { - printk(KERN_ERR "hfs: invalid attributes max_key_len %d\n", + pr_err("invalid attributes max_key_len %d\n", tree->max_key_len); goto fail_page; } tree->keycmp = hfsplus_attr_bin_cmp_key; break; default: - printk(KERN_ERR "hfs: unknown B*Tree requested\n"); + pr_err("unknown B*Tree requested\n"); goto fail_page; } if (!(tree->attributes & HFS_TREE_BIGKEYS)) { - printk(KERN_ERR "hfs: invalid btree flag\n"); + pr_err("invalid btree flag\n"); goto fail_page; } @@ -155,7 +154,7 @@ void hfs_btree_close(struct hfs_btree *tree) while ((node = tree->node_hash[i])) { tree->node_hash[i] = node->next_hash; if (atomic_read(&node->refcnt)) - printk(KERN_CRIT "hfs: node %d:%d " + pr_crit("node %d:%d " "still has %d user(s)!\n", node->tree->cnid, node->this, atomic_read(&node->refcnt)); @@ -345,7 +344,7 @@ void hfs_bmap_free(struct hfs_bnode *node) hfs_bnode_put(node); if (!i) { /* panic */; - printk(KERN_CRIT "hfs: unable to free bnode %u. " + pr_crit("unable to free bnode %u. " "bmap not found!\n", node->this); return; @@ -355,7 +354,7 @@ void hfs_bmap_free(struct hfs_bnode *node) return; if (node->type != HFS_NODE_MAP) { /* panic */; - printk(KERN_CRIT "hfs: invalid bmap found! " + pr_crit("invalid bmap found! " "(%u,%d)\n", node->this, node->type); hfs_bnode_put(node); @@ -370,7 +369,7 @@ void hfs_bmap_free(struct hfs_bnode *node) m = 1 << (~nidx & 7); byte = data[off]; if (!(byte & m)) { - printk(KERN_CRIT "hfs: trying to free free bnode " + pr_crit("trying to free free bnode " "%u(%d)\n", node->this, node->type); kunmap(page); diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c index 12cea23..968ce41 100644 --- a/fs/hfsplus/catalog.c +++ b/fs/hfsplus/catalog.c @@ -188,12 +188,12 @@ int hfsplus_find_cat(struct super_block *sb, u32 cnid, type = be16_to_cpu(tmp.type); if (type != HFSPLUS_FOLDER_THREAD && type != HFSPLUS_FILE_THREAD) { - printk(KERN_ERR "hfs: found bad thread record in catalog\n"); + pr_err("found bad thread record in catalog\n"); return -EIO; } if (be16_to_cpu(tmp.thread.nodeName.length) > 255) { - printk(KERN_ERR "hfs: catalog name length corrupted\n"); + pr_err("catalog name length corrupted\n"); return -EIO; } diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c index 031c24e..a37ac93 100644 --- a/fs/hfsplus/dir.c +++ b/fs/hfsplus/dir.c @@ -103,7 +103,7 @@ again: } else if (!dentry->d_fsdata) dentry->d_fsdata = (void *)(unsigned long)cnid; } else { - printk(KERN_ERR "hfs: invalid catalog entry type in lookup\n"); + pr_err("invalid catalog entry type in lookup\n"); err = -EIO; goto fail; } @@ -159,12 +159,12 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir) hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, fd.entrylength); if (be16_to_cpu(entry.type) != HFSPLUS_FOLDER_THREAD) { - printk(KERN_ERR "hfs: bad catalog folder thread\n"); + pr_err("bad catalog folder thread\n"); err = -EIO; goto out; } if (fd.entrylength < HFSPLUS_MIN_THREAD_SZ) { - printk(KERN_ERR "hfs: truncated catalog thread\n"); + pr_err("truncated catalog thread\n"); err = -EIO; goto out; } @@ -183,7 +183,7 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir) for (;;) { if (be32_to_cpu(fd.key->cat.parent) != inode->i_ino) { - printk(KERN_ERR "hfs: walked past end of dir\n"); + pr_err("walked past end of dir\n"); err = -EIO; goto out; } @@ -203,7 +203,7 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir) if (type == HFSPLUS_FOLDER) { if (fd.entrylength < sizeof(struct hfsplus_cat_folder)) { - printk(KERN_ERR "hfs: small dir entry\n"); + pr_err("small dir entry\n"); err = -EIO; goto out; } @@ -216,7 +216,7 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir) break; } else if (type == HFSPLUS_FILE) { if (fd.entrylength < sizeof(struct hfsplus_cat_file)) { - printk(KERN_ERR "hfs: small file entry\n"); + pr_err("small file entry\n"); err = -EIO; goto out; } @@ -224,7 +224,7 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir) be32_to_cpu(entry.file.id), DT_REG)) break; } else { - printk(KERN_ERR "hfs: bad catalog entry type\n"); + pr_err("bad catalog entry type\n"); err = -EIO; goto out; } diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c index 64a532a..43658c6 100644 --- a/fs/hfsplus/extents.c +++ b/fs/hfsplus/extents.c @@ -349,7 +349,7 @@ found: if (count <= block_nr) { err = hfsplus_block_free(sb, start, count); if (err) { - printk(KERN_ERR "hfs: can't free extent\n"); + pr_err("can't free extent\n"); hfs_dbg(EXTENT, " start: %u count: %u\n", start, count); } @@ -360,7 +360,7 @@ found: count -= block_nr; err = hfsplus_block_free(sb, start + count, block_nr); if (err) { - printk(KERN_ERR "hfs: can't free extent\n"); + pr_err("can't free extent\n"); hfs_dbg(EXTENT, " start: %u count: %u\n", start, count); } @@ -433,7 +433,7 @@ int hfsplus_file_extend(struct inode *inode) if (sbi->alloc_file->i_size * 8 < sbi->total_blocks - sbi->free_blocks + 8) { /* extend alloc file */ - printk(KERN_ERR "hfs: extend alloc file! " + pr_err("extend alloc file! " "(%llu,%u,%u)\n", sbi->alloc_file->i_size * 8, sbi->total_blocks, sbi->free_blocks); diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index 910ea98e..60b0a33 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -10,6 +10,12 @@ #ifndef _LINUX_HFSPLUS_FS_H #define _LINUX_HFSPLUS_FS_H +#ifdef pr_fmt +#undef pr_fmt +#endif + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -32,16 +38,16 @@ #endif #define DBG_MASK (0) -#define hfs_dbg(flg, fmt, ...) \ -do { \ - if (DBG_##flg & DBG_MASK) \ - printk(KERN_DEBUG fmt, ##__VA_ARGS__); \ +#define hfs_dbg(flg, fmt, ...) \ +do { \ + if (DBG_##flg & DBG_MASK) \ + printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); \ } while (0) -#define hfs_dbg_cont(flg, fmt, ...) \ -do { \ - if (DBG_##flg & DBG_MASK) \ - printk(KERN_CONT fmt, ##__VA_ARGS__); \ +#define hfs_dbg_cont(flg, fmt, ...) \ +do { \ + if (DBG_##flg & DBG_MASK) \ + pr_cont(fmt, ##__VA_ARGS__); \ } while (0) /* Runtime config options */ diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index 160ccc9..7faaa96 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -357,7 +357,7 @@ int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end, if (!error) error = error2; } else { - printk(KERN_ERR "hfs: sync non-existent attributes tree\n"); + pr_err("sync non-existent attributes tree\n"); } } @@ -573,7 +573,7 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd) inode->i_ctime = hfsp_mt2ut(file->attribute_mod_date); HFSPLUS_I(inode)->create_date = file->create_date; } else { - printk(KERN_ERR "hfs: bad catalog entry used to create inode\n"); + pr_err("bad catalog entry used to create inode\n"); res = -EIO; } return res; diff --git a/fs/hfsplus/options.c b/fs/hfsplus/options.c index ed257c6..968eab5 100644 --- a/fs/hfsplus/options.c +++ b/fs/hfsplus/options.c @@ -113,67 +113,67 @@ int hfsplus_parse_options(char *input, struct hfsplus_sb_info *sbi) switch (token) { case opt_creator: if (match_fourchar(&args[0], &sbi->creator)) { - printk(KERN_ERR "hfs: creator requires a 4 character value\n"); + pr_err("creator requires a 4 character value\n"); return 0; } break; case opt_type: if (match_fourchar(&args[0], &sbi->type)) { - printk(KERN_ERR "hfs: type requires a 4 character value\n"); + pr_err("type requires a 4 character value\n"); return 0; } break; case opt_umask: if (match_octal(&args[0], &tmp)) { - printk(KERN_ERR "hfs: umask requires a value\n"); + pr_err("umask requires a value\n"); return 0; } sbi->umask = (umode_t)tmp; break; case opt_uid: if (match_int(&args[0], &tmp)) { - printk(KERN_ERR "hfs: uid requires an argument\n"); + pr_err("uid requires an argument\n"); return 0; } sbi->uid = make_kuid(current_user_ns(), (uid_t)tmp); if (!uid_valid(sbi->uid)) { - printk(KERN_ERR "hfs: invalid uid specified\n"); + pr_err("invalid uid specified\n"); return 0; } break; case opt_gid: if (match_int(&args[0], &tmp)) { - printk(KERN_ERR "hfs: gid requires an argument\n"); + pr_err("gid requires an argument\n"); return 0; } sbi->gid = make_kgid(current_user_ns(), (gid_t)tmp); if (!gid_valid(sbi->gid)) { - printk(KERN_ERR "hfs: invalid gid specified\n"); + pr_err("invalid gid specified\n"); return 0; } break; case opt_part: if (match_int(&args[0], &sbi->part)) { - printk(KERN_ERR "hfs: part requires an argument\n"); + pr_err("part requires an argument\n"); return 0; } break; case opt_session: if (match_int(&args[0], &sbi->session)) { - printk(KERN_ERR "hfs: session requires an argument\n"); + pr_err("session requires an argument\n"); return 0; } break; case opt_nls: if (sbi->nls) { - printk(KERN_ERR "hfs: unable to change nls mapping\n"); + pr_err("unable to change nls mapping\n"); return 0; } p = match_strdup(&args[0]); if (p) sbi->nls = load_nls(p); if (!sbi->nls) { - printk(KERN_ERR "hfs: unable to load " + pr_err("unable to load " "nls mapping \"%s\"\n", p); kfree(p); diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 4886fd3..4c4d142 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -132,7 +132,7 @@ static int hfsplus_system_write_inode(struct inode *inode) if (tree) { int err = hfs_btree_write(tree); if (err) { - printk(KERN_ERR "hfs: b-tree write err: %d, ino %lu\n", + pr_err("b-tree write err: %d, ino %lu\n", err, inode->i_ino); return err; } @@ -251,7 +251,7 @@ static void delayed_sync_fs(struct work_struct *work) err = hfsplus_sync_fs(sbi->alloc_file->i_sb, 1); if (err) - printk(KERN_ERR "hfs: delayed sync fs err %d\n", err); + pr_err("delayed sync fs err %d\n", err); } void hfsplus_mark_mdb_dirty(struct super_block *sb) @@ -333,25 +333,19 @@ static int hfsplus_remount(struct super_block *sb, int *flags, char *data) return -EINVAL; if (!(vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_UNMNT))) { - printk(KERN_WARNING "hfs: filesystem was " - "not cleanly unmounted, " - "running fsck.hfsplus is recommended. " - "leaving read-only.\n"); + pr_warn("filesystem was not cleanly unmounted, running fsck.hfsplus is recommended. leaving read-only.\n"); sb->s_flags |= MS_RDONLY; *flags |= MS_RDONLY; } else if (force) { /* nothing */ } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_SOFTLOCK)) { - printk(KERN_WARNING "hfs: filesystem is marked locked, " - "leaving read-only.\n"); + pr_warn("filesystem is marked locked, leaving read-only.\n"); sb->s_flags |= MS_RDONLY; *flags |= MS_RDONLY; } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_JOURNALED)) { - printk(KERN_WARNING "hfs: filesystem is " - "marked journaled, " - "leaving read-only.\n"); + pr_warn("filesystem is marked journaled, leaving read-only.\n"); sb->s_flags |= MS_RDONLY; *flags |= MS_RDONLY; } @@ -397,7 +391,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) err = -EINVAL; if (!hfsplus_parse_options(data, sbi)) { - printk(KERN_ERR "hfs: unable to parse mount options\n"); + pr_err("unable to parse mount options\n"); goto out_unload_nls; } @@ -405,14 +399,14 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) nls = sbi->nls; sbi->nls = load_nls("utf8"); if (!sbi->nls) { - printk(KERN_ERR "hfs: unable to load nls for utf8\n"); + pr_err("unable to load nls for utf8\n"); goto out_unload_nls; } /* Grab the volume header */ if (hfsplus_read_wrapper(sb)) { if (!silent) - printk(KERN_WARNING "hfs: unable to find HFS+ superblock\n"); + pr_warn("unable to find HFS+ superblock\n"); goto out_unload_nls; } vhdr = sbi->s_vhdr; @@ -421,7 +415,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) sb->s_magic = HFSPLUS_VOLHEAD_SIG; if (be16_to_cpu(vhdr->version) < HFSPLUS_MIN_VERSION || be16_to_cpu(vhdr->version) > HFSPLUS_CURRENT_VERSION) { - printk(KERN_ERR "hfs: wrong filesystem version\n"); + pr_err("wrong filesystem version\n"); goto out_free_vhdr; } sbi->total_blocks = be32_to_cpu(vhdr->total_blocks); @@ -445,7 +439,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) if ((last_fs_block > (sector_t)(~0ULL) >> (sbi->alloc_blksz_shift - 9)) || (last_fs_page > (pgoff_t)(~0ULL))) { - printk(KERN_ERR "hfs: filesystem size too large.\n"); + pr_err("filesystem size too large\n"); goto out_free_vhdr; } @@ -454,22 +448,16 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) sb->s_maxbytes = MAX_LFS_FILESIZE; if (!(vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_UNMNT))) { - printk(KERN_WARNING "hfs: Filesystem was " - "not cleanly unmounted, " - "running fsck.hfsplus is recommended. " - "mounting read-only.\n"); + pr_warn("Filesystem was not cleanly unmounted, running fsck.hfsplus is recommended. mounting read-only.\n"); sb->s_flags |= MS_RDONLY; } else if (test_and_clear_bit(HFSPLUS_SB_FORCE, &sbi->flags)) { /* nothing */ } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_SOFTLOCK)) { - printk(KERN_WARNING "hfs: Filesystem is marked locked, mounting read-only.\n"); + pr_warn("Filesystem is marked locked, mounting read-only.\n"); sb->s_flags |= MS_RDONLY; } else if ((vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_JOURNALED)) && !(sb->s_flags & MS_RDONLY)) { - printk(KERN_WARNING "hfs: write access to " - "a journaled filesystem is not supported, " - "use the force option at your own risk, " - "mounting read-only.\n"); + pr_warn("write access to a journaled filesystem is not supported, use the force option at your own risk, mounting read-only.\n"); sb->s_flags |= MS_RDONLY; } @@ -478,18 +466,18 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) /* Load metadata objects (B*Trees) */ sbi->ext_tree = hfs_btree_open(sb, HFSPLUS_EXT_CNID); if (!sbi->ext_tree) { - printk(KERN_ERR "hfs: failed to load extents file\n"); + pr_err("failed to load extents file\n"); goto out_free_vhdr; } sbi->cat_tree = hfs_btree_open(sb, HFSPLUS_CAT_CNID); if (!sbi->cat_tree) { - printk(KERN_ERR "hfs: failed to load catalog file\n"); + pr_err("failed to load catalog file\n"); goto out_close_ext_tree; } if (vhdr->attr_file.total_blocks != 0) { sbi->attr_tree = hfs_btree_open(sb, HFSPLUS_ATTR_CNID); if (!sbi->attr_tree) { - printk(KERN_ERR "hfs: failed to load attributes file\n"); + pr_err("failed to load attributes file\n"); goto out_close_cat_tree; } } @@ -497,7 +485,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) inode = hfsplus_iget(sb, HFSPLUS_ALLOC_CNID); if (IS_ERR(inode)) { - printk(KERN_ERR "hfs: failed to load allocation file\n"); + pr_err("failed to load allocation file\n"); err = PTR_ERR(inode); goto out_close_attr_tree; } @@ -506,7 +494,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) /* Load the root directory */ root = hfsplus_iget(sb, HFSPLUS_ROOT_CNID); if (IS_ERR(root)) { - printk(KERN_ERR "hfs: failed to load root directory\n"); + pr_err("failed to load root directory\n"); err = PTR_ERR(root); goto out_put_alloc_file; } diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c index 90effcc..b51a607 100644 --- a/fs/hfsplus/wrapper.c +++ b/fs/hfsplus/wrapper.c @@ -156,7 +156,7 @@ static int hfsplus_get_last_session(struct super_block *sb, *start = (sector_t)te.cdte_addr.lba << 2; return 0; } - printk(KERN_ERR "hfs: invalid session number or type of track\n"); + pr_err("invalid session number or type of track\n"); return -EINVAL; } ms_info.addr_format = CDROM_LBA; @@ -234,8 +234,7 @@ reread: error = -EINVAL; if (sbi->s_backup_vhdr->signature != sbi->s_vhdr->signature) { - printk(KERN_WARNING - "hfs: invalid secondary volume header\n"); + pr_warn("invalid secondary volume header\n"); goto out_free_backup_vhdr; } @@ -259,8 +258,7 @@ reread: blocksize >>= 1; if (sb_set_blocksize(sb, blocksize) != blocksize) { - printk(KERN_ERR "hfs: unable to set blocksize to %u!\n", - blocksize); + pr_err("unable to set blocksize to %u!\n", blocksize); goto out_free_backup_vhdr; } diff --git a/fs/hfsplus/xattr.c b/fs/hfsplus/xattr.c index e8a4b08..f663461 100644 --- a/fs/hfsplus/xattr.c +++ b/fs/hfsplus/xattr.c @@ -107,19 +107,19 @@ int __hfsplus_setxattr(struct inode *inode, const char *name, err = hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &cat_fd); if (err) { - printk(KERN_ERR "hfs: can't init xattr find struct\n"); + pr_err("can't init xattr find struct\n"); return err; } err = hfsplus_find_cat(inode->i_sb, inode->i_ino, &cat_fd); if (err) { - printk(KERN_ERR "hfs: catalog searching failed\n"); + pr_err("catalog searching failed\n"); goto end_setxattr; } if (!strcmp_xattr_finder_info(name)) { if (flags & XATTR_CREATE) { - printk(KERN_ERR "hfs: xattr exists yet\n"); + pr_err("xattr exists yet\n"); err = -EOPNOTSUPP; goto end_setxattr; } @@ -165,7 +165,7 @@ int __hfsplus_setxattr(struct inode *inode, const char *name, if (hfsplus_attr_exists(inode, name)) { if (flags & XATTR_CREATE) { - printk(KERN_ERR "hfs: xattr exists yet\n"); + pr_err("xattr exists yet\n"); err = -EOPNOTSUPP; goto end_setxattr; } @@ -177,7 +177,7 @@ int __hfsplus_setxattr(struct inode *inode, const char *name, goto end_setxattr; } else { if (flags & XATTR_REPLACE) { - printk(KERN_ERR "hfs: cannot replace xattr\n"); + pr_err("cannot replace xattr\n"); err = -EOPNOTSUPP; goto end_setxattr; } @@ -210,7 +210,7 @@ int __hfsplus_setxattr(struct inode *inode, const char *name, cat_entry_flags); hfsplus_mark_inode_dirty(inode, HFSPLUS_I_CAT_DIRTY); } else { - printk(KERN_ERR "hfs: invalid catalog entry type\n"); + pr_err("invalid catalog entry type\n"); err = -EIO; goto end_setxattr; } @@ -269,7 +269,7 @@ static ssize_t hfsplus_getxattr_finder_info(struct dentry *dentry, if (size >= record_len) { res = hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &fd); if (res) { - printk(KERN_ERR "hfs: can't init xattr find struct\n"); + pr_err("can't init xattr find struct\n"); return res; } res = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd); @@ -340,13 +340,13 @@ ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name, entry = hfsplus_alloc_attr_entry(); if (!entry) { - printk(KERN_ERR "hfs: can't allocate xattr entry\n"); + pr_err("can't allocate xattr entry\n"); return -ENOMEM; } res = hfs_find_init(HFSPLUS_SB(inode->i_sb)->attr_tree, &fd); if (res) { - printk(KERN_ERR "hfs: can't init xattr find struct\n"); + pr_err("can't init xattr find struct\n"); goto failed_getxattr_init; } @@ -355,7 +355,7 @@ ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name, if (res == -ENOENT) res = -ENODATA; else - printk(KERN_ERR "hfs: xattr searching failed\n"); + pr_err("xattr searching failed\n"); goto out; } @@ -368,17 +368,17 @@ ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name, offsetof(struct hfsplus_attr_inline_data, length)); if (record_length > HFSPLUS_MAX_INLINE_DATA_SIZE) { - printk(KERN_ERR "hfs: invalid xattr record size\n"); + pr_err("invalid xattr record size\n"); res = -EIO; goto out; } } else if (record_type == HFSPLUS_ATTR_FORK_DATA || record_type == HFSPLUS_ATTR_EXTENTS) { - printk(KERN_ERR "hfs: only inline data xattr are supported\n"); + pr_err("only inline data xattr are supported\n"); res = -EOPNOTSUPP; goto out; } else { - printk(KERN_ERR "hfs: invalid xattr record\n"); + pr_err("invalid xattr record\n"); res = -EIO; goto out; } @@ -427,7 +427,7 @@ static ssize_t hfsplus_listxattr_finder_info(struct dentry *dentry, res = hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &fd); if (res) { - printk(KERN_ERR "hfs: can't init xattr find struct\n"); + pr_err("can't init xattr find struct\n"); return res; } @@ -506,7 +506,7 @@ ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size) err = hfs_find_init(HFSPLUS_SB(inode->i_sb)->attr_tree, &fd); if (err) { - printk(KERN_ERR "hfs: can't init xattr find struct\n"); + pr_err("can't init xattr find struct\n"); return err; } @@ -525,8 +525,7 @@ ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size) for (;;) { key_len = hfs_bnode_read_u16(fd.bnode, fd.keyoffset); if (key_len == 0 || key_len > fd.tree->max_key_len) { - printk(KERN_ERR "hfs: invalid xattr key length: %d\n", - key_len); + pr_err("invalid xattr key length: %d\n", key_len); res = -EIO; goto end_listxattr; } @@ -541,7 +540,7 @@ ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size) if (hfsplus_uni2asc(inode->i_sb, (const struct hfsplus_unistr *)&fd.key->attr.key_name, strbuf, &xattr_name_len)) { - printk(KERN_ERR "hfs: unicode conversion failed\n"); + pr_err("unicode conversion failed\n"); res = -EIO; goto end_listxattr; } @@ -598,13 +597,13 @@ int hfsplus_removexattr(struct dentry *dentry, const char *name) err = hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &cat_fd); if (err) { - printk(KERN_ERR "hfs: can't init xattr find struct\n"); + pr_err("can't init xattr find struct\n"); return err; } err = hfsplus_find_cat(inode->i_sb, inode->i_ino, &cat_fd); if (err) { - printk(KERN_ERR "hfs: catalog searching failed\n"); + pr_err("catalog searching failed\n"); goto end_removexattr; } @@ -643,7 +642,7 @@ int hfsplus_removexattr(struct dentry *dentry, const char *name) flags); hfsplus_mark_inode_dirty(inode, HFSPLUS_I_CAT_DIRTY); } else { - printk(KERN_ERR "hfs: invalid catalog entry type\n"); + pr_err("invalid catalog entry type\n"); err = -EIO; goto end_removexattr; } -- cgit v0.10.2 From d7a475d0c4a22ce12ae1d099b76327aa1637ce07 Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Tue, 30 Apr 2013 15:27:56 -0700 Subject: hfsplus: add error propagation to __hfsplus_ext_write_extent() __hfsplus_ext_write_extent() suppresses errors coming from hfs_brec_find(). The patch implements error code propagation. Signed-off-by: Alexey Khoroshilov Reviewed-by: Vyacheslav Dubeyko Cc: Hin-Tak Leung Cc: Al Viro Cc: Artem Bityutskiy Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c index 43658c6..fbb212f 100644 --- a/fs/hfsplus/extents.c +++ b/fs/hfsplus/extents.c @@ -83,7 +83,7 @@ static u32 hfsplus_ext_lastblock(struct hfsplus_extent *ext) return be32_to_cpu(ext->start_block) + be32_to_cpu(ext->block_count); } -static void __hfsplus_ext_write_extent(struct inode *inode, +static int __hfsplus_ext_write_extent(struct inode *inode, struct hfs_find_data *fd) { struct hfsplus_inode_info *hip = HFSPLUS_I(inode); @@ -98,13 +98,13 @@ static void __hfsplus_ext_write_extent(struct inode *inode, res = hfs_brec_find(fd, hfs_find_rec_by_key); if (hip->extent_state & HFSPLUS_EXT_NEW) { if (res != -ENOENT) - return; + return res; hfs_brec_insert(fd, hip->cached_extents, sizeof(hfsplus_extent_rec)); hip->extent_state &= ~(HFSPLUS_EXT_DIRTY | HFSPLUS_EXT_NEW); } else { if (res) - return; + return res; hfs_bnode_write(fd->bnode, hip->cached_extents, fd->entryoffset, fd->entrylength); hip->extent_state &= ~HFSPLUS_EXT_DIRTY; @@ -117,11 +117,13 @@ static void __hfsplus_ext_write_extent(struct inode *inode, * to explicily mark the inode dirty, too. */ set_bit(HFSPLUS_I_EXT_DIRTY, &hip->flags); + + return 0; } static int hfsplus_ext_write_extent_locked(struct inode *inode) { - int res; + int res = 0; if (HFSPLUS_I(inode)->extent_state & HFSPLUS_EXT_DIRTY) { struct hfs_find_data fd; @@ -129,10 +131,10 @@ static int hfsplus_ext_write_extent_locked(struct inode *inode) res = hfs_find_init(HFSPLUS_SB(inode->i_sb)->ext_tree, &fd); if (res) return res; - __hfsplus_ext_write_extent(inode, &fd); + res = __hfsplus_ext_write_extent(inode, &fd); hfs_find_exit(&fd); } - return 0; + return res; } int hfsplus_ext_write_extent(struct inode *inode) @@ -175,8 +177,11 @@ static inline int __hfsplus_ext_cache_extent(struct hfs_find_data *fd, WARN_ON(!mutex_is_locked(&hip->extents_lock)); - if (hip->extent_state & HFSPLUS_EXT_DIRTY) - __hfsplus_ext_write_extent(inode, fd); + if (hip->extent_state & HFSPLUS_EXT_DIRTY) { + res = __hfsplus_ext_write_extent(inode, fd); + if (res) + return res; + } res = __hfsplus_ext_read_extent(fd, hip->cached_extents, inode->i_ino, block, HFSPLUS_IS_RSRC(inode) ? -- cgit v0.10.2 From 865f38a3a3262d4a8a3d381a6899d2a6b7242244 Mon Sep 17 00:00:00 2001 From: Vyacheslav Dubeyko Date: Tue, 30 Apr 2013 15:27:58 -0700 Subject: hfsplus: remove duplicated message prefix in hfsplus_block_free() Signed-off-by: Vyacheslav Dubeyko Cc: Christoph Hellwig Cc: Al Viro Cc: Hin-Tak Leung Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/hfsplus/bitmap.c b/fs/hfsplus/bitmap.c index 826e864..d2954451 100644 --- a/fs/hfsplus/bitmap.c +++ b/fs/hfsplus/bitmap.c @@ -238,8 +238,7 @@ out: return 0; kaboom: - pr_crit("hfsplus: unable to mark blocks free: error %ld\n", - PTR_ERR(page)); + pr_crit("unable to mark blocks free: error %ld\n", PTR_ERR(page)); mutex_unlock(&sbi->alloc_mutex); return -EIO; -- cgit v0.10.2 From 84c751bd4aebbaae995fe32279d3dba48327bad4 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Tue, 30 Apr 2013 15:27:59 -0700 Subject: ptrace: add ability to retrieve signals without removing from a queue (v4) This patch adds a new ptrace request PTRACE_PEEKSIGINFO. This request is used to retrieve information about pending signals starting with the specified sequence number. Siginfo_t structures are copied from the child into the buffer starting at "data". The argument "addr" is a pointer to struct ptrace_peeksiginfo_args. struct ptrace_peeksiginfo_args { u64 off; /* from which siginfo to start */ u32 flags; s32 nr; /* how may siginfos to take */ }; "nr" has type "s32", because ptrace() returns "long", which has 32 bits on i386 and a negative values is used for errors. Currently here is only one flag PTRACE_PEEKSIGINFO_SHARED for dumping signals from process-wide queue. If this flag is not set, signals are read from a per-thread queue. The request PTRACE_PEEKSIGINFO returns a number of dumped signals. If a signal with the specified sequence number doesn't exist, ptrace returns zero. The request returns an error, if no signal has been dumped. Errors: EINVAL - one or more specified flags are not supported or nr is negative EFAULT - buf or addr is outside your accessible address space. A result siginfo contains a kernel part of si_code which usually striped, but it's required for queuing the same siginfo back during restore of pending signals. This functionality is required for checkpointing pending signals. Pedro Alves suggested using it in "gdb" to peek at pending signals. gdb already uses PTRACE_GETSIGINFO to get the siginfo for the signal which was already dequeued. This functionality allows gdb to look at the pending signals which were not reported yet. The prototype of this code was developed by Oleg Nesterov. Signed-off-by: Andrew Vagin Cc: Roland McGrath Cc: Oleg Nesterov Cc: "Paul E. McKenney" Cc: David Howells Cc: Dave Jones Cc: "Michael Kerrisk (man-pages)" Cc: Pavel Emelyanov Cc: Linus Torvalds Cc: Pedro Alves Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/uapi/linux/ptrace.h b/include/uapi/linux/ptrace.h index 022ab18..52ebcc8 100644 --- a/include/uapi/linux/ptrace.h +++ b/include/uapi/linux/ptrace.h @@ -5,6 +5,7 @@ /* has the defines to get at the registers. */ +#include #define PTRACE_TRACEME 0 #define PTRACE_PEEKTEXT 1 @@ -52,6 +53,17 @@ #define PTRACE_INTERRUPT 0x4207 #define PTRACE_LISTEN 0x4208 +#define PTRACE_PEEKSIGINFO 0x4209 + +struct ptrace_peeksiginfo_args { + __u64 off; /* from which siginfo to start */ + __u32 flags; + __s32 nr; /* how may siginfos to take */ +}; + +/* Read signals from a shared (process wide) queue */ +#define PTRACE_PEEKSIGINFO_SHARED (1 << 0) + /* Wait extended result codes for the above trace options. */ #define PTRACE_EVENT_FORK 1 #define PTRACE_EVENT_VFORK 2 diff --git a/kernel/ptrace.c b/kernel/ptrace.c index acbd284..17ae54d 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -24,6 +24,7 @@ #include #include #include +#include static int ptrace_trapping_sleep_fn(void *flags) @@ -618,6 +619,81 @@ static int ptrace_setsiginfo(struct task_struct *child, const siginfo_t *info) return error; } +static int ptrace_peek_siginfo(struct task_struct *child, + unsigned long addr, + unsigned long data) +{ + struct ptrace_peeksiginfo_args arg; + struct sigpending *pending; + struct sigqueue *q; + int ret, i; + + ret = copy_from_user(&arg, (void __user *) addr, + sizeof(struct ptrace_peeksiginfo_args)); + if (ret) + return -EFAULT; + + if (arg.flags & ~PTRACE_PEEKSIGINFO_SHARED) + return -EINVAL; /* unknown flags */ + + if (arg.nr < 0) + return -EINVAL; + + if (arg.flags & PTRACE_PEEKSIGINFO_SHARED) + pending = &child->signal->shared_pending; + else + pending = &child->pending; + + for (i = 0; i < arg.nr; ) { + siginfo_t info; + s32 off = arg.off + i; + + spin_lock_irq(&child->sighand->siglock); + list_for_each_entry(q, &pending->list, list) { + if (!off--) { + copy_siginfo(&info, &q->info); + break; + } + } + spin_unlock_irq(&child->sighand->siglock); + + if (off >= 0) /* beyond the end of the list */ + break; + +#ifdef CONFIG_COMPAT + if (unlikely(is_compat_task())) { + compat_siginfo_t __user *uinfo = compat_ptr(data); + + ret = copy_siginfo_to_user32(uinfo, &info); + ret |= __put_user(info.si_code, &uinfo->si_code); + } else +#endif + { + siginfo_t __user *uinfo = (siginfo_t __user *) data; + + ret = copy_siginfo_to_user(uinfo, &info); + ret |= __put_user(info.si_code, &uinfo->si_code); + } + + if (ret) { + ret = -EFAULT; + break; + } + + data += sizeof(siginfo_t); + i++; + + if (signal_pending(current)) + break; + + cond_resched(); + } + + if (i > 0) + return i; + + return ret; +} #ifdef PTRACE_SINGLESTEP #define is_singlestep(request) ((request) == PTRACE_SINGLESTEP) @@ -748,6 +824,10 @@ int ptrace_request(struct task_struct *child, long request, ret = put_user(child->ptrace_message, datalp); break; + case PTRACE_PEEKSIGINFO: + ret = ptrace_peek_siginfo(child, addr, data); + break; + case PTRACE_GETSIGINFO: ret = ptrace_getsiginfo(child, &siginfo); if (!ret) -- cgit v0.10.2 From 17afab1de42236ee2f6235f4383cc6f3f13f8a10 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Tue, 30 Apr 2013 15:28:00 -0700 Subject: selftest: add a test case for PTRACE_PEEKSIGINFO * Dump signals from process-wide and per-thread queues with different sizes of buffers. * Check error paths for buffers with restricted permissions. A part of buffer or a whole buffer is for read-only. * Try to get nonexistent signal. Signed-off-by: Andrew Vagin Cc: Roland McGrath Cc: Oleg Nesterov Cc: "Paul E. McKenney" Cc: David Howells Cc: Dave Jones Cc: "Michael Kerrisk (man-pages)" Cc: Pavel Emelyanov Cc: Linus Torvalds Cc: Pedro Alves Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 5eff5f7..fa6ea69 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -4,6 +4,7 @@ TARGETS += efivarfs TARGETS += kcmp TARGETS += memory-hotplug TARGETS += mqueue +TARGETS += ptrace TARGETS += soft-dirty TARGETS += vm diff --git a/tools/testing/selftests/ptrace/Makefile b/tools/testing/selftests/ptrace/Makefile new file mode 100644 index 0000000..47ae2d3 --- /dev/null +++ b/tools/testing/selftests/ptrace/Makefile @@ -0,0 +1,10 @@ +CFLAGS += -iquote../../../../include/uapi -Wall +peeksiginfo: peeksiginfo.c + +all: peeksiginfo + +clean: + rm -f peeksiginfo + +run_tests: all + @./peeksiginfo || echo "peeksiginfo selftests: [FAIL]" diff --git a/tools/testing/selftests/ptrace/peeksiginfo.c b/tools/testing/selftests/ptrace/peeksiginfo.c new file mode 100644 index 0000000..d46558b --- /dev/null +++ b/tools/testing/selftests/ptrace/peeksiginfo.c @@ -0,0 +1,214 @@ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "linux/ptrace.h" + +static int sys_rt_sigqueueinfo(pid_t tgid, int sig, siginfo_t *uinfo) +{ + return syscall(SYS_rt_sigqueueinfo, tgid, sig, uinfo); +} + +static int sys_rt_tgsigqueueinfo(pid_t tgid, pid_t tid, + int sig, siginfo_t *uinfo) +{ + return syscall(SYS_rt_tgsigqueueinfo, tgid, tid, sig, uinfo); +} + +static int sys_ptrace(int request, pid_t pid, void *addr, void *data) +{ + return syscall(SYS_ptrace, request, pid, addr, data); +} + +#define SIGNR 10 +#define TEST_SICODE_PRIV -1 +#define TEST_SICODE_SHARE -2 + +#define err(fmt, ...) \ + fprintf(stderr, \ + "Error (%s:%d): " fmt, \ + __FILE__, __LINE__, ##__VA_ARGS__) + +static int check_error_paths(pid_t child) +{ + struct ptrace_peeksiginfo_args arg; + int ret, exit_code = -1; + void *addr_rw, *addr_ro; + + /* + * Allocate two contiguous pages. The first one is for read-write, + * another is for read-only. + */ + addr_rw = mmap(NULL, 2 * PAGE_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (addr_rw == MAP_FAILED) { + err("mmap() failed: %m\n"); + return 1; + } + + addr_ro = mmap(addr_rw + PAGE_SIZE, PAGE_SIZE, PROT_READ, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); + if (addr_ro == MAP_FAILED) { + err("mmap() failed: %m\n"); + goto out; + } + + arg.nr = SIGNR; + arg.off = 0; + + /* Unsupported flags */ + arg.flags = ~0; + ret = sys_ptrace(PTRACE_PEEKSIGINFO, child, &arg, addr_rw); + if (ret != -1 || errno != EINVAL) { + err("sys_ptrace() returns %d (expected -1)," + " errno %d (expected %d): %m\n", + ret, errno, EINVAL); + goto out; + } + arg.flags = 0; + + /* A part of the buffer is read-only */ + ret = sys_ptrace(PTRACE_PEEKSIGINFO, child, &arg, + addr_ro - sizeof(siginfo_t) * 2); + if (ret != 2) { + err("sys_ptrace() returns %d (expected 2): %m\n", ret); + goto out; + } + + /* Read-only buffer */ + ret = sys_ptrace(PTRACE_PEEKSIGINFO, child, &arg, addr_ro); + if (ret != -1 && errno != EFAULT) { + err("sys_ptrace() returns %d (expected -1)," + " errno %d (expected %d): %m\n", + ret, errno, EFAULT); + goto out; + } + + exit_code = 0; +out: + munmap(addr_rw, 2 * PAGE_SIZE); + return exit_code; +} + +int check_direct_path(pid_t child, int shared, int nr) +{ + struct ptrace_peeksiginfo_args arg = {.flags = 0, .nr = nr, .off = 0}; + int i, j, ret, exit_code = -1; + siginfo_t siginfo[SIGNR]; + int si_code; + + if (shared == 1) { + arg.flags = PTRACE_PEEKSIGINFO_SHARED; + si_code = TEST_SICODE_SHARE; + } else { + arg.flags = 0; + si_code = TEST_SICODE_PRIV; + } + + for (i = 0; i < SIGNR; ) { + arg.off = i; + ret = sys_ptrace(PTRACE_PEEKSIGINFO, child, &arg, siginfo); + if (ret == -1) { + err("ptrace() failed: %m\n"); + goto out; + } + + if (ret == 0) + break; + + for (j = 0; j < ret; j++, i++) { + if (siginfo[j].si_code == si_code && + siginfo[j].si_int == i) + continue; + + err("%d: Wrong siginfo i=%d si_code=%d si_int=%d\n", + shared, i, siginfo[j].si_code, siginfo[j].si_int); + goto out; + } + } + + if (i != SIGNR) { + err("Only %d signals were read\n", i); + goto out; + } + + exit_code = 0; +out: + return exit_code; +} + +int main(int argc, char *argv[]) +{ + siginfo_t siginfo[SIGNR]; + int i, exit_code = 1; + sigset_t blockmask; + pid_t child; + + sigemptyset(&blockmask); + sigaddset(&blockmask, SIGRTMIN); + sigprocmask(SIG_BLOCK, &blockmask, NULL); + + child = fork(); + if (child == -1) { + err("fork() failed: %m"); + return 1; + } else if (child == 0) { + pid_t ppid = getppid(); + while (1) { + if (ppid != getppid()) + break; + sleep(1); + } + return 1; + } + + /* Send signals in process-wide and per-thread queues */ + for (i = 0; i < SIGNR; i++) { + siginfo->si_code = TEST_SICODE_SHARE; + siginfo->si_int = i; + sys_rt_sigqueueinfo(child, SIGRTMIN, siginfo); + + siginfo->si_code = TEST_SICODE_PRIV; + siginfo->si_int = i; + sys_rt_tgsigqueueinfo(child, child, SIGRTMIN, siginfo); + } + + if (sys_ptrace(PTRACE_ATTACH, child, NULL, NULL) == -1) + return 1; + + waitpid(child, NULL, 0); + + /* Dump signals one by one*/ + if (check_direct_path(child, 0, 1)) + goto out; + /* Dump all signals for one call */ + if (check_direct_path(child, 0, SIGNR)) + goto out; + + /* + * Dump signal from the process-wide queue. + * The number of signals is not multible to the buffer size + */ + if (check_direct_path(child, 1, 3)) + goto out; + + if (check_error_paths(child)) + goto out; + + printf("PASS\n"); + exit_code = 0; +out: + if (sys_ptrace(PTRACE_KILL, child, NULL, NULL) == -1) + return 1; + + waitpid(child, NULL, 0); + + return exit_code; +} -- cgit v0.10.2 From 938e4b22e2a7d0f6f3962e601339347b2d8e09f5 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Tue, 30 Apr 2013 15:28:02 -0700 Subject: usermodehelper: export call_usermodehelper_exec() and call_usermodehelper_setup() call_usermodehelper_setup() + call_usermodehelper_exec() need to be called instead of call_usermodehelper_fns() when the cleanup function needs to be called even when an ENOMEM error occurs. In this case using call_usermodehelper_fns() the user can't distinguish if the cleanup function was called or not. [akpm@linux-foundation.org: export call_usermodehelper_setup() to modules] Signed-off-by: Lucas De Marchi Reviewed-by: Oleg Nesterov Cc: David Howells Cc: James Morris Cc: Al Viro Cc: Tejun Heo Cc: "Rafael J. Wysocki" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/kmod.h b/include/linux/kmod.h index 5398d58..7eebcf5 100644 --- a/include/linux/kmod.h +++ b/include/linux/kmod.h @@ -71,6 +71,14 @@ call_usermodehelper_fns(char *path, char **argv, char **envp, int wait, int (*init)(struct subprocess_info *info, struct cred *new), void (*cleanup)(struct subprocess_info *), void *data); +extern struct subprocess_info * +call_usermodehelper_setup(char *path, char **argv, char **envp, gfp_t gfp_mask, + int (*init)(struct subprocess_info *info, struct cred *new), + void (*cleanup)(struct subprocess_info *), void *data); + +extern int +call_usermodehelper_exec(struct subprocess_info *info, int wait); + static inline int call_usermodehelper(char *path, char **argv, char **envp, int wait) { diff --git a/kernel/kmod.c b/kernel/kmod.c index 56dd349..e11ea14 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -502,14 +502,28 @@ static void helper_unlock(void) * @argv: arg vector for process * @envp: environment for process * @gfp_mask: gfp mask for memory allocation + * @cleanup: a cleanup function + * @init: an init function + * @data: arbitrary context sensitive data * * Returns either %NULL on allocation failure, or a subprocess_info * structure. This should be passed to call_usermodehelper_exec to * exec the process and free the structure. + * + * The init function is used to customize the helper process prior to + * exec. A non-zero return code causes the process to error out, exit, + * and return the failure to the calling process + * + * The cleanup function is just before ethe subprocess_info is about to + * be freed. This can be used for freeing the argv and envp. The + * Function must be runnable in either a process context or the + * context in which call_usermodehelper_exec is called. */ -static struct subprocess_info *call_usermodehelper_setup(char *path, char **argv, - char **envp, gfp_t gfp_mask) + char **envp, gfp_t gfp_mask, + int (*init)(struct subprocess_info *info, struct cred *new), + void (*cleanup)(struct subprocess_info *info), + void *data) { struct subprocess_info *sub_info; sub_info = kzalloc(sizeof(struct subprocess_info), gfp_mask); @@ -520,36 +534,14 @@ struct subprocess_info *call_usermodehelper_setup(char *path, char **argv, sub_info->path = path; sub_info->argv = argv; sub_info->envp = envp; + + sub_info->cleanup = cleanup; + sub_info->init = init; + sub_info->data = data; out: return sub_info; } - -/** - * call_usermodehelper_setfns - set a cleanup/init function - * @info: a subprocess_info returned by call_usermodehelper_setup - * @cleanup: a cleanup function - * @init: an init function - * @data: arbitrary context sensitive data - * - * The init function is used to customize the helper process prior to - * exec. A non-zero return code causes the process to error out, exit, - * and return the failure to the calling process - * - * The cleanup function is just before ethe subprocess_info is about to - * be freed. This can be used for freeing the argv and envp. The - * Function must be runnable in either a process context or the - * context in which call_usermodehelper_exec is called. - */ -static -void call_usermodehelper_setfns(struct subprocess_info *info, - int (*init)(struct subprocess_info *info, struct cred *new), - void (*cleanup)(struct subprocess_info *info), - void *data) -{ - info->cleanup = cleanup; - info->init = init; - info->data = data; -} +EXPORT_SYMBOL(call_usermodehelper_setup); /** * call_usermodehelper_exec - start a usermode application @@ -563,7 +555,6 @@ void call_usermodehelper_setfns(struct subprocess_info *info, * asynchronously if wait is not set, and runs as a child of keventd. * (ie. it runs with full root capabilities). */ -static int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait) { DECLARE_COMPLETION_ONSTACK(done); @@ -615,6 +606,7 @@ unlock: helper_unlock(); return retval; } +EXPORT_SYMBOL(call_usermodehelper_exec); /* * call_usermodehelper_fns() will not run the caller-provided cleanup function @@ -630,13 +622,12 @@ int call_usermodehelper_fns( struct subprocess_info *info; gfp_t gfp_mask = (wait == UMH_NO_WAIT) ? GFP_ATOMIC : GFP_KERNEL; - info = call_usermodehelper_setup(path, argv, envp, gfp_mask); + info = call_usermodehelper_setup(path, argv, envp, gfp_mask, + init, cleanup, data); if (info == NULL) return -ENOMEM; - call_usermodehelper_setfns(info, init, cleanup, data); - return call_usermodehelper_exec(info, wait); } EXPORT_SYMBOL(call_usermodehelper_fns); -- cgit v0.10.2 From f634460c90751da21745eec7a220edf76c7d0c76 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Tue, 30 Apr 2013 15:28:03 -0700 Subject: kmod: split call to call_usermodehelper_fns() Use call_usermodehelper_setup() + call_usermodehelper_exec() instead of calling call_usermodehelper_fns(). In case the latter returns -ENOMEM the cleanup function may had not been called - in this case we would not free argv and module_name. Signed-off-by: Lucas De Marchi Cc: Oleg Nesterov Cc: David Howells Cc: James Morris Cc: Al Viro Cc: Tejun Heo Cc: "Rafael J. Wysocki" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/kernel/kmod.c b/kernel/kmod.c index e11ea14..9941a4f 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -77,6 +77,7 @@ static void free_modprobe_argv(struct subprocess_info *info) static int call_modprobe(char *module_name, int wait) { + struct subprocess_info *info; static char *envp[] = { "HOME=/", "TERM=linux", @@ -98,8 +99,15 @@ static int call_modprobe(char *module_name, int wait) argv[3] = module_name; /* check free_modprobe_argv() */ argv[4] = NULL; - return call_usermodehelper_fns(modprobe_path, argv, envp, - wait | UMH_KILLABLE, NULL, free_modprobe_argv, NULL); + info = call_usermodehelper_setup(modprobe_path, argv, envp, GFP_KERNEL, + NULL, free_modprobe_argv, NULL); + if (!info) + goto free_module_name; + + return call_usermodehelper_exec(info, wait | UMH_KILLABLE); + +free_module_name: + kfree(module_name); free_argv: kfree(argv); out: -- cgit v0.10.2 From 93997f6ddb9d574cd58694f13c5b15212927bfa1 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Tue, 30 Apr 2013 15:28:05 -0700 Subject: KEYS: split call to call_usermodehelper_fns() Use call_usermodehelper_setup() + call_usermodehelper_exec() instead of calling call_usermodehelper_fns(). In case there's an OOM in this last function the cleanup function may not be called - in this case we would miss a call to key_put(). Signed-off-by: Lucas De Marchi Cc: Oleg Nesterov Acked-by: David Howells Acked-by: James Morris Cc: Al Viro Cc: Tejun Heo Cc: "Rafael J. Wysocki" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/security/keys/request_key.c b/security/keys/request_key.c index 4bd6bdb..c411f9b 100644 --- a/security/keys/request_key.c +++ b/security/keys/request_key.c @@ -93,9 +93,16 @@ static void umh_keys_cleanup(struct subprocess_info *info) static int call_usermodehelper_keys(char *path, char **argv, char **envp, struct key *session_keyring, int wait) { - return call_usermodehelper_fns(path, argv, envp, wait, - umh_keys_init, umh_keys_cleanup, - key_get(session_keyring)); + struct subprocess_info *info; + + info = call_usermodehelper_setup(path, argv, envp, GFP_KERNEL, + umh_keys_init, umh_keys_cleanup, + session_keyring); + if (!info) + return -ENOMEM; + + key_get(session_keyring); + return call_usermodehelper_exec(info, wait); } /* -- cgit v0.10.2 From fb96c475f6b27bd2c7f6fe0720e6972909a203ca Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Tue, 30 Apr 2013 15:28:06 -0700 Subject: coredump: remove trailling whitespace Signed-off-by: Lucas De Marchi Cc: Oleg Nesterov Cc: David Howells Cc: James Morris Cc: Al Viro Cc: Tejun Heo Cc: "Rafael J. Wysocki" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/coredump.c b/fs/coredump.c index c647965..7dfb3b0 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -522,7 +522,7 @@ void do_coredump(siginfo_t *siginfo) ispipe = format_corename(&cn, &cprm); - if (ispipe) { + if (ispipe) { int dump_count; char **helper_argv; @@ -576,10 +576,10 @@ void do_coredump(siginfo_t *siginfo) NULL, &cprm); argv_free(helper_argv); if (retval) { - printk(KERN_INFO "Core dump to %s pipe failed\n", + printk(KERN_INFO "Core dump to %s pipe failed\n", cn.corename); goto close_fail; - } + } } else { struct inode *inode; -- cgit v0.10.2 From 907ed1328d2a748f3d6028503d3e04c74ea62730 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Tue, 30 Apr 2013 15:28:07 -0700 Subject: usermodehelper: split remaining calls to call_usermodehelper_fns() These are the only users of call_usermodehelper_fns(). This function suffers from not being able to determine if the cleanup is called. Even if in this places the cleanup pointer is NULL, convert them to use the separate call_usermodehelper_setup() + call_usermodehelper_exec() functions so we can remove the _fns variant. Signed-off-by: Lucas De Marchi Cc: Oleg Nesterov Cc: David Howells Cc: James Morris Cc: Al Viro Cc: Tejun Heo Cc: "Rafael J. Wysocki" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/coredump.c b/fs/coredump.c index 7dfb3b0..d52f6bd 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -525,6 +525,7 @@ void do_coredump(siginfo_t *siginfo) if (ispipe) { int dump_count; char **helper_argv; + struct subprocess_info *sub_info; if (ispipe < 0) { printk(KERN_WARNING "format_corename failed\n"); @@ -571,9 +572,14 @@ void do_coredump(siginfo_t *siginfo) goto fail_dropcount; } - retval = call_usermodehelper_fns(helper_argv[0], helper_argv, - NULL, UMH_WAIT_EXEC, umh_pipe_setup, - NULL, &cprm); + retval = -ENOMEM; + sub_info = call_usermodehelper_setup(helper_argv[0], + helper_argv, NULL, GFP_KERNEL, + umh_pipe_setup, NULL, &cprm); + if (sub_info) + retval = call_usermodehelper_exec(sub_info, + UMH_WAIT_EXEC); + argv_free(helper_argv); if (retval) { printk(KERN_INFO "Core dump to %s pipe failed\n", diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c index a32ec1c..3e0878e 100644 --- a/init/do_mounts_initrd.c +++ b/init/do_mounts_initrd.c @@ -50,6 +50,7 @@ static int init_linuxrc(struct subprocess_info *info, struct cred *new) static void __init handle_initrd(void) { + struct subprocess_info *info; static char *argv[] = { "linuxrc", NULL, }; extern char *envp_init[]; int error; @@ -70,8 +71,11 @@ static void __init handle_initrd(void) */ current->flags |= PF_FREEZER_SKIP; - call_usermodehelper_fns("/linuxrc", argv, envp_init, UMH_WAIT_PROC, - init_linuxrc, NULL, NULL); + info = call_usermodehelper_setup("/linuxrc", argv, envp_init, + GFP_KERNEL, init_linuxrc, NULL, NULL); + if (!info) + return; + call_usermodehelper_exec(info, UMH_WAIT_PROC); current->flags &= ~PF_FREEZER_SKIP; -- cgit v0.10.2 From 66e5b7e1948cdbdca2b0cc6ddc6d69ee84583fb4 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Tue, 30 Apr 2013 15:28:09 -0700 Subject: kmod: remove call_usermodehelper_fns() This function suffers from not being able to determine if the cleanup is called in case it returns -ENOMEM. Nobody is using it anymore, so let's remove it. Signed-off-by: Lucas De Marchi Cc: Oleg Nesterov Cc: David Howells Cc: James Morris Cc: Al Viro Cc: Tejun Heo Cc: "Rafael J. Wysocki" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/kmod.h b/include/linux/kmod.h index 7eebcf5..0555cc6 100644 --- a/include/linux/kmod.h +++ b/include/linux/kmod.h @@ -67,9 +67,7 @@ struct subprocess_info { }; extern int -call_usermodehelper_fns(char *path, char **argv, char **envp, int wait, - int (*init)(struct subprocess_info *info, struct cred *new), - void (*cleanup)(struct subprocess_info *), void *data); +call_usermodehelper(char *path, char **argv, char **envp, int wait); extern struct subprocess_info * call_usermodehelper_setup(char *path, char **argv, char **envp, gfp_t gfp_mask, @@ -79,13 +77,6 @@ call_usermodehelper_setup(char *path, char **argv, char **envp, gfp_t gfp_mask, extern int call_usermodehelper_exec(struct subprocess_info *info, int wait); -static inline int -call_usermodehelper(char *path, char **argv, char **envp, int wait) -{ - return call_usermodehelper_fns(path, argv, envp, wait, - NULL, NULL, NULL); -} - extern struct ctl_table usermodehelper_table[]; enum umh_disable_depth { diff --git a/kernel/kmod.c b/kernel/kmod.c index 9941a4f..1296e72 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -555,8 +555,8 @@ EXPORT_SYMBOL(call_usermodehelper_setup); * call_usermodehelper_exec - start a usermode application * @sub_info: information about the subprocessa * @wait: wait for the application to finish and return status. - * when -1 don't wait at all, but you get no useful error back when - * the program couldn't be exec'ed. This makes it safe to call + * when UMH_NO_WAIT don't wait at all, but you get no useful error back + * when the program couldn't be exec'ed. This makes it safe to call * from interrupt context. * * Runs a user-space application. The application is started @@ -616,29 +616,32 @@ unlock: } EXPORT_SYMBOL(call_usermodehelper_exec); -/* - * call_usermodehelper_fns() will not run the caller-provided cleanup function - * if a memory allocation failure is experienced. So the caller might need to - * check the call_usermodehelper_fns() return value: if it is -ENOMEM, perform - * the necessaary cleanup within the caller. +/** + * call_usermodehelper() - prepare and start a usermode application + * @path: path to usermode executable + * @argv: arg vector for process + * @envp: environment for process + * @wait: wait for the application to finish and return status. + * when UMH_NO_WAIT don't wait at all, but you get no useful error back + * when the program couldn't be exec'ed. This makes it safe to call + * from interrupt context. + * + * This function is the equivalent to use call_usermodehelper_setup() and + * call_usermodehelper_exec(). */ -int call_usermodehelper_fns( - char *path, char **argv, char **envp, int wait, - int (*init)(struct subprocess_info *info, struct cred *new), - void (*cleanup)(struct subprocess_info *), void *data) +int call_usermodehelper(char *path, char **argv, char **envp, int wait) { struct subprocess_info *info; gfp_t gfp_mask = (wait == UMH_NO_WAIT) ? GFP_ATOMIC : GFP_KERNEL; info = call_usermodehelper_setup(path, argv, envp, gfp_mask, - init, cleanup, data); - + NULL, NULL, NULL); if (info == NULL) return -ENOMEM; return call_usermodehelper_exec(info, wait); } -EXPORT_SYMBOL(call_usermodehelper_fns); +EXPORT_SYMBOL(call_usermodehelper); static int proc_cap_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) -- cgit v0.10.2 From 403bad72b67d8b3f5a0240af5023adfa48132a65 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 30 Apr 2013 15:28:10 -0700 Subject: coredump: only SIGKILL should interrupt the coredumping task There are 2 well known and ancient problems with coredump/signals, and a lot of related bug reports: - do_coredump() clears TIF_SIGPENDING but of course this can't help if, say, SIGCHLD comes after that. In this case the coredump can fail unexpectedly. See for example wait_for_dump_helper()->signal_pending() check but there are other reasons. - At the same time, dumping a huge core on the slow media can take a lot of time/resources and there is no way to kill the coredumping task reliably. In particular this is not oom_kill-friendly. This patch tries to fix the 1st problem, and makes the preparation for the next changes. We add the new SIGNAL_GROUP_COREDUMP flag set by zap_threads() to indicate that this process dumps the core. prepare_signal() checks this flag and nacks any signal except SIGKILL. Note that this check tries to be conservative, in the long term we should probably treat the SIGNAL_GROUP_EXIT case equally but this needs more discussion. See marc.info/?l=linux-kernel&m=120508897917439 Notes: - recalc_sigpending() doesn't check SIGNAL_GROUP_COREDUMP. The patch assumes that dump_write/etc paths should never call it, but we can change it as well. - There is another source of TIF_SIGPENDING, freezer. This will be addressed separately. Signed-off-by: Oleg Nesterov Tested-by: Mandeep Singh Baines Cc: Ingo Molnar Cc: Neil Horman Cc: "Rafael J. Wysocki" Cc: Roland McGrath Cc: Tejun Heo Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/coredump.c b/fs/coredump.c index d52f6bd..f91cfd8 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -280,8 +280,8 @@ static int zap_process(struct task_struct *start, int exit_code) return nr; } -static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm, - struct core_state *core_state, int exit_code) +static int zap_threads(struct task_struct *tsk, struct mm_struct *mm, + struct core_state *core_state, int exit_code) { struct task_struct *g, *p; unsigned long flags; @@ -291,6 +291,9 @@ static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm, if (!signal_group_exit(tsk->signal)) { mm->core_state = core_state; nr = zap_process(tsk, exit_code); + /* ignore all signals except SIGKILL, see prepare_signal() */ + tsk->signal->flags |= SIGNAL_GROUP_COREDUMP; + clear_tsk_thread_flag(tsk, TIF_SIGPENDING); } spin_unlock_irq(&tsk->sighand->siglock); if (unlikely(nr < 0)) @@ -514,12 +517,6 @@ void do_coredump(siginfo_t *siginfo) old_cred = override_creds(cred); - /* - * Clear any false indication of pending signals that might - * be seen by the filesystem code called to write the core file. - */ - clear_thread_flag(TIF_SIGPENDING); - ispipe = format_corename(&cn, &cprm); if (ispipe) { diff --git a/include/linux/sched.h b/include/linux/sched.h index 54ddcb8..a22baf8 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -626,6 +626,7 @@ struct signal_struct { #define SIGNAL_STOP_STOPPED 0x00000001 /* job control stop in effect */ #define SIGNAL_STOP_CONTINUED 0x00000002 /* SIGCONT since WCONTINUED reap */ #define SIGNAL_GROUP_EXIT 0x00000004 /* group exit in progress */ +#define SIGNAL_GROUP_COREDUMP 0x00000008 /* coredump in progress */ /* * Pending notifications to parent. */ diff --git a/kernel/signal.c b/kernel/signal.c index 27ece01..cede589 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -854,12 +854,14 @@ static void ptrace_trap_notify(struct task_struct *t) * Returns true if the signal should be actually delivered, otherwise * it should be dropped. */ -static int prepare_signal(int sig, struct task_struct *p, bool force) +static bool prepare_signal(int sig, struct task_struct *p, bool force) { struct signal_struct *signal = p->signal; struct task_struct *t; - if (unlikely(signal->flags & SIGNAL_GROUP_EXIT)) { + if (signal->flags & (SIGNAL_GROUP_EXIT | SIGNAL_GROUP_COREDUMP)) { + if (signal->flags & SIGNAL_GROUP_COREDUMP) + return sig == SIGKILL; /* * The process is in the middle of dying, nothing to do. */ -- cgit v0.10.2 From 6cd8f0acae3420afce37bf51a9ff8c2c20342af5 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 30 Apr 2013 15:28:12 -0700 Subject: coredump: ensure that SIGKILL always kills the dumping thread prepare_signal() blesses SIGKILL sent to the dumping process but this signal can be "lost" anyway. The problems is, complete_signal() sees SIGNAL_GROUP_EXIT and skips the "kill them all" logic. And even if the dumping process is single-threaded (so the target is always "correct"), the group-wide SIGKILL is not recorded in task->pending and thus __fatal_signal_pending() won't be true. A multi-threaded case has even more problems. And even ignoring all technical details, SIGNAL_GROUP_EXIT doesn't look right to me. This coredumping process is not exiting yet, it can do a lot of work dumping the core. With this patch the dumping process doesn't have SIGNAL_GROUP_EXIT, we set signal->group_exit_task instead. This makes signal_group_exit() true and thus this should equally close the races with exit/exec/stop but allows to kill the dumping thread reliably. Notes: - It is not clear what should we do with ->group_exit_code if the dumper was killed, see the next change. - we need more (hopefully straightforward) changes to ensure that SIGKILL actually interrupts the coredump. Basically we need to check __fatal_signal_pending() in dump_write() and dump_seek(). Signed-off-by: Oleg Nesterov Tested-by: Mandeep Singh Baines Cc: Ingo Molnar Cc: Neil Horman Cc: "Rafael J. Wysocki" Cc: Roland McGrath Cc: Tejun Heo Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/coredump.c b/fs/coredump.c index f91cfd8..6fea590 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -263,7 +263,6 @@ static int zap_process(struct task_struct *start, int exit_code) struct task_struct *t; int nr = 0; - start->signal->flags = SIGNAL_GROUP_EXIT; start->signal->group_exit_code = exit_code; start->signal->group_stop_count = 0; @@ -291,8 +290,9 @@ static int zap_threads(struct task_struct *tsk, struct mm_struct *mm, if (!signal_group_exit(tsk->signal)) { mm->core_state = core_state; nr = zap_process(tsk, exit_code); + tsk->signal->group_exit_task = tsk; /* ignore all signals except SIGKILL, see prepare_signal() */ - tsk->signal->flags |= SIGNAL_GROUP_COREDUMP; + tsk->signal->flags = SIGNAL_GROUP_COREDUMP; clear_tsk_thread_flag(tsk, TIF_SIGPENDING); } spin_unlock_irq(&tsk->sighand->siglock); @@ -343,6 +343,7 @@ static int zap_threads(struct task_struct *tsk, struct mm_struct *mm, if (unlikely(p->mm == mm)) { lock_task_sighand(p, &flags); nr += zap_process(p, exit_code); + p->signal->flags = SIGNAL_GROUP_EXIT; unlock_task_sighand(p, &flags); } break; @@ -394,6 +395,11 @@ static void coredump_finish(struct mm_struct *mm) struct core_thread *curr, *next; struct task_struct *task; + spin_lock_irq(¤t->sighand->siglock); + current->signal->group_exit_task = NULL; + current->signal->flags = SIGNAL_GROUP_EXIT; + spin_unlock_irq(¤t->sighand->siglock); + next = mm->core_state->dumper.next; while ((curr = next) != NULL) { next = curr->next; -- cgit v0.10.2 From acdedd99b0f3bff9b4bb2103a6b1268c03d1f963 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 30 Apr 2013 15:28:13 -0700 Subject: coredump: sanitize the setting of signal->group_exit_code Now that the coredumping process can be SIGKILL'ed, the setting of ->group_exit_code in do_coredump() can race with complete_signal() and SIGKILL or 0x80 can be "lost", or wait(status) can report status == SIGKILL | 0x80. But the main problem is that it is not clear to me what should we do if binfmt->core_dump() succeeds but SIGKILL was sent, that is why this patch comes as a separate change. This patch adds 0x80 if ->core_dump() succeeds and the process was not killed. But perhaps we can (should?) re-set ->group_exit_code changed by SIGKILL back to "siginfo->si_signo |= 0x80" in case when core_dumped == T. Signed-off-by: Oleg Nesterov Tested-by: Mandeep Singh Baines Cc: Ingo Molnar Cc: Neil Horman Cc: "Rafael J. Wysocki" Cc: Roland McGrath Cc: Tejun Heo Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/coredump.c b/fs/coredump.c index 6fea590..acc4448 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -390,12 +390,14 @@ static int coredump_wait(int exit_code, struct core_state *core_state) return core_waiters; } -static void coredump_finish(struct mm_struct *mm) +static void coredump_finish(struct mm_struct *mm, bool core_dumped) { struct core_thread *curr, *next; struct task_struct *task; spin_lock_irq(¤t->sighand->siglock); + if (core_dumped && !__fatal_signal_pending(current)) + current->signal->group_exit_code |= 0x80; current->signal->group_exit_task = NULL; current->signal->flags = SIGNAL_GROUP_EXIT; spin_unlock_irq(¤t->sighand->siglock); @@ -480,6 +482,7 @@ void do_coredump(siginfo_t *siginfo) int ispipe; struct files_struct *displaced; bool need_nonrelative = false; + bool core_dumped = false; static atomic_t core_dump_count = ATOMIC_INIT(0); struct coredump_params cprm = { .siginfo = siginfo, @@ -638,9 +641,7 @@ void do_coredump(siginfo_t *siginfo) goto close_fail; if (displaced) put_files_struct(displaced); - retval = binfmt->core_dump(&cprm); - if (retval) - current->signal->group_exit_code |= 0x80; + core_dumped = binfmt->core_dump(&cprm); if (ispipe && core_pipe_limit) wait_for_dump_helpers(cprm.file); @@ -653,7 +654,7 @@ fail_dropcount: fail_unlock: kfree(cn.corename); fail_corename: - coredump_finish(mm); + coredump_finish(mm, core_dumped); revert_creds(old_cred); fail_creds: put_cred(cred); -- cgit v0.10.2 From 528f827ee0bb508af5c9466562f474675540473e Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 30 Apr 2013 15:28:15 -0700 Subject: coredump: introduce dump_interrupted() By discussion with Mandeep. Change dump_write(), dump_seek() and do_coredump() to check signal_pending() and abort if it is true. dump_seek() does this only before f_op->llseek(), otherwise it relies on dump_write(). We need this change to ensure that the coredump won't delay suspend, and to ensure it reacts to SIGKILL "quickly enough", a core dump can take a lot of time. In particular this can help oom-killer. We add the new trivial helper, dump_interrupted() to add the comments and to simplify the potential freezer changes. Perhaps it will have more callers. Ideally it should do try_to_freeze() but then we need the unpleasant changes in dump_write() and wait_for_dump_helpers(). It is not trivial to change dump_write() to restart if f_op->write() fails because of freezing(). We need to handle the short writes, we need to clear TIF_SIGPENDING (and we can't rely on recalc_sigpending() unless we change it to check PF_DUMPCORE). And if the buggy f_op->write() sets TIF_SIGPENDING we can not distinguish this case from the race with freeze_task() + __thaw_task(). So we simply accept the fact that the freezer can truncate a core-dump but at least you can reliably suspend. Hopefully we can tolerate this unlikely case and the necessary complications doesn't worth a trouble. But if we decide to make the coredumping freezable later we can do this on top of this change. Signed-off-by: Oleg Nesterov Acked-by: Mandeep Singh Baines Cc: Neil Horman Cc: "Rafael J. Wysocki" Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/coredump.c b/fs/coredump.c index acc4448..aa8ac69 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -418,6 +418,17 @@ static void coredump_finish(struct mm_struct *mm, bool core_dumped) mm->core_state = NULL; } +static bool dump_interrupted(void) +{ + /* + * SIGKILL or freezing() interrupt the coredumping. Perhaps we + * can do try_to_freeze() and check __fatal_signal_pending(), + * but then we need to teach dump_write() to restart and clear + * TIF_SIGPENDING. + */ + return signal_pending(current); +} + static void wait_for_dump_helpers(struct file *file) { struct pipe_inode_info *pipe; @@ -641,7 +652,7 @@ void do_coredump(siginfo_t *siginfo) goto close_fail; if (displaced) put_files_struct(displaced); - core_dumped = binfmt->core_dump(&cprm); + core_dumped = !dump_interrupted() && binfmt->core_dump(&cprm); if (ispipe && core_pipe_limit) wait_for_dump_helpers(cprm.file); @@ -669,7 +680,9 @@ fail: */ int dump_write(struct file *file, const void *addr, int nr) { - return access_ok(VERIFY_READ, addr, nr) && file->f_op->write(file, addr, nr, &file->f_pos) == nr; + return !dump_interrupted() && + access_ok(VERIFY_READ, addr, nr) && + file->f_op->write(file, addr, nr, &file->f_pos) == nr; } EXPORT_SYMBOL(dump_write); @@ -678,7 +691,8 @@ int dump_seek(struct file *file, loff_t off) int ret = 1; if (file->f_op->llseek && file->f_op->llseek != no_llseek) { - if (file->f_op->llseek(file, off, SEEK_CUR) < 0) + if (dump_interrupted() || + file->f_op->llseek(file, off, SEEK_CUR) < 0) return 0; } else { char *buf = (char *)get_zeroed_page(GFP_KERNEL); -- cgit v0.10.2 From 079148b919d0c58b796f9ae98bdb53028dbcd5e7 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 30 Apr 2013 15:28:16 -0700 Subject: coredump: factor out the setting of PF_DUMPCORE Cleanup. Every linux_binfmt->core_dump() sets PF_DUMPCORE, move this into zap_threads() called by do_coredump(). Signed-off-by: Oleg Nesterov Acked-by: Mandeep Singh Baines Cc: Neil Horman Cc: "Rafael J. Wysocki" Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c index 03abf9b..81e94d9 100644 --- a/arch/x86/ia32/ia32_aout.c +++ b/arch/x86/ia32/ia32_aout.c @@ -162,7 +162,6 @@ static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, fs = get_fs(); set_fs(KERNEL_DS); has_dumped = 1; - current->flags |= PF_DUMPCORE; strncpy(dump.u_comm, current->comm, sizeof(current->comm)); dump.u_ar0 = offsetof(struct user32, regs); dump.signal = signr; diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index bbc8f88..02fe378 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c @@ -62,7 +62,6 @@ static int aout_core_dump(struct coredump_params *cprm) fs = get_fs(); set_fs(KERNEL_DS); has_dumped = 1; - current->flags |= PF_DUMPCORE; strncpy(dump.u_comm, current->comm, sizeof(dump.u_comm)); dump.u_ar0 = offsetof(struct user, regs); dump.signal = cprm->siginfo->si_signo; diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index e32344b..34a9771 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -2092,8 +2092,7 @@ static int elf_core_dump(struct coredump_params *cprm) goto cleanup; has_dumped = 1; - current->flags |= PF_DUMPCORE; - + fs = get_fs(); set_fs(KERNEL_DS); diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 9c13e02..c1cc06a 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -1687,8 +1687,6 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) fill_elf_fdpic_header(elf, e_phnum); has_dumped = 1; - current->flags |= PF_DUMPCORE; - /* * Set up the notes in similar form to SVR4 core dumps made * with info from their /proc. diff --git a/fs/coredump.c b/fs/coredump.c index aa8ac69..7300e31 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -299,6 +299,7 @@ static int zap_threads(struct task_struct *tsk, struct mm_struct *mm, if (unlikely(nr < 0)) return nr; + tsk->flags = PF_DUMPCORE; if (atomic_read(&mm->mm_users) == nr + 1) goto done; /* -- cgit v0.10.2 From dc7ee2aac830e5423f41de87d50441f138f648da Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 30 Apr 2013 15:28:17 -0700 Subject: coredump: change wait_for_dump_helpers() to use wait_event_interruptible() wait_for_dump_helpers() calls wake_up/kill_fasync from inside the wait_event-like loop. This is not needed and in fact this is not strictly correct, we can/should do this only once after we change pipe->writers. We could even check if it becomes zero. Change this code to use use wait_event_interruptible(), this can also help to make this wait freezable. With this patch we check pipe->readers without pipe_lock(), this is fine. Once we see pipe->readers == 1 we know that the handler decremented the counter, this is all we need. Signed-off-by: Oleg Nesterov Acked-by: Mandeep Singh Baines Cc: Neil Horman Cc: "Rafael J. Wysocki" Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/coredump.c b/fs/coredump.c index 7300e31..ec306cc 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -439,17 +439,20 @@ static void wait_for_dump_helpers(struct file *file) pipe_lock(pipe); pipe->readers++; pipe->writers--; + wake_up_interruptible_sync(&pipe->wait); + kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); + pipe_unlock(pipe); - while ((pipe->readers > 1) && (!signal_pending(current))) { - wake_up_interruptible_sync(&pipe->wait); - kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); - pipe_wait(pipe); - } + /* + * We actually want wait_event_freezable() but then we need + * to clear TIF_SIGPENDING and improve dump_interrupted(). + */ + wait_event_interruptible(pipe->wait, pipe->readers == 1); + pipe_lock(pipe); pipe->readers--; pipe->writers++; pipe_unlock(pipe); - } /* -- cgit v0.10.2 From 830e0fc967a7ee5013d5d1cf6a3cea71a8868466 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Tue, 30 Apr 2013 15:28:18 -0700 Subject: fs, proc: truncate /proc/pid/comm writes to first TASK_COMM_LEN bytes Currently, a write to a procfs file will return the number of bytes successfully written. If the actual string is longer than this, the remainder of the string will not be be written and userspace will complete the operation by issuing additional write()s. Hence $ echo -n "abcdefghijklmnopqrs" > /proc/self/comm results in $ cat /proc/$$/comm pqrs since the final four bytes were written with a second write() since TASK_COMM_LEN == 16. This is obviously an undesired result and not equivalent to prctl(PR_SET_NAME). The implementation should not need to know the definition of TASK_COMM_LEN. This patch truncates the string to the first TASK_COMM_LEN bytes and returns the bytes written as the length of the string written so the second write() is suppressed. $ cat /proc/$$/comm abcdefghijklmno Signed-off-by: David Rientjes Acked-by: John Stultz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/proc/base.c b/fs/proc/base.c index a193086..3861bce 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1348,11 +1348,10 @@ static ssize_t comm_write(struct file *file, const char __user *buf, struct inode *inode = file_inode(file); struct task_struct *p; char buffer[TASK_COMM_LEN]; + const size_t maxlen = sizeof(buffer) - 1; memset(buffer, 0, sizeof(buffer)); - if (count > sizeof(buffer) - 1) - count = sizeof(buffer) - 1; - if (copy_from_user(buffer, buf, count)) + if (copy_from_user(buffer, buf, count > maxlen ? maxlen : count)) return -EFAULT; p = get_proc_task(inode); -- cgit v0.10.2 From 12eaaf309a798973d215f7f21aa5a67a760ed7c8 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 30 Apr 2013 15:28:19 -0700 Subject: set_task_comm: kill the pointless memset() + wmb() set_task_comm() does memset() + wmb() before strlcpy(). This buys nothing and to add to the confusion, the comment is wrong. - We do not need memset() to be "safe from non-terminating string reads", the final char is always zero and we never change it. - wmb() is paired with nothing, it cannot prevent from printing the mixture of the old/new data unless the reader takes the lock. Signed-off-by: Oleg Nesterov Cc: Andi Kleen Cc: John Stultz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/exec.c b/fs/exec.c index 87e731f..260f89f 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1027,17 +1027,7 @@ EXPORT_SYMBOL_GPL(get_task_comm); void set_task_comm(struct task_struct *tsk, char *buf) { task_lock(tsk); - trace_task_rename(tsk, buf); - - /* - * Threads may access current->comm without holding - * the task lock, so write the string carefully. - * Readers without a lock may see incomplete new - * names but are safe from non-terminating string reads. - */ - memset(tsk->comm, 0, TASK_COMM_LEN); - wmb(); strlcpy(tsk->comm, buf, sizeof(tsk->comm)); task_unlock(tsk); perf_event_comm(tsk); -- cgit v0.10.2 From e56fb2874015370e3b7f8d85051f6dce26051df9 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 30 Apr 2013 15:28:20 -0700 Subject: exec: do not abuse ->cred_guard_mutex in threadgroup_lock() threadgroup_lock() takes signal->cred_guard_mutex to ensure that thread_group_leader() is stable. This doesn't look nice, the scope of this lock in do_execve() is huge. And as Dave pointed out this can lead to deadlock, we have the following dependencies: do_execve: cred_guard_mutex -> i_mutex cgroup_mount: i_mutex -> cgroup_mutex attach_task_by_pid: cgroup_mutex -> cred_guard_mutex Change de_thread() to take threadgroup_change_begin() around the switch-the-leader code and change threadgroup_lock() to avoid ->cred_guard_mutex. Note that de_thread() can't sleep with ->group_rwsem held, this can obviously deadlock with the exiting leader if the writer is active, so it does threadgroup_change_end() before schedule(). Reported-by: Dave Jones Acked-by: Tejun Heo Acked-by: Li Zefan Signed-off-by: Oleg Nesterov Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/exec.c b/fs/exec.c index 260f89f..963f510 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -898,11 +898,13 @@ static int de_thread(struct task_struct *tsk) sig->notify_count = -1; /* for exit_notify() */ for (;;) { + threadgroup_change_begin(tsk); write_lock_irq(&tasklist_lock); if (likely(leader->exit_state)) break; __set_current_state(TASK_KILLABLE); write_unlock_irq(&tasklist_lock); + threadgroup_change_end(tsk); schedule(); if (unlikely(__fatal_signal_pending(tsk))) goto killed; @@ -960,6 +962,7 @@ static int de_thread(struct task_struct *tsk) if (unlikely(leader->ptrace)) __wake_up_parent(leader, leader->parent); write_unlock_irq(&tasklist_lock); + threadgroup_change_end(tsk); release_task(leader); } diff --git a/include/linux/sched.h b/include/linux/sched.h index a22baf8..6f95004 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2249,27 +2249,18 @@ static inline void threadgroup_change_end(struct task_struct *tsk) * * Lock the threadgroup @tsk belongs to. No new task is allowed to enter * and member tasks aren't allowed to exit (as indicated by PF_EXITING) or - * perform exec. This is useful for cases where the threadgroup needs to - * stay stable across blockable operations. + * change ->group_leader/pid. This is useful for cases where the threadgroup + * needs to stay stable across blockable operations. * * fork and exit paths explicitly call threadgroup_change_{begin|end}() for * synchronization. While held, no new task will be added to threadgroup * and no existing live task will have its PF_EXITING set. * - * During exec, a task goes and puts its thread group through unusual - * changes. After de-threading, exclusive access is assumed to resources - * which are usually shared by tasks in the same group - e.g. sighand may - * be replaced with a new one. Also, the exec'ing task takes over group - * leader role including its pid. Exclude these changes while locked by - * grabbing cred_guard_mutex which is used to synchronize exec path. + * de_thread() does threadgroup_change_{begin|end}() when a non-leader + * sub-thread becomes a new leader. */ static inline void threadgroup_lock(struct task_struct *tsk) { - /* - * exec uses exit for de-threading nesting group_rwsem inside - * cred_guard_mutex. Grab cred_guard_mutex first. - */ - mutex_lock(&tsk->signal->cred_guard_mutex); down_write(&tsk->signal->group_rwsem); } @@ -2282,7 +2273,6 @@ static inline void threadgroup_lock(struct task_struct *tsk) static inline void threadgroup_unlock(struct task_struct *tsk) { up_write(&tsk->signal->group_rwsem); - mutex_unlock(&tsk->signal->cred_guard_mutex); } #else static inline void threadgroup_change_begin(struct task_struct *tsk) {} -- cgit v0.10.2 From 310faaa9b2d075b8fd477550c3d85ccddec76f35 Mon Sep 17 00:00:00 2001 From: Zhang Yanfei Date: Tue, 30 Apr 2013 15:28:21 -0700 Subject: kexec: fix wrong types of some local variables The types of the following local variables: - ubytes/mbytes in kimage_load_crash_segment()/kimage_load_normal_segment() - r in vmcoreinfo_append_str() are wrong, so fix them. Signed-off-by: Zhang Yanfei Cc: "Eric W. Biederman" Cc: Simon Horman Cc: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/kernel/kexec.c b/kernel/kexec.c index b574920..b2c1306 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -786,7 +786,7 @@ static int kimage_load_normal_segment(struct kimage *image, struct kexec_segment *segment) { unsigned long maddr; - unsigned long ubytes, mbytes; + size_t ubytes, mbytes; int result; unsigned char __user *buf; @@ -850,7 +850,7 @@ static int kimage_load_crash_segment(struct kimage *image, * We do things a page at a time for the sake of kmap. */ unsigned long maddr; - unsigned long ubytes, mbytes; + size_t ubytes, mbytes; int result; unsigned char __user *buf; @@ -1540,7 +1540,7 @@ void vmcoreinfo_append_str(const char *fmt, ...) { va_list args; char buf[0x50]; - int r; + size_t r; va_start(args, fmt); r = vsnprintf(buf, sizeof(buf), fmt, args); -- cgit v0.10.2 From 31c3a3fe0777642816045a8dd1b666f1718851d7 Mon Sep 17 00:00:00 2001 From: Zhang Yanfei Date: Tue, 30 Apr 2013 15:28:23 -0700 Subject: kexec: Use min() and min_t() to simplify logic Simplify the logic of variable assignments. [akpm@linux-foundation.org: replace min_t with min, remove unneeded casts] Signed-off-by: Zhang Yanfei Cc: "Eric W. Biederman" Reviewed-by: Simon Horman Cc: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/kernel/kexec.c b/kernel/kexec.c index b2c1306..59f7b55 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -819,13 +819,9 @@ static int kimage_load_normal_segment(struct kimage *image, /* Start with a clear page */ clear_page(ptr); ptr += maddr & ~PAGE_MASK; - mchunk = PAGE_SIZE - (maddr & ~PAGE_MASK); - if (mchunk > mbytes) - mchunk = mbytes; - - uchunk = mchunk; - if (uchunk > ubytes) - uchunk = ubytes; + mchunk = min_t(size_t, mbytes, + PAGE_SIZE - (maddr & ~PAGE_MASK)); + uchunk = min(ubytes, mchunk); result = copy_from_user(ptr, buf, uchunk); kunmap(page); @@ -871,13 +867,10 @@ static int kimage_load_crash_segment(struct kimage *image, } ptr = kmap(page); ptr += maddr & ~PAGE_MASK; - mchunk = PAGE_SIZE - (maddr & ~PAGE_MASK); - if (mchunk > mbytes) - mchunk = mbytes; - - uchunk = mchunk; - if (uchunk > ubytes) { - uchunk = ubytes; + mchunk = min_t(size_t, mbytes, + PAGE_SIZE - (maddr & ~PAGE_MASK)); + uchunk = min(ubytes, mchunk); + if (mchunk > uchunk) { /* Zero the trailing part of the page */ memset(ptr + uchunk, 0, mchunk - uchunk); } @@ -1546,8 +1539,7 @@ void vmcoreinfo_append_str(const char *fmt, ...) r = vsnprintf(buf, sizeof(buf), fmt, args); va_end(args); - if (r + vmcoreinfo_size > vmcoreinfo_max_size) - r = vmcoreinfo_max_size - vmcoreinfo_size; + r = min(r, vmcoreinfo_max_size - vmcoreinfo_size); memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r); -- cgit v0.10.2 From 4130f0efbfe5adb360328b777afa8e45f7e467f7 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Tue, 30 Apr 2013 15:28:24 -0700 Subject: rbtree_test: add extra rbtree integrity check Account for the rbtree having 2**bh(v)-1 internal nodes. While this can be seen as a consequence of other checks, Michel states that it nicely sums up what the other properties are for. Signed-off-by: Davidlohr Bueso Reviewed-by: Michel Lespinasse Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/lib/rbtree_test.c b/lib/rbtree_test.c index af38aed..9951503 100644 --- a/lib/rbtree_test.c +++ b/lib/rbtree_test.c @@ -117,8 +117,7 @@ static int black_path_count(struct rb_node *rb) static void check(int nr_nodes) { struct rb_node *rb; - int count = 0; - int blacks = 0; + int count = 0, blacks = 0; u32 prev_key = 0; for (rb = rb_first(&root); rb; rb = rb_next(rb)) { @@ -134,7 +133,9 @@ static void check(int nr_nodes) prev_key = node->key; count++; } + WARN_ON_ONCE(count != nr_nodes); + WARN_ON_ONCE(count < (1 << black_path_count(rb_last(&root))) - 1); } static void check_augmented(int nr_nodes) -- cgit v0.10.2 From c75aaa8ed03eb1312ed2990f1a716b2b9cc0df42 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Tue, 30 Apr 2013 15:28:25 -0700 Subject: rbtree_test: add __init/__exit annotations Signed-off-by: Davidlohr Bueso Reviewed-by: Michel Lespinasse Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/lib/rbtree_test.c b/lib/rbtree_test.c index 9951503..122f02f 100644 --- a/lib/rbtree_test.c +++ b/lib/rbtree_test.c @@ -149,7 +149,7 @@ static void check_augmented(int nr_nodes) } } -static int rbtree_test_init(void) +static int __init rbtree_test_init(void) { int i, j; cycles_t time1, time2, time; @@ -222,7 +222,7 @@ static int rbtree_test_init(void) return -EAGAIN; /* Fail will directly unload the module */ } -static void rbtree_test_exit(void) +static void __exit rbtree_test_exit(void) { printk(KERN_ALERT "test exit\n"); } -- cgit v0.10.2 From 8db049b3d666b3676ff4a976e03c14de302bf9fa Mon Sep 17 00:00:00 2001 From: "Raphael S. Carvalho" Date: Tue, 30 Apr 2013 15:28:26 -0700 Subject: kernel/pid.c: improve flow of a loop inside alloc_pidmap. find_next_offset() searches for an available "cleaned bit" in the respective pid bitmap (page), so returns the offset if found, otherwise it returns a value equals to BITS_PER_PAGE. For example, suppose find_next_offset didn't find any available bit, so there's no purpose to call mk_pid (Wasteful Cpu Cycles). Therefore, I found it could be better to call mk_pid after the checking (offset < BITS_PER_PAGE) returned sucessfully! Another point: If (offset < BITS_PER_PAGE) results in a "failure", then mk_pid would be called again afterwards. [akpm@linux-foundation.org: simplify code] Signed-off-by: Raphael S. Carvalho Cc: "Eric W. Biederman" Cc: Serge Hallyn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/kernel/pid.c b/kernel/pid.c index 047dc62..8147bdf 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -183,15 +183,19 @@ static int alloc_pidmap(struct pid_namespace *pid_ns) break; } if (likely(atomic_read(&map->nr_free))) { - do { + for ( ; ; ) { if (!test_and_set_bit(offset, map->page)) { atomic_dec(&map->nr_free); set_last_pid(pid_ns, last, pid); return pid; } offset = find_next_offset(map, offset); + if (offset >= BITS_PER_PAGE) + break; pid = mk_pid(pid_ns, map, offset); - } while (offset < BITS_PER_PAGE && pid < pid_max); + if (pid >= pid_max) + break; + } } if (map < &pid_ns->pidmap[(pid_max-1)/BITS_PER_PAGE]) { ++map; -- cgit v0.10.2 From 5cc5445164c16d32bab2912fac28356ab07aa8b4 Mon Sep 17 00:00:00 2001 From: "Raphael S.Carvalho" Date: Tue, 30 Apr 2013 15:28:27 -0700 Subject: pid_namespace.c/.h: simplify defines Move BITS_PER_PAGE from pid_namespace.c to pid_namespace.h, since we can simplify the define PID_MAP_ENTRIES by using the BITS_PER_PAGE. [akpm@linux-foundation.org: kernel/pid.c:54:1: warning: "BITS_PER_PAGE" redefined] Signed-off-by: Raphael S.Carvalho Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index 215e5e3..8ac3283 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -13,7 +13,9 @@ struct pidmap { void *page; }; -#define PIDMAP_ENTRIES ((PID_MAX_LIMIT + 8*PAGE_SIZE - 1)/PAGE_SIZE/8) +#define BITS_PER_PAGE (PAGE_SIZE * 8) +#define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1) +#define PIDMAP_ENTRIES ((PID_MAX_LIMIT+BITS_PER_PAGE-1)/BITS_PER_PAGE) struct bsd_acct_struct; diff --git a/kernel/pid.c b/kernel/pid.c index 8147bdf..6283d64 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -51,9 +51,6 @@ int pid_max = PID_MAX_DEFAULT; int pid_max_min = RESERVED_PIDS + 1; int pid_max_max = PID_MAX_LIMIT; -#define BITS_PER_PAGE (PAGE_SIZE*8) -#define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1) - static inline int mk_pid(struct pid_namespace *pid_ns, struct pidmap *map, int off) { diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index bea15bd..69473c4 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -19,8 +19,6 @@ #include #include -#define BITS_PER_PAGE (PAGE_SIZE*8) - struct pid_cache { int nr_ids; char name[16]; -- cgit v0.10.2 From 078be02b80359a541928c899c2631f39628f56df Mon Sep 17 00:00:00 2001 From: Michal Belczyk Date: Tue, 30 Apr 2013 15:28:28 -0700 Subject: nbd: increase default and max request sizes Raise the default max request size for nbd to 128KB (from 127KB) to get it 4KB aligned. This patch also allows the max request size to be increased (via /sys/block/nbd/queue/max_sectors_kb) to 32MB. The patch makes nbd network traffic more efficient by: - reducing request fragmentation (4KB alignment) - reducing the number of requests (fewer round trips, less network overhead) Especially in high latency networks, larger request size can make a dramatic Signed-off-by: Paul Clements Signed-off-by: Michal Belczyk Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 7fecc78..037288e 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -856,6 +856,8 @@ static int __init nbd_init(void) disk->queue->limits.discard_granularity = 512; disk->queue->limits.max_discard_sectors = UINT_MAX; disk->queue->limits.discard_zeroes_data = 0; + blk_queue_max_hw_sectors(disk->queue, 65536); + disk->queue->limits.max_sectors = 256; } if (register_blkdev(NBD_MAJOR, "nbd")) { -- cgit v0.10.2 From 60abc786dd3b5d6917d63fd789c9fed772f65039 Mon Sep 17 00:00:00 2001 From: Mihnea Dobrescu-Balaur Date: Tue, 30 Apr 2013 15:28:29 -0700 Subject: aoe: replace kmalloc and then memcpy with kmemdup Signed-off-by: Mihnea Dobrescu-Balaur Cc: Ed Cashin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/block/aoe/aoechr.c b/drivers/block/aoe/aoechr.c index 42e67ad..ab41be6 100644 --- a/drivers/block/aoe/aoechr.c +++ b/drivers/block/aoe/aoechr.c @@ -139,13 +139,12 @@ bail: spin_unlock_irqrestore(&emsgs_lock, flags); return; } - mp = kmalloc(n, GFP_ATOMIC); + mp = kmemdup(msg, n, GFP_ATOMIC); if (mp == NULL) { printk(KERN_ERR "aoe: allocation failure, len=%ld\n", n); goto bail; } - memcpy(mp, msg, n); em->msg = mp; em->flags |= EMFL_VALID; em->len = n; -- cgit v0.10.2 From 1a0f39997caf3e1edc7e5b7da168b91c32760ccb Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 30 Apr 2013 15:28:30 -0700 Subject: pps: hide more configuration symbols behind CONFIG_PPS Make CONFIG_PPS_DEBUG and CONFIG_NTP_PPS be hidden if CONFIG_PPS is not selected, so that we are not prompted for these configuration options if CONFIG_PPS is not set. Signed-off-by: Florian Fainelli Cc: Rodolfo Giometti Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/pps/Kconfig b/drivers/pps/Kconfig index 982d16b..7512e98 100644 --- a/drivers/pps/Kconfig +++ b/drivers/pps/Kconfig @@ -20,10 +20,10 @@ config PPS To compile this driver as a module, choose M here: the module will be called pps_core.ko. +if PPS config PPS_DEBUG bool "PPS debugging messages" - depends on PPS help Say Y here if you want the PPS support to produce a bunch of debug messages to the system log. Select this if you are having a @@ -31,13 +31,15 @@ config PPS_DEBUG config NTP_PPS bool "PPS kernel consumer support" - depends on PPS && !NO_HZ + depends on !NO_HZ help This option adds support for direct in-kernel time synchronization using an external PPS signal. It doesn't work on tickless systems at the moment. +endif + source drivers/pps/clients/Kconfig source drivers/pps/generators/Kconfig -- cgit v0.10.2 From 97439d0f84942d3031545e80852dde09457136c6 Mon Sep 17 00:00:00 2001 From: Fengguang Wu Date: Tue, 30 Apr 2013 15:28:31 -0700 Subject: pps: pps_kc_hardpps_lock can be static drivers/pps/kc.c:37:1: sparse: symbol 'pps_kc_hardpps_lock' was not declared. Should it be static? drivers/pps/kc.c:39:19: sparse: symbol 'pps_kc_hardpps_dev' was not declared. Should it be static? drivers/pps/kc.c:40:5: sparse: symbol 'pps_kc_hardpps_mode' was not declared. Should it be static? Signed-off-by: Fengguang Wu Cc: Rodolfo Giometti Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/pps/kc.c b/drivers/pps/kc.c index 079e930..e219db1 100644 --- a/drivers/pps/kc.c +++ b/drivers/pps/kc.c @@ -34,10 +34,10 @@ */ /* state variables to bind kernel consumer */ -DEFINE_SPINLOCK(pps_kc_hardpps_lock); +static DEFINE_SPINLOCK(pps_kc_hardpps_lock); /* PPS API (RFC 2783): current source and mode for kernel consumer */ -struct pps_device *pps_kc_hardpps_dev; /* unique pointer to device */ -int pps_kc_hardpps_mode; /* mode bits for kernel consumer */ +static struct pps_device *pps_kc_hardpps_dev; /* unique pointer to device */ +static int pps_kc_hardpps_mode; /* mode bits for kernel consumer */ /* pps_kc_bind - control PPS kernel consumer binding * @pps: the PPS source -- cgit v0.10.2 From c74f66ce102e2cfb61c185f7451538e29de640b9 Mon Sep 17 00:00:00 2001 From: liguang Date: Tue, 30 Apr 2013 15:28:32 -0700 Subject: semaphore: use unlikely() for down's timeout Signed-off-by: liguang Cc: Jiri Kosina Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/kernel/semaphore.c b/kernel/semaphore.c index 4567fc0..9c7017d 100644 --- a/kernel/semaphore.c +++ b/kernel/semaphore.c @@ -214,7 +214,7 @@ static inline int __sched __down_common(struct semaphore *sem, long state, for (;;) { if (signal_pending_state(state, task)) goto interrupted; - if (timeout <= 0) + if (unlikely(timeout <= 0)) goto timed_out; __set_task_state(task, state); raw_spin_unlock_irq(&sem->lock); -- cgit v0.10.2 From 06a6ea3702143e0af38270ac07d102a68810d564 Mon Sep 17 00:00:00 2001 From: liguang Date: Tue, 30 Apr 2013 15:28:33 -0700 Subject: semaphore: use `bool' type for semaphore_waiter's up Signed-off-by: liguang Cc: Jiri Kosina Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/kernel/semaphore.c b/kernel/semaphore.c index 9c7017d..6815171 100644 --- a/kernel/semaphore.c +++ b/kernel/semaphore.c @@ -193,7 +193,7 @@ EXPORT_SYMBOL(up); struct semaphore_waiter { struct list_head list; struct task_struct *task; - int up; + bool up; }; /* @@ -209,7 +209,7 @@ static inline int __sched __down_common(struct semaphore *sem, long state, list_add_tail(&waiter.list, &sem->wait_list); waiter.task = task; - waiter.up = 0; + waiter.up = false; for (;;) { if (signal_pending_state(state, task)) @@ -258,6 +258,6 @@ static noinline void __sched __up(struct semaphore *sem) struct semaphore_waiter *waiter = list_first_entry(&sem->wait_list, struct semaphore_waiter, list); list_del(&waiter->list); - waiter->up = 1; + waiter->up = true; wake_up_process(waiter->task); } -- cgit v0.10.2 From 8d46fa11790e012d991495148f2d16cdbf4ee864 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Tue, 30 Apr 2013 15:28:34 -0700 Subject: drivers/memstick/host/r592.c: make r592_pm_ops static r592_pm_ops is not exported. Also, CONFIG_PM_SLEEP is used to remove unnecessary ifdefs. Signed-off-by: Jingoo Han Cc: Maxim Levitsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/memstick/host/r592.c b/drivers/memstick/host/r592.c index a7c5b31..9718661 100644 --- a/drivers/memstick/host/r592.c +++ b/drivers/memstick/host/r592.c @@ -847,7 +847,7 @@ static void r592_remove(struct pci_dev *pdev) dev->dummy_dma_page_physical_address); } -#ifdef CONFIG_PM +#ifdef CONFIG_PM_SLEEP static int r592_suspend(struct device *core_dev) { struct pci_dev *pdev = to_pci_dev(core_dev); @@ -870,10 +870,10 @@ static int r592_resume(struct device *core_dev) r592_update_card_detect(dev); return 0; } - -SIMPLE_DEV_PM_OPS(r592_pm_ops, r592_suspend, r592_resume); #endif +static SIMPLE_DEV_PM_OPS(r592_pm_ops, r592_suspend, r592_resume); + MODULE_DEVICE_TABLE(pci, r592_pci_id_tbl); static struct pci_driver r852_pci_driver = { @@ -881,9 +881,7 @@ static struct pci_driver r852_pci_driver = { .id_table = r592_pci_id_tbl, .probe = r592_probe, .remove = r592_remove, -#ifdef CONFIG_PM .driver.pm = &r592_pm_ops, -#endif }; static __init int r592_module_init(void) -- cgit v0.10.2 From 0ece1bbf4c280a4e886709ebf21d7df35a638a82 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 30 Apr 2013 15:28:35 -0700 Subject: drivers/w1/slaves/w1_bq27000.c: fix the error handling in w1_bq27000_add_slave() Use platform_device_put() instead of platform_device_unregister() if platform_device_add() fails, and also add the return value check of platform_device_add_data(). Signed-off-by: Wei Yongjun Cc: Evgeniy Polyakov Cc: Greg KH Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/w1/slaves/w1_bq27000.c b/drivers/w1/slaves/w1_bq27000.c index 773dca5..afbefed 100644 --- a/drivers/w1/slaves/w1_bq27000.c +++ b/drivers/w1/slaves/w1_bq27000.c @@ -57,6 +57,8 @@ static int w1_bq27000_add_slave(struct w1_slave *sl) ret = platform_device_add_data(pdev, &bq27000_battery_info, sizeof(bq27000_battery_info)); + if (ret) + goto pdev_add_failed; pdev->dev.parent = &sl->dev; ret = platform_device_add(pdev); @@ -68,7 +70,7 @@ static int w1_bq27000_add_slave(struct w1_slave *sl) goto success; pdev_add_failed: - platform_device_unregister(pdev); + platform_device_put(pdev); success: return ret; } -- cgit v0.10.2 From c5cfedf234fd5ba457c404bf42b6e6aea23b1e69 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 30 Apr 2013 15:28:36 -0700 Subject: drivers/w1/slaves/w1_ds2780.c: fix the error handling in w1_ds2780_add_slave() Use platform_device_put() instead of platform_device_unregister() if platform_device_add() fail, and platform_device_del() should be used in the error handling case after platform_device_add() success. Signed-off-by: Wei Yongjun Cc: Evgeniy Polyakov Cc: Greg KH Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/w1/slaves/w1_ds2780.c b/drivers/w1/slaves/w1_ds2780.c index 7b09307..98ed9c4 100644 --- a/drivers/w1/slaves/w1_ds2780.c +++ b/drivers/w1/slaves/w1_ds2780.c @@ -141,8 +141,9 @@ static int w1_ds2780_add_slave(struct w1_slave *sl) return 0; bin_attr_failed: + platform_device_del(pdev); pdev_add_failed: - platform_device_unregister(pdev); + platform_device_put(pdev); pdev_alloc_failed: ida_simple_remove(&bat_ida, id); noid: -- cgit v0.10.2 From c28d6f2ddb10fd2b359083e89ae583c6c062e35f Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 30 Apr 2013 15:28:37 -0700 Subject: drivers/w1/slaves/w1_ds2781.c: fix the error handling in w1_ds2781_add_slave() Use platform_device_put() instead of platform_device_unregister() if platform_device_add() fail, and platform_device_del() should be used in the error handling case after platform_device_add() success. Signed-off-by: Wei Yongjun Cc: Evgeniy Polyakov Cc: Greg KH Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/w1/slaves/w1_ds2781.c b/drivers/w1/slaves/w1_ds2781.c index 877daf7..5140d7b 100644 --- a/drivers/w1/slaves/w1_ds2781.c +++ b/drivers/w1/slaves/w1_ds2781.c @@ -139,8 +139,9 @@ static int w1_ds2781_add_slave(struct w1_slave *sl) return 0; bin_attr_failed: + platform_device_del(pdev); pdev_add_failed: - platform_device_unregister(pdev); + platform_device_put(pdev); pdev_alloc_failed: ida_simple_remove(&bat_ida, id); noid: -- cgit v0.10.2 From 4d10e0f2dd42b950448c13715f88edfce82a0975 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 30 Apr 2013 15:28:38 -0700 Subject: drivers/w1/slaves/w1_ds2760.c: fix the error handling in w1_ds2760_add_slave() Use platform_device_put() instead of platform_device_unregister() if platform_device_add() fail, and platform_device_del() should be used in the error handling case after platform_device_add() success. Signed-off-by: Wei Yongjun Cc: Evgeniy Polyakov Cc: Greg KH Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/w1/slaves/w1_ds2760.c b/drivers/w1/slaves/w1_ds2760.c index aa7bd5f..e86a69d 100644 --- a/drivers/w1/slaves/w1_ds2760.c +++ b/drivers/w1/slaves/w1_ds2760.c @@ -148,8 +148,9 @@ static int w1_ds2760_add_slave(struct w1_slave *sl) goto success; bin_attr_failed: + platform_device_del(pdev); pdev_add_failed: - platform_device_unregister(pdev); + platform_device_put(pdev); pdev_alloc_failed: ida_simple_remove(&bat_ida, id); noid: -- cgit v0.10.2 From 8359f689e23a607ec6758737551045997848ac75 Mon Sep 17 00:00:00 2001 From: "zhangwei(Jovi)" Date: Tue, 30 Apr 2013 15:28:39 -0700 Subject: kernel/relay.c: remove unused function argument actor Currently argument `actor' is never used in the relay reading path, so remove it. Signed-off-by: zhangwei(Jovi) Cc: Jens Axboe Cc: Al Viro Cc: Eric Dumazet Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/kernel/relay.c b/kernel/relay.c index 01ab081..d21006c 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -1099,8 +1099,7 @@ static size_t relay_file_read_end_pos(struct rchan_buf *buf, static int subbuf_read_actor(size_t read_start, struct rchan_buf *buf, size_t avail, - read_descriptor_t *desc, - read_actor_t actor) + read_descriptor_t *desc) { void *from; int ret = 0; @@ -1121,15 +1120,13 @@ static int subbuf_read_actor(size_t read_start, typedef int (*subbuf_actor_t) (size_t read_start, struct rchan_buf *buf, size_t avail, - read_descriptor_t *desc, - read_actor_t actor); + read_descriptor_t *desc); /* * relay_file_read_subbufs - read count bytes, bridging subbuf boundaries */ static ssize_t relay_file_read_subbufs(struct file *filp, loff_t *ppos, subbuf_actor_t subbuf_actor, - read_actor_t actor, read_descriptor_t *desc) { struct rchan_buf *buf = filp->private_data; @@ -1150,7 +1147,7 @@ static ssize_t relay_file_read_subbufs(struct file *filp, loff_t *ppos, break; avail = min(desc->count, avail); - ret = subbuf_actor(read_start, buf, avail, desc, actor); + ret = subbuf_actor(read_start, buf, avail, desc); if (desc->error < 0) break; @@ -1174,8 +1171,7 @@ static ssize_t relay_file_read(struct file *filp, desc.count = count; desc.arg.buf = buffer; desc.error = 0; - return relay_file_read_subbufs(filp, ppos, subbuf_read_actor, - NULL, &desc); + return relay_file_read_subbufs(filp, ppos, subbuf_read_actor, &desc); } static void relay_consume_bytes(struct rchan_buf *rbuf, int bytes_consumed) -- cgit v0.10.2 From 536b39ecf1b52ab71c2c126db0137611b9e1a4d4 Mon Sep 17 00:00:00 2001 From: "zhangwei(Jovi)" Date: Tue, 30 Apr 2013 15:28:40 -0700 Subject: kernel/relay.c: move FIX_SIZE macro into relay.c It's better to place FIX_SIZE macro in relay.c, instead of relay.h Signed-off-by: zhangwei(Jovi) Cc: Jens Axboe Cc: Al Viro Cc: Eric Dumazet Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/relay.h b/include/linux/relay.h index 91cacc3..d7c8359 100644 --- a/include/linux/relay.h +++ b/include/linux/relay.h @@ -20,9 +20,6 @@ #include #include -/* Needs a _much_ better name... */ -#define FIX_SIZE(x) ((((x) - 1) & PAGE_MASK) + PAGE_SIZE) - /* * Tracks changes to rchan/rchan_buf structs */ diff --git a/kernel/relay.c b/kernel/relay.c index d21006c..4c2959b 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -550,6 +550,9 @@ static int __cpuinit relay_hotcpu_callback(struct notifier_block *nb, return NOTIFY_OK; } +/* Needs a _much_ better name... */ +#define FIX_SIZE(x) ((((x) - 1) & PAGE_MASK) + PAGE_SIZE) + /** * relay_open - create a new relay channel * @base_filename: base name of files to create, %NULL for buffering only -- cgit v0.10.2 From a05342cbd62b7f6dacb8d71683d799f5e5e694b8 Mon Sep 17 00:00:00 2001 From: "zhangwei(Jovi)" Date: Tue, 30 Apr 2013 15:28:41 -0700 Subject: relay: use macro PAGE_ALIGN instead of FIX_SIZE Macro FIX_SIZE is same as PAGE_ALIGN at present, so use PAGE_ALIGN instead. Thanks Andrew found this. Signed-off-by: zhangwei(Jovi) Cc: Jens Axboe Cc: Al Viro Cc: Eric Dumazet Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/kernel/relay.c b/kernel/relay.c index 4c2959b..eef0d11 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -550,9 +550,6 @@ static int __cpuinit relay_hotcpu_callback(struct notifier_block *nb, return NOTIFY_OK; } -/* Needs a _much_ better name... */ -#define FIX_SIZE(x) ((((x) - 1) & PAGE_MASK) + PAGE_SIZE) - /** * relay_open - create a new relay channel * @base_filename: base name of files to create, %NULL for buffering only @@ -591,7 +588,7 @@ struct rchan *relay_open(const char *base_filename, chan->version = RELAYFS_CHANNEL_VERSION; chan->n_subbufs = n_subbufs; chan->subbuf_size = subbuf_size; - chan->alloc_size = FIX_SIZE(subbuf_size * n_subbufs); + chan->alloc_size = PAGE_ALIGN(subbuf_size * n_subbufs); chan->parent = parent; chan->private_data = private_data; if (base_filename) { -- cgit v0.10.2 From 446f24d1199e8a546ba7c97da3fbb9a505a94795 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Tue, 30 Apr 2013 15:28:42 -0700 Subject: Kconfig: consolidate CONFIG_DEBUG_STRICT_USER_COPY_CHECKS The help text for this config is duplicated across the x86, parisc, and s390 Kconfig.debug files. Arnd Bergman noted that the help text was slightly misleading and should be fixed to state that enabling this option isn't a problem when using pre 4.4 gcc. To simplify the rewording, consolidate the text into lib/Kconfig.debug and modify it there to be more explicit about when you should say N to this config. Also, make the text a bit more generic by stating that this option enables compile time checks so we can cover architectures which emit warnings vs. ones which emit errors. The details of how an architecture decided to implement the checks isn't as important as the concept of compile time checking of copy_from_user() calls. While we're doing this, remove all the copy_from_user_overflow() code that's duplicated many times and place it into lib/ so that any architecture supporting this option can get the function for free. Signed-off-by: Stephen Boyd Acked-by: Arnd Bergmann Acked-by: Ingo Molnar Acked-by: H. Peter Anvin Cc: Arjan van de Ven Acked-by: Helge Deller Cc: Heiko Carstens Cc: Stephen Rothwell Cc: Chris Metcalf Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 0339181..433e75a 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -1,5 +1,6 @@ config PARISC def_bool y + select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS select HAVE_IDE select HAVE_OPROFILE select HAVE_FUNCTION_TRACER if 64BIT diff --git a/arch/parisc/Kconfig.debug b/arch/parisc/Kconfig.debug index 7305ac8..bc989e5 100644 --- a/arch/parisc/Kconfig.debug +++ b/arch/parisc/Kconfig.debug @@ -12,18 +12,4 @@ config DEBUG_RODATA portion of the kernel code won't be covered by a TLB anymore. If in doubt, say "N". -config DEBUG_STRICT_USER_COPY_CHECKS - bool "Strict copy size checks" - depends on DEBUG_KERNEL && !TRACE_BRANCH_PROFILING - ---help--- - Enabling this option turns a certain set of sanity checks for user - copy operations into compile time failures. - - The copy_from_user() etc checks are there to help test if there - are sufficient security checks on the length argument of - the copy operation, by having gcc prove that the argument is - within bounds. - - If unsure, or if you run an older (pre 4.4) gcc, say N. - endmenu diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index bda6ba6..ce640af 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -91,6 +91,7 @@ config S390 select ARCH_INLINE_WRITE_UNLOCK_BH select ARCH_INLINE_WRITE_UNLOCK_IRQ select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE + select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS select ARCH_SAVE_PAGE_KEYS if HIBERNATION select ARCH_WANT_IPC_PARSE_VERSION select BUILDTIME_EXTABLE_SORT diff --git a/arch/s390/Kconfig.debug b/arch/s390/Kconfig.debug index fc32a2d..c56878e 100644 --- a/arch/s390/Kconfig.debug +++ b/arch/s390/Kconfig.debug @@ -17,20 +17,6 @@ config STRICT_DEVMEM If you are unsure, say Y. -config DEBUG_STRICT_USER_COPY_CHECKS - def_bool n - prompt "Strict user copy size checks" - ---help--- - Enabling this option turns a certain set of sanity checks for user - copy operations into compile time warnings. - - The copy_from_user() etc checks are there to help test if there - are sufficient security checks on the length argument of - the copy operation, by having gcc prove that the argument is - within bounds. - - If unsure, or if you run an older (pre 4.4) gcc, say N. - config S390_PTDUMP bool "Export kernel pagetable layout to userspace via debugfs" depends on DEBUG_KERNEL diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile index 6ab0d0b..20b0e97 100644 --- a/arch/s390/lib/Makefile +++ b/arch/s390/lib/Makefile @@ -3,7 +3,6 @@ # lib-y += delay.o string.o uaccess_std.o uaccess_pt.o -obj-y += usercopy.o obj-$(CONFIG_32BIT) += div64.o qrnnd.o ucmpdi2.o mem32.o obj-$(CONFIG_64BIT) += mem64.o lib-$(CONFIG_64BIT) += uaccess_mvcos.o diff --git a/arch/s390/lib/usercopy.c b/arch/s390/lib/usercopy.c deleted file mode 100644 index 14b363f..0000000 --- a/arch/s390/lib/usercopy.c +++ /dev/null @@ -1,8 +0,0 @@ -#include -#include - -void copy_from_user_overflow(void) -{ - WARN(1, "Buffer overflow detected!\n"); -} -EXPORT_SYMBOL(copy_from_user_overflow); diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile index 8410065f2..dbe119b 100644 --- a/arch/sparc/lib/Makefile +++ b/arch/sparc/lib/Makefile @@ -45,4 +45,3 @@ obj-y += iomap.o obj-$(CONFIG_SPARC32) += atomic32.o ucmpdi2.o obj-y += ksyms.o obj-$(CONFIG_SPARC64) += PeeCeeI.o -obj-y += usercopy.o diff --git a/arch/sparc/lib/usercopy.c b/arch/sparc/lib/usercopy.c deleted file mode 100644 index 5c4284c..0000000 --- a/arch/sparc/lib/usercopy.c +++ /dev/null @@ -1,9 +0,0 @@ -#include -#include -#include - -void copy_from_user_overflow(void) -{ - WARN(1, "Buffer overflow detected!\n"); -} -EXPORT_SYMBOL(copy_from_user_overflow); diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index 25877ae..0f712f4 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig @@ -19,6 +19,7 @@ config TILE select HAVE_SYSCALL_WRAPPERS if TILEGX select VIRT_TO_BUS select SYS_HYPERVISOR + select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS select ARCH_HAVE_NMI_SAFE_CMPXCHG select GENERIC_CLOCKEVENTS select MODULES_USE_ELF_RELA @@ -114,13 +115,6 @@ config STRICT_DEVMEM config SMP def_bool y -# Allow checking for compile-time determined overflow errors in -# copy_from_user(). There are still unprovable places in the -# generic code as of 2.6.34, so this option is not really compatible -# with -Werror, which is more useful in general. -config DEBUG_COPY_FROM_USER - def_bool n - config HVC_TILE depends on TTY select HVC_DRIVER diff --git a/arch/tile/include/asm/uaccess.h b/arch/tile/include/asm/uaccess.h index 9ab078a..8a082bc 100644 --- a/arch/tile/include/asm/uaccess.h +++ b/arch/tile/include/asm/uaccess.h @@ -395,7 +395,12 @@ _copy_from_user(void *to, const void __user *from, unsigned long n) return n; } -#ifdef CONFIG_DEBUG_COPY_FROM_USER +#ifdef CONFIG_DEBUG_STRICT_USER_COPY_CHECKS +/* + * There are still unprovable places in the generic code as of 2.6.34, so this + * option is not really compatible with -Werror, which is more useful in + * general. + */ extern void copy_from_user_overflow(void) __compiletime_warning("copy_from_user() size is not provably correct"); diff --git a/arch/tile/lib/uaccess.c b/arch/tile/lib/uaccess.c index f8d398c..030abe3 100644 --- a/arch/tile/lib/uaccess.c +++ b/arch/tile/lib/uaccess.c @@ -22,11 +22,3 @@ int __range_ok(unsigned long addr, unsigned long size) is_arch_mappable_range(addr, size)); } EXPORT_SYMBOL(__range_ok); - -#ifdef CONFIG_DEBUG_COPY_FROM_USER -void copy_from_user_overflow(void) -{ - WARN(1, "Buffer overflow detected!\n"); -} -EXPORT_SYMBOL(copy_from_user_overflow); -#endif diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 05b057d..5db2117 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -20,6 +20,7 @@ config X86_64 ### Arch settings config X86 def_bool y + select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS select HAVE_AOUT if X86_32 select HAVE_UNSTABLE_SCHED_CLOCK select ARCH_SUPPORTS_NUMA_BALANCING diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 16f7383..c198b7e 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -292,20 +292,6 @@ config OPTIMIZE_INLINING If unsure, say N. -config DEBUG_STRICT_USER_COPY_CHECKS - bool "Strict copy size checks" - depends on DEBUG_KERNEL && !TRACE_BRANCH_PROFILING - ---help--- - Enabling this option turns a certain set of sanity checks for user - copy operations into compile time failures. - - The copy_from_user() etc checks are there to help test if there - are sufficient security checks on the length argument of - the copy operation, by having gcc prove that the argument is - within bounds. - - If unsure, or if you run an older (pre 4.4) gcc, say N. - config DEBUG_NMI_SELFTEST bool "NMI Selftest" depends on DEBUG_KERNEL && X86_LOCAL_APIC diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c index f0312d7..3eb18ac 100644 --- a/arch/x86/lib/usercopy_32.c +++ b/arch/x86/lib/usercopy_32.c @@ -689,9 +689,3 @@ _copy_from_user(void *to, const void __user *from, unsigned long n) return n; } EXPORT_SYMBOL(_copy_from_user); - -void copy_from_user_overflow(void) -{ - WARN(1, "Buffer overflow detected!\n"); -} -EXPORT_SYMBOL(copy_from_user_overflow); diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 77ebaa3..770a422 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1292,6 +1292,24 @@ config LATENCYTOP Enable this option if you want to use the LatencyTOP tool to find out which userspace is blocking on what kernel operations. +config ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS + bool + +config DEBUG_STRICT_USER_COPY_CHECKS + bool "Strict user copy size checks" + depends on ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS + depends on DEBUG_KERNEL && !TRACE_BRANCH_PROFILING + help + Enabling this option turns a certain set of sanity checks for user + copy operations into compile time failures. + + The copy_from_user() etc checks are there to help test if there + are sufficient security checks on the length argument of + the copy operation, by having gcc prove that the argument is + within bounds. + + If unsure, say N. + source mm/Kconfig.debug source kernel/trace/Kconfig diff --git a/lib/Makefile b/lib/Makefile index 23c9a0f..e9c52e1 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -15,6 +15,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ is_single_threaded.o plist.o decompress.o kobject_uevent.o \ earlycpio.o +obj-$(CONFIG_ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS) += usercopy.o lib-$(CONFIG_MMU) += ioremap.o lib-$(CONFIG_SMP) += cpumask.o diff --git a/lib/usercopy.c b/lib/usercopy.c new file mode 100644 index 0000000..4f5b1dd --- /dev/null +++ b/lib/usercopy.c @@ -0,0 +1,9 @@ +#include +#include +#include + +void copy_from_user_overflow(void) +{ + WARN(1, "Buffer overflow detected!\n"); +} +EXPORT_SYMBOL(copy_from_user_overflow); -- cgit v0.10.2 From 966f3096b18bf13385799ef745e114a8f292ed69 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 30 Apr 2013 15:28:44 -0700 Subject: kconfig menu: move Virtualization drivers near other virtualization options Make virtualization drivers be logically grouped together (physically near each other) in the kconfig menu by moving "Virtualization drivers" to be near "Virtio drivers", Microsort Hyper-V, and Xen driver support. This is just a user-friendly, visual search change. Signed-off-by: Randy Dunlap Cc: Alexander Graf Cc: Stuart Yoder Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/Kconfig b/drivers/Kconfig index 78a956e..8d96238 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -120,6 +120,8 @@ source "drivers/vfio/Kconfig" source "drivers/vlynq/Kconfig" +source "drivers/virt/Kconfig" + source "drivers/virtio/Kconfig" source "drivers/hv/Kconfig" @@ -144,8 +146,6 @@ source "drivers/remoteproc/Kconfig" source "drivers/rpmsg/Kconfig" -source "drivers/virt/Kconfig" - source "drivers/devfreq/Kconfig" source "drivers/extcon/Kconfig" -- cgit v0.10.2 From 657a52095fa3e8560d41047851f4e73a410f3ed2 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Tue, 30 Apr 2013 15:28:45 -0700 Subject: init/Kconfig: re-order CONFIG_EXPERT options to fix menuconfig display The kconfig language requires that dependent options all follow the menuconfig symbol in order to be collapsed below it. Recently some hidden options were added below the EXPERT menuconfig, but did not depend on EXPERT (because hidden options can't). This broke the display. So re-order all these options, and while we're here stick the PCI quirks under the EXPERT menu (since it isn't sitting with any related options). Before this commit, we get: [*] Configure standard kernel features (expert users) ---> [ ] Sysctl syscall support [*] Load all symbols for debugging/ksymoops ... [ ] Embedded system Now we get the older (and correct) behavior: [*] Configure standard kernel features (expert users) ---> [ ] Embedded system And if you go into the expert menu you get the expert options: [ ] Sysctl syscall support [*] Load all symbols for debugging/ksymoops ... Signed-off-by: Mike Frysinger Acked-by: Randy Dunlap Cc: zhangwei(Jovi) Cc: Michal Marek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/init/Kconfig b/init/Kconfig index 4367e13..a76d131 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1221,6 +1221,35 @@ config SYSCTL config ANON_INODES bool +config HAVE_UID16 + bool + +config SYSCTL_EXCEPTION_TRACE + bool + help + Enable support for /proc/sys/debug/exception-trace. + +config SYSCTL_ARCH_UNALIGN_NO_WARN + bool + help + Enable support for /proc/sys/kernel/ignore-unaligned-usertrap + Allows arch to define/use @no_unaligned_warning to possibly warn + about unaligned access emulation going on under the hood. + +config SYSCTL_ARCH_UNALIGN_ALLOW + bool + help + Enable support for /proc/sys/kernel/unaligned-trap + Allows arches to define/use @unaligned_enabled to runtime toggle + the unaligned access emulation. + see arch/parisc/kernel/unaligned.c for reference + +config HOTPLUG + def_bool y + +config HAVE_PCSPKR_PLATFORM + bool + menuconfig EXPERT bool "Configure standard kernel features (expert users)" # Unhide debug options, to make the on-by-default options visible @@ -1231,9 +1260,6 @@ menuconfig EXPERT environments which can tolerate a "non-standard" kernel. Only use this if you really know what you are doing. -config HAVE_UID16 - bool - config UID16 bool "Enable 16-bit UID system calls" if EXPERT depends on HAVE_UID16 @@ -1258,26 +1284,6 @@ config SYSCTL_SYSCALL If unsure say N here. -config SYSCTL_EXCEPTION_TRACE - bool - help - Enable support for /proc/sys/debug/exception-trace. - -config SYSCTL_ARCH_UNALIGN_NO_WARN - bool - help - Enable support for /proc/sys/kernel/ignore-unaligned-usertrap - Allows arch to define/use @no_unaligned_warning to possibly warn - about unaligned access emulation going on under the hood. - -config SYSCTL_ARCH_UNALIGN_ALLOW - bool - help - Enable support for /proc/sys/kernel/unaligned-trap - Allows arches to define/use @unaligned_enabled to runtime toggle - the unaligned access emulation. - see arch/parisc/kernel/unaligned.c for reference - config KALLSYMS bool "Load all symbols for debugging/ksymoops" if EXPERT default y @@ -1303,9 +1309,6 @@ config KALLSYMS_ALL Say N unless you really need all symbols. -config HOTPLUG - def_bool y - config PRINTK default y bool "Enable support for printk" if EXPERT @@ -1344,9 +1347,6 @@ config PCSPKR_PLATFORM This option allows to disable the internal PC-Speaker support, saving some memory. -config HAVE_PCSPKR_PLATFORM - bool - config BASE_FULL default y bool "Enable full-sized data structures for core" if EXPERT @@ -1418,8 +1418,17 @@ config AIO default y help This option enables POSIX asynchronous I/O which may by used - by some high performance threaded applications. Disabling - this option saves about 7k. + by some high performance threaded applications. Disabling + this option saves about 7k. + +config PCI_QUIRKS + default y + bool "Enable PCI quirk workarounds" if EXPERT + depends on PCI + help + This enables workarounds for various PCI chipset + bugs/quirks. Disable this only if your target machine is + unaffected by PCI quirks. config EMBEDDED bool "Embedded system" @@ -1494,15 +1503,6 @@ config VM_EVENT_COUNTERS on EXPERT systems. /proc/vmstat will only show page counts if VM event counters are disabled. -config PCI_QUIRKS - default y - bool "Enable PCI quirk workarounds" if EXPERT - depends on PCI - help - This enables workarounds for various PCI chipset - bugs/quirks. Disable this only if your target machine is - unaffected by PCI quirks. - config SLUB_DEBUG default y bool "Enable SLUB debugging support" if EXPERT -- cgit v0.10.2 From bcdedcc1afd6ac91e15cb90aedaf8432f62fed13 Mon Sep 17 00:00:00 2001 From: Wengmeiling Date: Tue, 30 Apr 2013 15:28:46 -0700 Subject: menuconfig: print more info for symbol without prompts When we search a config symbol, if it has no prompt the position of this symbol in the Kconfig file and it's dependencies are not printed. This can be inconvenient, especially when it's set to n and we want to find out why. the following is an example: before: Symbol: GENERIC_SMP_IDLE_THREAD [=y] Type : boolean Selected by: X86 [=y] after: Symbol: GENERIC_SMP_IDLE_THREAD [=y] Type : boolean Defined at arch/Kconfig:213 Selected by: X86 [=y] Signed-off-by: Weng Meiling Signed-off-by: Libo Chen Cc: Michal Marek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/scripts/kconfig/menu.c b/scripts/kconfig/menu.c index f3bffa3..826da66 100644 --- a/scripts/kconfig/menu.c +++ b/scripts/kconfig/menu.c @@ -515,13 +515,6 @@ static void get_prompt_str(struct gstr *r, struct property *prop, struct jump_key *jump; str_printf(r, _("Prompt: %s\n"), _(prop->text)); - str_printf(r, _(" Defined at %s:%d\n"), prop->menu->file->name, - prop->menu->lineno); - if (!expr_is_yes(prop->visible.expr)) { - str_append(r, _(" Depends on: ")); - expr_gstr_print(prop->visible.expr, r); - str_append(r, "\n"); - } menu = prop->menu->parent; for (i = 0; menu != &rootmenu && i < 8; menu = menu->parent) { bool accessible = menu_is_visible(menu); @@ -572,6 +565,18 @@ static void get_prompt_str(struct gstr *r, struct property *prop, } /* + * get peoperty of type P_SYMBOL + */ +static struct property *get_symbol_prop(struct symbol *sym) +{ + struct property *prop = NULL; + + for_all_properties(sym, prop, P_SYMBOL) + break; + return prop; +} + +/* * head is optional and may be NULL */ void get_symbol_str(struct gstr *r, struct symbol *sym, @@ -595,6 +600,14 @@ void get_symbol_str(struct gstr *r, struct symbol *sym, } for_all_prompts(sym, prop) get_prompt_str(r, prop, head); + prop = get_symbol_prop(sym); + str_printf(r, _(" Defined at %s:%d\n"), prop->menu->file->name, + prop->menu->lineno); + if (!expr_is_yes(prop->visible.expr)) { + str_append(r, _(" Depends on: ")); + expr_gstr_print(prop->visible.expr, r); + str_append(r, "\n"); + } hit = false; for_all_properties(sym, prop, P_SELECT) { if (!hit) { -- cgit v0.10.2 From 22145aa1f65c2bb4a008bc29054349a4a6c4f565 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 30 Apr 2013 15:28:47 -0700 Subject: UAPI: remove empty Kbuild files Remove empty Kbuild files as they cause problems with the patch program which removes files that become empty. Signed-off-by: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/Kbuild b/include/Kbuild index 1dfd33e..bab1145 100644 --- a/include/Kbuild +++ b/include/Kbuild @@ -1,5 +1,2 @@ # Top-level Makefile calls into asm-$(ARCH) # List only non-arch directories below - -header-y += video/ -header-y += scsi/ diff --git a/include/scsi/Kbuild b/include/scsi/Kbuild deleted file mode 100644 index 562ff9d..0000000 --- a/include/scsi/Kbuild +++ /dev/null @@ -1 +0,0 @@ -header-y += fc/ -- cgit v0.10.2 From 52b3694157e3aa6df871e283115652ec6f2d31e0 Mon Sep 17 00:00:00 2001 From: Amnon Shiloh Date: Tue, 30 Apr 2013 15:28:48 -0700 Subject: kernel/sys.c: make prctl(PR_SET_MM) generally available The purpose of this patch is to allow privileged processes to set their own per-memory memory-region fields: start_code, end_code, start_data, end_data, start_brk, brk, start_stack, arg_start, arg_end, env_start, env_end. This functionality is needed by any application or package that needs to reconstruct Linux processes, that is, to start them in any way other than by means of an "execve()" from an executable file. This includes: 1. Restoring processes from a checkpoint-file (by all potential user-level checkpointing packages, not only CRIU's). 2. Restarting processes on another node after process migration. 3. Starting duplicated copies of a running process (for reliability and high-availablity). 4. Starting a process from an executable format that is not supported by Linux, thus requiring a "manual execve" by a user-level utility. 5. Similarly, starting a process from a networked and/or crypted executable that, for confidentiality, licensing or other reasons, may not be written to the local file-systems. The code that does that was already included in the Linux kernel by the CRIU group, in the form of "prctl(PR_SET_MM)", but prior to this was enclosed within their private "#ifdef CONFIG_CHECKPOINT_RESTORE", which is normally disabled. The patch removes those ifdefs. Signed-off-by: Amnon Shiloh Cc: Cyrill Gorcunov Cc: Pavel Emelyanov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/kernel/sys.c b/kernel/sys.c index e30eba4..afd0f7e 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1857,7 +1857,6 @@ SYSCALL_DEFINE1(umask, int, mask) return mask; } -#ifdef CONFIG_CHECKPOINT_RESTORE static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) { struct fd exe; @@ -2051,17 +2050,12 @@ out: return error; } +#ifdef CONFIG_CHECKPOINT_RESTORE static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr) { return put_user(me->clear_child_tid, tid_addr); } - -#else /* CONFIG_CHECKPOINT_RESTORE */ -static int prctl_set_mm(int opt, unsigned long addr, - unsigned long arg4, unsigned long arg5) -{ - return -EINVAL; -} +#else static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr) { return -EINVAL; -- cgit v0.10.2 From e12a95f40ac78f223deab743a5228f9eb3df8ed1 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Tue, 30 Apr 2013 15:28:49 -0700 Subject: notifier-error-inject: fix module names in Kconfig The Kconfig help text for MEMORY_NOTIFIER_ERROR_INJECT and OF_RECONFIG_NOTIFIER_ERROR_INJECT has mismatched module names. Signed-off-by: Akinobu Mita Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 770a422..566cf2b 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1192,7 +1192,7 @@ config MEMORY_NOTIFIER_ERROR_INJECT bash: echo: write error: Cannot allocate memory To compile this code as a module, choose M here: the module will - be called pSeries-reconfig-notifier-error-inject. + be called memory-notifier-error-inject. If unsure, say N. @@ -1209,7 +1209,7 @@ config OF_RECONFIG_NOTIFIER_ERROR_INJECT notified, write the error code to "actions//error". To compile this code as a module, choose M here: the module will - be called memory-notifier-error-inject. + be called of-reconfig-notifier-error-inject. If unsure, say N. -- cgit v0.10.2 From 6f9982bdde99d4e6f248613ca780c63de26ba086 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 30 Apr 2013 15:28:50 -0700 Subject: lib/decompress.c: fix initconst Signed-off-by: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/lib/decompress.c b/lib/decompress.c index 31a8042..f8fdeda 100644 --- a/lib/decompress.c +++ b/lib/decompress.c @@ -38,7 +38,7 @@ struct compress_format { decompress_fn decompressor; }; -static const struct compress_format compressed_formats[] __initdata = { +static const struct compress_format compressed_formats[] __initconst = { { {037, 0213}, "gzip", gunzip }, { {037, 0236}, "gzip", gunzip }, { {0x42, 0x5a}, "bzip2", bunzip2 }, -- cgit v0.10.2 From f345650964936395307d9d6bdee0168cf7d926e7 Mon Sep 17 00:00:00 2001 From: "zhangwei(Jovi)" Date: Tue, 30 Apr 2013 15:28:51 -0700 Subject: kgdb/sysrq: fix inconstistent help message of sysrq key Currently help message of /proc/sysrq-trigger highlight its upper-case characters, like below: SysRq : HELP : loglevel(0-9) reBoot Crash terminate-all-tasks(E) memory-full-oom-kill(F) kill-all-tasks(I) ... this would confuse user trigger sysrq by upper-case character, which is inconsistent with the real lower-case character registed key. This inconsistent help message will also lead more confused when 26 upper-case letters put into use in future. This patch fix kgdb sysrq key: "debug(g)" Signed-off-by: zhangwei(Jovi) Cc: Jason Wessel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index c26278f..0506d44 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c @@ -775,7 +775,7 @@ static void sysrq_handle_dbg(int key) static struct sysrq_key_op sysrq_dbg_op = { .handler = sysrq_handle_dbg, - .help_msg = "debug(G)", + .help_msg = "debug(g)", .action_msg = "DEBUG", }; #endif -- cgit v0.10.2 From 28ad585e352ff892bd7b17ffdc2355dc51a17be5 Mon Sep 17 00:00:00 2001 From: "zhangwei(Jovi)" Date: Tue, 30 Apr 2013 15:28:52 -0700 Subject: power/sysrq: fix inconstistent help message of sysrq key Currently help message of /proc/sysrq-trigger highlight its upper-case characters, like below: SysRq : HELP : loglevel(0-9) reBoot Crash terminate-all-tasks(E) memory-full-oom-kill(F) kill-all-tasks(I) ... this would confuse user trigger sysrq by upper-case character, which is inconsistent with the real lower-case character registed key. This inconsistent help message will also lead more confused when 26 upper-case letters put into use in future. This patch fix power off sysrq key: "poweroff(o)" Signed-off-by: zhangwei(Jovi) Cc: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/kernel/power/poweroff.c b/kernel/power/poweroff.c index 68197a4..7ef6866 100644 --- a/kernel/power/poweroff.c +++ b/kernel/power/poweroff.c @@ -32,7 +32,7 @@ static void handle_poweroff(int key) static struct sysrq_key_op sysrq_poweroff_op = { .handler = handle_poweroff, - .help_msg = "powerOff", + .help_msg = "poweroff(o)", .action_msg = "Power Off", .enable_mask = SYSRQ_ENABLE_BOOT, }; -- cgit v0.10.2 From 9387a393ef28b8c45dbede4fca9cd7cc56ca30ca Mon Sep 17 00:00:00 2001 From: "zhangwei(Jovi)" Date: Tue, 30 Apr 2013 15:28:53 -0700 Subject: ARM/etm/sysrq: fix inconstistent help message of sysrq key Currently help message of /proc/sysrq-trigger highlights its upper-case characters, like below: SysRq : HELP : loglevel(0-9) reBoot Crash terminate-all-tasks(E) memory-full-oom-kill(F) kill-all-tasks(I) ... this would confuse user trigger sysrq by upper-case character, which is inconsistent with the real lower-case character registed key. This inconsistent help message will also lead more confused when 26 upper-case letters put into use in future. This patch fix arm etm sysrq key: "etm-buffer-dump(v)" (This patch also add "-" to separate each sysrq key help word, instead of spaces) Signed-off-by: zhangwei(Jovi) Acked-by: Randy Dunlap Cc: Russell King Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/arm/kernel/etm.c b/arch/arm/kernel/etm.c index 9b6de8c..8ff0ecd 100644 --- a/arch/arm/kernel/etm.c +++ b/arch/arm/kernel/etm.c @@ -254,7 +254,7 @@ static void sysrq_etm_dump(int key) static struct sysrq_key_op sysrq_etm_op = { .handler = sysrq_etm_dump, - .help_msg = "ETM buffer dump", + .help_msg = "etm-buffer-dump(v)", .action_msg = "etm", }; -- cgit v0.10.2 From 90a102e59ab9c94071fe1993134daff462d17a3f Mon Sep 17 00:00:00 2001 From: "zhangwei(Jovi)" Date: Tue, 30 Apr 2013 15:28:54 -0700 Subject: powerpc/xmon/sysrq: fix inconstistent help message of sysrq key Currently help message of /proc/sysrq-trigger highlight its upper-case characters, like below: SysRq : HELP : loglevel(0-9) reBoot Crash terminate-all-tasks(E) memory-full-oom-kill(F) kill-all-tasks(I) ... this would confuse user trigger sysrq by upper-case character, which is inconsistent with the real lower-case character registed key. This inconsistent help message will also lead more confused when 26 upper-case letters put into use in future. This patch fix powerpc xmon sysrq key: "xmon(x)" Signed-off-by: zhangwei(Jovi) Cc: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 13f85de..3e34cd2 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -2947,7 +2947,7 @@ static void sysrq_handle_xmon(int key) static struct sysrq_key_op sysrq_xmon_op = { .handler = sysrq_handle_xmon, - .help_msg = "Xmon", + .help_msg = "xmon(x)", .action_msg = "Entering xmon", }; -- cgit v0.10.2 From 5452c7f8c7e12c191b7e2d2a58476ab41e0387aa Mon Sep 17 00:00:00 2001 From: "zhangwei(Jovi)" Date: Tue, 30 Apr 2013 15:28:55 -0700 Subject: sparc/sysrq: fix inconstistent help message of sysrq key Currently help message of /proc/sysrq-trigger highlight its upper-case characters, like below: SysRq : HELP : loglevel(0-9) reBoot Crash terminate-all-tasks(E) memory-full-oom-kill(F) kill-all-tasks(I) ... this would confuse user trigger sysrq by upper-case character, which is inconsistent with the real lower-case character registed key. This inconsistent help message will also lead more confused when 26 upper-case letters put into use in future. This patch fix spare sysrq key: "global-regs(y)" Signed-off-by: zhangwei(Jovi) Acked-by: David S. Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index c6dc1ba..baebab2 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -294,7 +294,7 @@ static void sysrq_handle_globreg(int key) static struct sysrq_key_op sparc_globalreg_op = { .handler = sysrq_handle_globreg, - .help_msg = "global-regs(Y)", + .help_msg = "global-regs(y)", .action_msg = "Show Global CPU Regs", }; @@ -364,7 +364,7 @@ static void sysrq_handle_globpmu(int key) static struct sysrq_key_op sparc_globalpmu_op = { .handler = sysrq_handle_globpmu, - .help_msg = "global-pmu(X)", + .help_msg = "global-pmu(x)", .action_msg = "Show Global PMU Regs", }; -- cgit v0.10.2 From 7b6b99a02a8acfa8e71a990198c2ad0b9cc5cd71 Mon Sep 17 00:00:00 2001 From: "zhangwei(Jovi)" Date: Tue, 30 Apr 2013 15:28:56 -0700 Subject: ethernet/emac/sysrq: fix inconstistent help message of sysrq key Currently help message of /proc/sysrq-trigger highlight its upper-case characters, like below: SysRq : HELP : loglevel(0-9) reBoot Crash terminate-all-tasks(E) memory-full-oom-kill(F) kill-all-tasks(I) ... this would confuse user trigger sysrq by upper-case character, which is inconsistent with the real lower-case character registed key. This inconsistent help message will also lead more confused when 26 upper-case letters put into use in future. This patch ethernet emac sysrq key: "emac(c)" Signed-off-by: zhangwei(Jovi) Cc: Josh Boyer Cc: Thadeu Lima de Souza Cascardo Cc: David Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/net/ethernet/ibm/emac/debug.c b/drivers/net/ethernet/ibm/emac/debug.c index b16b482..a559f32 100644 --- a/drivers/net/ethernet/ibm/emac/debug.c +++ b/drivers/net/ethernet/ibm/emac/debug.c @@ -245,7 +245,7 @@ static void emac_sysrq_handler(int key) static struct sysrq_key_op emac_sysrq_op = { .handler = emac_sysrq_handler, - .help_msg = "emaC", + .help_msg = "emac(c)", .action_msg = "Show EMAC(s) status", }; -- cgit v0.10.2 From e2a8b0a779787314eca1061308a8182e6c5bfabd Mon Sep 17 00:00:00 2001 From: "zhangwei(Jovi)" Date: Tue, 30 Apr 2013 15:28:57 -0700 Subject: Documentation/sysrq: fix inconstistent help message of sysrq key Currently help message of /proc/sysrq-trigger highlight its upper-case characters, like below: SysRq : HELP : loglevel(0-9) reBoot Crash terminate-all-tasks(E) memory-full-oom-kill(F) kill-all-tasks(I) ... this would confuse user trigger sysrq by upper-case character, which is inconsistent with the real lower-case character registed key. This inconsistent help message will also lead more confused when 26 upper-case letters put into use in future. This patch fix sysrq documentation. Signed-off-by: zhangwei(Jovi) Cc: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/Documentation/sysrq.txt b/Documentation/sysrq.txt index 2a4cdda..8cb4d78 100644 --- a/Documentation/sysrq.txt +++ b/Documentation/sysrq.txt @@ -129,9 +129,9 @@ On all - write a character to /proc/sysrq-trigger. e.g.: * Okay, so what can I use them for? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Well, un'R'aw is very handy when your X server or a svgalib program crashes. +Well, unraw(r) is very handy when your X server or a svgalib program crashes. -sa'K' (Secure Access Key) is useful when you want to be sure there is no +sak(k) (Secure Access Key) is useful when you want to be sure there is no trojan program running at console which could grab your password when you would try to login. It will kill all programs on given console, thus letting you make sure that the login prompt you see is actually @@ -143,20 +143,20 @@ IMPORTANT: such. :IMPORTANT useful when you want to exit a program that will not let you switch consoles. (For example, X or a svgalib program.) -re'B'oot is good when you're unable to shut down. But you should also 'S'ync -and 'U'mount first. +reboot(b) is good when you're unable to shut down. But you should also +sync(s) and umount(u) first. -'C'rash can be used to manually trigger a crashdump when the system is hung. +crash(c) can be used to manually trigger a crashdump when the system is hung. Note that this just triggers a crash if there is no dump mechanism available. -'S'ync is great when your system is locked up, it allows you to sync your +sync(s) is great when your system is locked up, it allows you to sync your disks and will certainly lessen the chance of data loss and fscking. Note that the sync hasn't taken place until you see the "OK" and "Done" appear on the screen. (If the kernel is really in strife, you may not ever get the OK or Done message...) -'U'mount is basically useful in the same ways as 'S'ync. I generally 'S'ync, -'U'mount, then re'B'oot when my system locks. It's saved me many a fsck. +umount(u) is basically useful in the same ways as sync(s). I generally sync(s), +umount(u), then reboot(b) when my system locks. It's saved me many a fsck. Again, the unmount (remount read-only) hasn't taken place until you see the "OK" and "Done" message appear on the screen. @@ -165,11 +165,11 @@ kernel messages you do not want to see. Selecting '0' will prevent all but the most urgent kernel messages from reaching your console. (They will still be logged if syslogd/klogd are alive, though.) -t'E'rm and k'I'll are useful if you have some sort of runaway process you +term(e) and kill(i) are useful if you have some sort of runaway process you are unable to kill any other way, especially if it's spawning other processes. -"'J'ust thaw it" is useful if your system becomes unresponsive due to a frozen +"just thaw it(j)" is useful if your system becomes unresponsive due to a frozen (probably root) filesystem via the FIFREEZE ioctl. * Sometimes SysRq seems to get 'stuck' after using it, what can I do? -- cgit v0.10.2