From c846ef7deba2d4f75138cf6a4b137b7e0e7659af Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 3 Jul 2013 15:00:41 -0700 Subject: include/linux/smp.h:on_each_cpu(): switch back to a macro Commit f21afc25f9ed ("smp.h: Use local_irq_{save,restore}() in !SMP version of on_each_cpu()") converted on_each_cpu() to a C function. This required inclusion of irqflags.h, which broke ia64 and mn10300 (at least) due to header ordering hell. Switch on_each_cpu() back to a macro to fix this. Reported-by: Geert Uytterhoeven Acked-by: Geert Uytterhoeven Cc: David Daney Cc: Ralf Baechle Cc: Stephen Rothwell Cc: [3.10.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/smp.h b/include/linux/smp.h index c848876..c181399 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -11,7 +11,6 @@ #include #include #include -#include extern void cpu_idle(void); @@ -140,17 +139,14 @@ static inline int up_smp_call_function(smp_call_func_t func, void *info) } #define smp_call_function(func, info, wait) \ (up_smp_call_function(func, info)) - -static inline int on_each_cpu(smp_call_func_t func, void *info, int wait) -{ - unsigned long flags; - - local_irq_save(flags); - func(info); - local_irq_restore(flags); - return 0; -} - +#define on_each_cpu(func, info, wait) \ + ({ \ + unsigned long __flags; \ + local_irq_save(__flags); \ + func(info); \ + local_irq_restore(__flags); \ + 0; \ + }) /* * Note we still need to test the mask even for UP * because we actually can get an empty mask from -- cgit v0.10.2 From fe74650166dd6905b0cf66594eb79222dc9d7109 Mon Sep 17 00:00:00 2001 From: Chen Gang Date: Wed, 3 Jul 2013 15:00:42 -0700 Subject: arch: c6x: mm: include "asm/uaccess.h" to pass compiling Need include "asm/uaccess.h" to pass compiling. The related error (with allmodconfig): arch/c6x/mm/init.c: In function `paging_init': arch/c6x/mm/init.c:46:2: error: implicit declaration of function `set_fs' [-Werror=implicit-function-declaration] arch/c6x/mm/init.c:46:9: error: `KERNEL_DS' undeclared (first use in this function) arch/c6x/mm/init.c:46:9: note: each undeclared identifier is reported only once for each function it appears in Signed-off-by: Chen Gang Cc: [3.10.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/c6x/mm/init.c b/arch/c6x/mm/init.c index a9fcd89..b74ccb5 100644 --- a/arch/c6x/mm/init.c +++ b/arch/c6x/mm/init.c @@ -18,6 +18,7 @@ #include #include +#include /* * ZERO_PAGE is a special page that is used for zero-initialized -- cgit v0.10.2 From da331ba8e9c5de72a27e50f71105395bba6eebe0 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 3 Jul 2013 15:00:43 -0700 Subject: drivers/dma/pl330.c: fix locking in pl330_free_chan_resources() tasklet_kill() may sleep so call it before taking pch->lock. Fixes following lockup: BUG: scheduling while atomic: cat/2383/0x00000002 Modules linked in: unwind_backtrace+0x0/0xfc __schedule_bug+0x4c/0x58 __schedule+0x690/0x6e0 sys_sched_yield+0x70/0x78 tasklet_kill+0x34/0x8c pl330_free_chan_resources+0x24/0x88 dma_chan_put+0x4c/0x50 [...] BUG: spinlock lockup suspected on CPU#0, swapper/0/0 lock: 0xe52aa04c, .magic: dead4ead, .owner: cat/2383, .owner_cpu: 1 unwind_backtrace+0x0/0xfc do_raw_spin_lock+0x194/0x204 _raw_spin_lock_irqsave+0x20/0x28 pl330_tasklet+0x2c/0x5a8 tasklet_action+0xfc/0x114 __do_softirq+0xe4/0x19c irq_exit+0x98/0x9c handle_IPI+0x124/0x16c gic_handle_irq+0x64/0x68 __irq_svc+0x40/0x70 cpuidle_wrap_enter+0x4c/0xa0 cpuidle_enter_state+0x18/0x68 cpuidle_idle_call+0xac/0xe0 cpu_idle+0xac/0xf0 Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Kyungmin Park Acked-by: Jassi Brar Cc: Vinod Koul Cc: Tomasz Figa Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index a17553f..7ec82f0 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -2485,10 +2485,10 @@ static void pl330_free_chan_resources(struct dma_chan *chan) struct dma_pl330_chan *pch = to_pchan(chan); unsigned long flags; - spin_lock_irqsave(&pch->lock, flags); - tasklet_kill(&pch->task); + spin_lock_irqsave(&pch->lock, flags); + pl330_release_channel(pch->pl330_chid); pch->pl330_chid = NULL; -- cgit v0.10.2 From 3b1f9f53b18b2007803de17ab79cea736d5baf81 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 3 Jul 2013 15:00:44 -0700 Subject: sound/soc/codecs/si476x.c: don't use 0bNNN spacr64 gcc-3.4.5 (at least) spits this back. Cc: Andrey Smirnov Cc: Mark Brown Cc: Takashi Iwai Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/sound/soc/codecs/si476x.c b/sound/soc/codecs/si476x.c index 721587c..73e205c 100644 --- a/sound/soc/codecs/si476x.c +++ b/sound/soc/codecs/si476x.c @@ -38,9 +38,9 @@ enum si476x_digital_io_output_format { SI476X_DIGITAL_IO_SAMPLE_SIZE_SHIFT = 8, }; -#define SI476X_DIGITAL_IO_OUTPUT_WIDTH_MASK ((0b111 << SI476X_DIGITAL_IO_SLOT_SIZE_SHIFT) | \ - (0b111 << SI476X_DIGITAL_IO_SAMPLE_SIZE_SHIFT)) -#define SI476X_DIGITAL_IO_OUTPUT_FORMAT_MASK (0b1111110) +#define SI476X_DIGITAL_IO_OUTPUT_WIDTH_MASK ((0x7 << SI476X_DIGITAL_IO_SLOT_SIZE_SHIFT) | \ + (0x7 << SI476X_DIGITAL_IO_SAMPLE_SIZE_SHIFT)) +#define SI476X_DIGITAL_IO_OUTPUT_FORMAT_MASK (0x7e) enum si476x_daudio_formats { SI476X_DAUDIO_MODE_I2S = (0x0 << 1), -- cgit v0.10.2 From 7121064b214bbc97007ec4d0e70aaeffef1b55f7 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 3 Jul 2013 15:00:45 -0700 Subject: configfs: use capped length for ->store_attribute() The difference between "count" and "len" is that "len" is capped at 4095. Changing it like this makes it match how sysfs_write_file() is implemented. This is a static analysis patch. I haven't found any store_attribute() functions where this change makes a difference. Signed-off-by: Dan Carpenter Acked-by: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/configfs/file.c b/fs/configfs/file.c index 2b6cb23..1d1c41f 100644 --- a/fs/configfs/file.c +++ b/fs/configfs/file.c @@ -203,7 +203,7 @@ configfs_write_file(struct file *file, const char __user *buf, size_t count, lof mutex_lock(&buffer->mutex); len = fill_write_buffer(buffer, buf, count); if (len > 0) - len = flush_write_buffer(file->f_path.dentry, buffer, count); + len = flush_write_buffer(file->f_path.dentry, buffer, len); if (len > 0) *ppos += len; mutex_unlock(&buffer->mutex); -- cgit v0.10.2 From 82d627cf1f41c6ca550bb404dfbc4bae2c954611 Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Wed, 3 Jul 2013 15:00:46 -0700 Subject: fs/ocfs2/dlm/dlmrecovery.c: remove duplicate declarations Below 3 functions have already been declared in dlmcommon.h, so we have no need to declare them again in dlmrecovery.c: dlm_complete_recovery_thread dlm_launch_recovery_thread dlm_kick_recovery_thread Signed-off-by: Joseph Qi Cc: Joel Becker Cc: Mark Fasheh Acked-by: Sunil Mushran Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index e68588e..3e18641 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -55,9 +55,6 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node); static int dlm_recovery_thread(void *data); -void dlm_complete_recovery_thread(struct dlm_ctxt *dlm); -int dlm_launch_recovery_thread(struct dlm_ctxt *dlm); -void dlm_kick_recovery_thread(struct dlm_ctxt *dlm); static int dlm_do_recovery(struct dlm_ctxt *dlm); static int dlm_pick_recovery_master(struct dlm_ctxt *dlm); -- cgit v0.10.2 From 22ab9014bfdaf97449759aef946871aabe78bb40 Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Wed, 3 Jul 2013 15:00:47 -0700 Subject: fs/ocfs2/dlm/dlmrecovery.c:dlm_request_all_locks(): ret should be int instead of enum In dlm_request_all_locks, ret is type enum. But o2net_send_message returns a type int value. Then it will never run into the following error branch. So we should change the ret type from enum to int. Signed-off-by: Joseph Qi Cc: Joel Becker Cc: Mark Fasheh Acked-by: Sunil Mushran Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 3e18641..53e5c02 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -786,7 +786,7 @@ static int dlm_request_all_locks(struct dlm_ctxt *dlm, u8 request_from, u8 dead_node) { struct dlm_lock_request lr; - enum dlm_status ret; + int ret; mlog(0, "\n"); @@ -799,7 +799,6 @@ static int dlm_request_all_locks(struct dlm_ctxt *dlm, u8 request_from, lr.dead_node = dead_node; // send message - ret = DLM_NOLOCKMGR; ret = o2net_send_message(DLM_LOCK_REQUEST_MSG, dlm->key, &lr, sizeof(lr), request_from, NULL); -- cgit v0.10.2 From 13eb98874c837b4ca0cb2db6a4fe3551e51b7632 Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Wed, 3 Jul 2013 15:00:48 -0700 Subject: ocfs2: should not use le32_add_cpu to set ocfs2_dinode i_flags If we use le32_add_cpu to set ocfs2_dinode i_flags, it may lead to the corresponding flag corrupted. So we should change it to bitwise and/or operation. Signed-off-by: Joseph Qi Cc: Joel Becker Cc: Mark Fasheh Cc: shencanquan Reviewed-by: Jie Liu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index b4a5cdf..75964ad 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -522,7 +522,7 @@ static int __ocfs2_mknod_locked(struct inode *dir, fe->i_last_eb_blk = 0; strcpy(fe->i_signature, OCFS2_INODE_SIGNATURE); - le32_add_cpu(&fe->i_flags, OCFS2_VALID_FL); + fe->i_flags |= cpu_to_le32(OCFS2_VALID_FL); fe->i_atime = fe->i_ctime = fe->i_mtime = cpu_to_le64(CURRENT_TIME.tv_sec); fe->i_mtime_nsec = fe->i_ctime_nsec = fe->i_atime_nsec = @@ -2044,7 +2044,7 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb, goto leave; } - le32_add_cpu(&fe->i_flags, OCFS2_ORPHANED_FL); + fe->i_flags |= cpu_to_le32(OCFS2_ORPHANED_FL); OCFS2_I(inode)->ip_flags &= ~OCFS2_INODE_SKIP_ORPHAN_DIR; /* Record which orphan dir our inode now resides @@ -2434,7 +2434,7 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir, } di = (struct ocfs2_dinode *)di_bh->b_data; - le32_add_cpu(&di->i_flags, -OCFS2_ORPHANED_FL); + di->i_flags &= ~cpu_to_le32(OCFS2_ORPHANED_FL); di->i_orphaned_slot = 0; set_nlink(inode, 1); ocfs2_set_links_count(di, inode->i_nlink); -- cgit v0.10.2 From 40c7f2eaf59230135fd91a398924bdbf873378ec Mon Sep 17 00:00:00 2001 From: Xue jiufei Date: Wed, 3 Jul 2013 15:00:50 -0700 Subject: ocfs2: add missing dlm_put() in dlm_begin_reco_handler() dlm_begin_reco_handler() returns without putting dlm when dlm recovery state is DLM_RECO_STATE_FINALIZE. Signed-off-by: joyce Reviewed-by: Jie Liu Acked-by: Joel Becker Cc: Mark Fasheh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 53e5c02..773bd32 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -2692,6 +2692,7 @@ int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data, dlm->name, br->node_idx, br->dead_node, dlm->reco.dead_node, dlm->reco.new_master); spin_unlock(&dlm->spinlock); + dlm_put(dlm); return -EAGAIN; } spin_unlock(&dlm->spinlock); -- cgit v0.10.2 From 8fa9d17f93ee8eb79d595c98cef0944f6ee5de75 Mon Sep 17 00:00:00 2001 From: Goldwyn Rodrigues Date: Wed, 3 Jul 2013 15:00:51 -0700 Subject: ocfs2: remove unecessary variable needs_checkpoint Code cleanup: needs_checkpoint is assigned to but never used. Delete the variable. Signed-off-by: Goldwyn Rodrigues Cc: Jeff Liu Acked-by: Joel Becker Cc: Mark Fasheh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index a3385b6..0a99273 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h @@ -200,7 +200,6 @@ void ocfs2_complete_quota_recovery(struct ocfs2_super *osb); static inline void ocfs2_start_checkpoint(struct ocfs2_super *osb) { - atomic_set(&osb->needs_checkpoint, 1); wake_up(&osb->checkpoint_event); } diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index d355e6e..3a90347 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -347,7 +347,6 @@ struct ocfs2_super struct task_struct *recovery_thread_task; int disable_recovery; wait_queue_head_t checkpoint_event; - atomic_t needs_checkpoint; struct ocfs2_journal *journal; unsigned long osb_commit_interval; diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 01b8516..854d809 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -286,10 +286,9 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len) spin_unlock(&osb->osb_lock); out += snprintf(buf + out, len - out, - "%10s => Pid: %d Interval: %lu Needs: %d\n", "Commit", + "%10s => Pid: %d Interval: %lu\n", "Commit", (osb->commit_task ? task_pid_nr(osb->commit_task) : -1), - osb->osb_commit_interval, - atomic_read(&osb->needs_checkpoint)); + osb->osb_commit_interval); out += snprintf(buf + out, len - out, "%10s => State: %d TxnId: %lu NumTxns: %d\n", @@ -2154,7 +2153,6 @@ static int ocfs2_initialize_super(struct super_block *sb, } init_waitqueue_head(&osb->checkpoint_event); - atomic_set(&osb->needs_checkpoint, 0); osb->s_atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM; -- cgit v0.10.2 From 33add0e3a09a62c7796ea9838243c1cd933d8543 Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Wed, 3 Jul 2013 15:00:52 -0700 Subject: ocfs2: fix mutex_unlock and possible memory leak in ocfs2_remove_btree_range In ocfs2_remove_btree_range, when calling ocfs2_lock_refcount_tree and ocfs2_prepare_refcount_change_for_del failed, it goes to out and then tries to call mutex_unlock without mutex_lock before. And when calling ocfs2_reserve_blocks_for_rec_trunc failed, it should free ref_tree before return. Signed-off-by: Joseph Qi Reviewed-by: Jie Liu Cc: Joel Becker Cc: Mark Fasheh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index b8a9d87..17e6bdd 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -5655,7 +5655,7 @@ int ocfs2_remove_btree_range(struct inode *inode, &ref_tree, NULL); if (ret) { mlog_errno(ret); - goto out; + goto bail; } ret = ocfs2_prepare_refcount_change_for_del(inode, @@ -5666,7 +5666,7 @@ int ocfs2_remove_btree_range(struct inode *inode, &extra_blocks); if (ret < 0) { mlog_errno(ret); - goto out; + goto bail; } } @@ -5674,7 +5674,7 @@ int ocfs2_remove_btree_range(struct inode *inode, extra_blocks); if (ret) { mlog_errno(ret); - return ret; + goto bail; } mutex_lock(&tl_inode->i_mutex); @@ -5734,7 +5734,7 @@ out_commit: ocfs2_commit_trans(osb, handle); out: mutex_unlock(&tl_inode->i_mutex); - +bail: if (meta_ac) ocfs2_free_alloc_context(meta_ac); -- cgit v0.10.2 From 40bd62eb7fb8447798732e809a676ebaf2a7f910 Mon Sep 17 00:00:00 2001 From: Goldwyn Rodrigues Date: Wed, 3 Jul 2013 15:00:53 -0700 Subject: fs/ocfs2/journal.h: add bits_wanted while calculating credits in ocfs2_calc_extend_credits While adding extends to a file, the credits are calculated incorrectly and if the requested clusters is more than one (or more because we used a conservative limit) then we run out of journal credits and we hit an assert in journalling code. The function parameter bits_wanted variable was not used at all. Signed-off-by: Goldwyn Rodrigues Reviewed-by: Jie Liu Cc: Joel Becker Cc: Mark Fasheh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index 0a99273..96f9ac2 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h @@ -537,7 +537,7 @@ static inline int ocfs2_calc_extend_credits(struct super_block *sb, extent_blocks = 1 + 1 + le16_to_cpu(root_el->l_tree_depth); return bitmap_blocks + sysfile_bitmap_blocks + extent_blocks + - ocfs2_quota_trans_credits(sb); + ocfs2_quota_trans_credits(sb) + bits_wanted; } static inline int ocfs2_calc_symlink_credits(struct super_block *sb) -- cgit v0.10.2 From d3e3b41b3dffff50c66651d60146b155d6ce0484 Mon Sep 17 00:00:00 2001 From: Younger Liu Date: Wed, 3 Jul 2013 15:00:54 -0700 Subject: fs/ocfs2/cluster/tcp.c: free sc->sc_page in sc_kref_release() There is a memory leak in sc_kref_release(). When free struct o2net_sock_container (sc), we should release sc->sc_page. Signed-off-by: Younger Liu Reviewed-by: Jie Liu Cc: Joel Becker Cc: Mark Fasheh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index aa88bd8..bb9a48b 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -406,6 +406,9 @@ static void sc_kref_release(struct kref *kref) sc->sc_node = NULL; o2net_debug_del_sc(sc); + + if (sc->sc_page) + __free_page(sc->sc_page); kfree(sc); } -- cgit v0.10.2 From b30f14c490d0e3ff1b7c0952ccd343408557484e Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Wed, 3 Jul 2013 15:00:55 -0700 Subject: ocfs2: xattr: remove useless free space checking Free space checking will be done in ocfs2_xattr_ibody_init(). So remove here. [akpm@linux-foundation.org: remove unused local] Signed-off-by: Junxiao Bi Reviewed-by: Jie Liu Acked-by: Joel Becker Cc: Mark Fasheh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 2e3ea30..0deabcd 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -2751,7 +2751,6 @@ static int ocfs2_xattr_ibody_set(struct inode *inode, { int ret; struct ocfs2_inode_info *oi = OCFS2_I(inode); - struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; struct ocfs2_xa_loc loc; if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) @@ -2759,13 +2758,6 @@ static int ocfs2_xattr_ibody_set(struct inode *inode, down_write(&oi->ip_alloc_sem); if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) { - if (!ocfs2_xattr_has_space_inline(inode, di)) { - ret = -ENOSPC; - goto out; - } - } - - if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) { ret = ocfs2_xattr_ibody_init(inode, xs->inode_bh, ctxt); if (ret) { if (ret != -ENOSPC) -- cgit v0.10.2 From 096b2ef83c87d7a7e0d2838f7f1391c74fdad0e6 Mon Sep 17 00:00:00 2001 From: Xue jiufei Date: Wed, 3 Jul 2013 15:00:56 -0700 Subject: ocfs2: dlmlock_master() should return DLM_NORMAL after adding lock to blocked list dlmlock_master() returns DLM_RECOVERING/DLM_MIGRATING/ DLM_FORWAR after adding lock to blocked list if lockres has the state DLM_LOCK_RES_RECOVERING/DLM_LOCK_RES_MIGRATING/ DLM_LOCK_RES_IN_PROGRESS. so it will retry in dlmlock(). And this may cause dlm_thread fall into an infinite loop Thread1 dlm_thread calls dlm_lock->dlmlock_master, if lockresA is in state DLM_LOCK_RES_RECOVERING, calls __dlm_wait_on_lockres() and waits until others threads clear this state; If cannot grant this lock, adding lock to blocked list, and return DLM_RECOVERING; Grant this lock and move it to grant list; After a while, retry and calls list_add_tail(), adding lock to blocked list again. Granted and blocked list of this lockres will become the following conditions: lock_res->granted.next = dlm_lock->list_head; lock_res->blocked.next = dlm_lock->list_head; dlm_lock->list_head.next = dlm_lock_resource->blocked; When dlm_thread traverses the granted list, it will fall into an endless loop, checking dlm_lock.list_head, dlm_lock->list_head.next (i.e.lock_res->blocked), lock_res->blocked.next(i.e.dlm_lock.list_head again) ..... Signed-off-by: joyce Reviewed-by: jensen Cc: Jeff Liu Acked-by: Sunil Mushran Cc: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c index 975810b..47e67c2 100644 --- a/fs/ocfs2/dlm/dlmlock.c +++ b/fs/ocfs2/dlm/dlmlock.c @@ -178,6 +178,7 @@ static enum dlm_status dlmlock_master(struct dlm_ctxt *dlm, lock->ml.node); } } else { + status = DLM_NORMAL; dlm_lock_get(lock); list_add_tail(&lock->list, &res->blocked); kick_thread = 1; -- cgit v0.10.2 From ea45466aec98b68faaf354901c42e281e58af50a Mon Sep 17 00:00:00 2001 From: Younger Liu Date: Wed, 3 Jul 2013 15:00:58 -0700 Subject: ocfs2: need rollback when journal_access failed in ocfs2_orphan_add() While adding a file into orphan dir in ocfs2_orphan_add(), it calls __ocfs2_add_entry() before ocfs2_journal_access_di(). If ocfs2_journal_access_di() failed, the file is added into orphan dir, and orphan dir dinode updated, but file dinode has not been updated. Accordingly, the data is not consistent between file dinode and orphan dir. So, need to call ocfs2_journal_access_di() before __ocfs2_add_entry(), and if ocfs2_journal_access_di() failed, orphan_fe and orphan_dir_inode->i_nlink need rollback. This bug was added by 3939fda4 ("Ocfs2: Journaling i_flags and i_orphaned_slot when adding inode to orphan dir."). Signed-off-by: Younger Liu Acked-by: Jeff Liu Cc: Sunil Mushran Cc: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 75964ad..30842f5 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -2012,6 +2012,21 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb, goto leave; } + /* + * We're going to journal the change of i_flags and i_orphaned_slot. + * It's safe anyway, though some callers may duplicate the journaling. + * Journaling within the func just make the logic look more + * straightforward. + */ + status = ocfs2_journal_access_di(handle, + INODE_CACHE(inode), + fe_bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (status < 0) { + mlog_errno(status); + goto leave; + } + /* we're a cluster, and nlink can change on disk from * underneath us... */ orphan_fe = (struct ocfs2_dinode *) orphan_dir_bh->b_data; @@ -2026,22 +2041,7 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb, orphan_dir_bh, lookup); if (status < 0) { mlog_errno(status); - goto leave; - } - - /* - * We're going to journal the change of i_flags and i_orphaned_slot. - * It's safe anyway, though some callers may duplicate the journaling. - * Journaling within the func just make the logic look more - * straightforward. - */ - status = ocfs2_journal_access_di(handle, - INODE_CACHE(inode), - fe_bh, - OCFS2_JOURNAL_ACCESS_WRITE); - if (status < 0) { - mlog_errno(status); - goto leave; + goto rollback; } fe->i_flags |= cpu_to_le32(OCFS2_ORPHANED_FL); @@ -2057,11 +2057,16 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb, trace_ocfs2_orphan_add_end((unsigned long long)OCFS2_I(inode)->ip_blkno, osb->slot_num); +rollback: + if (status < 0) { + if (S_ISDIR(inode->i_mode)) + ocfs2_add_links_count(orphan_fe, -1); + set_nlink(orphan_dir_inode, ocfs2_read_links_count(orphan_fe)); + } + leave: brelse(orphan_dir_bh); - if (status) - mlog_errno(status); return status; } -- cgit v0.10.2 From 493098413bdb45f223ff0552e2f734849491dbbe Mon Sep 17 00:00:00 2001 From: Jie Liu Date: Wed, 3 Jul 2013 15:00:59 -0700 Subject: ocfs2: rework transaction rollback in ocfs2_relink_block_group() In ocfs2_relink_block_group(), we roll back all those changes if notify intent to modify buffers for metadata update failed even if the relevant buffer has not yet been modified/got dirty at that point, that are not quite right because of: - None buffer has been modified/dirty if failed to call ocfs2_journal_access_gd() against the previous block group buffer - Only the previous block group buffer has got dirty if failed to call ocfs2_journal_access_gd() against the block group buffer - There is no need to roll back the change for file entry buffer at all Those problems will not cause anything wrong but unnecessary. This patch fix them and kill the useless bg_ptr variable as well. Signed-off-by: Jie Liu Cc: Younger Liu Cc: Sunil Mushran Cc: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index b7e74b5..101d16d 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -1422,7 +1422,7 @@ static int ocfs2_relink_block_group(handle_t *handle, int status; /* there is a really tiny chance the journal calls could fail, * but we wouldn't want inconsistent blocks in *any* case. */ - u64 fe_ptr, bg_ptr, prev_bg_ptr; + u64 bg_ptr, prev_bg_ptr; struct ocfs2_dinode *fe = (struct ocfs2_dinode *) fe_bh->b_data; struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; struct ocfs2_group_desc *prev_bg = (struct ocfs2_group_desc *) prev_bg_bh->b_data; @@ -1437,7 +1437,6 @@ static int ocfs2_relink_block_group(handle_t *handle, (unsigned long long)le64_to_cpu(bg->bg_blkno), (unsigned long long)le64_to_cpu(prev_bg->bg_blkno)); - fe_ptr = le64_to_cpu(fe->id2.i_chain.cl_recs[chain].c_blkno); bg_ptr = le64_to_cpu(bg->bg_next_group); prev_bg_ptr = le64_to_cpu(prev_bg->bg_next_group); @@ -1446,7 +1445,7 @@ static int ocfs2_relink_block_group(handle_t *handle, OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); - goto out_rollback; + goto out; } prev_bg->bg_next_group = bg->bg_next_group; @@ -1456,7 +1455,7 @@ static int ocfs2_relink_block_group(handle_t *handle, bg_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); - goto out_rollback; + goto out_rollback_prev_bg; } bg->bg_next_group = fe->id2.i_chain.cl_recs[chain].c_blkno; @@ -1466,21 +1465,21 @@ static int ocfs2_relink_block_group(handle_t *handle, fe_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); - goto out_rollback; + goto out_rollback_bg; } fe->id2.i_chain.cl_recs[chain].c_blkno = bg->bg_blkno; ocfs2_journal_dirty(handle, fe_bh); -out_rollback: - if (status < 0) { - fe->id2.i_chain.cl_recs[chain].c_blkno = cpu_to_le64(fe_ptr); - bg->bg_next_group = cpu_to_le64(bg_ptr); - prev_bg->bg_next_group = cpu_to_le64(prev_bg_ptr); - } +out: + return status; - if (status) - mlog_errno(status); +out_rollback_bg: + bg->bg_next_group = cpu_to_le64(bg_ptr); +out_rollback_prev_bg: + prev_bg->bg_next_group = cpu_to_le64(prev_bg_ptr); + + mlog_errno(status); return status; } -- cgit v0.10.2 From 25e2892101ba541dce8593c698d70ccc278bc1fd Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 3 Jul 2013 15:01:00 -0700 Subject: ocfs2: remove duplicated mlog_errno() in ocfs2_relink_block_group Cc: Jie Liu Cc: Joel Becker Cc: Mark Fasheh Cc: Sunil Mushran Cc: Younger Liu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 101d16d..5397c07 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -1443,44 +1443,38 @@ static int ocfs2_relink_block_group(handle_t *handle, status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode), prev_bg_bh, OCFS2_JOURNAL_ACCESS_WRITE); - if (status < 0) { - mlog_errno(status); + if (status < 0) goto out; - } prev_bg->bg_next_group = bg->bg_next_group; ocfs2_journal_dirty(handle, prev_bg_bh); status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode), bg_bh, OCFS2_JOURNAL_ACCESS_WRITE); - if (status < 0) { - mlog_errno(status); + if (status < 0) goto out_rollback_prev_bg; - } bg->bg_next_group = fe->id2.i_chain.cl_recs[chain].c_blkno; ocfs2_journal_dirty(handle, bg_bh); status = ocfs2_journal_access_di(handle, INODE_CACHE(alloc_inode), fe_bh, OCFS2_JOURNAL_ACCESS_WRITE); - if (status < 0) { - mlog_errno(status); + if (status < 0) goto out_rollback_bg; - } fe->id2.i_chain.cl_recs[chain].c_blkno = bg->bg_blkno; ocfs2_journal_dirty(handle, fe_bh); out: + if (status < 0) + mlog_errno(status); return status; out_rollback_bg: bg->bg_next_group = cpu_to_le64(bg_ptr); out_rollback_prev_bg: prev_bg->bg_next_group = cpu_to_le64(prev_bg_ptr); - - mlog_errno(status); - return status; + goto out; } static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg, -- cgit v0.10.2 From b5a8bb717e29b549584cc27ac8e109a803c4f8c4 Mon Sep 17 00:00:00 2001 From: Younger Liu Date: Wed, 3 Jul 2013 15:01:01 -0700 Subject: ocfs2: fix readonly issue in ocfs2_unlink() While deleting a file with ocfs2_unlink(), there is a bug in this function. This bug will result in filesystem read-only. After calling ocfs2_orphan_add(), the file which will be deleted is added into orphan dir. If ocfs2_delete_entry() fails, the file still exists in the parent dir. And this scenario introduces a conflict of metadata. If a file is added into orphan dir, when we put inode of the file with iput(), the inode i_flags is setted (~OCFS2_VALID_FL) in ocfs2_remove_inode(), and then write back to disk. But as previously mentioned, the file still exists in the parent dir. On other nodes, the file can be still accessed. When first read the file with ocfs2_read_blocks() from disk, It will check and avalidate inode using ocfs2_validate_inode_block(). So File system will be readonly because the inode is invalid. In other words, the inode i_flags has been set (~OCFS2_VALID_FL). [akpm@linux-foundation.org: cleanups] [jeff.liu@oracle.com: s/inode_is_unlinkable/ocfs2_inode_is_unlinkable/] Signed-off-by: Younger Liu Signed-off-by: Jensen Cc: Jie Liu Cc: Joel Becker Cc: Mark Fasheh Cc: Sunil Mushran Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 30842f5..be3f867 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -773,7 +773,7 @@ static int ocfs2_remote_dentry_delete(struct dentry *dentry) return ret; } -static inline int inode_is_unlinkable(struct inode *inode) +static inline int ocfs2_inode_is_unlinkable(struct inode *inode) { if (S_ISDIR(inode->i_mode)) { if (inode->i_nlink == 2) @@ -791,6 +791,7 @@ static int ocfs2_unlink(struct inode *dir, { int status; int child_locked = 0; + bool is_unlinkable = false; struct inode *inode = dentry->d_inode; struct inode *orphan_dir = NULL; struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); @@ -865,7 +866,7 @@ static int ocfs2_unlink(struct inode *dir, goto leave; } - if (inode_is_unlinkable(inode)) { + if (ocfs2_inode_is_unlinkable(inode)) { status = ocfs2_prepare_orphan_dir(osb, &orphan_dir, OCFS2_I(inode)->ip_blkno, orphan_name, &orphan_insert); @@ -873,6 +874,7 @@ static int ocfs2_unlink(struct inode *dir, mlog_errno(status); goto leave; } + is_unlinkable = true; } handle = ocfs2_start_trans(osb, ocfs2_unlink_credits(osb->sb)); @@ -892,15 +894,6 @@ static int ocfs2_unlink(struct inode *dir, fe = (struct ocfs2_dinode *) fe_bh->b_data; - if (inode_is_unlinkable(inode)) { - status = ocfs2_orphan_add(osb, handle, inode, fe_bh, orphan_name, - &orphan_insert, orphan_dir); - if (status < 0) { - mlog_errno(status); - goto leave; - } - } - /* delete the name from the parent dir */ status = ocfs2_delete_entry(handle, dir, &lookup); if (status < 0) { @@ -923,6 +916,14 @@ static int ocfs2_unlink(struct inode *dir, mlog_errno(status); if (S_ISDIR(inode->i_mode)) inc_nlink(dir); + goto leave; + } + + if (is_unlinkable) { + status = ocfs2_orphan_add(osb, handle, inode, fe_bh, + orphan_name, &orphan_insert, orphan_dir); + if (status < 0) + mlog_errno(status); } leave: -- cgit v0.10.2 From ef962df057aaafd714f5c22ba3de1be459571fdf Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Wed, 3 Jul 2013 15:01:03 -0700 Subject: ocfs2: xattr: fix inlined xattr reflink Inlined xattr shared free space of inode block with inlined data or data extent record, so the size of the later two should be adjusted when inlined xattr is enabled. See ocfs2_xattr_ibody_init(). But this isn't done well when reflink. For inode with inlined data, its max inlined data size is adjusted in ocfs2_duplicate_inline_data(), no problem. But for inode with data extent record, its record count isn't adjusted. Fix it, or data extent record and inlined xattr may overwrite each other, then cause data corruption or xattr failure. One panic caused by this bug in our test environment is the following: kernel BUG at fs/ocfs2/xattr.c:1435! invalid opcode: 0000 [#1] SMP Pid: 10871, comm: multi_reflink_t Not tainted 2.6.39-300.17.1.el5uek #1 RIP: ocfs2_xa_offset_pointer+0x17/0x20 [ocfs2] RSP: e02b:ffff88007a587948 EFLAGS: 00010283 RAX: 0000000000000000 RBX: 0000000000000010 RCX: 00000000000051e4 RDX: ffff880057092060 RSI: 0000000000000f80 RDI: ffff88007a587a68 RBP: ffff88007a587948 R08: 00000000000062f4 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000010 R13: ffff88007a587a68 R14: 0000000000000001 R15: ffff88007a587c68 FS: 00007fccff7f06e0(0000) GS:ffff88007fc00000(0000) knlGS:0000000000000000 CS: e033 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 00000000015cf000 CR3: 000000007aa76000 CR4: 0000000000000660 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process multi_reflink_t Call Trace: ocfs2_xa_reuse_entry+0x60/0x280 [ocfs2] ocfs2_xa_prepare_entry+0x17e/0x2a0 [ocfs2] ocfs2_xa_set+0xcc/0x250 [ocfs2] ocfs2_xattr_ibody_set+0x98/0x230 [ocfs2] __ocfs2_xattr_set_handle+0x4f/0x700 [ocfs2] ocfs2_xattr_set+0x6c6/0x890 [ocfs2] ocfs2_xattr_user_set+0x46/0x50 [ocfs2] generic_setxattr+0x70/0x90 __vfs_setxattr_noperm+0x80/0x1a0 vfs_setxattr+0xa9/0xb0 setxattr+0xc3/0x120 sys_fsetxattr+0xa8/0xd0 system_call_fastpath+0x16/0x1b Signed-off-by: Junxiao Bi Reviewed-by: Jie Liu Acked-by: Joel Becker Cc: Mark Fasheh Cc: Sunil Mushran Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 0deabcd..317ef0a 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -6491,6 +6491,16 @@ static int ocfs2_reflink_xattr_inline(struct ocfs2_xattr_reflink *args) } new_oi = OCFS2_I(args->new_inode); + /* + * Adjust extent record count to reserve space for extended attribute. + * Inline data count had been adjusted in ocfs2_duplicate_inline_data(). + */ + if (!(new_oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) && + !(ocfs2_inode_is_fast_symlink(args->new_inode))) { + struct ocfs2_extent_list *el = &new_di->id2.i_list; + le16_add_cpu(&el->l_count, -(inline_size / + sizeof(struct ocfs2_extent_rec))); + } spin_lock(&new_oi->ip_lock); new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL | OCFS2_INLINE_XATTR_FL; new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features); -- cgit v0.10.2 From e873fdb5259ac62cf099621590645470c12a1d21 Mon Sep 17 00:00:00 2001 From: Noboru Iwamatsu Date: Wed, 3 Jul 2013 15:01:04 -0700 Subject: ocfs2: submit disk heartbeat bio using WRITE_SYNC Under heavy I/O load, writing the disk heartbeat can be forced to wait for minutes, and this causes the node to be fenced. This patch tries to use WRITE_SYNC in submitting the heartbeat bio, so that writing the heartbeat will have a priority over other requests. Signed-off-by: Noboru Iwamatsu Acked-by: Tao Ma Acked-by: Sunil Mushran Cc: Srinivas Eeeda Reviewed-by: Jie Liu Tested-by: Gurudas Pai Cc: Joel Becker Cc: Mark Fasheh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 42252bf..f89b46b 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -500,7 +500,7 @@ static int o2hb_issue_node_write(struct o2hb_region *reg, } atomic_inc(&write_wc->wc_num_reqs); - submit_bio(WRITE, bio); + submit_bio(WRITE_SYNC, bio); status = 0; bail: -- cgit v0.10.2 From 70f651edb75b68e3c039d137a56be84f53211558 Mon Sep 17 00:00:00 2001 From: Jie Liu Date: Wed, 3 Jul 2013 15:01:06 -0700 Subject: ocfs2: consolidate o2hb_global_hearbeat_mode_set() naming convention s/o2hb_global_hearbeat_mode_set/o2hb_global_heartbeat_mode_set/ to make the signature of those routines in a consistent manner with others for heartbeating. Signed-off-by: Jie Liu Acked-by: Sunil Mushran Cc: Gurudas Pai Cc: Joel Becker Cc: Mark Fasheh Cc: Noboru Iwamatsu Cc: Srinivas Eeeda Cc: Tao Ma Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index f89b46b..956d79d 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -176,7 +176,7 @@ static void o2hb_dead_threshold_set(unsigned int threshold) } } -static int o2hb_global_hearbeat_mode_set(unsigned int hb_mode) +static int o2hb_global_heartbeat_mode_set(unsigned int hb_mode) { int ret = -1; @@ -2271,7 +2271,7 @@ ssize_t o2hb_heartbeat_group_mode_store(struct o2hb_heartbeat_group *group, if (strnicmp(page, o2hb_heartbeat_mode_desc[i], len)) continue; - ret = o2hb_global_hearbeat_mode_set(i); + ret = o2hb_global_heartbeat_mode_set(i); if (!ret) printk(KERN_NOTICE "o2hb: Heartbeat mode set to %s\n", o2hb_heartbeat_mode_desc[i]); @@ -2304,7 +2304,7 @@ static struct configfs_attribute *o2hb_heartbeat_group_attrs[] = { NULL, }; -static struct configfs_item_operations o2hb_hearbeat_group_item_ops = { +static struct configfs_item_operations o2hb_heartbeat_group_item_ops = { .show_attribute = o2hb_heartbeat_group_show, .store_attribute = o2hb_heartbeat_group_store, }; @@ -2316,7 +2316,7 @@ static struct configfs_group_operations o2hb_heartbeat_group_group_ops = { static struct config_item_type o2hb_heartbeat_group_type = { .ct_group_ops = &o2hb_heartbeat_group_group_ops, - .ct_item_ops = &o2hb_hearbeat_group_item_ops, + .ct_item_ops = &o2hb_heartbeat_group_item_ops, .ct_attrs = o2hb_heartbeat_group_attrs, .ct_owner = THIS_MODULE, }; -- cgit v0.10.2 From b4d8ed4f8e527751c7ece6cef73f6212808e6130 Mon Sep 17 00:00:00 2001 From: Jie Liu Date: Wed, 3 Jul 2013 15:01:07 -0700 Subject: ocfs2: fix a comments typo at o2quo_hb_still_up() Fix a comment typo in o2quo_hb_still_up() Signed-off-by: Jie Liu Cc: Gurudas Pai Cc: Joel Becker Cc: Mark Fasheh Cc: Noboru Iwamatsu Cc: Srinivas Eeeda Cc: Sunil Mushran Cc: Tao Ma Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/ocfs2/cluster/quorum.c b/fs/ocfs2/cluster/quorum.c index c19897d..1ec141e 100644 --- a/fs/ocfs2/cluster/quorum.c +++ b/fs/ocfs2/cluster/quorum.c @@ -264,7 +264,7 @@ void o2quo_hb_still_up(u8 node) /* This is analogous to hb_up. as a node's connection comes up we delay the * quorum decision until we see it heartbeating. the hold will be droped in * hb_up or hb_down. it might be perpetuated by con_err until hb_down. if - * it's already heartbeating we we might be dropping a hold that conn_up got. + * it's already heartbeating we might be dropping a hold that conn_up got. * */ void o2quo_conn_up(u8 node) { -- cgit v0.10.2 From 44e89cb8e279abdf83581a10e592c2451bae7824 Mon Sep 17 00:00:00 2001 From: Jie Liu Date: Wed, 3 Jul 2013 15:01:08 -0700 Subject: ocfs2: adjust switch_case syntax at o2net_state_change() Adjust switch..case syntax at o2net_state_change to meet the kernel coding standard. s/printk/pr_info/. [akpm@linux-foundation.org: revert pr_foo() change] Signed-off-by: Jie Liu Acked-by: Joel Becker Cc: Gurudas Pai Cc: Mark Fasheh Cc: Noboru Iwamatsu Cc: Srinivas Eeeda Cc: Sunil Mushran Cc: Tao Ma Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index bb9a48b..d644dc6 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -633,19 +633,19 @@ static void o2net_state_change(struct sock *sk) state_change = sc->sc_state_change; switch(sk->sk_state) { - /* ignore connecting sockets as they make progress */ - case TCP_SYN_SENT: - case TCP_SYN_RECV: - break; - case TCP_ESTABLISHED: - o2net_sc_queue_work(sc, &sc->sc_connect_work); - break; - default: - printk(KERN_INFO "o2net: Connection to " SC_NODEF_FMT - " shutdown, state %d\n", - SC_NODEF_ARGS(sc), sk->sk_state); - o2net_sc_queue_work(sc, &sc->sc_shutdown_work); - break; + /* ignore connecting sockets as they make progress */ + case TCP_SYN_SENT: + case TCP_SYN_RECV: + break; + case TCP_ESTABLISHED: + o2net_sc_queue_work(sc, &sc->sc_connect_work); + break; + default: + printk(KERN_INFO "o2net: Connection to " SC_NODEF_FMT + " shutdown, state %d\n", + SC_NODEF_ARGS(sc), sk->sk_state); + o2net_sc_queue_work(sc, &sc->sc_shutdown_work); + break; } out: read_unlock(&sk->sk_callback_lock); -- cgit v0.10.2 From 4a184b4ff424e544359f081087723fc36efe603e Mon Sep 17 00:00:00 2001 From: Xue jiufei Date: Wed, 3 Jul 2013 15:01:10 -0700 Subject: ocfs2: fix NULL pointer dereference when traversing o2hb_all_regions There may exist NULL pointer dereference in config_item_name() when one volume (say Volume A) unmounts while another (say Volume B) mounting. Volume A Volume B already Mounted. Unmounting, call o2hb_heartbeat_group_drop_item() -> config_item_put(item) set reg(A)->item.ci_name to NULL in function config_item_cleanup(). begin mounting, call o2hb_region_pin() and tranverse all regions. When reading reg(A)->item.ci_name, it causes NULL pointer dereference. call o2hb_region_release() and del reg(A) from list. So we should skip accessing regions that is going to release when tranverse o2hb_all_regions. Signed-off-by: Yiwen Jiang Signed-off-by: joyce Acked-by: Joel Becker Cc: Mark Fasheh Cc: Sunil Mushran Cc: Jie Liu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 956d79d..5c1c864 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -2389,6 +2389,9 @@ static int o2hb_region_pin(const char *region_uuid) assert_spin_locked(&o2hb_live_lock); list_for_each_entry(reg, &o2hb_all_regions, hr_all_item) { + if (reg->hr_item_dropped) + continue; + uuid = config_item_name(®->hr_item); /* local heartbeat */ @@ -2439,6 +2442,9 @@ static void o2hb_region_unpin(const char *region_uuid) assert_spin_locked(&o2hb_live_lock); list_for_each_entry(reg, &o2hb_all_regions, hr_all_item) { + if (reg->hr_item_dropped) + continue; + uuid = config_item_name(®->hr_item); if (region_uuid) { if (strcmp(region_uuid, uuid)) @@ -2654,6 +2660,9 @@ int o2hb_get_all_regions(char *region_uuids, u8 max_regions) p = region_uuids; list_for_each_entry(reg, &o2hb_all_regions, hr_all_item) { + if (reg->hr_item_dropped) + continue; + mlog(0, "Region: %s\n", config_item_name(®->hr_item)); if (numregs < max_regions) { memcpy(p, config_item_name(®->hr_item), -- cgit v0.10.2 From 31bd8fbb41b1fdf61f80e3e506574b43fad5e478 Mon Sep 17 00:00:00 2001 From: Libo Chen Date: Wed, 3 Jul 2013 15:01:11 -0700 Subject: drivers/cdrom/gdrom.c: fix device number leak Without this patch, gdrom_major will leak when gd.cd_info alloc fails. Signed-off-by: Libo Chen Cc: Jens Axboe Acked-by: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c index 4afcb65..5980cb9 100644 --- a/drivers/cdrom/gdrom.c +++ b/drivers/cdrom/gdrom.c @@ -830,9 +830,9 @@ probe_fail_cdrom_register: del_gendisk(gd.disk); probe_fail_no_disk: kfree(gd.cd_info); +probe_fail_no_mem: unregister_blkdev(gdrom_major, GDROM_DEV_NAME); gdrom_major = 0; -probe_fail_no_mem: pr_warning("Probe failed - error is 0x%X\n", err); return err; } -- cgit v0.10.2 From 8b0d77f13192678019f07cbc6b3338d6d91f1cf4 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 3 Jul 2013 15:01:12 -0700 Subject: block/compat_ioctl.c: do not leak info to user-space There is a hole in struct hd_geometry, so we have to zero the struct on stack before copying it to user-space. Signed-off-by: Cong Wang Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c index 7c668c8..7e5d474 100644 --- a/block/compat_ioctl.c +++ b/block/compat_ioctl.c @@ -59,6 +59,7 @@ static int compat_hdio_getgeo(struct gendisk *disk, struct block_device *bdev, if (!disk->fops->getgeo) return -ENOTTY; + memset(&geo, 0, sizeof(geo)); /* * We need to set the startsect first, the driver may * want to override it. -- cgit v0.10.2 From 542db01579fbb7ea7d1f7bb9ddcef1559df660b2 Mon Sep 17 00:00:00 2001 From: Jonathan Salwan Date: Wed, 3 Jul 2013 15:01:13 -0700 Subject: drivers/cdrom/cdrom.c: use kzalloc() for failing hardware In drivers/cdrom/cdrom.c mmc_ioctl_cdrom_read_data() allocates a memory area with kmalloc in line 2885. 2885 cgc->buffer = kmalloc(blocksize, GFP_KERNEL); 2886 if (cgc->buffer == NULL) 2887 return -ENOMEM; In line 2908 we can find the copy_to_user function: 2908 if (!ret && copy_to_user(arg, cgc->buffer, blocksize)) The cgc->buffer is never cleaned and initialized before this function. If ret = 0 with the previous basic block, it's possible to display some memory bytes in kernel space from userspace. When we read a block from the disk it normally fills the ->buffer but if the drive is malfunctioning there is a chance that it would only be partially filled. The result is an leak information to userspace. Signed-off-by: Dan Carpenter Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index d620b44..8a3aff7 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -2882,7 +2882,7 @@ static noinline int mmc_ioctl_cdrom_read_data(struct cdrom_device_info *cdi, if (lba < 0) return -EINVAL; - cgc->buffer = kmalloc(blocksize, GFP_KERNEL); + cgc->buffer = kzalloc(blocksize, GFP_KERNEL); if (cgc->buffer == NULL) return -ENOMEM; -- cgit v0.10.2 From ffc8b30866879ed9ba62bd0a86fecdbd51cd3d19 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 3 Jul 2013 15:01:14 -0700 Subject: block: do not pass disk names as format strings Disk names may contain arbitrary strings, so they must not be interpreted as format strings. It seems that only md allows arbitrary strings to be used for disk names, but this could allow for a local memory corruption from uid 0 into ring 0. CVE-2013-2851 Signed-off-by: Kees Cook Cc: Jens Axboe Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/block/genhd.c b/block/genhd.c index e9094b3..dadf42b 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -512,7 +512,7 @@ static void register_disk(struct gendisk *disk) ddev->parent = disk->driverfs_dev; - dev_set_name(ddev, disk->disk_name); + dev_set_name(ddev, "%s", disk->disk_name); /* delay uevents, until we scanned partition table */ dev_set_uevent_suppress(ddev, 1); diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 037288e..46b35f7 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -714,7 +714,8 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, else blk_queue_flush(nbd->disk->queue, 0); - thread = kthread_create(nbd_thread, nbd, nbd->disk->disk_name); + thread = kthread_create(nbd_thread, nbd, "%s", + nbd->disk->disk_name); if (IS_ERR(thread)) { mutex_lock(&nbd->tx_lock); return PTR_ERR(thread); diff --git a/drivers/scsi/osd/osd_uld.c b/drivers/scsi/osd/osd_uld.c index 0fab6b5..9d86947 100644 --- a/drivers/scsi/osd/osd_uld.c +++ b/drivers/scsi/osd/osd_uld.c @@ -485,7 +485,7 @@ static int osd_probe(struct device *dev) oud->class_dev.class = &osd_uld_class; oud->class_dev.parent = dev; oud->class_dev.release = __remove; - error = dev_set_name(&oud->class_dev, disk->disk_name); + error = dev_set_name(&oud->class_dev, "%s", disk->disk_name); if (error) { OSD_ERR("dev_set_name failed => %d\n", error); goto err_put_cdev; -- cgit v0.10.2 From 1c8fca1d92e14859159a82b8a380d220139b7344 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 3 Jul 2013 15:01:15 -0700 Subject: crypto: sanitize argument for format string The template lookup interface does not provide a way to use format strings, so make sure that the interface cannot be abused accidentally. Signed-off-by: Kees Cook Cc: Herbert Xu Cc: "David S. Miller" Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/crypto/algapi.c b/crypto/algapi.c index 6149a6e..7a1ae87 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -495,7 +495,8 @@ static struct crypto_template *__crypto_lookup_template(const char *name) struct crypto_template *crypto_lookup_template(const char *name) { - return try_then_request_module(__crypto_lookup_template(name), name); + return try_then_request_module(__crypto_lookup_template(name), "%s", + name); } EXPORT_SYMBOL_GPL(crypto_lookup_template); -- cgit v0.10.2 From 040fa02077de01c7e08fa75be6125e4ca5636011 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 3 Jul 2013 15:01:16 -0700 Subject: clear_refs: sanitize accepted commands declaration This is the implementation of the soft-dirty bit concept that should help keep track of changes in user memory, which in turn is very-very required by the checkpoint-restore project (http://criu.org). To create a dump of an application(s) we save all the information about it to files, and the biggest part of such dump is the contents of tasks' memory. However, there are usage scenarios where it's not required to get _all_ the task memory while creating a dump. For example, when doing periodical dumps, it's only required to take full memory dump only at the first step and then take incremental changes of memory. Another example is live migration. We copy all the memory to the destination node without stopping all tasks, then stop them, check for what pages has changed, dump it and the rest of the state, then copy it to the destination node. This decreases freeze time significantly. That said, some help from kernel to watch how processes modify the contents of their memory is required. The proposal is to track changes with the help of new soft-dirty bit this way: 1. First do "echo 4 > /proc/$pid/clear_refs". At that point kernel clears the soft dirty _and_ the writable bits from all ptes of process $pid. From now on every write to any page will result in #pf and the subsequent call to pte_mkdirty/pmd_mkdirty, which in turn will set the soft dirty flag. 2. Then read the /proc/$pid/pagemap2 and check the soft-dirty bit reported there (the 55'th one). If set, the respective pte was written to since last call to clear refs. The soft-dirty bit is the _PAGE_BIT_HIDDEN one. Although it's used by kmemcheck, the latter one marks kernel pages with it, while the former bit is put on user pages so they do not conflict to each other. This patch: A new clear-refs type will be added in the next patch, so prepare code for that. [akpm@linux-foundation.org: don't assume that sizeof(enum clear_refs_types) == sizeof(int)] Signed-off-by: Pavel Emelyanov Cc: Matt Mackall Cc: Xiao Guangrong Cc: Glauber Costa Cc: Marcelo Tosatti Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 3e636d8..dad0809 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -688,6 +688,13 @@ const struct file_operations proc_tid_smaps_operations = { .release = seq_release_private, }; +enum clear_refs_types { + CLEAR_REFS_ALL = 1, + CLEAR_REFS_ANON, + CLEAR_REFS_MAPPED, + CLEAR_REFS_LAST, +}; + static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, struct mm_walk *walk) { @@ -719,10 +726,6 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, return 0; } -#define CLEAR_REFS_ALL 1 -#define CLEAR_REFS_ANON 2 -#define CLEAR_REFS_MAPPED 3 - static ssize_t clear_refs_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { @@ -730,7 +733,8 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, char buffer[PROC_NUMBUF]; struct mm_struct *mm; struct vm_area_struct *vma; - int type; + enum clear_refs_types type; + int itype; int rv; memset(buffer, 0, sizeof(buffer)); @@ -738,10 +742,11 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, count = sizeof(buffer) - 1; if (copy_from_user(buffer, buf, count)) return -EFAULT; - rv = kstrtoint(strstrip(buffer), 10, &type); + rv = kstrtoint(strstrip(buffer), 10, &itype); if (rv < 0) return rv; - if (type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED) + type = (enum clear_refs_types)itype; + if (type < CLEAR_REFS_ALL || type >= CLEAR_REFS_LAST) return -EINVAL; task = get_proc_task(file_inode(file)); if (!task) -- cgit v0.10.2 From af9de7eb180fa9b74c2cdc256349304a58c63c02 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 3 Jul 2013 15:01:18 -0700 Subject: clear_refs: introduce private struct for mm_walk In the next patch the clear-refs-type will be required in clear_refs_pte_range funciton, so prepare the walk->private to carry this info. Signed-off-by: Pavel Emelyanov Cc: Matt Mackall Cc: Xiao Guangrong Cc: Glauber Costa Cc: Marcelo Tosatti Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index dad0809..ef6f6c6 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -695,10 +695,15 @@ enum clear_refs_types { CLEAR_REFS_LAST, }; +struct clear_refs_private { + struct vm_area_struct *vma; +}; + static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, struct mm_walk *walk) { - struct vm_area_struct *vma = walk->private; + struct clear_refs_private *cp = walk->private; + struct vm_area_struct *vma = cp->vma; pte_t *pte, ptent; spinlock_t *ptl; struct page *page; @@ -753,13 +758,16 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, return -ESRCH; mm = get_task_mm(task); if (mm) { + struct clear_refs_private cp = { + }; struct mm_walk clear_refs_walk = { .pmd_entry = clear_refs_pte_range, .mm = mm, + .private = &cp, }; down_read(&mm->mmap_sem); for (vma = mm->mmap; vma; vma = vma->vm_next) { - clear_refs_walk.private = vma; + cp.vma = vma; if (is_vm_hugetlb_page(vma)) continue; /* -- cgit v0.10.2 From 2b0a9f017548f05e42fbf7e67c4a626c1ebd5e12 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 3 Jul 2013 15:01:19 -0700 Subject: pagemap: introduce pagemap_entry_t without pmshift bits These bits are always constant (== PAGE_SHIFT) and just occupy space in the entry. Moreover, in next patch we will need to report one more bit in the pagemap, but all bits are already busy on it. That said, describe the pagemap entry that has 6 more free zero bits. Signed-off-by: Pavel Emelyanov Cc: Matt Mackall Cc: Xiao Guangrong Cc: Glauber Costa Cc: Marcelo Tosatti Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index ef6f6c6..39d6412 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -807,6 +807,7 @@ typedef struct { struct pagemapread { int pos, len; pagemap_entry_t *buffer; + bool v2; }; #define PAGEMAP_WALK_SIZE (PMD_SIZE) @@ -820,14 +821,16 @@ struct pagemapread { #define PM_PSHIFT_BITS 6 #define PM_PSHIFT_OFFSET (PM_STATUS_OFFSET - PM_PSHIFT_BITS) #define PM_PSHIFT_MASK (((1LL << PM_PSHIFT_BITS) - 1) << PM_PSHIFT_OFFSET) -#define PM_PSHIFT(x) (((u64) (x) << PM_PSHIFT_OFFSET) & PM_PSHIFT_MASK) +#define __PM_PSHIFT(x) (((u64) (x) << PM_PSHIFT_OFFSET) & PM_PSHIFT_MASK) #define PM_PFRAME_MASK ((1LL << PM_PSHIFT_OFFSET) - 1) #define PM_PFRAME(x) ((x) & PM_PFRAME_MASK) +/* in "new" pagemap pshift bits are occupied with more status bits */ +#define PM_STATUS2(v2, x) (__PM_PSHIFT(v2 ? x : PAGE_SHIFT)) #define PM_PRESENT PM_STATUS(4LL) #define PM_SWAP PM_STATUS(2LL) #define PM_FILE PM_STATUS(1LL) -#define PM_NOT_PRESENT PM_PSHIFT(PAGE_SHIFT) +#define PM_NOT_PRESENT(v2) PM_STATUS2(v2, 0) #define PM_END_OF_BUFFER 1 static inline pagemap_entry_t make_pme(u64 val) @@ -850,7 +853,7 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end, struct pagemapread *pm = walk->private; unsigned long addr; int err = 0; - pagemap_entry_t pme = make_pme(PM_NOT_PRESENT); + pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2)); for (addr = start; addr < end; addr += PAGE_SIZE) { err = add_to_pagemap(addr, &pme, pm); @@ -860,7 +863,7 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end, return err; } -static void pte_to_pagemap_entry(pagemap_entry_t *pme, +static void pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm, struct vm_area_struct *vma, unsigned long addr, pte_t pte) { u64 frame, flags; @@ -879,18 +882,18 @@ static void pte_to_pagemap_entry(pagemap_entry_t *pme, if (is_migration_entry(entry)) page = migration_entry_to_page(entry); } else { - *pme = make_pme(PM_NOT_PRESENT); + *pme = make_pme(PM_NOT_PRESENT(pm->v2)); return; } if (page && !PageAnon(page)) flags |= PM_FILE; - *pme = make_pme(PM_PFRAME(frame) | PM_PSHIFT(PAGE_SHIFT) | flags); + *pme = make_pme(PM_PFRAME(frame) | PM_STATUS2(pm->v2, 0) | flags); } #ifdef CONFIG_TRANSPARENT_HUGEPAGE -static void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, +static void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm, pmd_t pmd, int offset) { /* @@ -900,12 +903,12 @@ static void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, */ if (pmd_present(pmd)) *pme = make_pme(PM_PFRAME(pmd_pfn(pmd) + offset) - | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT); + | PM_STATUS2(pm->v2, 0) | PM_PRESENT); else - *pme = make_pme(PM_NOT_PRESENT); + *pme = make_pme(PM_NOT_PRESENT(pm->v2)); } #else -static inline void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, +static inline void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm, pmd_t pmd, int offset) { } @@ -918,7 +921,7 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, struct pagemapread *pm = walk->private; pte_t *pte; int err = 0; - pagemap_entry_t pme = make_pme(PM_NOT_PRESENT); + pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2)); /* find the first VMA at or above 'addr' */ vma = find_vma(walk->mm, addr); @@ -928,7 +931,7 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, offset = (addr & ~PAGEMAP_WALK_MASK) >> PAGE_SHIFT; - thp_pmd_to_pagemap_entry(&pme, *pmd, offset); + thp_pmd_to_pagemap_entry(&pme, pm, *pmd, offset); err = add_to_pagemap(addr, &pme, pm); if (err) break; @@ -945,7 +948,7 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, * and need a new, higher one */ if (vma && (addr >= vma->vm_end)) { vma = find_vma(walk->mm, addr); - pme = make_pme(PM_NOT_PRESENT); + pme = make_pme(PM_NOT_PRESENT(pm->v2)); } /* check that 'vma' actually covers this address, @@ -953,7 +956,7 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, if (vma && (vma->vm_start <= addr) && !is_vm_hugetlb_page(vma)) { pte = pte_offset_map(pmd, addr); - pte_to_pagemap_entry(&pme, vma, addr, *pte); + pte_to_pagemap_entry(&pme, pm, vma, addr, *pte); /* unmap before userspace copy */ pte_unmap(pte); } @@ -968,14 +971,14 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, } #ifdef CONFIG_HUGETLB_PAGE -static void huge_pte_to_pagemap_entry(pagemap_entry_t *pme, +static void huge_pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm, pte_t pte, int offset) { if (pte_present(pte)) *pme = make_pme(PM_PFRAME(pte_pfn(pte) + offset) - | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT); + | PM_STATUS2(pm->v2, 0) | PM_PRESENT); else - *pme = make_pme(PM_NOT_PRESENT); + *pme = make_pme(PM_NOT_PRESENT(pm->v2)); } /* This function walks within one hugetlb entry in the single call */ @@ -989,7 +992,7 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask, for (; addr != end; addr += PAGE_SIZE) { int offset = (addr & ~hmask) >> PAGE_SHIFT; - huge_pte_to_pagemap_entry(&pme, *pte, offset); + huge_pte_to_pagemap_entry(&pme, pm, *pte, offset); err = add_to_pagemap(addr, &pme, pm); if (err) return err; @@ -1051,6 +1054,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, if (!count) goto out_task; + pm.v2 = false; pm.len = PM_ENTRY_BYTES * (PAGEMAP_WALK_SIZE >> PAGE_SHIFT); pm.buffer = kmalloc(pm.len, GFP_TEMPORARY); ret = -ENOMEM; -- cgit v0.10.2 From 0f8975ec4db2c8b5bd111b211292ca9be0feb6b8 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 3 Jul 2013 15:01:20 -0700 Subject: mm: soft-dirty bits for user memory changes tracking The soft-dirty is a bit on a PTE which helps to track which pages a task writes to. In order to do this tracking one should 1. Clear soft-dirty bits from PTEs ("echo 4 > /proc/PID/clear_refs) 2. Wait some time. 3. Read soft-dirty bits (55'th in /proc/PID/pagemap2 entries) To do this tracking, the writable bit is cleared from PTEs when the soft-dirty bit is. Thus, after this, when the task tries to modify a page at some virtual address the #PF occurs and the kernel sets the soft-dirty bit on the respective PTE. Note, that although all the task's address space is marked as r/o after the soft-dirty bits clear, the #PF-s that occur after that are processed fast. This is so, since the pages are still mapped to physical memory, and thus all the kernel does is finds this fact out and puts back writable, dirty and soft-dirty bits on the PTE. Another thing to note, is that when mremap moves PTEs they are marked with soft-dirty as well, since from the user perspective mremap modifies the virtual memory at mremap's new address. Signed-off-by: Pavel Emelyanov Cc: Matt Mackall Cc: Xiao Guangrong Cc: Glauber Costa Cc: Marcelo Tosatti Cc: KOSAKI Motohiro Cc: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index fd8d0d5..fcc22c9 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -473,7 +473,8 @@ This file is only present if the CONFIG_MMU kernel configuration option is enabled. The /proc/PID/clear_refs is used to reset the PG_Referenced and ACCESSED/YOUNG -bits on both physical and virtual pages associated with a process. +bits on both physical and virtual pages associated with a process, and the +soft-dirty bit on pte (see Documentation/vm/soft-dirty.txt for details). To clear the bits for all the pages associated with the process > echo 1 > /proc/PID/clear_refs @@ -482,6 +483,10 @@ To clear the bits for the anonymous pages associated with the process To clear the bits for the file mapped pages associated with the process > echo 3 > /proc/PID/clear_refs + +To clear the soft-dirty bit + > echo 4 > /proc/PID/clear_refs + Any other value written to /proc/PID/clear_refs will have no effect. The /proc/pid/pagemap gives the PFN, which can be used to find the pageflags diff --git a/Documentation/vm/soft-dirty.txt b/Documentation/vm/soft-dirty.txt new file mode 100644 index 0000000..9a12a59 --- /dev/null +++ b/Documentation/vm/soft-dirty.txt @@ -0,0 +1,36 @@ + SOFT-DIRTY PTEs + + The soft-dirty is a bit on a PTE which helps to track which pages a task +writes to. In order to do this tracking one should + + 1. Clear soft-dirty bits from the task's PTEs. + + This is done by writing "4" into the /proc/PID/clear_refs file of the + task in question. + + 2. Wait some time. + + 3. Read soft-dirty bits from the PTEs. + + This is done by reading from the /proc/PID/pagemap. The bit 55 of the + 64-bit qword is the soft-dirty one. If set, the respective PTE was + written to since step 1. + + + Internally, to do this tracking, the writable bit is cleared from PTEs +when the soft-dirty bit is cleared. So, after this, when the task tries to +modify a page at some virtual address the #PF occurs and the kernel sets +the soft-dirty bit on the respective PTE. + + Note, that although all the task's address space is marked as r/o after the +soft-dirty bits clear, the #PF-s that occur after that are processed fast. +This is so, since the pages are still mapped to physical memory, and thus all +the kernel does is finds this fact out and puts both writable and soft-dirty +bits on the PTE. + + + This feature is actively used by the checkpoint-restore project. You +can find more details about it on http://criu.org + + +-- Pavel Emelyanov, Apr 9, 2013 diff --git a/arch/Kconfig b/arch/Kconfig index a4429bc..8d2ae24 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -365,6 +365,9 @@ config HAVE_IRQ_TIME_ACCOUNTING config HAVE_ARCH_TRANSPARENT_HUGEPAGE bool +config HAVE_ARCH_SOFT_DIRTY + bool + config HAVE_MOD_ARCH_SPECIFIC bool help diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index b094816..10764a3 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -102,6 +102,7 @@ config X86 select HAVE_ARCH_SECCOMP_FILTER select BUILDTIME_EXTABLE_SORT select GENERIC_CMOS_UPDATE + select HAVE_ARCH_SOFT_DIRTY select CLOCKSOURCE_WATCHDOG select GENERIC_CLOCKEVENTS select ARCH_CLOCKSOURCE_DATA if X86_64 diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 5b0818b..7dc305a 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -207,7 +207,7 @@ static inline pte_t pte_mkexec(pte_t pte) static inline pte_t pte_mkdirty(pte_t pte) { - return pte_set_flags(pte, _PAGE_DIRTY); + return pte_set_flags(pte, _PAGE_DIRTY | _PAGE_SOFT_DIRTY); } static inline pte_t pte_mkyoung(pte_t pte) @@ -271,7 +271,7 @@ static inline pmd_t pmd_wrprotect(pmd_t pmd) static inline pmd_t pmd_mkdirty(pmd_t pmd) { - return pmd_set_flags(pmd, _PAGE_DIRTY); + return pmd_set_flags(pmd, _PAGE_DIRTY | _PAGE_SOFT_DIRTY); } static inline pmd_t pmd_mkhuge(pmd_t pmd) @@ -294,6 +294,26 @@ static inline pmd_t pmd_mknotpresent(pmd_t pmd) return pmd_clear_flags(pmd, _PAGE_PRESENT); } +static inline int pte_soft_dirty(pte_t pte) +{ + return pte_flags(pte) & _PAGE_SOFT_DIRTY; +} + +static inline int pmd_soft_dirty(pmd_t pmd) +{ + return pmd_flags(pmd) & _PAGE_SOFT_DIRTY; +} + +static inline pte_t pte_mksoft_dirty(pte_t pte) +{ + return pte_set_flags(pte, _PAGE_SOFT_DIRTY); +} + +static inline pmd_t pmd_mksoft_dirty(pmd_t pmd) +{ + return pmd_set_flags(pmd, _PAGE_SOFT_DIRTY); +} + /* * Mask out unsupported bits in a present pgprot. Non-present pgprots * can use those bits for other purposes, so leave them be. diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index e642300..c98ac63 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -55,6 +55,18 @@ #define _PAGE_HIDDEN (_AT(pteval_t, 0)) #endif +/* + * The same hidden bit is used by kmemcheck, but since kmemcheck + * works on kernel pages while soft-dirty engine on user space, + * they do not conflict with each other. + */ + +#ifdef CONFIG_MEM_SOFT_DIRTY +#define _PAGE_SOFT_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_HIDDEN) +#else +#define _PAGE_SOFT_DIRTY (_AT(pteval_t, 0)) +#endif + #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) #define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX) #else diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 39d6412..a18e065 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -692,13 +693,32 @@ enum clear_refs_types { CLEAR_REFS_ALL = 1, CLEAR_REFS_ANON, CLEAR_REFS_MAPPED, + CLEAR_REFS_SOFT_DIRTY, CLEAR_REFS_LAST, }; struct clear_refs_private { struct vm_area_struct *vma; + enum clear_refs_types type; }; +static inline void clear_soft_dirty(struct vm_area_struct *vma, + unsigned long addr, pte_t *pte) +{ +#ifdef CONFIG_MEM_SOFT_DIRTY + /* + * The soft-dirty tracker uses #PF-s to catch writes + * to pages, so write-protect the pte as well. See the + * Documentation/vm/soft-dirty.txt for full description + * of how soft-dirty works. + */ + pte_t ptent = *pte; + ptent = pte_wrprotect(ptent); + ptent = pte_clear_flags(ptent, _PAGE_SOFT_DIRTY); + set_pte_at(vma->vm_mm, addr, pte, ptent); +#endif +} + static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, struct mm_walk *walk) { @@ -718,6 +738,11 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, if (!pte_present(ptent)) continue; + if (cp->type == CLEAR_REFS_SOFT_DIRTY) { + clear_soft_dirty(vma, addr, pte); + continue; + } + page = vm_normal_page(vma, addr, ptent); if (!page) continue; @@ -759,6 +784,7 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, mm = get_task_mm(task); if (mm) { struct clear_refs_private cp = { + .type = type, }; struct mm_walk clear_refs_walk = { .pmd_entry = clear_refs_pte_range, @@ -766,6 +792,8 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, .private = &cp, }; down_read(&mm->mmap_sem); + if (type == CLEAR_REFS_SOFT_DIRTY) + mmu_notifier_invalidate_range_start(mm, 0, -1); for (vma = mm->mmap; vma; vma = vma->vm_next) { cp.vma = vma; if (is_vm_hugetlb_page(vma)) @@ -786,6 +814,8 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, walk_page_range(vma->vm_start, vma->vm_end, &clear_refs_walk); } + if (type == CLEAR_REFS_SOFT_DIRTY) + mmu_notifier_invalidate_range_end(mm, 0, -1); flush_tlb_mm(mm); up_read(&mm->mmap_sem); mmput(mm); @@ -827,6 +857,7 @@ struct pagemapread { /* in "new" pagemap pshift bits are occupied with more status bits */ #define PM_STATUS2(v2, x) (__PM_PSHIFT(v2 ? x : PAGE_SHIFT)) +#define __PM_SOFT_DIRTY (1LL) #define PM_PRESENT PM_STATUS(4LL) #define PM_SWAP PM_STATUS(2LL) #define PM_FILE PM_STATUS(1LL) @@ -868,6 +899,7 @@ static void pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm, { u64 frame, flags; struct page *page = NULL; + int flags2 = 0; if (pte_present(pte)) { frame = pte_pfn(pte); @@ -888,13 +920,15 @@ static void pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm, if (page && !PageAnon(page)) flags |= PM_FILE; + if (pte_soft_dirty(pte)) + flags2 |= __PM_SOFT_DIRTY; - *pme = make_pme(PM_PFRAME(frame) | PM_STATUS2(pm->v2, 0) | flags); + *pme = make_pme(PM_PFRAME(frame) | PM_STATUS2(pm->v2, flags2) | flags); } #ifdef CONFIG_TRANSPARENT_HUGEPAGE static void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm, - pmd_t pmd, int offset) + pmd_t pmd, int offset, int pmd_flags2) { /* * Currently pmd for thp is always present because thp can not be @@ -903,13 +937,13 @@ static void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *p */ if (pmd_present(pmd)) *pme = make_pme(PM_PFRAME(pmd_pfn(pmd) + offset) - | PM_STATUS2(pm->v2, 0) | PM_PRESENT); + | PM_STATUS2(pm->v2, pmd_flags2) | PM_PRESENT); else *pme = make_pme(PM_NOT_PRESENT(pm->v2)); } #else static inline void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm, - pmd_t pmd, int offset) + pmd_t pmd, int offset, int pmd_flags2) { } #endif @@ -926,12 +960,15 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, /* find the first VMA at or above 'addr' */ vma = find_vma(walk->mm, addr); if (vma && pmd_trans_huge_lock(pmd, vma) == 1) { + int pmd_flags2; + + pmd_flags2 = (pmd_soft_dirty(*pmd) ? __PM_SOFT_DIRTY : 0); for (; addr != end; addr += PAGE_SIZE) { unsigned long offset; offset = (addr & ~PAGEMAP_WALK_MASK) >> PAGE_SHIFT; - thp_pmd_to_pagemap_entry(&pme, pm, *pmd, offset); + thp_pmd_to_pagemap_entry(&pme, pm, *pmd, offset, pmd_flags2); err = add_to_pagemap(addr, &pme, pm); if (err) break; diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index b183698..a7126d2 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -396,6 +396,28 @@ static inline void ptep_modify_prot_commit(struct mm_struct *mm, #define arch_start_context_switch(prev) do {} while (0) #endif +#ifndef CONFIG_HAVE_ARCH_SOFT_DIRTY +static inline int pte_soft_dirty(pte_t pte) +{ + return 0; +} + +static inline int pmd_soft_dirty(pmd_t pmd) +{ + return 0; +} + +static inline pte_t pte_mksoft_dirty(pte_t pte) +{ + return pte; +} + +static inline pmd_t pmd_mksoft_dirty(pmd_t pmd) +{ + return pmd; +} +#endif + #ifndef __HAVE_PFNMAP_TRACKING /* * Interfaces that can be used by architecture code to keep track of diff --git a/mm/Kconfig b/mm/Kconfig index f5e698e..7e28ecf 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -477,3 +477,15 @@ config FRONTSWAP and swap data is stored as normal on the matching swap device. If unsure, say Y to enable frontswap. + +config MEM_SOFT_DIRTY + bool "Track memory changes" + depends on CHECKPOINT_RESTORE && HAVE_ARCH_SOFT_DIRTY + select PROC_PAGE_MONITOR + help + This option enables memory changes tracking by introducing a + soft-dirty bit on pte-s. This bit it set when someone writes + into a page just as regular dirty bit, but unlike the latter + it can be cleared by hands. + + See Documentation/vm/soft-dirty.txt for more details. diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 362c329..d8b3b85 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1429,7 +1429,7 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma, if (ret == 1) { pmd = pmdp_get_and_clear(mm, old_addr, old_pmd); VM_BUG_ON(!pmd_none(*new_pmd)); - set_pmd_at(mm, new_addr, new_pmd, pmd); + set_pmd_at(mm, new_addr, new_pmd, pmd_mksoft_dirty(pmd)); spin_unlock(&mm->page_table_lock); } out: diff --git a/mm/mremap.c b/mm/mremap.c index 463a257..3708655 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -126,7 +126,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, continue; pte = ptep_get_and_clear(mm, old_addr, old_pte); pte = move_pte(pte, new_vma->vm_page_prot, old_addr, new_addr); - set_pte_at(mm, new_addr, new_pte, pte); + set_pte_at(mm, new_addr, new_pte, pte_mksoft_dirty(pte)); } arch_leave_lazy_mmu_mode(); -- cgit v0.10.2 From 541c237c0923f567c9c4cabb8a81635baadc713f Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 3 Jul 2013 15:01:22 -0700 Subject: pagemap: prepare to reuse constant bits with page-shift In order to reuse bits from pagemap entries gracefully, we leave the entries as is but on pagemap open emit a warning in dmesg, that bits 55-60 are about to change in a couple of releases. Next, if a user issues soft-dirty clear command via the clear_refs file (it was disabled before v3.9) we assume that he's aware of the new pagemap format, note that fact and report the bits in pagemap in the new manner. The "migration strategy" looks like this then: 1. existing users are not affected -- they don't touch soft-dirty feature, thus see old bits in pagemap, but are warned and have time to fix themselves 2. those who use soft-dirty know about new pagemap format 3. some time soon we get rid of any signs of page-shift in pagemap as well as this trick with clear-soft-dirty affecting pagemap format. Signed-off-by: Pavel Emelyanov Cc: Matt Mackall Cc: Xiao Guangrong Cc: Glauber Costa Cc: Marcelo Tosatti Cc: KOSAKI Motohiro Cc: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/Documentation/vm/pagemap.txt b/Documentation/vm/pagemap.txt index 7587493..fd7c3cf 100644 --- a/Documentation/vm/pagemap.txt +++ b/Documentation/vm/pagemap.txt @@ -15,7 +15,8 @@ There are three components to pagemap: * Bits 0-54 page frame number (PFN) if present * Bits 0-4 swap type if swapped * Bits 5-54 swap offset if swapped - * Bits 55-60 page shift (page size = 1<= CLEAR_REFS_LAST) return -EINVAL; + + if (type == CLEAR_REFS_SOFT_DIRTY) { + soft_dirty_cleared = true; + pr_warn_once("The pagemap bits 55-60 has changed their meaning! " + "See the linux/Documentation/vm/pagemap.txt for details.\n"); + } + task = get_proc_task(file_inode(file)); if (!task) return -ESRCH; @@ -1091,7 +1115,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, if (!count) goto out_task; - pm.v2 = false; + pm.v2 = soft_dirty_cleared; pm.len = PM_ENTRY_BYTES * (PAGEMAP_WALK_SIZE >> PAGE_SHIFT); pm.buffer = kmalloc(pm.len, GFP_TEMPORARY); ret = -ENOMEM; @@ -1164,9 +1188,18 @@ out: return ret; } +static int pagemap_open(struct inode *inode, struct file *file) +{ + pr_warn_once("Bits 55-60 of /proc/PID/pagemap entries are about " + "to stop being page-shift some time soon. See the " + "linux/Documentation/vm/pagemap.txt for details.\n"); + return 0; +} + const struct file_operations proc_pagemap_operations = { .llseek = mem_lseek, /* borrow this */ .read = pagemap_read, + .open = pagemap_open, }; #endif /* CONFIG_PROC_PAGE_MONITOR */ -- cgit v0.10.2 From ffbdccf5e1facd18b54429a749667fb185c10f20 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Wed, 3 Jul 2013 15:01:23 -0700 Subject: mm, memcg: don't take task_lock in task_in_mem_cgroup For processes that have detached their mm's, task_in_mem_cgroup() unnecessarily takes task_lock() when rcu_read_lock() is all that is necessary to call mem_cgroup_from_task(). While we're here, switch task_in_mem_cgroup() to return bool. Signed-off-by: David Rientjes Cc: KAMEZAWA Hiroyuki Cc: Johannes Weiner Acked-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index d6183f0..7b4d9d7 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -77,7 +77,8 @@ extern void mem_cgroup_uncharge_cache_page(struct page *page); bool __mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg, struct mem_cgroup *memcg); -int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *memcg); +bool task_in_mem_cgroup(struct task_struct *task, + const struct mem_cgroup *memcg); extern struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page); extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p); @@ -273,10 +274,10 @@ static inline bool mm_match_cgroup(struct mm_struct *mm, return true; } -static inline int task_in_mem_cgroup(struct task_struct *task, - const struct mem_cgroup *memcg) +static inline bool task_in_mem_cgroup(struct task_struct *task, + const struct mem_cgroup *memcg) { - return 1; + return true; } static inline struct cgroup_subsys_state diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 1947218..4748966 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1448,11 +1448,12 @@ static bool mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg, return ret; } -int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *memcg) +bool task_in_mem_cgroup(struct task_struct *task, + const struct mem_cgroup *memcg) { - int ret; struct mem_cgroup *curr = NULL; struct task_struct *p; + bool ret; p = find_lock_task_mm(task); if (p) { @@ -1464,14 +1465,14 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *memcg) * killer still needs to detect if they have already been oom * killed to prevent needlessly killing additional tasks. */ - task_lock(task); + rcu_read_lock(); curr = mem_cgroup_from_task(task); if (curr) css_get(&curr->css); - task_unlock(task); + rcu_read_unlock(); } if (!curr) - return 0; + return false; /* * We should check use_hierarchy of "memcg" not "curr". Because checking * use_hierarchy of "curr" here make this function true if hierarchy is -- cgit v0.10.2 From b430e9d1c6d416306d44dbf3aa3148be7af78abc Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Wed, 3 Jul 2013 15:01:24 -0700 Subject: mm: remove compressed copy from zram in-memory Swap subsystem does lazy swap slot free with expecting the page would be swapped out again so we can avoid unnecessary write. But the problem in in-memory swap(ex, zram) is that it consumes memory space until vm_swap_full(ie, used half of all of swap device) condition meet. It could be bad if we use multiple swap device, small in-memory swap and big storage swap or in-memory swap alone. This patch makes swap subsystem free swap slot as soon as swap-read is completed and make the swapcache page dirty so the page should be written out the swap device to reclaim it. It means we never lose it. I tested this patch with kernel compile workload. 1. before compile time : 9882.42 zram max wasted space by fragmentation: 13471881 byte memory space consumed by zram: 174227456 byte the number of slot free notify: 206684 2. after compile time : 9653.90 zram max wasted space by fragmentation: 11805932 byte memory space consumed by zram: 154001408 byte the number of slot free notify: 426972 [akpm@linux-foundation.org: tweak comment text] [artem.savkov@gmail.com: fix BUG due to non-swapcache pages in end_swap_bio_read()] [akpm@linux-foundation.org: invert unlikely() test, augment comment, 80-col cleanup] Signed-off-by: Dan Magenheimer Signed-off-by: Minchan Kim Signed-off-by: Artem Savkov Cc: Hugh Dickins Cc: Seth Jennings Cc: Nitin Gupta Cc: Konrad Rzeszutek Wilk Cc: Shaohua Li Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/page_io.c b/mm/page_io.c index a8a3ef4..ba05b64 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -21,6 +21,7 @@ #include #include #include +#include #include static struct bio *get_swap_bio(gfp_t gfp_flags, @@ -80,9 +81,54 @@ void end_swap_bio_read(struct bio *bio, int err) imajor(bio->bi_bdev->bd_inode), iminor(bio->bi_bdev->bd_inode), (unsigned long long)bio->bi_sector); - } else { - SetPageUptodate(page); + goto out; } + + SetPageUptodate(page); + + /* + * There is no guarantee that the page is in swap cache - the software + * suspend code (at least) uses end_swap_bio_read() against a non- + * swapcache page. So we must check PG_swapcache before proceeding with + * this optimization. + */ + if (likely(PageSwapCache(page))) { + struct swap_info_struct *sis; + + sis = page_swap_info(page); + if (sis->flags & SWP_BLKDEV) { + /* + * The swap subsystem performs lazy swap slot freeing, + * expecting that the page will be swapped out again. + * So we can avoid an unnecessary write if the page + * isn't redirtied. + * This is good for real swap storage because we can + * reduce unnecessary I/O and enhance wear-leveling + * if an SSD is used as the as swap device. + * But if in-memory swap device (eg zram) is used, + * this causes a duplicated copy between uncompressed + * data in VM-owned memory and compressed data in + * zram-owned memory. So let's free zram-owned memory + * and make the VM-owned decompressed page *dirty*, + * so the page should be swapped out somewhere again if + * we again wish to reclaim it. + */ + struct gendisk *disk = sis->bdev->bd_disk; + if (disk->fops->swap_slot_free_notify) { + swp_entry_t entry; + unsigned long offset; + + entry.val = page_private(page); + offset = swp_offset(entry); + + SetPageDirty(page); + disk->fops->swap_slot_free_notify(sis->bdev, + offset); + } + } + } + +out: unlock_page(page); bio_put(bio); } -- cgit v0.10.2 From d6e932177090463e5c709e9e61bbd705a33a1609 Mon Sep 17 00:00:00 2001 From: Libin Date: Wed, 3 Jul 2013 15:01:26 -0700 Subject: mm: use vma_pages() to replace (vm_end - vm_start) >> PAGE_SHIFT (*->vm_end - *->vm_start) >> PAGE_SHIFT operation is implemented as a inline funcion vma_pages() in linux/mm.h, so using it. Signed-off-by: Libin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/memory.c b/mm/memory.c index 95d0cce..a101bbc 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2904,7 +2904,7 @@ static inline void unmap_mapping_range_tree(struct rb_root *root, details->first_index, details->last_index) { vba = vma->vm_pgoff; - vea = vba + ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) - 1; + vea = vba + vma_pages(vma) - 1; /* Assume for now that PAGE_CACHE_SHIFT == PAGE_SHIFT */ zba = details->first_index; if (zba < vba) diff --git a/mm/mmap.c b/mm/mmap.c index f681e18..8468ffd 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -955,7 +955,7 @@ can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags, if (is_mergeable_vma(vma, file, vm_flags) && is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) { pgoff_t vm_pglen; - vm_pglen = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + vm_pglen = vma_pages(vma); if (vma->vm_pgoff + vm_pglen == vm_pgoff) return 1; } -- cgit v0.10.2 From ef9f515a4c38c759c56c7d1e760cc243b6d516f4 Mon Sep 17 00:00:00 2001 From: Libin Date: Wed, 3 Jul 2013 15:01:26 -0700 Subject: ncpfs: use vma_pages() to replace (vm_end - vm_start) >> PAGE_SHIFT (*->vm_end - *->vm_start) >> PAGE_SHIFT operation is implemented as a inline funcion vma_pages() in linux/mm.h, so using it. Signed-off-by: Libin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c index ee24df5..3c5dd55 100644 --- a/fs/ncpfs/mmap.c +++ b/fs/ncpfs/mmap.c @@ -117,7 +117,7 @@ int ncp_mmap(struct file *file, struct vm_area_struct *vma) return -EINVAL; /* we do not support files bigger than 4GB... We eventually supports just 4GB... */ - if (((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff + if (vma_pages(vma) + vma->vm_pgoff > (1U << (32 - PAGE_SHIFT))) return -EFBIG; -- cgit v0.10.2 From 52c2dad914ca3ac84926106d95ddf47de2f40b45 Mon Sep 17 00:00:00 2001 From: Libin Date: Wed, 3 Jul 2013 15:01:27 -0700 Subject: uio: use vma_pages() to replace (vm_end - vm_start) >> PAGE_SHIFT (*->vm_end - *->vm_start) >> PAGE_SHIFT operation is implemented as a inline funcion vma_pages() in linux/mm.h, so using it. Signed-off-by: Libin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c index b645c47..3b96f18 100644 --- a/drivers/uio/uio.c +++ b/drivers/uio/uio.c @@ -677,7 +677,7 @@ static int uio_mmap(struct file *filep, struct vm_area_struct *vma) if (mi < 0) return -EINVAL; - requested_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + requested_pages = vma_pages(vma); actual_pages = ((idev->info->mem[mi].addr & ~PAGE_MASK) + idev->info->mem[mi].size + PAGE_SIZE -1) >> PAGE_SHIFT; if (requested_pages > actual_pages) -- cgit v0.10.2 From 4008bab7b3969ad9f9dd1d02096a3f0aa5610bd2 Mon Sep 17 00:00:00 2001 From: Cody P Schafer Date: Wed, 3 Jul 2013 15:01:28 -0700 Subject: mm/page_alloc: factor out setting of pcp->high and pcp->batch "Problems" with the current code: 1: there is a lack of synchronization in setting ->high and ->batch in percpu_pagelist_fraction_sysctl_handler() 2: stop_machine() in zone_pcp_update() is unnecissary. 3: zone_pcp_update() does not consider the case where percpu_pagelist_fraction is non-zero To fix: 1: add memory barriers, a safe ->batch value, an update side mutex when updating ->high and ->batch, and use ACCESS_ONCE() for ->batch users that expect a stable value. 2: avoid draining pages in zone_pcp_update(), rely upon the memory barriers added to fix #1 3: factor out quite a few functions, and then call the appropriate one. Note that it results in a change to the behavior of zone_pcp_update(), which is used by memory_hotplug. I'm rather certain that I've diserned (and preserved) the essential behavior (changing ->high and ->batch), and only eliminated unneeded actions (draining the per cpu pages), but this may not be the case. Further note that the draining of pages that previously took place in zone_pcp_update() occured after repeated draining when attempting to offline a page, and after the offline has "succeeded". It appears that the draining was added to zone_pcp_update() to avoid refactoring setup_pageset() into 2 funtions. This patch: Creates pageset_set_batch() for use in setup_pageset(). pageset_set_batch() imitates the functionality of setup_pagelist_highmark(), but uses the boot time (percpu_pagelist_fraction == 0) calculations for determining ->high based on ->batch. Signed-off-by: Cody P Schafer Cc: Gilad Ben-Yossef Cc: KOSAKI Motohiro Cc: Mel Gorman Cc: Pekka Enberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/page_alloc.c b/mm/page_alloc.c index c3edb62..d4bcc20 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4032,6 +4032,14 @@ static int __meminit zone_batchsize(struct zone *zone) #endif } +/* a companion to setup_pagelist_highmark() */ +static void pageset_set_batch(struct per_cpu_pageset *p, unsigned long batch) +{ + struct per_cpu_pages *pcp = &p->pcp; + pcp->high = 6 * batch; + pcp->batch = max(1UL, 1 * batch); +} + static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch) { struct per_cpu_pages *pcp; @@ -4041,8 +4049,7 @@ static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch) pcp = &p->pcp; pcp->count = 0; - pcp->high = 6 * batch; - pcp->batch = max(1UL, 1 * batch); + pageset_set_batch(p, batch); for (migratetype = 0; migratetype < MIGRATE_PCPTYPES; migratetype++) INIT_LIST_HEAD(&pcp->lists[migratetype]); } @@ -4051,7 +4058,6 @@ static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch) * setup_pagelist_highmark() sets the high water mark for hot per_cpu_pagelist * to the value high for the pageset p. */ - static void setup_pagelist_highmark(struct per_cpu_pageset *p, unsigned long high) { -- cgit v0.10.2 From c8e251fadc6220261f6e0c6b8a4f1cdf27626165 Mon Sep 17 00:00:00 2001 From: Cody P Schafer Date: Wed, 3 Jul 2013 15:01:29 -0700 Subject: mm/page_alloc: prevent concurrent updaters of pcp ->batch and ->high Because we are going to rely upon a careful transision between old and new ->high and ->batch values using memory barriers and will remove stop_machine(), we need to prevent multiple updaters from interweaving their memory writes. Add a simple mutex to protect both update loops. Signed-off-by: Cody P Schafer Cc: Gilad Ben-Yossef Cc: KOSAKI Motohiro Cc: Mel Gorman Cc: Pekka Enberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/page_alloc.c b/mm/page_alloc.c index d4bcc20..8d43357 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -65,6 +65,9 @@ #include #include "internal.h" +/* prevent >1 _updater_ of zone percpu pageset ->high and ->batch fields */ +static DEFINE_MUTEX(pcp_batch_high_lock); + #ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID DEFINE_PER_CPU(int, numa_node); EXPORT_PER_CPU_SYMBOL(numa_node); @@ -5557,6 +5560,8 @@ int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write, ret = proc_dointvec_minmax(table, write, buffer, length, ppos); if (!write || (ret < 0)) return ret; + + mutex_lock(&pcp_batch_high_lock); for_each_populated_zone(zone) { for_each_possible_cpu(cpu) { unsigned long high; @@ -5565,6 +5570,7 @@ int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write, per_cpu_ptr(zone->pageset, cpu), high); } } + mutex_unlock(&pcp_batch_high_lock); return 0; } @@ -6078,7 +6084,9 @@ static int __meminit __zone_pcp_update(void *data) void __meminit zone_pcp_update(struct zone *zone) { + mutex_lock(&pcp_batch_high_lock); stop_machine(__zone_pcp_update, zone, NULL); + mutex_unlock(&pcp_batch_high_lock); } #endif -- cgit v0.10.2 From 8d7a8fa97abeb4fd6b3975d32c9f859875157770 Mon Sep 17 00:00:00 2001 From: Cody P Schafer Date: Wed, 3 Jul 2013 15:01:31 -0700 Subject: mm/page_alloc: insert memory barriers to allow async update of pcp batch and high Introduce pageset_update() to perform a safe transision from one set of pcp->{batch,high} to a new set using memory barriers. This ensures that batch is always set to a safe value (1) prior to updating high, and ensure that high is fully updated before setting the real value of batch. It avoids ->batch ever rising above ->high. Suggested by Gilad Ben-Yossef in these threads: https://lkml.org/lkml/2013/4/9/23 https://lkml.org/lkml/2013/4/10/49 Also reproduces his proposed comment. Signed-off-by: Cody P Schafer Reviewed-by: Gilad Ben-Yossef Cc: KOSAKI Motohiro Cc: Mel Gorman Cc: Pekka Enberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 8d43357..eaaef2a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4035,12 +4035,37 @@ static int __meminit zone_batchsize(struct zone *zone) #endif } +/* + * pcp->high and pcp->batch values are related and dependent on one another: + * ->batch must never be higher then ->high. + * The following function updates them in a safe manner without read side + * locking. + * + * Any new users of pcp->batch and pcp->high should ensure they can cope with + * those fields changing asynchronously (acording the the above rule). + * + * mutex_is_locked(&pcp_batch_high_lock) required when calling this function + * outside of boot time (or some other assurance that no concurrent updaters + * exist). + */ +static void pageset_update(struct per_cpu_pages *pcp, unsigned long high, + unsigned long batch) +{ + /* start with a fail safe value for batch */ + pcp->batch = 1; + smp_wmb(); + + /* Update high, then batch, in order */ + pcp->high = high; + smp_wmb(); + + pcp->batch = batch; +} + /* a companion to setup_pagelist_highmark() */ static void pageset_set_batch(struct per_cpu_pageset *p, unsigned long batch) { - struct per_cpu_pages *pcp = &p->pcp; - pcp->high = 6 * batch; - pcp->batch = max(1UL, 1 * batch); + pageset_update(&p->pcp, 6 * batch, max(1UL, 1 * batch)); } static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch) @@ -4064,13 +4089,11 @@ static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch) static void setup_pagelist_highmark(struct per_cpu_pageset *p, unsigned long high) { - struct per_cpu_pages *pcp; + unsigned long batch = max(1UL, high / 4); + if ((high / 4) > (PAGE_SHIFT * 8)) + batch = PAGE_SHIFT * 8; - pcp = &p->pcp; - pcp->high = high; - pcp->batch = max(1UL, high/4); - if ((high/4) > (PAGE_SHIFT * 8)) - pcp->batch = PAGE_SHIFT * 8; + pageset_update(&p->pcp, high, batch); } static void __meminit setup_zone_pageset(struct zone *zone) -- cgit v0.10.2 From 998d39cb236fe464af86a3492a24d2f67ee1efc2 Mon Sep 17 00:00:00 2001 From: Cody P Schafer Date: Wed, 3 Jul 2013 15:01:32 -0700 Subject: mm/page_alloc: protect pcp->batch accesses with ACCESS_ONCE pcp->batch could change at any point, avoid relying on it being a stable value. Signed-off-by: Cody P Schafer Cc: Gilad Ben-Yossef Cc: KOSAKI Motohiro Cc: Mel Gorman Cc: Pekka Enberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/page_alloc.c b/mm/page_alloc.c index eaaef2a..97b8f86 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1182,10 +1182,12 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp) { unsigned long flags; int to_drain; + unsigned long batch; local_irq_save(flags); - if (pcp->count >= pcp->batch) - to_drain = pcp->batch; + batch = ACCESS_ONCE(pcp->batch); + if (pcp->count >= batch) + to_drain = batch; else to_drain = pcp->count; if (to_drain > 0) { @@ -1353,8 +1355,9 @@ void free_hot_cold_page(struct page *page, int cold) list_add(&page->lru, &pcp->lists[migratetype]); pcp->count++; if (pcp->count >= pcp->high) { - free_pcppages_bulk(zone, pcp->batch, pcp); - pcp->count -= pcp->batch; + unsigned long batch = ACCESS_ONCE(pcp->batch); + free_pcppages_bulk(zone, batch, pcp); + pcp->count -= batch; } out: -- cgit v0.10.2 From 0a647f3811d6af56405a819341ceac23e31d4572 Mon Sep 17 00:00:00 2001 From: Cody P Schafer Date: Wed, 3 Jul 2013 15:01:33 -0700 Subject: mm/page_alloc: convert zone_pcp_update() to rely on memory barriers instead of stop_machine() zone_pcp_update()'s goal is to adjust the ->high and ->mark members of a percpu pageset based on a zone's ->managed_pages. We don't need to drain the entire percpu pageset just to modify these fields. This lets us avoid calling setup_pageset() (and the draining required to call it) and instead allows simply setting the fields' values (with some attention paid to memory barriers to prevent the relationship between ->batch and ->high from being thrown off). This does change the behavior of zone_pcp_update() as the percpu pagesets will not be drained when zone_pcp_update() is called (they will end up being shrunk, not completely drained, later when a 0-order page is freed in free_hot_cold_page()). Signed-off-by: Cody P Schafer Cc: Gilad Ben-Yossef Cc: KOSAKI Motohiro Cc: Mel Gorman Cc: Pekka Enberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 97b8f86..8125263 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -6085,33 +6085,18 @@ void free_contig_range(unsigned long pfn, unsigned nr_pages) #endif #ifdef CONFIG_MEMORY_HOTPLUG -static int __meminit __zone_pcp_update(void *data) -{ - struct zone *zone = data; - int cpu; - unsigned long batch = zone_batchsize(zone), flags; - - for_each_possible_cpu(cpu) { - struct per_cpu_pageset *pset; - struct per_cpu_pages *pcp; - - pset = per_cpu_ptr(zone->pageset, cpu); - pcp = &pset->pcp; - - local_irq_save(flags); - if (pcp->count > 0) - free_pcppages_bulk(zone, pcp->count, pcp); - drain_zonestat(zone, pset); - setup_pageset(pset, batch); - local_irq_restore(flags); - } - return 0; -} - +/* + * The zone indicated has a new number of managed_pages; batch sizes and percpu + * page high values need to be recalulated. + */ void __meminit zone_pcp_update(struct zone *zone) { + unsigned cpu; + unsigned long batch; mutex_lock(&pcp_batch_high_lock); - stop_machine(__zone_pcp_update, zone, NULL); + batch = zone_batchsize(zone); + for_each_possible_cpu(cpu) + pageset_set_batch(per_cpu_ptr(zone->pageset, cpu), batch); mutex_unlock(&pcp_batch_high_lock); } #endif -- cgit v0.10.2 From 22a7f12b1606327f0e11fcdf9043ae00bf9917df Mon Sep 17 00:00:00 2001 From: Cody P Schafer Date: Wed, 3 Jul 2013 15:01:34 -0700 Subject: mm/page_alloc: when handling percpu_pagelist_fraction, don't unneedly recalulate high Simply moves calculation of the new 'high' value outside the for_each_possible_cpu() loop, as it does not depend on the cpu. Signed-off-by: Cody P Schafer Cc: Gilad Ben-Yossef Cc: KOSAKI Motohiro Cc: Mel Gorman Cc: Pekka Enberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 8125263..386de0f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5575,7 +5575,6 @@ int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write, * cpu. It is the fraction of total pages in each zone that a hot per cpu pagelist * can have before it gets flushed back to buddy allocator. */ - int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write, void __user *buffer, size_t *length, loff_t *ppos) { @@ -5589,12 +5588,11 @@ int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write, mutex_lock(&pcp_batch_high_lock); for_each_populated_zone(zone) { - for_each_possible_cpu(cpu) { - unsigned long high; - high = zone->managed_pages / percpu_pagelist_fraction; + unsigned long high; + high = zone->managed_pages / percpu_pagelist_fraction; + for_each_possible_cpu(cpu) setup_pagelist_highmark( - per_cpu_ptr(zone->pageset, cpu), high); - } + per_cpu_ptr(zone->pageset, cpu), high); } mutex_unlock(&pcp_batch_high_lock); return 0; -- cgit v0.10.2 From 88c90dbccaaed35991b5336fec84294de1d23538 Mon Sep 17 00:00:00 2001 From: Cody P Schafer Date: Wed, 3 Jul 2013 15:01:35 -0700 Subject: mm/page_alloc: factor setup_pageset() into pageset_init() and pageset_set_batch() Signed-off-by: Cody P Schafer Cc: Gilad Ben-Yossef Cc: KOSAKI Motohiro Cc: Mel Gorman Cc: Pekka Enberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 386de0f..a235149 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4071,7 +4071,7 @@ static void pageset_set_batch(struct per_cpu_pageset *p, unsigned long batch) pageset_update(&p->pcp, 6 * batch, max(1UL, 1 * batch)); } -static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch) +static void pageset_init(struct per_cpu_pageset *p) { struct per_cpu_pages *pcp; int migratetype; @@ -4080,11 +4080,16 @@ static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch) pcp = &p->pcp; pcp->count = 0; - pageset_set_batch(p, batch); for (migratetype = 0; migratetype < MIGRATE_PCPTYPES; migratetype++) INIT_LIST_HEAD(&pcp->lists[migratetype]); } +static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch) +{ + pageset_init(p); + pageset_set_batch(p, batch); +} + /* * setup_pagelist_highmark() sets the high water mark for hot per_cpu_pagelist * to the value high for the pageset p. -- cgit v0.10.2 From dd1895e2c5c9ed3a791d1d8eb4a6a3e241ec9d6e Mon Sep 17 00:00:00 2001 From: Cody P Schafer Date: Wed, 3 Jul 2013 15:01:36 -0700 Subject: mm/page_alloc: relocate comment to be directly above code it refers to. Signed-off-by: Cody P Schafer Cc: Gilad Ben-Yossef Cc: KOSAKI Motohiro Cc: Mel Gorman Cc: Pekka Enberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/page_alloc.c b/mm/page_alloc.c index a235149..2793ce5 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3711,12 +3711,12 @@ void __ref build_all_zonelists(pg_data_t *pgdat, struct zone *zone) mminit_verify_zonelist(); cpuset_init_current_mems_allowed(); } else { - /* we have to stop all cpus to guarantee there is no user - of zonelist */ #ifdef CONFIG_MEMORY_HOTPLUG if (zone) setup_zone_pageset(zone); #endif + /* we have to stop all cpus to guarantee there is no user + of zonelist */ stop_machine(__build_all_zonelists, pgdat, NULL); /* cpuset refresh routine should be here */ } -- cgit v0.10.2 From 56cef2b85c28d81efd39f2eeaddce28678756fe3 Mon Sep 17 00:00:00 2001 From: Cody P Schafer Date: Wed, 3 Jul 2013 15:01:38 -0700 Subject: mm/page_alloc: factor zone_pageset_init() out of setup_zone_pageset() Signed-off-by: Cody P Schafer Cc: Gilad Ben-Yossef Cc: KOSAKI Motohiro Cc: Mel Gorman Cc: Pekka Enberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 2793ce5..ee6fe7f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4104,22 +4104,25 @@ static void setup_pagelist_highmark(struct per_cpu_pageset *p, pageset_update(&p->pcp, high, batch); } +static void __meminit zone_pageset_init(struct zone *zone, int cpu) +{ + struct per_cpu_pageset *pcp = per_cpu_ptr(zone->pageset, cpu); + + pageset_init(pcp); + if (percpu_pagelist_fraction) + setup_pagelist_highmark(pcp, + (zone->managed_pages / + percpu_pagelist_fraction)); + else + pageset_set_batch(pcp, zone_batchsize(zone)); +} + static void __meminit setup_zone_pageset(struct zone *zone) { int cpu; - zone->pageset = alloc_percpu(struct per_cpu_pageset); - - for_each_possible_cpu(cpu) { - struct per_cpu_pageset *pcp = per_cpu_ptr(zone->pageset, cpu); - - setup_pageset(pcp, zone_batchsize(zone)); - - if (percpu_pagelist_fraction) - setup_pagelist_highmark(pcp, - (zone->managed_pages / - percpu_pagelist_fraction)); - } + for_each_possible_cpu(cpu) + zone_pageset_init(zone, cpu); } /* -- cgit v0.10.2 From 737af4c0110fc69a81dc7464a74a4113f7645255 Mon Sep 17 00:00:00 2001 From: Cody P Schafer Date: Wed, 3 Jul 2013 15:01:39 -0700 Subject: mm/page_alloc: in zone_pcp_update(), uze zone_pageset_init() Previously, zone_pcp_update() called pageset_set_batch() directly, essentially assuming that percpu_pagelist_fraction == 0. Correct this by calling zone_pageset_init(), which chooses the appropriate ->batch and ->high calculations. Signed-off-by: Cody P Schafer Cc: Gilad Ben-Yossef Cc: KOSAKI Motohiro Cc: Mel Gorman Cc: Pekka Enberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/page_alloc.c b/mm/page_alloc.c index ee6fe7f..c7344d1 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -6098,11 +6098,9 @@ void free_contig_range(unsigned long pfn, unsigned nr_pages) void __meminit zone_pcp_update(struct zone *zone) { unsigned cpu; - unsigned long batch; mutex_lock(&pcp_batch_high_lock); - batch = zone_batchsize(zone); for_each_possible_cpu(cpu) - pageset_set_batch(per_cpu_ptr(zone->pageset, cpu), batch); + zone_pageset_init(zone, cpu); mutex_unlock(&pcp_batch_high_lock); } #endif -- cgit v0.10.2 From 3664033c56f211a3dcf28d9d68c604ed447d8d79 Mon Sep 17 00:00:00 2001 From: Cody P Schafer Date: Wed, 3 Jul 2013 15:01:40 -0700 Subject: mm/page_alloc: rename setup_pagelist_highmark() to match naming of pageset_set_batch() Signed-off-by: Cody P Schafer Cc: Gilad Ben-Yossef Cc: KOSAKI Motohiro Cc: Mel Gorman Cc: Pekka Enberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/page_alloc.c b/mm/page_alloc.c index c7344d1..03a3f94 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4065,7 +4065,7 @@ static void pageset_update(struct per_cpu_pages *pcp, unsigned long high, pcp->batch = batch; } -/* a companion to setup_pagelist_highmark() */ +/* a companion to pageset_set_high() */ static void pageset_set_batch(struct per_cpu_pageset *p, unsigned long batch) { pageset_update(&p->pcp, 6 * batch, max(1UL, 1 * batch)); @@ -4091,10 +4091,10 @@ static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch) } /* - * setup_pagelist_highmark() sets the high water mark for hot per_cpu_pagelist + * pageset_set_high() sets the high water mark for hot per_cpu_pagelist * to the value high for the pageset p. */ -static void setup_pagelist_highmark(struct per_cpu_pageset *p, +static void pageset_set_high(struct per_cpu_pageset *p, unsigned long high) { unsigned long batch = max(1UL, high / 4); @@ -4110,7 +4110,7 @@ static void __meminit zone_pageset_init(struct zone *zone, int cpu) pageset_init(pcp); if (percpu_pagelist_fraction) - setup_pagelist_highmark(pcp, + pageset_set_high(pcp, (zone->managed_pages / percpu_pagelist_fraction)); else @@ -5599,8 +5599,8 @@ int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write, unsigned long high; high = zone->managed_pages / percpu_pagelist_fraction; for_each_possible_cpu(cpu) - setup_pagelist_highmark( - per_cpu_ptr(zone->pageset, cpu), high); + pageset_set_high(per_cpu_ptr(zone->pageset, cpu), + high); } mutex_unlock(&pcp_batch_high_lock); return 0; -- cgit v0.10.2 From 169f6c1999ca6d0c5e06e8d810817ed3d1ebf017 Mon Sep 17 00:00:00 2001 From: Cody P Schafer Date: Wed, 3 Jul 2013 15:01:41 -0700 Subject: mm/page_alloc: don't re-init pageset in zone_pcp_update() When memory hotplug is triggered, we call pageset_init() on per-cpu-pagesets which both contain pages and are in use, causing both the leakage of those pages and (potentially) bad behaviour if a page is allocated from a pageset while it is being cleared. Avoid this by factoring out pageset_set_high_and_batch() (which contains all needed logic too set a pageset's ->high and ->batch inrespective of system state) from zone_pageset_init() and using the new pageset_set_high_and_batch() instead of zone_pageset_init() in zone_pcp_update(). Signed-off-by: Cody P Schafer Cc: Valdis Kletnieks Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 03a3f94..fab9506 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4104,11 +4104,9 @@ static void pageset_set_high(struct per_cpu_pageset *p, pageset_update(&p->pcp, high, batch); } -static void __meminit zone_pageset_init(struct zone *zone, int cpu) +static void __meminit pageset_set_high_and_batch(struct zone *zone, + struct per_cpu_pageset *pcp) { - struct per_cpu_pageset *pcp = per_cpu_ptr(zone->pageset, cpu); - - pageset_init(pcp); if (percpu_pagelist_fraction) pageset_set_high(pcp, (zone->managed_pages / @@ -4117,6 +4115,14 @@ static void __meminit zone_pageset_init(struct zone *zone, int cpu) pageset_set_batch(pcp, zone_batchsize(zone)); } +static void __meminit zone_pageset_init(struct zone *zone, int cpu) +{ + struct per_cpu_pageset *pcp = per_cpu_ptr(zone->pageset, cpu); + + pageset_init(pcp); + pageset_set_high_and_batch(zone, pcp); +} + static void __meminit setup_zone_pageset(struct zone *zone) { int cpu; @@ -6100,7 +6106,8 @@ void __meminit zone_pcp_update(struct zone *zone) unsigned cpu; mutex_lock(&pcp_batch_high_lock); for_each_possible_cpu(cpu) - zone_pageset_init(zone, cpu); + pageset_set_high_and_batch(zone, + per_cpu_ptr(zone->pageset, cpu)); mutex_unlock(&pcp_batch_high_lock); } #endif -- cgit v0.10.2 From 75485363ce8552698bfb9970d901f755d5713cca Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 3 Jul 2013 15:01:42 -0700 Subject: mm: vmscan: limit the number of pages kswapd reclaims at each priority This series does not fix all the current known problems with reclaim but it addresses one important swapping bug when there is background IO. Changelog since V3 - Drop the slab shrink changes in light of Glaubers series and discussions highlighted that there were a number of potential problems with the patch. (mel) - Rebased to 3.10-rc1 Changelog since V2 - Preserve ratio properly for proportional scanning (kamezawa) Changelog since V1 - Rename ZONE_DIRTY to ZONE_TAIL_LRU_DIRTY (andi) - Reformat comment in shrink_page_list (andi) - Clarify some comments (dhillf) - Rework how the proportional scanning is preserved - Add PageReclaim check before kswapd starts writeback - Reset sc.nr_reclaimed on every full zone scan Kswapd and page reclaim behaviour has been screwy in one way or the other for a long time. Very broadly speaking it worked in the far past because machines were limited in memory so it did not have that many pages to scan and it stalled congestion_wait() frequently to prevent it going completely nuts. In recent times it has behaved very unsatisfactorily with some of the problems compounded by the removal of stall logic and the introduction of transparent hugepage support with high-order reclaims. There are many variations of bugs that are rooted in this area. One example is reports of a large copy operations or backup causing the machine to grind to a halt or applications pushed to swap. Sometimes in low memory situations a large percentage of memory suddenly gets reclaimed. In other cases an application starts and kswapd hits 100% CPU usage for prolonged periods of time and so on. There is now talk of introducing features like an extra free kbytes tunable to work around aspects of the problem instead of trying to deal with it. It's compounded by the problem that it can be very workload and machine specific. This series aims at addressing some of the worst of these problems without attempting to fundmentally alter how page reclaim works. Patches 1-2 limits the number of pages kswapd reclaims while still obeying the anon/file proportion of the LRUs it should be scanning. Patches 3-4 control how and when kswapd raises its scanning priority and deletes the scanning restart logic which is tricky to follow. Patch 5 notes that it is too easy for kswapd to reach priority 0 when scanning and then reclaim the world. Down with that sort of thing. Patch 6 notes that kswapd starts writeback based on scanning priority which is not necessarily related to dirty pages. It will have kswapd writeback pages if a number of unqueued dirty pages have been recently encountered at the tail of the LRU. Patch 7 notes that sometimes kswapd should stall waiting on IO to complete to reduce LRU churn and the likelihood that it'll reclaim young clean pages or push applications to swap. It will cause kswapd to block on IO if it detects that pages being reclaimed under writeback are recycling through the LRU before the IO completes. Patchies 8-9 are cosmetic but balance_pgdat() is easier to follow after they are applied. This was tested using memcached+memcachetest while some background IO was in progress as implemented by the parallel IO tests implement in MM Tests. memcachetest benchmarks how many operations/second memcached can service and it is run multiple times. It starts with no background IO and then re-runs the test with larger amounts of IO in the background to roughly simulate a large copy in progress. The expectation is that the IO should have little or no impact on memcachetest which is running entirely in memory. 3.10.0-rc1 3.10.0-rc1 vanilla lessdisrupt-v4 Ops memcachetest-0M 22155.00 ( 0.00%) 22180.00 ( 0.11%) Ops memcachetest-715M 22720.00 ( 0.00%) 22355.00 ( -1.61%) Ops memcachetest-2385M 3939.00 ( 0.00%) 23450.00 (495.33%) Ops memcachetest-4055M 3628.00 ( 0.00%) 24341.00 (570.92%) Ops io-duration-0M 0.00 ( 0.00%) 0.00 ( 0.00%) Ops io-duration-715M 12.00 ( 0.00%) 7.00 ( 41.67%) Ops io-duration-2385M 118.00 ( 0.00%) 21.00 ( 82.20%) Ops io-duration-4055M 162.00 ( 0.00%) 36.00 ( 77.78%) Ops swaptotal-0M 0.00 ( 0.00%) 0.00 ( 0.00%) Ops swaptotal-715M 140134.00 ( 0.00%) 18.00 ( 99.99%) Ops swaptotal-2385M 392438.00 ( 0.00%) 0.00 ( 0.00%) Ops swaptotal-4055M 449037.00 ( 0.00%) 27864.00 ( 93.79%) Ops swapin-0M 0.00 ( 0.00%) 0.00 ( 0.00%) Ops swapin-715M 0.00 ( 0.00%) 0.00 ( 0.00%) Ops swapin-2385M 148031.00 ( 0.00%) 0.00 ( 0.00%) Ops swapin-4055M 135109.00 ( 0.00%) 0.00 ( 0.00%) Ops minorfaults-0M 1529984.00 ( 0.00%) 1530235.00 ( -0.02%) Ops minorfaults-715M 1794168.00 ( 0.00%) 1613750.00 ( 10.06%) Ops minorfaults-2385M 1739813.00 ( 0.00%) 1609396.00 ( 7.50%) Ops minorfaults-4055M 1754460.00 ( 0.00%) 1614810.00 ( 7.96%) Ops majorfaults-0M 0.00 ( 0.00%) 0.00 ( 0.00%) Ops majorfaults-715M 185.00 ( 0.00%) 180.00 ( 2.70%) Ops majorfaults-2385M 24472.00 ( 0.00%) 101.00 ( 99.59%) Ops majorfaults-4055M 22302.00 ( 0.00%) 229.00 ( 98.97%) Note how the vanilla kernels performance collapses when there is enough IO taking place in the background. This drop in performance is part of what users complain of when they start backups. Note how the swapin and major fault figures indicate that processes were being pushed to swap prematurely. With the series applied, there is no noticable performance drop and while there is still some swap activity, it's tiny. 20 iterations of this test were run in total and averaged. Every 5 iterations, additional IO was generated in the background using dd to measure how the workload was impacted. The 0M, 715M, 2385M and 4055M subblock refer to the amount of IO going on in the background at each iteration. So memcachetest-2385M is reporting how many transactions/second memcachetest recorded on average over 5 iterations while there was 2385M of IO going on in the ground. There are six blocks of information reported here memcachetest is the transactions/second reported by memcachetest. In the vanilla kernel note that performance drops from around 22K/sec to just under 4K/second when there is 2385M of IO going on in the background. This is one type of performance collapse users complain about if a large cp or backup starts in the background io-duration refers to how long it takes for the background IO to complete. It's showing that with the patched kernel that the IO completes faster while not interfering with the memcache workload swaptotal is the total amount of swap traffic. With the patched kernel, the total amount of swapping is much reduced although it is still not zero. swapin in this case is an indication as to whether we are swap trashing. The closer the swapin/swapout ratio is to 1, the worse the trashing is. Note with the patched kernel that there is no swapin activity indicating that all the pages swapped were really inactive unused pages. minorfaults are just minor faults. An increased number of minor faults can indicate that page reclaim is unmapping the pages but not swapping them out before they are faulted back in. With the patched kernel, there is only a small change in minor faults majorfaults are just major faults in the target workload and a high number can indicate that a workload is being prematurely swapped. With the patched kernel, major faults are much reduced. As there are no swapin's recorded so it's not being swapped. The likely explanation is that that libraries or configuration files used by the workload during startup get paged out by the background IO. Overall with the series applied, there is no noticable performance drop due to background IO and while there is still some swap activity, it's tiny and the lack of swapins imply that the swapped pages were inactive and unused. 3.10.0-rc1 3.10.0-rc1 vanilla lessdisrupt-v4 Page Ins 1234608 101892 Page Outs 12446272 11810468 Swap Ins 283406 0 Swap Outs 698469 27882 Direct pages scanned 0 136480 Kswapd pages scanned 6266537 5369364 Kswapd pages reclaimed 1088989 930832 Direct pages reclaimed 0 120901 Kswapd efficiency 17% 17% Kswapd velocity 5398.371 4635.115 Direct efficiency 100% 88% Direct velocity 0.000 117.817 Percentage direct scans 0% 2% Page writes by reclaim 1655843 4009929 Page writes file 957374 3982047 Page writes anon 698469 27882 Page reclaim immediate 5245 1745 Page rescued immediate 0 0 Slabs scanned 33664 25216 Direct inode steals 0 0 Kswapd inode steals 19409 778 Kswapd skipped wait 0 0 THP fault alloc 35 30 THP collapse alloc 472 401 THP splits 27 22 THP fault fallback 0 0 THP collapse fail 0 1 Compaction stalls 0 4 Compaction success 0 0 Compaction failures 0 4 Page migrate success 0 0 Page migrate failure 0 0 Compaction pages isolated 0 0 Compaction migrate scanned 0 0 Compaction free scanned 0 0 Compaction cost 0 0 NUMA PTE updates 0 0 NUMA hint faults 0 0 NUMA hint local faults 0 0 NUMA pages migrated 0 0 AutoNUMA cost 0 0 Unfortunately, note that there is a small amount of direct reclaim due to kswapd no longer reclaiming the world. ftrace indicates that the direct reclaim stalls are mostly harmless with the vast bulk of the stalls incurred by dd 23 tclsh-3367 38 memcachetest-13733 49 memcachetest-12443 57 tee-3368 1541 dd-13826 1981 dd-12539 A consequence of the direct reclaim for dd is that the processes for the IO workload may show a higher system CPU usage. There is also a risk that kswapd not reclaiming the world may mean that it stays awake balancing zones, does not stall on the appropriate events and continually scans pages it cannot reclaim consuming CPU. This will be visible as continued high CPU usage but in my own tests I only saw a single spike lasting less than a second and I did not observe any problems related to reclaim while running the series on my desktop. This patch: The number of pages kswapd can reclaim is bound by the number of pages it scans which is related to the size of the zone and the scanning priority. In many cases the priority remains low because it's reset every SWAP_CLUSTER_MAX reclaimed pages but in the event kswapd scans a large number of pages it cannot reclaim, it will raise the priority and potentially discard a large percentage of the zone as sc->nr_to_reclaim is ULONG_MAX. The user-visible effect is a reclaim "spike" where a large percentage of memory is suddenly freed. It would be bad enough if this was just unused memory but because of how anon/file pages are balanced it is possible that applications get pushed to swap unnecessarily. This patch limits the number of pages kswapd will reclaim to the high watermark. Reclaim will still overshoot due to it not being a hard limit as shrink_lruvec() will ignore the sc.nr_to_reclaim at DEF_PRIORITY but it prevents kswapd reclaiming the world at higher priorities. The number of pages it reclaims is not adjusted for high-order allocations as kswapd will reclaim excessively if it is to balance zones for high-order allocations. Signed-off-by: Mel Gorman Reviewed-by: Rik van Riel Reviewed-by: Michal Hocko Acked-by: Johannes Weiner Acked-by: KAMEZAWA Hiroyuki Cc: Jiri Slaby Cc: Valdis Kletnieks Tested-by: Zlatko Calusic Cc: dormando Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/vmscan.c b/mm/vmscan.c index fa6a853..cdbc069 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2601,6 +2601,32 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, long remaining, } /* + * kswapd shrinks the zone by the number of pages required to reach + * the high watermark. + */ +static void kswapd_shrink_zone(struct zone *zone, + struct scan_control *sc, + unsigned long lru_pages) +{ + unsigned long nr_slab; + struct reclaim_state *reclaim_state = current->reclaim_state; + struct shrink_control shrink = { + .gfp_mask = sc->gfp_mask, + }; + + /* Reclaim above the high watermark. */ + sc->nr_to_reclaim = max(SWAP_CLUSTER_MAX, high_wmark_pages(zone)); + shrink_zone(zone, sc); + + reclaim_state->reclaimed_slab = 0; + nr_slab = shrink_slab(&shrink, sc->nr_scanned, lru_pages); + sc->nr_reclaimed += reclaim_state->reclaimed_slab; + + if (nr_slab == 0 && !zone_reclaimable(zone)) + zone->all_unreclaimable = 1; +} + +/* * For kswapd, balance_pgdat() will work across all this node's zones until * they are all at high_wmark_pages(zone). * @@ -2627,24 +2653,15 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order, bool pgdat_is_balanced = false; int i; int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ - struct reclaim_state *reclaim_state = current->reclaim_state; unsigned long nr_soft_reclaimed; unsigned long nr_soft_scanned; struct scan_control sc = { .gfp_mask = GFP_KERNEL, .may_unmap = 1, .may_swap = 1, - /* - * kswapd doesn't want to be bailed out while reclaim. because - * we want to put equal scanning pressure on each zone. - */ - .nr_to_reclaim = ULONG_MAX, .order = order, .target_mem_cgroup = NULL, }; - struct shrink_control shrink = { - .gfp_mask = sc.gfp_mask, - }; loop_again: sc.priority = DEF_PRIORITY; sc.nr_reclaimed = 0; @@ -2716,7 +2733,7 @@ loop_again: */ for (i = 0; i <= end_zone; i++) { struct zone *zone = pgdat->node_zones + i; - int nr_slab, testorder; + int testorder; unsigned long balance_gap; if (!populated_zone(zone)) @@ -2764,16 +2781,8 @@ loop_again: if ((buffer_heads_over_limit && is_highmem_idx(i)) || !zone_balanced(zone, testorder, - balance_gap, end_zone)) { - shrink_zone(zone, &sc); - - reclaim_state->reclaimed_slab = 0; - nr_slab = shrink_slab(&shrink, sc.nr_scanned, lru_pages); - sc.nr_reclaimed += reclaim_state->reclaimed_slab; - - if (nr_slab == 0 && !zone_reclaimable(zone)) - zone->all_unreclaimable = 1; - } + balance_gap, end_zone)) + kswapd_shrink_zone(zone, &sc, lru_pages); /* * If we're getting trouble reclaiming, start doing -- cgit v0.10.2 From e82e0561dae9f3ae5a21fc2d3d3ccbe69d90be46 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 3 Jul 2013 15:01:44 -0700 Subject: mm: vmscan: obey proportional scanning requirements for kswapd Simplistically, the anon and file LRU lists are scanned proportionally depending on the value of vm.swappiness although there are other factors taken into account by get_scan_count(). The patch "mm: vmscan: Limit the number of pages kswapd reclaims" limits the number of pages kswapd reclaims but it breaks this proportional scanning and may evenly shrink anon/file LRUs regardless of vm.swappiness. This patch preserves the proportional scanning and reclaim. It does mean that kswapd will reclaim more than requested but the number of pages will be related to the high watermark. [mhocko@suse.cz: Correct proportional reclaim for memcg and simplify] [kamezawa.hiroyu@jp.fujitsu.com: Recalculate scan based on target] [hannes@cmpxchg.org: Account for already scanned pages properly] Signed-off-by: Mel Gorman Acked-by: Rik van Riel Reviewed-by: Michal Hocko Cc: Johannes Weiner Acked-by: KAMEZAWA Hiroyuki Cc: Jiri Slaby Cc: Valdis Kletnieks Tested-by: Zlatko Calusic Cc: dormando Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/vmscan.c b/mm/vmscan.c index cdbc069..26ad67f 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1822,17 +1822,25 @@ out: static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) { unsigned long nr[NR_LRU_LISTS]; + unsigned long targets[NR_LRU_LISTS]; unsigned long nr_to_scan; enum lru_list lru; unsigned long nr_reclaimed = 0; unsigned long nr_to_reclaim = sc->nr_to_reclaim; struct blk_plug plug; + bool scan_adjusted = false; get_scan_count(lruvec, sc, nr); + /* Record the original scan target for proportional adjustments later */ + memcpy(targets, nr, sizeof(nr)); + blk_start_plug(&plug); while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || nr[LRU_INACTIVE_FILE]) { + unsigned long nr_anon, nr_file, percentage; + unsigned long nr_scanned; + for_each_evictable_lru(lru) { if (nr[lru]) { nr_to_scan = min(nr[lru], SWAP_CLUSTER_MAX); @@ -1842,17 +1850,60 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) lruvec, sc); } } + + if (nr_reclaimed < nr_to_reclaim || scan_adjusted) + continue; + /* - * On large memory systems, scan >> priority can become - * really large. This is fine for the starting priority; - * we want to put equal scanning pressure on each zone. - * However, if the VM has a harder time of freeing pages, - * with multiple processes reclaiming pages, the total - * freeing target can get unreasonably large. + * For global direct reclaim, reclaim only the number of pages + * requested. Less care is taken to scan proportionally as it + * is more important to minimise direct reclaim stall latency + * than it is to properly age the LRU lists. */ - if (nr_reclaimed >= nr_to_reclaim && - sc->priority < DEF_PRIORITY) + if (global_reclaim(sc) && !current_is_kswapd()) break; + + /* + * For kswapd and memcg, reclaim at least the number of pages + * requested. Ensure that the anon and file LRUs shrink + * proportionally what was requested by get_scan_count(). We + * stop reclaiming one LRU and reduce the amount scanning + * proportional to the original scan target. + */ + nr_file = nr[LRU_INACTIVE_FILE] + nr[LRU_ACTIVE_FILE]; + nr_anon = nr[LRU_INACTIVE_ANON] + nr[LRU_ACTIVE_ANON]; + + if (nr_file > nr_anon) { + unsigned long scan_target = targets[LRU_INACTIVE_ANON] + + targets[LRU_ACTIVE_ANON] + 1; + lru = LRU_BASE; + percentage = nr_anon * 100 / scan_target; + } else { + unsigned long scan_target = targets[LRU_INACTIVE_FILE] + + targets[LRU_ACTIVE_FILE] + 1; + lru = LRU_FILE; + percentage = nr_file * 100 / scan_target; + } + + /* Stop scanning the smaller of the LRU */ + nr[lru] = 0; + nr[lru + LRU_ACTIVE] = 0; + + /* + * Recalculate the other LRU scan count based on its original + * scan target and the percentage scanning already complete + */ + lru = (lru == LRU_FILE) ? LRU_BASE : LRU_FILE; + nr_scanned = targets[lru] - nr[lru]; + nr[lru] = targets[lru] * (100 - percentage) / 100; + nr[lru] -= min(nr[lru], nr_scanned); + + lru += LRU_ACTIVE; + nr_scanned = targets[lru] - nr[lru]; + nr[lru] = targets[lru] * (100 - percentage) / 100; + nr[lru] -= min(nr[lru], nr_scanned); + + scan_adjusted = true; } blk_finish_plug(&plug); sc->nr_reclaimed += nr_reclaimed; -- cgit v0.10.2 From b8e83b942a16eb73e63406592d3178207a4f07a1 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 3 Jul 2013 15:01:45 -0700 Subject: mm: vmscan: flatten kswapd priority loop kswapd stops raising the scanning priority when at least SWAP_CLUSTER_MAX pages have been reclaimed or the pgdat is considered balanced. It then rechecks if it needs to restart at DEF_PRIORITY and whether high-order reclaim needs to be reset. This is not wrong per-se but it is confusing to follow and forcing kswapd to stay at DEF_PRIORITY may require several restarts before it has scanned enough pages to meet the high watermark even at 100% efficiency. This patch irons out the logic a bit by controlling when priority is raised and removing the "goto loop_again". This patch has kswapd raise the scanning priority until it is scanning enough pages that it could meet the high watermark in one shrink of the LRU lists if it is able to reclaim at 100% efficiency. It will not raise the scanning prioirty higher unless it is failing to reclaim any pages. To avoid infinite looping for high-order allocation requests kswapd will not reclaim for high-order allocations when it has reclaimed at least twice the number of pages as the allocation request. Signed-off-by: Mel Gorman Acked-by: Johannes Weiner Reviewed-by: Michal Hocko Cc: KAMEZAWA Hiroyuki Cc: Rik van Riel Cc: Jiri Slaby Cc: Valdis Kletnieks Tested-by: Zlatko Calusic Cc: dormando Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/vmscan.c b/mm/vmscan.c index 26ad67f..1c10ee5 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2654,8 +2654,12 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, long remaining, /* * kswapd shrinks the zone by the number of pages required to reach * the high watermark. + * + * Returns true if kswapd scanned at least the requested number of pages to + * reclaim. This is used to determine if the scanning priority needs to be + * raised. */ -static void kswapd_shrink_zone(struct zone *zone, +static bool kswapd_shrink_zone(struct zone *zone, struct scan_control *sc, unsigned long lru_pages) { @@ -2675,6 +2679,8 @@ static void kswapd_shrink_zone(struct zone *zone, if (nr_slab == 0 && !zone_reclaimable(zone)) zone->all_unreclaimable = 1; + + return sc->nr_scanned >= sc->nr_to_reclaim; } /* @@ -2701,26 +2707,26 @@ static void kswapd_shrink_zone(struct zone *zone, static unsigned long balance_pgdat(pg_data_t *pgdat, int order, int *classzone_idx) { - bool pgdat_is_balanced = false; int i; int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ unsigned long nr_soft_reclaimed; unsigned long nr_soft_scanned; struct scan_control sc = { .gfp_mask = GFP_KERNEL, + .priority = DEF_PRIORITY, .may_unmap = 1, .may_swap = 1, + .may_writepage = !laptop_mode, .order = order, .target_mem_cgroup = NULL, }; -loop_again: - sc.priority = DEF_PRIORITY; - sc.nr_reclaimed = 0; - sc.may_writepage = !laptop_mode; count_vm_event(PAGEOUTRUN); do { unsigned long lru_pages = 0; + bool raise_priority = true; + + sc.nr_reclaimed = 0; /* * Scan in the highmem->dma direction for the highest @@ -2762,10 +2768,8 @@ loop_again: } } - if (i < 0) { - pgdat_is_balanced = true; + if (i < 0) goto out; - } for (i = 0; i <= end_zone; i++) { struct zone *zone = pgdat->node_zones + i; @@ -2832,8 +2836,16 @@ loop_again: if ((buffer_heads_over_limit && is_highmem_idx(i)) || !zone_balanced(zone, testorder, - balance_gap, end_zone)) - kswapd_shrink_zone(zone, &sc, lru_pages); + balance_gap, end_zone)) { + /* + * There should be no need to raise the + * scanning priority if enough pages are + * already being scanned that high + * watermark would be met at 100% efficiency. + */ + if (kswapd_shrink_zone(zone, &sc, lru_pages)) + raise_priority = false; + } /* * If we're getting trouble reclaiming, start doing @@ -2868,46 +2880,29 @@ loop_again: pfmemalloc_watermark_ok(pgdat)) wake_up(&pgdat->pfmemalloc_wait); - if (pgdat_balanced(pgdat, order, *classzone_idx)) { - pgdat_is_balanced = true; - break; /* kswapd: all done */ - } - /* - * We do this so kswapd doesn't build up large priorities for - * example when it is freeing in parallel with allocators. It - * matches the direct reclaim path behaviour in terms of impact - * on zone->*_priority. + * Fragmentation may mean that the system cannot be rebalanced + * for high-order allocations in all zones. If twice the + * allocation size has been reclaimed and the zones are still + * not balanced then recheck the watermarks at order-0 to + * prevent kswapd reclaiming excessively. Assume that a + * process requested a high-order can direct reclaim/compact. */ - if (sc.nr_reclaimed >= SWAP_CLUSTER_MAX) - break; - } while (--sc.priority >= 0); - -out: - if (!pgdat_is_balanced) { - cond_resched(); + if (order && sc.nr_reclaimed >= 2UL << order) + order = sc.order = 0; - try_to_freeze(); + /* Check if kswapd should be suspending */ + if (try_to_freeze() || kthread_should_stop()) + break; /* - * Fragmentation may mean that the system cannot be - * rebalanced for high-order allocations in all zones. - * At this point, if nr_reclaimed < SWAP_CLUSTER_MAX, - * it means the zones have been fully scanned and are still - * not balanced. For high-order allocations, there is - * little point trying all over again as kswapd may - * infinite loop. - * - * Instead, recheck all watermarks at order-0 as they - * are the most important. If watermarks are ok, kswapd will go - * back to sleep. High-order users can still perform direct - * reclaim if they wish. + * Raise priority if scanning rate is too low or there was no + * progress in reclaiming pages */ - if (sc.nr_reclaimed < SWAP_CLUSTER_MAX) - order = sc.order = 0; - - goto loop_again; - } + if (raise_priority || !sc.nr_reclaimed) + sc.priority--; + } while (sc.priority >= 0 && + !pgdat_balanced(pgdat, order, *classzone_idx)); /* * If kswapd was reclaiming at a higher order, it has the option of @@ -2936,6 +2931,7 @@ out: compact_pgdat(pgdat, order); } +out: /* * Return the order we were reclaiming at so prepare_kswapd_sleep() * makes a decision on the order we were last reclaiming at. However, -- cgit v0.10.2 From 2ab44f434586b8ccb11f781b4c2730492e6628f5 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 3 Jul 2013 15:01:47 -0700 Subject: mm: vmscan: decide whether to compact the pgdat based on reclaim progress In the past, kswapd makes a decision on whether to compact memory after the pgdat was considered balanced. This more or less worked but it is late to make such a decision and does not fit well now that kswapd makes a decision whether to exit the zone scanning loop depending on reclaim progress. This patch will compact a pgdat if at least the requested number of pages were reclaimed from unbalanced zones for a given priority. If any zone is currently balanced, kswapd will not call compaction as it is expected the necessary pages are already available. Signed-off-by: Mel Gorman Acked-by: Johannes Weiner Reviewed-by: Michal Hocko Cc: KAMEZAWA Hiroyuki Cc: Rik van Riel Cc: Jiri Slaby Cc: Valdis Kletnieks Tested-by: Zlatko Calusic Cc: dormando Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/vmscan.c b/mm/vmscan.c index 1c10ee5..cd09803 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2661,7 +2661,8 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, long remaining, */ static bool kswapd_shrink_zone(struct zone *zone, struct scan_control *sc, - unsigned long lru_pages) + unsigned long lru_pages, + unsigned long *nr_attempted) { unsigned long nr_slab; struct reclaim_state *reclaim_state = current->reclaim_state; @@ -2677,6 +2678,9 @@ static bool kswapd_shrink_zone(struct zone *zone, nr_slab = shrink_slab(&shrink, sc->nr_scanned, lru_pages); sc->nr_reclaimed += reclaim_state->reclaimed_slab; + /* Account for the number of pages attempted to reclaim */ + *nr_attempted += sc->nr_to_reclaim; + if (nr_slab == 0 && !zone_reclaimable(zone)) zone->all_unreclaimable = 1; @@ -2724,7 +2728,9 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order, do { unsigned long lru_pages = 0; + unsigned long nr_attempted = 0; bool raise_priority = true; + bool pgdat_needs_compaction = (order > 0); sc.nr_reclaimed = 0; @@ -2774,7 +2780,21 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order, for (i = 0; i <= end_zone; i++) { struct zone *zone = pgdat->node_zones + i; + if (!populated_zone(zone)) + continue; + lru_pages += zone_reclaimable_pages(zone); + + /* + * If any zone is currently balanced then kswapd will + * not call compaction as it is expected that the + * necessary pages are already available. + */ + if (pgdat_needs_compaction && + zone_watermark_ok(zone, order, + low_wmark_pages(zone), + *classzone_idx, 0)) + pgdat_needs_compaction = false; } /* @@ -2843,7 +2863,8 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order, * already being scanned that high * watermark would be met at 100% efficiency. */ - if (kswapd_shrink_zone(zone, &sc, lru_pages)) + if (kswapd_shrink_zone(zone, &sc, lru_pages, + &nr_attempted)) raise_priority = false; } @@ -2896,6 +2917,13 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order, break; /* + * Compact if necessary and kswapd is reclaiming at least the + * high watermark number of pages as requsted + */ + if (pgdat_needs_compaction && sc.nr_reclaimed > nr_attempted) + compact_pgdat(pgdat, order); + + /* * Raise priority if scanning rate is too low or there was no * progress in reclaiming pages */ @@ -2904,33 +2932,6 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order, } while (sc.priority >= 0 && !pgdat_balanced(pgdat, order, *classzone_idx)); - /* - * If kswapd was reclaiming at a higher order, it has the option of - * sleeping without all zones being balanced. Before it does, it must - * ensure that the watermarks for order-0 on *all* zones are met and - * that the congestion flags are cleared. The congestion flag must - * be cleared as kswapd is the only mechanism that clears the flag - * and it is potentially going to sleep here. - */ - if (order) { - int zones_need_compaction = 1; - - for (i = 0; i <= end_zone; i++) { - struct zone *zone = pgdat->node_zones + i; - - if (!populated_zone(zone)) - continue; - - /* Check if the memory needs to be defragmented. */ - if (zone_watermark_ok(zone, order, - low_wmark_pages(zone), *classzone_idx, 0)) - zones_need_compaction = 0; - } - - if (zones_need_compaction) - compact_pgdat(pgdat, order); - } - out: /* * Return the order we were reclaiming at so prepare_kswapd_sleep() -- cgit v0.10.2 From 9aa41348a8d11427feec350b21dcdd4330fd20c4 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 3 Jul 2013 15:01:48 -0700 Subject: mm: vmscan: do not allow kswapd to scan at maximum priority Page reclaim at priority 0 will scan the entire LRU as priority 0 is considered to be a near OOM condition. Kswapd can reach priority 0 quite easily if it is encountering a large number of pages it cannot reclaim such as pages under writeback. When this happens, kswapd reclaims very aggressively even though there may be no real risk of allocation failure or OOM. This patch prevents kswapd reaching priority 0 and trying to reclaim the world. Direct reclaimers will still reach priority 0 in the event of an OOM situation. Signed-off-by: Mel Gorman Acked-by: Rik van Riel Acked-by: Johannes Weiner Reviewed-by: Michal Hocko Cc: KAMEZAWA Hiroyuki Cc: Jiri Slaby Cc: Valdis Kletnieks Tested-by: Zlatko Calusic Cc: dormando Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/vmscan.c b/mm/vmscan.c index cd09803..1505c57 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2929,7 +2929,7 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order, */ if (raise_priority || !sc.nr_reclaimed) sc.priority--; - } while (sc.priority >= 0 && + } while (sc.priority >= 1 && !pgdat_balanced(pgdat, order, *classzone_idx)); out: -- cgit v0.10.2 From d43006d503ac921c7df4f94d13c17db6f13c9d26 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 3 Jul 2013 15:01:50 -0700 Subject: mm: vmscan: have kswapd writeback pages based on dirty pages encountered, not priority Currently kswapd queues dirty pages for writeback if scanning at an elevated priority but the priority kswapd scans at is not related to the number of unqueued dirty encountered. Since commit "mm: vmscan: Flatten kswapd priority loop", the priority is related to the size of the LRU and the zone watermark which is no indication as to whether kswapd should write pages or not. This patch tracks if an excessive number of unqueued dirty pages are being encountered at the end of the LRU. If so, it indicates that dirty pages are being recycled before flusher threads can clean them and flags the zone so that kswapd will start writing pages until the zone is balanced. Signed-off-by: Mel Gorman Acked-by: Johannes Weiner Reviewed-by: Michal Hocko Cc: KAMEZAWA Hiroyuki Cc: Rik van Riel Cc: Jiri Slaby Cc: Valdis Kletnieks Tested-by: Zlatko Calusic Cc: dormando Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 5c76737..2aaf72f 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -495,6 +495,10 @@ typedef enum { ZONE_CONGESTED, /* zone has many dirty pages backed by * a congested BDI */ + ZONE_TAIL_LRU_DIRTY, /* reclaim scanning has recently found + * many dirty file pages at the tail + * of the LRU. + */ } zone_flags_t; static inline void zone_set_flag(struct zone *zone, zone_flags_t flag) @@ -517,6 +521,11 @@ static inline int zone_is_reclaim_congested(const struct zone *zone) return test_bit(ZONE_CONGESTED, &zone->flags); } +static inline int zone_is_reclaim_dirty(const struct zone *zone) +{ + return test_bit(ZONE_TAIL_LRU_DIRTY, &zone->flags); +} + static inline int zone_is_reclaim_locked(const struct zone *zone) { return test_bit(ZONE_RECLAIM_LOCKED, &zone->flags); diff --git a/mm/vmscan.c b/mm/vmscan.c index 1505c57..d6c916d 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -676,13 +676,14 @@ static unsigned long shrink_page_list(struct list_head *page_list, struct zone *zone, struct scan_control *sc, enum ttu_flags ttu_flags, - unsigned long *ret_nr_dirty, + unsigned long *ret_nr_unqueued_dirty, unsigned long *ret_nr_writeback, bool force_reclaim) { LIST_HEAD(ret_pages); LIST_HEAD(free_pages); int pgactivate = 0; + unsigned long nr_unqueued_dirty = 0; unsigned long nr_dirty = 0; unsigned long nr_congested = 0; unsigned long nr_reclaimed = 0; @@ -808,14 +809,17 @@ static unsigned long shrink_page_list(struct list_head *page_list, if (PageDirty(page)) { nr_dirty++; + if (!PageWriteback(page)) + nr_unqueued_dirty++; + /* * Only kswapd can writeback filesystem pages to - * avoid risk of stack overflow but do not writeback - * unless under significant pressure. + * avoid risk of stack overflow but only writeback + * if many dirty pages have been encountered. */ if (page_is_file_cache(page) && (!current_is_kswapd() || - sc->priority >= DEF_PRIORITY - 2)) { + !zone_is_reclaim_dirty(zone))) { /* * Immediately reclaim when written back. * Similar in principal to deactivate_page() @@ -960,7 +964,7 @@ keep: list_splice(&ret_pages, page_list); count_vm_events(PGACTIVATE, pgactivate); mem_cgroup_uncharge_end(); - *ret_nr_dirty += nr_dirty; + *ret_nr_unqueued_dirty += nr_unqueued_dirty; *ret_nr_writeback += nr_writeback; return nr_reclaimed; } @@ -1373,6 +1377,15 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, (nr_taken >> (DEF_PRIORITY - sc->priority))) wait_iff_congested(zone, BLK_RW_ASYNC, HZ/10); + /* + * Similarly, if many dirty pages are encountered that are not + * currently being written then flag that kswapd should start + * writing back pages. + */ + if (global_reclaim(sc) && nr_dirty && + nr_dirty >= (nr_taken >> (DEF_PRIORITY - sc->priority))) + zone_set_flag(zone, ZONE_TAIL_LRU_DIRTY); + trace_mm_vmscan_lru_shrink_inactive(zone->zone_pgdat->node_id, zone_idx(zone), nr_scanned, nr_reclaimed, @@ -2769,8 +2782,12 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order, end_zone = i; break; } else { - /* If balanced, clear the congested flag */ + /* + * If balanced, clear the dirty and congested + * flags + */ zone_clear_flag(zone, ZONE_CONGESTED); + zone_clear_flag(zone, ZONE_TAIL_LRU_DIRTY); } } @@ -2888,8 +2905,10 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order, * possible there are dirty pages backed by * congested BDIs but as pressure is relieved, * speculatively avoid congestion waits + * or writing pages from kswapd context. */ zone_clear_flag(zone, ZONE_CONGESTED); + zone_clear_flag(zone, ZONE_TAIL_LRU_DIRTY); } /* -- cgit v0.10.2 From 283aba9f9e0e4882bf09bd37a2983379a6fae805 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 3 Jul 2013 15:01:51 -0700 Subject: mm: vmscan: block kswapd if it is encountering pages under writeback Historically, kswapd used to congestion_wait() at higher priorities if it was not making forward progress. This made no sense as the failure to make progress could be completely independent of IO. It was later replaced by wait_iff_congested() and removed entirely by commit 258401a6 (mm: don't wait on congested zones in balance_pgdat()) as it was duplicating logic in shrink_inactive_list(). This is problematic. If kswapd encounters many pages under writeback and it continues to scan until it reaches the high watermark then it will quickly skip over the pages under writeback and reclaim clean young pages or push applications out to swap. The use of wait_iff_congested() is not suited to kswapd as it will only stall if the underlying BDI is really congested or a direct reclaimer was unable to write to the underlying BDI. kswapd bypasses the BDI congestion as it sets PF_SWAPWRITE but even if this was taken into account then it would cause direct reclaimers to stall on writeback which is not desirable. This patch sets a ZONE_WRITEBACK flag if direct reclaim or kswapd is encountering too many pages under writeback. If this flag is set and kswapd encounters a PageReclaim page under writeback then it'll assume that the LRU lists are being recycled too quickly before IO can complete and block waiting for some IO to complete. Signed-off-by: Mel Gorman Reviewed-by: Michal Hocko Acked-by: Rik van Riel Cc: Johannes Weiner Cc: KAMEZAWA Hiroyuki Cc: Jiri Slaby Cc: Valdis Kletnieks Tested-by: Zlatko Calusic Cc: dormando Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 2aaf72f..fce64af 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -499,6 +499,9 @@ typedef enum { * many dirty file pages at the tail * of the LRU. */ + ZONE_WRITEBACK, /* reclaim scanning has recently found + * many pages under writeback + */ } zone_flags_t; static inline void zone_set_flag(struct zone *zone, zone_flags_t flag) @@ -526,6 +529,11 @@ static inline int zone_is_reclaim_dirty(const struct zone *zone) return test_bit(ZONE_TAIL_LRU_DIRTY, &zone->flags); } +static inline int zone_is_reclaim_writeback(const struct zone *zone) +{ + return test_bit(ZONE_WRITEBACK, &zone->flags); +} + static inline int zone_is_reclaim_locked(const struct zone *zone) { return test_bit(ZONE_RECLAIM_LOCKED, &zone->flags); diff --git a/mm/vmscan.c b/mm/vmscan.c index d6c916d..1109de0 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -724,25 +724,55 @@ static unsigned long shrink_page_list(struct list_head *page_list, may_enter_fs = (sc->gfp_mask & __GFP_FS) || (PageSwapCache(page) && (sc->gfp_mask & __GFP_IO)); + /* + * If a page at the tail of the LRU is under writeback, there + * are three cases to consider. + * + * 1) If reclaim is encountering an excessive number of pages + * under writeback and this page is both under writeback and + * PageReclaim then it indicates that pages are being queued + * for IO but are being recycled through the LRU before the + * IO can complete. Waiting on the page itself risks an + * indefinite stall if it is impossible to writeback the + * page due to IO error or disconnected storage so instead + * block for HZ/10 or until some IO completes then clear the + * ZONE_WRITEBACK flag to recheck if the condition exists. + * + * 2) Global reclaim encounters a page, memcg encounters a + * page that is not marked for immediate reclaim or + * the caller does not have __GFP_IO. In this case mark + * the page for immediate reclaim and continue scanning. + * + * __GFP_IO is checked because a loop driver thread might + * enter reclaim, and deadlock if it waits on a page for + * which it is needed to do the write (loop masks off + * __GFP_IO|__GFP_FS for this reason); but more thought + * would probably show more reasons. + * + * Don't require __GFP_FS, since we're not going into the + * FS, just waiting on its writeback completion. Worryingly, + * ext4 gfs2 and xfs allocate pages with + * grab_cache_page_write_begin(,,AOP_FLAG_NOFS), so testing + * may_enter_fs here is liable to OOM on them. + * + * 3) memcg encounters a page that is not already marked + * PageReclaim. memcg does not have any dirty pages + * throttling so we could easily OOM just because too many + * pages are in writeback and there is nothing else to + * reclaim. Wait for the writeback to complete. + */ if (PageWriteback(page)) { - /* - * memcg doesn't have any dirty pages throttling so we - * could easily OOM just because too many pages are in - * writeback and there is nothing else to reclaim. - * - * Check __GFP_IO, certainly because a loop driver - * thread might enter reclaim, and deadlock if it waits - * on a page for which it is needed to do the write - * (loop masks off __GFP_IO|__GFP_FS for this reason); - * but more thought would probably show more reasons. - * - * Don't require __GFP_FS, since we're not going into - * the FS, just waiting on its writeback completion. - * Worryingly, ext4 gfs2 and xfs allocate pages with - * grab_cache_page_write_begin(,,AOP_FLAG_NOFS), so - * testing may_enter_fs here is liable to OOM on them. - */ - if (global_reclaim(sc) || + /* Case 1 above */ + if (current_is_kswapd() && + PageReclaim(page) && + zone_is_reclaim_writeback(zone)) { + unlock_page(page); + congestion_wait(BLK_RW_ASYNC, HZ/10); + zone_clear_flag(zone, ZONE_WRITEBACK); + goto keep; + + /* Case 2 above */ + } else if (global_reclaim(sc) || !PageReclaim(page) || !(sc->gfp_mask & __GFP_IO)) { /* * This is slightly racy - end_page_writeback() @@ -757,9 +787,13 @@ static unsigned long shrink_page_list(struct list_head *page_list, */ SetPageReclaim(page); nr_writeback++; + goto keep_locked; + + /* Case 3 above */ + } else { + wait_on_page_writeback(page); } - wait_on_page_writeback(page); } if (!force_reclaim) @@ -1374,8 +1408,10 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, * isolated page is PageWriteback */ if (nr_writeback && nr_writeback >= - (nr_taken >> (DEF_PRIORITY - sc->priority))) + (nr_taken >> (DEF_PRIORITY - sc->priority))) { wait_iff_congested(zone, BLK_RW_ASYNC, HZ/10); + zone_set_flag(zone, ZONE_WRITEBACK); + } /* * Similarly, if many dirty pages are encountered that are not @@ -2669,8 +2705,8 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, long remaining, * the high watermark. * * Returns true if kswapd scanned at least the requested number of pages to - * reclaim. This is used to determine if the scanning priority needs to be - * raised. + * reclaim or if the lack of progress was due to pages under writeback. + * This is used to determine if the scanning priority needs to be raised. */ static bool kswapd_shrink_zone(struct zone *zone, struct scan_control *sc, @@ -2697,6 +2733,8 @@ static bool kswapd_shrink_zone(struct zone *zone, if (nr_slab == 0 && !zone_reclaimable(zone)) zone->all_unreclaimable = 1; + zone_clear_flag(zone, ZONE_WRITEBACK); + return sc->nr_scanned >= sc->nr_to_reclaim; } -- cgit v0.10.2 From b7ea3c417b6c2e74ca1cb051568f60377908928d Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 3 Jul 2013 15:01:53 -0700 Subject: mm: vmscan: check if kswapd should writepage once per pgdat scan Currently kswapd checks if it should start writepage as it shrinks each zone without taking into consideration if the zone is balanced or not. This is not wrong as such but it does not make much sense either. This patch checks once per pgdat scan if kswapd should be writing pages. Signed-off-by: Mel Gorman Reviewed-by: Michal Hocko Acked-by: Rik van Riel Acked-by: Johannes Weiner Cc: KAMEZAWA Hiroyuki Cc: Jiri Slaby Cc: Valdis Kletnieks Tested-by: Zlatko Calusic Cc: dormando Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/vmscan.c b/mm/vmscan.c index 1109de0..a2d0c68 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2853,6 +2853,13 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order, } /* + * If we're getting trouble reclaiming, start doing writepage + * even in laptop mode. + */ + if (sc.priority < DEF_PRIORITY - 2) + sc.may_writepage = 1; + + /* * Now scan the zone in the dma->highmem direction, stopping * at the last zone which needs scanning. * @@ -2923,13 +2930,6 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order, raise_priority = false; } - /* - * If we're getting trouble reclaiming, start doing - * writepage even in laptop mode. - */ - if (sc.priority < DEF_PRIORITY - 2) - sc.may_writepage = 1; - if (zone->all_unreclaimable) { if (end_zone && end_zone == i) end_zone--; -- cgit v0.10.2 From 7c954f6de6b630de30f265a079aad359f159ebe9 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 3 Jul 2013 15:01:54 -0700 Subject: mm: vmscan: move logic from balance_pgdat() to kswapd_shrink_zone() balance_pgdat() is very long and some of the logic can and should be internal to kswapd_shrink_zone(). Move it so the flow of balance_pgdat() is marginally easier to follow. Signed-off-by: Mel Gorman Acked-by: Johannes Weiner Reviewed-by: Michal Hocko Acked-by: Rik van Riel Cc: KAMEZAWA Hiroyuki Cc: Jiri Slaby Cc: Valdis Kletnieks Tested-by: Zlatko Calusic Cc: dormando Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/vmscan.c b/mm/vmscan.c index a2d0c68..4a43c28 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2709,18 +2709,53 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, long remaining, * This is used to determine if the scanning priority needs to be raised. */ static bool kswapd_shrink_zone(struct zone *zone, + int classzone_idx, struct scan_control *sc, unsigned long lru_pages, unsigned long *nr_attempted) { unsigned long nr_slab; + int testorder = sc->order; + unsigned long balance_gap; struct reclaim_state *reclaim_state = current->reclaim_state; struct shrink_control shrink = { .gfp_mask = sc->gfp_mask, }; + bool lowmem_pressure; /* Reclaim above the high watermark. */ sc->nr_to_reclaim = max(SWAP_CLUSTER_MAX, high_wmark_pages(zone)); + + /* + * Kswapd reclaims only single pages with compaction enabled. Trying + * too hard to reclaim until contiguous free pages have become + * available can hurt performance by evicting too much useful data + * from memory. Do not reclaim more than needed for compaction. + */ + if (IS_ENABLED(CONFIG_COMPACTION) && sc->order && + compaction_suitable(zone, sc->order) != + COMPACT_SKIPPED) + testorder = 0; + + /* + * We put equal pressure on every zone, unless one zone has way too + * many pages free already. The "too many pages" is defined as the + * high wmark plus a "gap" where the gap is either the low + * watermark or 1% of the zone, whichever is smaller. + */ + balance_gap = min(low_wmark_pages(zone), + (zone->managed_pages + KSWAPD_ZONE_BALANCE_GAP_RATIO-1) / + KSWAPD_ZONE_BALANCE_GAP_RATIO); + + /* + * If there is no low memory pressure or the zone is balanced then no + * reclaim is necessary + */ + lowmem_pressure = (buffer_heads_over_limit && is_highmem(zone)); + if (!lowmem_pressure && zone_balanced(zone, testorder, + balance_gap, classzone_idx)) + return true; + shrink_zone(zone, sc); reclaim_state->reclaimed_slab = 0; @@ -2735,6 +2770,18 @@ static bool kswapd_shrink_zone(struct zone *zone, zone_clear_flag(zone, ZONE_WRITEBACK); + /* + * If a zone reaches its high watermark, consider it to be no longer + * congested. It's possible there are dirty pages backed by congested + * BDIs but as pressure is relieved, speculatively avoid congestion + * waits. + */ + if (!zone->all_unreclaimable && + zone_balanced(zone, testorder, 0, classzone_idx)) { + zone_clear_flag(zone, ZONE_CONGESTED); + zone_clear_flag(zone, ZONE_TAIL_LRU_DIRTY); + } + return sc->nr_scanned >= sc->nr_to_reclaim; } @@ -2870,8 +2917,6 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order, */ for (i = 0; i <= end_zone; i++) { struct zone *zone = pgdat->node_zones + i; - int testorder; - unsigned long balance_gap; if (!populated_zone(zone)) continue; @@ -2892,61 +2937,14 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order, sc.nr_reclaimed += nr_soft_reclaimed; /* - * We put equal pressure on every zone, unless - * one zone has way too many pages free - * already. The "too many pages" is defined - * as the high wmark plus a "gap" where the - * gap is either the low watermark or 1% - * of the zone, whichever is smaller. - */ - balance_gap = min(low_wmark_pages(zone), - (zone->managed_pages + - KSWAPD_ZONE_BALANCE_GAP_RATIO-1) / - KSWAPD_ZONE_BALANCE_GAP_RATIO); - /* - * Kswapd reclaims only single pages with compaction - * enabled. Trying too hard to reclaim until contiguous - * free pages have become available can hurt performance - * by evicting too much useful data from memory. - * Do not reclaim more than needed for compaction. + * There should be no need to raise the scanning + * priority if enough pages are already being scanned + * that that high watermark would be met at 100% + * efficiency. */ - testorder = order; - if (IS_ENABLED(CONFIG_COMPACTION) && order && - compaction_suitable(zone, order) != - COMPACT_SKIPPED) - testorder = 0; - - if ((buffer_heads_over_limit && is_highmem_idx(i)) || - !zone_balanced(zone, testorder, - balance_gap, end_zone)) { - /* - * There should be no need to raise the - * scanning priority if enough pages are - * already being scanned that high - * watermark would be met at 100% efficiency. - */ - if (kswapd_shrink_zone(zone, &sc, lru_pages, - &nr_attempted)) - raise_priority = false; - } - - if (zone->all_unreclaimable) { - if (end_zone && end_zone == i) - end_zone--; - continue; - } - - if (zone_balanced(zone, testorder, 0, end_zone)) - /* - * If a zone reaches its high watermark, - * consider it to be no longer congested. It's - * possible there are dirty pages backed by - * congested BDIs but as pressure is relieved, - * speculatively avoid congestion waits - * or writing pages from kswapd context. - */ - zone_clear_flag(zone, ZONE_CONGESTED); - zone_clear_flag(zone, ZONE_TAIL_LRU_DIRTY); + if (kswapd_shrink_zone(zone, end_zone, &sc, + lru_pages, &nr_attempted)) + raise_priority = false; } /* -- cgit v0.10.2 From e2be15f6c3eecedfbe1550cca8d72c5057abbbd2 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 3 Jul 2013 15:01:57 -0700 Subject: mm: vmscan: stall page reclaim and writeback pages based on dirty/writepage pages encountered Further testing of the "Reduce system disruption due to kswapd" discovered a few problems. First and foremost, it's possible for pages under writeback to be freed which will lead to badness. Second, as pages were not being swapped the file LRU was being scanned faster and clean file pages were being reclaimed. In some cases this results in increased read IO to re-read data from disk. Third, more pages were being written from kswapd context which can adversly affect IO performance. Lastly, it was observed that PageDirty pages are not necessarily dirty on all filesystems (buffers can be clean while PageDirty is set and ->writepage generates no IO) and not all filesystems set PageWriteback when the page is being written (e.g. ext3). This disconnect confuses the reclaim stalling logic. This follow-up series is aimed at these problems. The tests were based on three kernels vanilla: kernel 3.9 as that is what the current mmotm uses as a baseline mmotm-20130522 is mmotm as of 22nd May with "Reduce system disruption due to kswapd" applied on top as per what should be in Andrew's tree right now lessdisrupt-v7r10 is this follow-up series on top of the mmotm kernel The first test used memcached+memcachetest while some background IO was in progress as implemented by the parallel IO tests implement in MM Tests. memcachetest benchmarks how many operations/second memcached can service. It starts with no background IO on a freshly created ext4 filesystem and then re-runs the test with larger amounts of IO in the background to roughly simulate a large copy in progress. The expectation is that the IO should have little or no impact on memcachetest which is running entirely in memory. parallelio 3.9.0 3.9.0 3.9.0 vanilla mm1-mmotm-20130522 mm1-lessdisrupt-v7r10 Ops memcachetest-0M 23117.00 ( 0.00%) 22780.00 ( -1.46%) 22763.00 ( -1.53%) Ops memcachetest-715M 23774.00 ( 0.00%) 23299.00 ( -2.00%) 22934.00 ( -3.53%) Ops memcachetest-2385M 4208.00 ( 0.00%) 24154.00 (474.00%) 23765.00 (464.76%) Ops memcachetest-4055M 4104.00 ( 0.00%) 25130.00 (512.33%) 24614.00 (499.76%) Ops io-duration-0M 0.00 ( 0.00%) 0.00 ( 0.00%) 0.00 ( 0.00%) Ops io-duration-715M 12.00 ( 0.00%) 7.00 ( 41.67%) 6.00 ( 50.00%) Ops io-duration-2385M 116.00 ( 0.00%) 21.00 ( 81.90%) 21.00 ( 81.90%) Ops io-duration-4055M 160.00 ( 0.00%) 36.00 ( 77.50%) 35.00 ( 78.12%) Ops swaptotal-0M 0.00 ( 0.00%) 0.00 ( 0.00%) 0.00 ( 0.00%) Ops swaptotal-715M 140138.00 ( 0.00%) 18.00 ( 99.99%) 18.00 ( 99.99%) Ops swaptotal-2385M 385682.00 ( 0.00%) 0.00 ( 0.00%) 0.00 ( 0.00%) Ops swaptotal-4055M 418029.00 ( 0.00%) 0.00 ( 0.00%) 0.00 ( 0.00%) Ops swapin-0M 0.00 ( 0.00%) 0.00 ( 0.00%) 0.00 ( 0.00%) Ops swapin-715M 144.00 ( 0.00%) 0.00 ( 0.00%) 0.00 ( 0.00%) Ops swapin-2385M 134227.00 ( 0.00%) 0.00 ( 0.00%) 0.00 ( 0.00%) Ops swapin-4055M 125618.00 ( 0.00%) 0.00 ( 0.00%) 0.00 ( 0.00%) Ops minorfaults-0M 1536429.00 ( 0.00%) 1531632.00 ( 0.31%) 1533541.00 ( 0.19%) Ops minorfaults-715M 1786996.00 ( 0.00%) 1612148.00 ( 9.78%) 1608832.00 ( 9.97%) Ops minorfaults-2385M 1757952.00 ( 0.00%) 1614874.00 ( 8.14%) 1613541.00 ( 8.21%) Ops minorfaults-4055M 1774460.00 ( 0.00%) 1633400.00 ( 7.95%) 1630881.00 ( 8.09%) Ops majorfaults-0M 1.00 ( 0.00%) 0.00 ( 0.00%) 0.00 ( 0.00%) Ops majorfaults-715M 184.00 ( 0.00%) 167.00 ( 9.24%) 166.00 ( 9.78%) Ops majorfaults-2385M 24444.00 ( 0.00%) 155.00 ( 99.37%) 93.00 ( 99.62%) Ops majorfaults-4055M 21357.00 ( 0.00%) 147.00 ( 99.31%) 134.00 ( 99.37%) memcachetest is the transactions/second reported by memcachetest. In the vanilla kernel note that performance drops from around 23K/sec to just over 4K/second when there is 2385M of IO going on in the background. With current mmotm, there is no collapse in performance and with this follow-up series there is little change. swaptotal is the total amount of swap traffic. With mmotm and the follow-up series, the total amount of swapping is much reduced. 3.9.0 3.9.0 3.9.0 vanillamm1-mmotm-20130522mm1-lessdisrupt-v7r10 Minor Faults 11160152 10706748 10622316 Major Faults 46305 755 678 Swap Ins 260249 0 0 Swap Outs 683860 18 18 Direct pages scanned 0 678 2520 Kswapd pages scanned 6046108 8814900 1639279 Kswapd pages reclaimed 1081954 1172267 1094635 Direct pages reclaimed 0 566 2304 Kswapd efficiency 17% 13% 66% Kswapd velocity 5217.560 7618.953 1414.879 Direct efficiency 100% 83% 91% Direct velocity 0.000 0.586 2.175 Percentage direct scans 0% 0% 0% Zone normal velocity 5105.086 6824.681 671.158 Zone dma32 velocity 112.473 794.858 745.896 Zone dma velocity 0.000 0.000 0.000 Page writes by reclaim 1929612.000 6861768.000 32821.000 Page writes file 1245752 6861750 32803 Page writes anon 683860 18 18 Page reclaim immediate 7484 40 239 Sector Reads 1130320 93996 86900 Sector Writes 13508052 10823500 11804436 Page rescued immediate 0 0 0 Slabs scanned 33536 27136 18560 Direct inode steals 0 0 0 Kswapd inode steals 8641 1035 0 Kswapd skipped wait 0 0 0 THP fault alloc 8 37 33 THP collapse alloc 508 552 515 THP splits 24 1 1 THP fault fallback 0 0 0 THP collapse fail 0 0 0 There are a number of observations to make here 1. Swap outs are almost eliminated. Swap ins are 0 indicating that the pages swapped were really unused anonymous pages. Related to that, major faults are much reduced. 2. kswapd efficiency was impacted by the initial series but with these follow-up patches, the efficiency is now at 66% indicating that far fewer pages were skipped during scanning due to dirty or writeback pages. 3. kswapd velocity is reduced indicating that fewer pages are being scanned with the follow-up series as kswapd now stalls when the tail of the LRU queue is full of unqueued dirty pages. The stall gives flushers a chance to catch-up so kswapd can reclaim clean pages when it wakes 4. In light of Zlatko's recent reports about zone scanning imbalances, mmtests now reports scanning velocity on a per-zone basis. With mainline, you can see that the scanning activity is dominated by the Normal zone with over 45 times more scanning in Normal than the DMA32 zone. With the series currently in mmotm, the ratio is slightly better but it is still the case that the bulk of scanning is in the highest zone. With this follow-up series, the ratio of scanning between the Normal and DMA32 zone is roughly equal. 5. As Dave Chinner observed, the current patches in mmotm increased the number of pages written from kswapd context which is expected to adversly impact IO performance. With the follow-up patches, far fewer pages are written from kswapd context than the mainline kernel 6. With the series in mmotm, fewer inodes were reclaimed by kswapd. With the follow-up series, there is less slab shrinking activity and no inodes were reclaimed. 7. Note that "Sectors Read" is drastically reduced implying that the source data being used for the IO is not being aggressively discarded due to page reclaim skipping over dirty pages and reclaiming clean pages. Note that the reducion in reads could also be due to inode data not being re-read from disk after a slab shrink. 3.9.0 3.9.0 3.9.0 vanillamm1-mmotm-20130522mm1-lessdisrupt-v7r10 Mean sda-avgqz 166.99 32.09 33.44 Mean sda-await 853.64 192.76 185.43 Mean sda-r_await 6.31 9.24 5.97 Mean sda-w_await 2992.81 202.65 192.43 Max sda-avgqz 1409.91 718.75 698.98 Max sda-await 6665.74 3538.00 3124.23 Max sda-r_await 58.96 111.95 58.00 Max sda-w_await 28458.94 3977.29 3148.61 In light of the changes in writes from reclaim context, the number of reads and Dave Chinner's concerns about IO performance I took a closer look at the IO stats for the test disk. Few observations 1. The average queue size is reduced by the initial series and roughly the same with this follow up. 2. Average wait times for writes are reduced and as the IO is completing faster it at least implies that the gain is because flushers are writing the files efficiently instead of page reclaim getting in the way. 3. The reduction in maximum write latency is staggering. 28 seconds down to 3 seconds. Jan Kara asked how NFS is affected by all of this. Unstable pages can be taken into account as one of the patches in the series shows but it is still the case that filesystems with unusual handling of dirty or writeback could still be treated better. Tests like postmark, fsmark and largedd showed up nothing useful. On my test setup, pages are simply not being written back from reclaim context with or without the patches and there are no changes in performance. My test setup probably is just not strong enough network-wise to be really interesting. I ran a longer-lived memcached test with IO going to NFS instead of a local disk parallelio 3.9.0 3.9.0 3.9.0 vanilla mm1-mmotm-20130522 mm1-lessdisrupt-v7r10 Ops memcachetest-0M 23323.00 ( 0.00%) 23241.00 ( -0.35%) 23321.00 ( -0.01%) Ops memcachetest-715M 25526.00 ( 0.00%) 24763.00 ( -2.99%) 23242.00 ( -8.95%) Ops memcachetest-2385M 8814.00 ( 0.00%) 26924.00 (205.47%) 23521.00 (166.86%) Ops memcachetest-4055M 5835.00 ( 0.00%) 26827.00 (359.76%) 25560.00 (338.05%) Ops io-duration-0M 0.00 ( 0.00%) 0.00 ( 0.00%) 0.00 ( 0.00%) Ops io-duration-715M 65.00 ( 0.00%) 71.00 ( -9.23%) 11.00 ( 83.08%) Ops io-duration-2385M 129.00 ( 0.00%) 94.00 ( 27.13%) 53.00 ( 58.91%) Ops io-duration-4055M 301.00 ( 0.00%) 100.00 ( 66.78%) 108.00 ( 64.12%) Ops swaptotal-0M 0.00 ( 0.00%) 0.00 ( 0.00%) 0.00 ( 0.00%) Ops swaptotal-715M 14394.00 ( 0.00%) 949.00 ( 93.41%) 63.00 ( 99.56%) Ops swaptotal-2385M 401483.00 ( 0.00%) 24437.00 ( 93.91%) 30118.00 ( 92.50%) Ops swaptotal-4055M 554123.00 ( 0.00%) 35688.00 ( 93.56%) 63082.00 ( 88.62%) Ops swapin-0M 0.00 ( 0.00%) 0.00 ( 0.00%) 0.00 ( 0.00%) Ops swapin-715M 4522.00 ( 0.00%) 560.00 ( 87.62%) 63.00 ( 98.61%) Ops swapin-2385M 169861.00 ( 0.00%) 5026.00 ( 97.04%) 13917.00 ( 91.81%) Ops swapin-4055M 192374.00 ( 0.00%) 10056.00 ( 94.77%) 25729.00 ( 86.63%) Ops minorfaults-0M 1445969.00 ( 0.00%) 1520878.00 ( -5.18%) 1454024.00 ( -0.56%) Ops minorfaults-715M 1557288.00 ( 0.00%) 1528482.00 ( 1.85%) 1535776.00 ( 1.38%) Ops minorfaults-2385M 1692896.00 ( 0.00%) 1570523.00 ( 7.23%) 1559622.00 ( 7.87%) Ops minorfaults-4055M 1654985.00 ( 0.00%) 1581456.00 ( 4.44%) 1596713.00 ( 3.52%) Ops majorfaults-0M 0.00 ( 0.00%) 1.00 (-99.00%) 0.00 ( 0.00%) Ops majorfaults-715M 763.00 ( 0.00%) 265.00 ( 65.27%) 75.00 ( 90.17%) Ops majorfaults-2385M 23861.00 ( 0.00%) 894.00 ( 96.25%) 2189.00 ( 90.83%) Ops majorfaults-4055M 27210.00 ( 0.00%) 1569.00 ( 94.23%) 4088.00 ( 84.98%) 1. Performance does not collapse due to IO which is good. IO is also completing faster. Note with mmotm, IO completes in a third of the time and faster again with this series applied 2. Swapping is reduced, although not eliminated. The figures for the follow-up look bad but it does vary a bit as the stalling is not perfect for nfs or filesystems like ext3 with unusual handling of dirty and writeback pages 3. There are swapins, particularly with larger amounts of IO indicating that active pages are being reclaimed. However, the number of much reduced. 3.9.0 3.9.0 3.9.0 vanillamm1-mmotm-20130522mm1-lessdisrupt-v7r10 Minor Faults 36339175 35025445 35219699 Major Faults 310964 27108 51887 Swap Ins 2176399 173069 333316 Swap Outs 3344050 357228 504824 Direct pages scanned 8972 77283 43242 Kswapd pages scanned 20899983 8939566 14772851 Kswapd pages reclaimed 6193156 5172605 5231026 Direct pages reclaimed 8450 73802 39514 Kswapd efficiency 29% 57% 35% Kswapd velocity 3929.743 1847.499 3058.840 Direct efficiency 94% 95% 91% Direct velocity 1.687 15.972 8.954 Percentage direct scans 0% 0% 0% Zone normal velocity 3721.907 939.103 2185.142 Zone dma32 velocity 209.522 924.368 882.651 Zone dma velocity 0.000 0.000 0.000 Page writes by reclaim 4082185.000 526319.000 537114.000 Page writes file 738135 169091 32290 Page writes anon 3344050 357228 504824 Page reclaim immediate 9524 170 5595843 Sector Reads 8909900 861192 1483680 Sector Writes 13428980 1488744 2076800 Page rescued immediate 0 0 0 Slabs scanned 38016 31744 28672 Direct inode steals 0 0 0 Kswapd inode steals 424 0 0 Kswapd skipped wait 0 0 0 THP fault alloc 14 15 119 THP collapse alloc 1767 1569 1618 THP splits 30 29 25 THP fault fallback 0 0 0 THP collapse fail 8 5 0 Compaction stalls 17 41 100 Compaction success 7 31 95 Compaction failures 10 10 5 Page migrate success 7083 22157 62217 Page migrate failure 0 0 0 Compaction pages isolated 14847 48758 135830 Compaction migrate scanned 18328 48398 138929 Compaction free scanned 2000255 355827 1720269 Compaction cost 7 24 68 I guess the main takeaway again is the much reduced page writes from reclaim context and reduced reads. 3.9.0 3.9.0 3.9.0 vanillamm1-mmotm-20130522mm1-lessdisrupt-v7r10 Mean sda-avgqz 23.58 0.35 0.44 Mean sda-await 133.47 15.72 15.46 Mean sda-r_await 4.72 4.69 3.95 Mean sda-w_await 507.69 28.40 33.68 Max sda-avgqz 680.60 12.25 23.14 Max sda-await 3958.89 221.83 286.22 Max sda-r_await 63.86 61.23 67.29 Max sda-w_await 11710.38 883.57 1767.28 And as before, write wait times are much reduced. This patch: The patch "mm: vmscan: Have kswapd writeback pages based on dirty pages encountered, not priority" decides whether to writeback pages from reclaim context based on the number of dirty pages encountered. This situation is flagged too easily and flushers are not given the chance to catch up resulting in more pages being written from reclaim context and potentially impacting IO performance. The check for PageWriteback is also misplaced as it happens within a PageDirty check which is nonsense as the dirty may have been cleared for IO. The accounting is updated very late and pages that are already under writeback, were reactivated, could not unmapped or could not be released are all missed. Similarly, a page is considered congested for reasons other than being congested and pages that cannot be written out in the correct context are skipped. Finally, it considers stalling and writing back filesystem pages due to encountering dirty anonymous pages at the tail of the LRU which is dumb. This patch causes kswapd to begin writing filesystem pages from reclaim context only if page reclaim found that all filesystem pages at the tail of the LRU were unqueued dirty pages. Before it starts writing filesystem pages, it will stall to give flushers a chance to catch up. The decision on whether wait_iff_congested is also now determined by dirty filesystem pages only. Congested pages are based on whether the underlying BDI is congested regardless of the context of the reclaiming process. Signed-off-by: Mel Gorman Cc: Johannes Weiner Cc: Michal Hocko Cc: Rik van Riel Cc: KAMEZAWA Hiroyuki Cc: Jiri Slaby Cc: Valdis Kletnieks Cc: Zlatko Calusic Cc: dormando Cc: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/vmscan.c b/mm/vmscan.c index 4a43c28..999ef0b 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -669,6 +669,25 @@ static enum page_references page_check_references(struct page *page, return PAGEREF_RECLAIM; } +/* Check if a page is dirty or under writeback */ +static void page_check_dirty_writeback(struct page *page, + bool *dirty, bool *writeback) +{ + /* + * Anonymous pages are not handled by flushers and must be written + * from reclaim context. Do not stall reclaim based on them + */ + if (!page_is_file_cache(page)) { + *dirty = false; + *writeback = false; + return; + } + + /* By default assume that the page flags are accurate */ + *dirty = PageDirty(page); + *writeback = PageWriteback(page); +} + /* * shrink_page_list() returns the number of reclaimed pages */ @@ -697,6 +716,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, struct page *page; int may_enter_fs; enum page_references references = PAGEREF_RECLAIM_CLEAN; + bool dirty, writeback; cond_resched(); @@ -725,6 +745,24 @@ static unsigned long shrink_page_list(struct list_head *page_list, (PageSwapCache(page) && (sc->gfp_mask & __GFP_IO)); /* + * The number of dirty pages determines if a zone is marked + * reclaim_congested which affects wait_iff_congested. kswapd + * will stall and start writing pages if the tail of the LRU + * is all dirty unqueued pages. + */ + page_check_dirty_writeback(page, &dirty, &writeback); + if (dirty || writeback) + nr_dirty++; + + if (dirty && !writeback) + nr_unqueued_dirty++; + + /* Treat this page as congested if underlying BDI is */ + mapping = page_mapping(page); + if (mapping && bdi_write_congested(mapping->backing_dev_info)) + nr_congested++; + + /* * If a page at the tail of the LRU is under writeback, there * are three cases to consider. * @@ -819,9 +857,10 @@ static unsigned long shrink_page_list(struct list_head *page_list, if (!add_to_swap(page, page_list)) goto activate_locked; may_enter_fs = 1; - } - mapping = page_mapping(page); + /* Adding to swap updated mapping */ + mapping = page_mapping(page); + } /* * The page is mapped into the page tables of one or more @@ -841,11 +880,6 @@ static unsigned long shrink_page_list(struct list_head *page_list, } if (PageDirty(page)) { - nr_dirty++; - - if (!PageWriteback(page)) - nr_unqueued_dirty++; - /* * Only kswapd can writeback filesystem pages to * avoid risk of stack overflow but only writeback @@ -876,7 +910,6 @@ static unsigned long shrink_page_list(struct list_head *page_list, /* Page is dirty, try to write it out here */ switch (pageout(page, mapping, sc)) { case PAGE_KEEP: - nr_congested++; goto keep_locked; case PAGE_ACTIVATE: goto activate_locked; @@ -1318,7 +1351,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, unsigned long nr_scanned; unsigned long nr_reclaimed = 0; unsigned long nr_taken; - unsigned long nr_dirty = 0; + unsigned long nr_unqueued_dirty = 0; unsigned long nr_writeback = 0; isolate_mode_t isolate_mode = 0; int file = is_file_lru(lru); @@ -1361,7 +1394,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, return 0; nr_reclaimed = shrink_page_list(&page_list, zone, sc, TTU_UNMAP, - &nr_dirty, &nr_writeback, false); + &nr_unqueued_dirty, &nr_writeback, false); spin_lock_irq(&zone->lru_lock); @@ -1416,11 +1449,13 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, /* * Similarly, if many dirty pages are encountered that are not * currently being written then flag that kswapd should start - * writing back pages. + * writing back pages and stall to give a chance for flushers + * to catch up. */ - if (global_reclaim(sc) && nr_dirty && - nr_dirty >= (nr_taken >> (DEF_PRIORITY - sc->priority))) + if (global_reclaim(sc) && nr_unqueued_dirty == nr_taken) { + congestion_wait(BLK_RW_ASYNC, HZ/10); zone_set_flag(zone, ZONE_TAIL_LRU_DIRTY); + } trace_mm_vmscan_lru_shrink_inactive(zone->zone_pgdat->node_id, zone_idx(zone), -- cgit v0.10.2 From b1a6f21e3b2315d46ae8af88a8f4eb8ea2763107 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 3 Jul 2013 15:01:58 -0700 Subject: mm: vmscan: stall page reclaim after a list of pages have been processed Commit "mm: vmscan: Block kswapd if it is encountering pages under writeback" blocks page reclaim if it encounters pages under writeback marked for immediate reclaim. It blocks while pages are still isolated from the LRU which is unnecessary. This patch defers the blocking until after the isolated pages have been processed and tidies up some of the comments. Signed-off-by: Mel Gorman Cc: Johannes Weiner Cc: Michal Hocko Cc: Rik van Riel Cc: KAMEZAWA Hiroyuki Cc: Jiri Slaby Cc: Valdis Kletnieks Cc: Zlatko Calusic Cc: dormando Cc: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/vmscan.c b/mm/vmscan.c index 999ef0b..5b1a79c 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -697,6 +697,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, enum ttu_flags ttu_flags, unsigned long *ret_nr_unqueued_dirty, unsigned long *ret_nr_writeback, + unsigned long *ret_nr_immediate, bool force_reclaim) { LIST_HEAD(ret_pages); @@ -707,6 +708,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, unsigned long nr_congested = 0; unsigned long nr_reclaimed = 0; unsigned long nr_writeback = 0; + unsigned long nr_immediate = 0; cond_resched(); @@ -773,8 +775,8 @@ static unsigned long shrink_page_list(struct list_head *page_list, * IO can complete. Waiting on the page itself risks an * indefinite stall if it is impossible to writeback the * page due to IO error or disconnected storage so instead - * block for HZ/10 or until some IO completes then clear the - * ZONE_WRITEBACK flag to recheck if the condition exists. + * note that the LRU is being scanned too quickly and the + * caller can stall after page list has been processed. * * 2) Global reclaim encounters a page, memcg encounters a * page that is not marked for immediate reclaim or @@ -804,10 +806,8 @@ static unsigned long shrink_page_list(struct list_head *page_list, if (current_is_kswapd() && PageReclaim(page) && zone_is_reclaim_writeback(zone)) { - unlock_page(page); - congestion_wait(BLK_RW_ASYNC, HZ/10); - zone_clear_flag(zone, ZONE_WRITEBACK); - goto keep; + nr_immediate++; + goto keep_locked; /* Case 2 above */ } else if (global_reclaim(sc) || @@ -1033,6 +1033,7 @@ keep: mem_cgroup_uncharge_end(); *ret_nr_unqueued_dirty += nr_unqueued_dirty; *ret_nr_writeback += nr_writeback; + *ret_nr_immediate += nr_immediate; return nr_reclaimed; } @@ -1044,7 +1045,7 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone, .priority = DEF_PRIORITY, .may_unmap = 1, }; - unsigned long ret, dummy1, dummy2; + unsigned long ret, dummy1, dummy2, dummy3; struct page *page, *next; LIST_HEAD(clean_pages); @@ -1057,7 +1058,7 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone, ret = shrink_page_list(&clean_pages, zone, &sc, TTU_UNMAP|TTU_IGNORE_ACCESS, - &dummy1, &dummy2, true); + &dummy1, &dummy2, &dummy3, true); list_splice(&clean_pages, page_list); __mod_zone_page_state(zone, NR_ISOLATED_FILE, -ret); return ret; @@ -1353,6 +1354,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, unsigned long nr_taken; unsigned long nr_unqueued_dirty = 0; unsigned long nr_writeback = 0; + unsigned long nr_immediate = 0; isolate_mode_t isolate_mode = 0; int file = is_file_lru(lru); struct zone *zone = lruvec_zone(lruvec); @@ -1394,7 +1396,8 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, return 0; nr_reclaimed = shrink_page_list(&page_list, zone, sc, TTU_UNMAP, - &nr_unqueued_dirty, &nr_writeback, false); + &nr_unqueued_dirty, &nr_writeback, &nr_immediate, + false); spin_lock_irq(&zone->lru_lock); @@ -1447,14 +1450,28 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, } /* - * Similarly, if many dirty pages are encountered that are not - * currently being written then flag that kswapd should start - * writing back pages and stall to give a chance for flushers - * to catch up. + * memcg will stall in page writeback so only consider forcibly + * stalling for global reclaim */ - if (global_reclaim(sc) && nr_unqueued_dirty == nr_taken) { - congestion_wait(BLK_RW_ASYNC, HZ/10); - zone_set_flag(zone, ZONE_TAIL_LRU_DIRTY); + if (global_reclaim(sc)) { + /* + * If dirty pages are scanned that are not queued for IO, it + * implies that flushers are not keeping up. In this case, flag + * the zone ZONE_TAIL_LRU_DIRTY and kswapd will start writing + * pages from reclaim context. It will forcibly stall in the + * next check. + */ + if (nr_unqueued_dirty == nr_taken) + zone_set_flag(zone, ZONE_TAIL_LRU_DIRTY); + + /* + * In addition, if kswapd scans pages marked marked for + * immediate reclaim and under writeback (nr_immediate), it + * implies that pages are cycling through the LRU faster than + * they are written so also forcibly stall. + */ + if (nr_unqueued_dirty == nr_taken || nr_immediate) + congestion_wait(BLK_RW_ASYNC, HZ/10); } trace_mm_vmscan_lru_shrink_inactive(zone->zone_pgdat->node_id, -- cgit v0.10.2 From f7ab8db791a8692f5ed4201dbae25722c1732a8d Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 3 Jul 2013 15:02:00 -0700 Subject: mm: vmscan: set zone flags before blocking In shrink_page_list a decision may be made to stall and flag a zone as ZONE_WRITEBACK so that if a large number of unqueued dirty pages are encountered later then the reclaimer will stall. Set ZONE_WRITEBACK before potentially going to sleep so it is noticed sooner. Signed-off-by: Mel Gorman Cc: Johannes Weiner Cc: Michal Hocko Cc: Rik van Riel Cc: KAMEZAWA Hiroyuki Cc: Jiri Slaby Cc: Valdis Kletnieks Cc: Zlatko Calusic Cc: dormando Cc: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/vmscan.c b/mm/vmscan.c index 5b1a79c..5f80d01 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1445,8 +1445,8 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, */ if (nr_writeback && nr_writeback >= (nr_taken >> (DEF_PRIORITY - sc->priority))) { - wait_iff_congested(zone, BLK_RW_ASYNC, HZ/10); zone_set_flag(zone, ZONE_WRITEBACK); + wait_iff_congested(zone, BLK_RW_ASYNC, HZ/10); } /* -- cgit v0.10.2 From 8e950282804558e4605401b9c79c1d34f0d73507 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 3 Jul 2013 15:02:02 -0700 Subject: mm: vmscan: move direct reclaim wait_iff_congested into shrink_list shrink_inactive_list makes decisions on whether to stall based on the number of dirty pages encountered. The wait_iff_congested() call in shrink_page_list does no such thing and it's arbitrary. This patch moves the decision on whether to set ZONE_CONGESTED and the wait_iff_congested call into shrink_page_list. This keeps all the decisions on whether to stall or not in the one place. Signed-off-by: Mel Gorman Cc: Johannes Weiner Cc: Michal Hocko Cc: Rik van Riel Cc: KAMEZAWA Hiroyuki Cc: Jiri Slaby Cc: Valdis Kletnieks Cc: Zlatko Calusic Cc: dormando Cc: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/vmscan.c b/mm/vmscan.c index 5f80d01..4898daf 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -695,7 +695,9 @@ static unsigned long shrink_page_list(struct list_head *page_list, struct zone *zone, struct scan_control *sc, enum ttu_flags ttu_flags, + unsigned long *ret_nr_dirty, unsigned long *ret_nr_unqueued_dirty, + unsigned long *ret_nr_congested, unsigned long *ret_nr_writeback, unsigned long *ret_nr_immediate, bool force_reclaim) @@ -1017,20 +1019,13 @@ keep: VM_BUG_ON(PageLRU(page) || PageUnevictable(page)); } - /* - * Tag a zone as congested if all the dirty pages encountered were - * backed by a congested BDI. In this case, reclaimers should just - * back off and wait for congestion to clear because further reclaim - * will encounter the same problem - */ - if (nr_dirty && nr_dirty == nr_congested && global_reclaim(sc)) - zone_set_flag(zone, ZONE_CONGESTED); - free_hot_cold_page_list(&free_pages, 1); list_splice(&ret_pages, page_list); count_vm_events(PGACTIVATE, pgactivate); mem_cgroup_uncharge_end(); + *ret_nr_dirty += nr_dirty; + *ret_nr_congested += nr_congested; *ret_nr_unqueued_dirty += nr_unqueued_dirty; *ret_nr_writeback += nr_writeback; *ret_nr_immediate += nr_immediate; @@ -1045,7 +1040,7 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone, .priority = DEF_PRIORITY, .may_unmap = 1, }; - unsigned long ret, dummy1, dummy2, dummy3; + unsigned long ret, dummy1, dummy2, dummy3, dummy4, dummy5; struct page *page, *next; LIST_HEAD(clean_pages); @@ -1057,8 +1052,8 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone, } ret = shrink_page_list(&clean_pages, zone, &sc, - TTU_UNMAP|TTU_IGNORE_ACCESS, - &dummy1, &dummy2, &dummy3, true); + TTU_UNMAP|TTU_IGNORE_ACCESS, + &dummy1, &dummy2, &dummy3, &dummy4, &dummy5, true); list_splice(&clean_pages, page_list); __mod_zone_page_state(zone, NR_ISOLATED_FILE, -ret); return ret; @@ -1352,6 +1347,8 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, unsigned long nr_scanned; unsigned long nr_reclaimed = 0; unsigned long nr_taken; + unsigned long nr_dirty = 0; + unsigned long nr_congested = 0; unsigned long nr_unqueued_dirty = 0; unsigned long nr_writeback = 0; unsigned long nr_immediate = 0; @@ -1396,8 +1393,9 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, return 0; nr_reclaimed = shrink_page_list(&page_list, zone, sc, TTU_UNMAP, - &nr_unqueued_dirty, &nr_writeback, &nr_immediate, - false); + &nr_dirty, &nr_unqueued_dirty, &nr_congested, + &nr_writeback, &nr_immediate, + false); spin_lock_irq(&zone->lru_lock); @@ -1431,7 +1429,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, * same way balance_dirty_pages() manages. * * This scales the number of dirty pages that must be under writeback - * before throttling depending on priority. It is a simple backoff + * before a zone gets flagged ZONE_WRITEBACK. It is a simple backoff * function that has the most effect in the range DEF_PRIORITY to * DEF_PRIORITY-2 which is the priority reclaim is considered to be * in trouble and reclaim is considered to be in trouble. @@ -1442,12 +1440,14 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, * ... * DEF_PRIORITY-6 For SWAP_CLUSTER_MAX isolated pages, throttle if any * isolated page is PageWriteback + * + * Once a zone is flagged ZONE_WRITEBACK, kswapd will count the number + * of pages under pages flagged for immediate reclaim and stall if any + * are encountered in the nr_immediate check below. */ if (nr_writeback && nr_writeback >= - (nr_taken >> (DEF_PRIORITY - sc->priority))) { + (nr_taken >> (DEF_PRIORITY - sc->priority))) zone_set_flag(zone, ZONE_WRITEBACK); - wait_iff_congested(zone, BLK_RW_ASYNC, HZ/10); - } /* * memcg will stall in page writeback so only consider forcibly @@ -1455,6 +1455,13 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, */ if (global_reclaim(sc)) { /* + * Tag a zone as congested if all the dirty pages scanned were + * backed by a congested BDI and wait_iff_congested will stall. + */ + if (nr_dirty && nr_dirty == nr_congested) + zone_set_flag(zone, ZONE_CONGESTED); + + /* * If dirty pages are scanned that are not queued for IO, it * implies that flushers are not keeping up. In this case, flag * the zone ZONE_TAIL_LRU_DIRTY and kswapd will start writing @@ -1474,6 +1481,14 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, congestion_wait(BLK_RW_ASYNC, HZ/10); } + /* + * Stall direct reclaim for IO completions if underlying BDIs or zone + * is congested. Allow kswapd to continue until it starts encountering + * unqueued dirty pages or cycling through the LRU too quickly. + */ + if (!sc->hibernation_mode && !current_is_kswapd()) + wait_iff_congested(zone, BLK_RW_ASYNC, HZ/10); + trace_mm_vmscan_lru_shrink_inactive(zone->zone_pgdat->node_id, zone_idx(zone), nr_scanned, nr_reclaimed, @@ -2374,17 +2389,6 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, WB_REASON_TRY_TO_FREE_PAGES); sc->may_writepage = 1; } - - /* Take a nap, wait for some writeback to complete */ - if (!sc->hibernation_mode && sc->nr_scanned && - sc->priority < DEF_PRIORITY - 2) { - struct zone *preferred_zone; - - first_zones_zonelist(zonelist, gfp_zone(sc->gfp_mask), - &cpuset_current_mems_allowed, - &preferred_zone); - wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/10); - } } while (--sc->priority >= 0); out: -- cgit v0.10.2 From d04e8acd03e5c3421ef18e3da7bc88d56179ca42 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 3 Jul 2013 15:02:03 -0700 Subject: mm: vmscan: treat pages marked for immediate reclaim as zone congestion Currently a zone will only be marked congested if the underlying BDI is congested but if dirty pages are spread across zones it is possible that an individual zone is full of dirty pages without being congested. The impact is that zone gets scanned very quickly potentially reclaiming really clean pages. This patch treats pages marked for immediate reclaim as congested for the purposes of marking a zone ZONE_CONGESTED and stalling in wait_iff_congested. Signed-off-by: Mel Gorman Cc: Johannes Weiner Cc: Michal Hocko Cc: Rik van Riel Cc: KAMEZAWA Hiroyuki Cc: Jiri Slaby Cc: Valdis Kletnieks Cc: Zlatko Calusic Cc: dormando Cc: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/vmscan.c b/mm/vmscan.c index 4898daf..bf47784 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -761,9 +761,15 @@ static unsigned long shrink_page_list(struct list_head *page_list, if (dirty && !writeback) nr_unqueued_dirty++; - /* Treat this page as congested if underlying BDI is */ + /* + * Treat this page as congested if the underlying BDI is or if + * pages are cycling through the LRU so quickly that the + * pages marked for immediate reclaim are making it to the + * end of the LRU a second time. + */ mapping = page_mapping(page); - if (mapping && bdi_write_congested(mapping->backing_dev_info)) + if ((mapping && bdi_write_congested(mapping->backing_dev_info)) || + (writeback && PageReclaim(page))) nr_congested++; /* -- cgit v0.10.2 From b45972265f823ed01eae0867a176320071665787 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 3 Jul 2013 15:02:05 -0700 Subject: mm: vmscan: take page buffers dirty and locked state into account Page reclaim keeps track of dirty and under writeback pages and uses it to determine if wait_iff_congested() should stall or if kswapd should begin writing back pages. This fails to account for buffer pages that can be under writeback but not PageWriteback which is the case for filesystems like ext3 ordered mode. Furthermore, PageDirty buffer pages can have all the buffers clean and writepage does no IO so it should not be accounted as congested. This patch adds an address_space operation that filesystems may optionally use to check if a page is really dirty or really under writeback. An implementation is provided for for buffer_heads is added and used for block operations and ext3 in ordered mode. By default the page flags are obeyed. Credit goes to Jan Kara for identifying that the page flags alone are not sufficient for ext3 and sanity checking a number of ideas on how the problem could be addressed. Signed-off-by: Mel Gorman Cc: Johannes Weiner Cc: Michal Hocko Cc: Rik van Riel Cc: KAMEZAWA Hiroyuki Cc: Jiri Slaby Cc: Valdis Kletnieks Cc: Zlatko Calusic Cc: dormando Cc: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/block_dev.c b/fs/block_dev.c index 431b6a0..bb43ce0 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1562,6 +1562,7 @@ static const struct address_space_operations def_blk_aops = { .writepages = generic_writepages, .releasepage = blkdev_releasepage, .direct_IO = blkdev_direct_IO, + .is_dirty_writeback = buffer_check_dirty_writeback, }; const struct file_operations def_blk_fops = { diff --git a/fs/buffer.c b/fs/buffer.c index f93392e..4d74335 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -83,6 +83,40 @@ void unlock_buffer(struct buffer_head *bh) EXPORT_SYMBOL(unlock_buffer); /* + * Returns if the page has dirty or writeback buffers. If all the buffers + * are unlocked and clean then the PageDirty information is stale. If + * any of the pages are locked, it is assumed they are locked for IO. + */ +void buffer_check_dirty_writeback(struct page *page, + bool *dirty, bool *writeback) +{ + struct buffer_head *head, *bh; + *dirty = false; + *writeback = false; + + BUG_ON(!PageLocked(page)); + + if (!page_has_buffers(page)) + return; + + if (PageWriteback(page)) + *writeback = true; + + head = page_buffers(page); + bh = head; + do { + if (buffer_locked(bh)) + *writeback = true; + + if (buffer_dirty(bh)) + *dirty = true; + + bh = bh->b_this_page; + } while (bh != head); +} +EXPORT_SYMBOL(buffer_check_dirty_writeback); + +/* * Block until a buffer comes unlocked. This doesn't stop it * from becoming locked again - you have to lock it yourself * if you want to preserve its state. diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index f67668f..2bd8548 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -1985,6 +1985,7 @@ static const struct address_space_operations ext3_ordered_aops = { .direct_IO = ext3_direct_IO, .migratepage = buffer_migrate_page, .is_partially_uptodate = block_is_partially_uptodate, + .is_dirty_writeback = buffer_check_dirty_writeback, .error_remove_page = generic_error_remove_page, }; diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index f5a3b83..91fa9a9 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -139,6 +139,9 @@ BUFFER_FNS(Prio, prio) }) #define page_has_buffers(page) PagePrivate(page) +void buffer_check_dirty_writeback(struct page *page, + bool *dirty, bool *writeback); + /* * Declarations */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 2b82c80..99be011 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -380,6 +380,7 @@ struct address_space_operations { int (*launder_page) (struct page *); int (*is_partially_uptodate) (struct page *, read_descriptor_t *, unsigned long); + void (*is_dirty_writeback) (struct page *, bool *, bool *); int (*error_remove_page)(struct address_space *, struct page *); /* swapfile support */ diff --git a/mm/vmscan.c b/mm/vmscan.c index bf47784..c857943 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -673,6 +673,8 @@ static enum page_references page_check_references(struct page *page, static void page_check_dirty_writeback(struct page *page, bool *dirty, bool *writeback) { + struct address_space *mapping; + /* * Anonymous pages are not handled by flushers and must be written * from reclaim context. Do not stall reclaim based on them @@ -686,6 +688,14 @@ static void page_check_dirty_writeback(struct page *page, /* By default assume that the page flags are accurate */ *dirty = PageDirty(page); *writeback = PageWriteback(page); + + /* Verify dirty/writeback state if the filesystem supports it */ + if (!page_has_private(page)) + return; + + mapping = page_mapping(page); + if (mapping && mapping->a_ops->is_dirty_writeback) + mapping->a_ops->is_dirty_writeback(page, dirty, writeback); } /* -- cgit v0.10.2 From f919b19614f06711cba300c1bb1e3d94c9ca21b0 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 3 Jul 2013 15:02:06 -0700 Subject: fs: nfs: inform the VM about pages being committed or unstable VM page reclaim uses dirty and writeback page states to determine if flushers are cleaning pages too slowly and that page reclaim should stall waiting on flushers to catch up. Page state in NFS is a bit more complex and a clean page can be unreclaimable due to being unstable which is effectively "dirty" from the perspective of the VM from reclaim context. Similarly, if the inode is currently being committed then it's similar to being under writeback. This patch adds a is_dirty_writeback() handled for NFS that checks if a pages backing inode is being committed and should be accounted as writeback and if a page has private state indicating that it is effectively dirty. Signed-off-by: Mel Gorman Cc: Johannes Weiner Cc: Michal Hocko Cc: Rik van Riel Cc: KAMEZAWA Hiroyuki Cc: Jiri Slaby Cc: Valdis Kletnieks Cc: Zlatko Calusic Cc: dormando Cc: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 6b4a79f..94e94bd 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -495,6 +495,35 @@ static int nfs_release_page(struct page *page, gfp_t gfp) return nfs_fscache_release_page(page, gfp); } +static void nfs_check_dirty_writeback(struct page *page, + bool *dirty, bool *writeback) +{ + struct nfs_inode *nfsi; + struct address_space *mapping = page_file_mapping(page); + + if (!mapping || PageSwapCache(page)) + return; + + /* + * Check if an unstable page is currently being committed and + * if so, have the VM treat it as if the page is under writeback + * so it will not block due to pages that will shortly be freeable. + */ + nfsi = NFS_I(mapping->host); + if (test_bit(NFS_INO_COMMIT, &nfsi->flags)) { + *writeback = true; + return; + } + + /* + * If PagePrivate() is set, then the page is not freeable and as the + * inode is not being committed, it's not going to be cleaned in the + * near future so treat it as dirty + */ + if (PagePrivate(page)) + *dirty = true; +} + /* * Attempt to clear the private state associated with a page when an error * occurs that requires the cached contents of an inode to be written back or @@ -542,6 +571,7 @@ const struct address_space_operations nfs_file_aops = { .direct_IO = nfs_direct_IO, .migratepage = nfs_migrate_page, .launder_page = nfs_launder_page, + .is_dirty_writeback = nfs_check_dirty_writeback, .error_remove_page = generic_error_remove_page, #ifdef CONFIG_NFS_SWAP .swap_activate = nfs_swap_activate, -- cgit v0.10.2 From 72c3b51bda557ab38d6c5b2af750c27cba15f828 Mon Sep 17 00:00:00 2001 From: Cody P Schafer Date: Wed, 3 Jul 2013 15:02:08 -0700 Subject: mm: fix comment referring to non-existent size_seqlock, change to span_seqlock Signed-off-by: Cody P Schafer Acked-by: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index fce64af..9ec7cff 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -733,7 +733,7 @@ typedef struct pglist_data { * or node_spanned_pages stay constant. Holding this will also * guarantee that any pfn_valid() stays that way. * - * Nests above zone->lock and zone->size_seqlock. + * Nests above zone->lock and zone->span_seqlock */ spinlock_t node_size_lock; #endif -- cgit v0.10.2 From 114d4b79f7e561fd76fb98eba79e18df7cdd60f0 Mon Sep 17 00:00:00 2001 From: Cody P Schafer Date: Wed, 3 Jul 2013 15:02:09 -0700 Subject: mmzone: note that node_size_lock should be manipulated via pgdat_resize_lock() Signed-off-by: Cody P Schafer Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 9ec7cff..e511f94 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -733,6 +733,9 @@ typedef struct pglist_data { * or node_spanned_pages stay constant. Holding this will also * guarantee that any pfn_valid() stays that way. * + * pgdat_resize_lock() and pgdat_resize_unlock() are provided to + * manipulate node_size_lock without checking for CONFIG_MEMORY_HOTPLUG. + * * Nests above zone->lock and zone->span_seqlock */ spinlock_t node_size_lock; -- cgit v0.10.2 From aa47228a18e6d49369df877463095b899aff495f Mon Sep 17 00:00:00 2001 From: Cody P Schafer Date: Wed, 3 Jul 2013 15:02:10 -0700 Subject: memory_hotplug: use pgdat_resize_lock() in online_pages() mmzone.h documents node_size_lock (which pgdat_resize_lock() locks) as follows: * Must be held any time you expect node_start_pfn, node_present_pages * or node_spanned_pages stay constant. [...] So actually hold it when we update node_present_pages in online_pages(). Signed-off-by: Cody P Schafer Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 1ad92b4..527c510 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -918,6 +918,7 @@ static void node_states_set_node(int node, struct memory_notify *arg) int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_type) { + unsigned long flags; unsigned long onlined_pages = 0; struct zone *zone; int need_zonelists_rebuild = 0; @@ -996,7 +997,11 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ zone->managed_pages += onlined_pages; zone->present_pages += onlined_pages; + + pgdat_resize_lock(zone->zone_pgdat, &flags); zone->zone_pgdat->node_present_pages += onlined_pages; + pgdat_resize_unlock(zone->zone_pgdat, &flags); + if (onlined_pages) { node_states_set_node(zone_to_nid(zone), &arg); if (need_zonelists_rebuild) -- cgit v0.10.2 From d702909f0aa14fe678d74d7f974aa66bfb211d0b Mon Sep 17 00:00:00 2001 From: Cody P Schafer Date: Wed, 3 Jul 2013 15:02:11 -0700 Subject: memory_hotplug: use pgdat_resize_lock() in __offline_pages() mmzone.h documents node_size_lock (which pgdat_resize_lock() locks) as follows: * Must be held any time you expect node_start_pfn, node_present_pages * or node_spanned_pages stay constant. [...] So actually hold it when we update node_present_pages in __offline_pages(). [akpm@linux-foundation.org: fix build] Signed-off-by: Cody P Schafer Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 527c510..a66d002 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1492,6 +1492,7 @@ static int __ref __offline_pages(unsigned long start_pfn, unsigned long pfn, nr_pages, expire; long offlined_pages; int ret, drain, retry_max, node; + unsigned long flags; struct zone *zone; struct memory_notify arg; @@ -1585,7 +1586,11 @@ repeat: /* removal success */ zone->managed_pages -= offlined_pages; zone->present_pages -= offlined_pages; + + pgdat_resize_lock(zone->zone_pgdat, &flags); zone->zone_pgdat->node_present_pages -= offlined_pages; + pgdat_resize_unlock(zone->zone_pgdat, &flags); + totalram_pages -= offlined_pages; init_per_zone_wmark_min(); -- cgit v0.10.2 From 0fa73b86ef0797ca4fde5334117ca0b330f08030 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 3 Jul 2013 15:02:11 -0700 Subject: include/linux/mm.h: add PAGE_ALIGNED() helper To test whether an address is aligned to PAGE_SIZE. Cc: HATAYAMA Daisuke Cc: "Eric W. Biederman" , Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/mm.h b/include/linux/mm.h index 66d881f..949bd70 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -52,6 +52,9 @@ extern unsigned long sysctl_admin_reserve_kbytes; /* to align the pointer to the (next) page boundary */ #define PAGE_ALIGN(addr) ALIGN(addr, PAGE_SIZE) +/* test whether an address (unsigned long or pointer) is aligned to PAGE_SIZE */ +#define PAGE_ALIGNED(addr) IS_ALIGNED((unsigned long)addr, PAGE_SIZE) + /* * Linux kernel virtual memory manager primitives. * The idea being to have a "virtual" mm in the same way -- cgit v0.10.2 From b27eb186608c89ef0979ae47c649859ceaa1b2e7 Mon Sep 17 00:00:00 2001 From: HATAYAMA Daisuke Date: Wed, 3 Jul 2013 15:02:13 -0700 Subject: vmcore: clean up read_vmcore() Rewrite part of read_vmcore() that reads objects in vmcore_list in the same way as part reading ELF headers, by which some duplicated and redundant codes are removed. Signed-off-by: HATAYAMA Daisuke Acked-by: Vivek Goyal Cc: KOSAKI Motohiro Cc: Atsushi Kumagai Cc: Lisa Mitchell Cc: Zhang Yanfei Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 17f7e08..ab0c92e 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -118,27 +118,6 @@ static ssize_t read_from_oldmem(char *buf, size_t count, return read; } -/* Maps vmcore file offset to respective physical address in memroy. */ -static u64 map_offset_to_paddr(loff_t offset, struct list_head *vc_list, - struct vmcore **m_ptr) -{ - struct vmcore *m; - u64 paddr; - - list_for_each_entry(m, vc_list, list) { - u64 start, end; - start = m->offset; - end = m->offset + m->size - 1; - if (offset >= start && offset <= end) { - paddr = m->paddr + offset - start; - *m_ptr = m; - return paddr; - } - } - *m_ptr = NULL; - return 0; -} - /* Read from the ELF header and then the crash dump. On error, negative value is * returned otherwise number of bytes read are returned. */ @@ -147,8 +126,8 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer, { ssize_t acc = 0, tmp; size_t tsz; - u64 start, nr_bytes; - struct vmcore *curr_m = NULL; + u64 start; + struct vmcore *m = NULL; if (buflen == 0 || *fpos >= vmcore_size) return 0; @@ -174,33 +153,26 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer, return acc; } - start = map_offset_to_paddr(*fpos, &vmcore_list, &curr_m); - if (!curr_m) - return -EINVAL; - - while (buflen) { - tsz = min_t(size_t, buflen, PAGE_SIZE - (start & ~PAGE_MASK)); - - /* Calculate left bytes in current memory segment. */ - nr_bytes = (curr_m->size - (start - curr_m->paddr)); - if (tsz > nr_bytes) - tsz = nr_bytes; - - tmp = read_from_oldmem(buffer, tsz, &start, 1); - if (tmp < 0) - return tmp; - buflen -= tsz; - *fpos += tsz; - buffer += tsz; - acc += tsz; - if (start >= (curr_m->paddr + curr_m->size)) { - if (curr_m->list.next == &vmcore_list) - return acc; /*EOF*/ - curr_m = list_entry(curr_m->list.next, - struct vmcore, list); - start = curr_m->paddr; + list_for_each_entry(m, &vmcore_list, list) { + if (*fpos < m->offset + m->size) { + tsz = m->offset + m->size - *fpos; + if (buflen < tsz) + tsz = buflen; + start = m->paddr + *fpos - m->offset; + tmp = read_from_oldmem(buffer, tsz, &start, 1); + if (tmp < 0) + return tmp; + buflen -= tsz; + *fpos += tsz; + buffer += tsz; + acc += tsz; + + /* leave now if filled buffer already */ + if (buflen == 0) + return acc; } } + return acc; } -- cgit v0.10.2 From f2bdacdd597d8d05c3d5f5d36273084f7ef7e6f5 Mon Sep 17 00:00:00 2001 From: HATAYAMA Daisuke Date: Wed, 3 Jul 2013 15:02:14 -0700 Subject: vmcore: allocate buffer for ELF headers on page-size alignment Allocate ELF headers on page-size boundary using __get_free_pages() instead of kmalloc(). Later patch will merge PT_NOTE entries into a single unique one and decrease the buffer size actually used. Keep original buffer size in variable elfcorebuf_sz_orig to kfree the buffer later and actually used buffer size with rounded up to page-size boundary in variable elfcorebuf_sz separately. The size of part of the ELF buffer exported from /proc/vmcore is elfcorebuf_sz. The merged, removed PT_NOTE entries, i.e. the range [elfcorebuf_sz, elfcorebuf_sz_orig], is filled with 0. Use size of the ELF headers as an initial offset value in set_vmcore_list_offsets_elf{64,32} and process_ptload_program_headers_elf{64,32} in order to indicate that the offset includes the holes towards the page boundary. As a result, both set_vmcore_list_offsets_elf{64,32} have the same definition. Merge them as set_vmcore_list_offsets. [akpm@linux-foundation.org: add free_elfcorebuf(), cleanups] Signed-off-by: HATAYAMA Daisuke Acked-by: Vivek Goyal Cc: KOSAKI Motohiro Cc: Atsushi Kumagai Cc: Lisa Mitchell Cc: Zhang Yanfei Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index ab0c92e..0b1c04e 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -32,6 +32,7 @@ static LIST_HEAD(vmcore_list); /* Stores the pointer to the buffer containing kernel elf core headers. */ static char *elfcorebuf; static size_t elfcorebuf_sz; +static size_t elfcorebuf_sz_orig; /* Total size of vmcore file. */ static u64 vmcore_size; @@ -186,7 +187,7 @@ static struct vmcore* __init get_new_element(void) return kzalloc(sizeof(struct vmcore), GFP_KERNEL); } -static u64 __init get_vmcore_size_elf64(char *elfptr) +static u64 __init get_vmcore_size_elf64(char *elfptr, size_t elfsz) { int i; u64 size; @@ -195,7 +196,7 @@ static u64 __init get_vmcore_size_elf64(char *elfptr) ehdr_ptr = (Elf64_Ehdr *)elfptr; phdr_ptr = (Elf64_Phdr*)(elfptr + sizeof(Elf64_Ehdr)); - size = sizeof(Elf64_Ehdr) + ((ehdr_ptr->e_phnum) * sizeof(Elf64_Phdr)); + size = elfsz; for (i = 0; i < ehdr_ptr->e_phnum; i++) { size += phdr_ptr->p_memsz; phdr_ptr++; @@ -203,7 +204,7 @@ static u64 __init get_vmcore_size_elf64(char *elfptr) return size; } -static u64 __init get_vmcore_size_elf32(char *elfptr) +static u64 __init get_vmcore_size_elf32(char *elfptr, size_t elfsz) { int i; u64 size; @@ -212,7 +213,7 @@ static u64 __init get_vmcore_size_elf32(char *elfptr) ehdr_ptr = (Elf32_Ehdr *)elfptr; phdr_ptr = (Elf32_Phdr*)(elfptr + sizeof(Elf32_Ehdr)); - size = sizeof(Elf32_Ehdr) + ((ehdr_ptr->e_phnum) * sizeof(Elf32_Phdr)); + size = elfsz; for (i = 0; i < ehdr_ptr->e_phnum; i++) { size += phdr_ptr->p_memsz; phdr_ptr++; @@ -294,6 +295,8 @@ static int __init merge_note_headers_elf64(char *elfptr, size_t *elfsz, i = (nr_ptnote - 1) * sizeof(Elf64_Phdr); *elfsz = *elfsz - i; memmove(tmp, tmp+i, ((*elfsz)-sizeof(Elf64_Ehdr)-sizeof(Elf64_Phdr))); + memset(elfptr + *elfsz, 0, i); + *elfsz = roundup(*elfsz, PAGE_SIZE); /* Modify e_phnum to reflect merged headers. */ ehdr_ptr->e_phnum = ehdr_ptr->e_phnum - nr_ptnote + 1; @@ -375,6 +378,8 @@ static int __init merge_note_headers_elf32(char *elfptr, size_t *elfsz, i = (nr_ptnote - 1) * sizeof(Elf32_Phdr); *elfsz = *elfsz - i; memmove(tmp, tmp+i, ((*elfsz)-sizeof(Elf32_Ehdr)-sizeof(Elf32_Phdr))); + memset(elfptr + *elfsz, 0, i); + *elfsz = roundup(*elfsz, PAGE_SIZE); /* Modify e_phnum to reflect merged headers. */ ehdr_ptr->e_phnum = ehdr_ptr->e_phnum - nr_ptnote + 1; @@ -398,8 +403,7 @@ static int __init process_ptload_program_headers_elf64(char *elfptr, phdr_ptr = (Elf64_Phdr*)(elfptr + sizeof(Elf64_Ehdr)); /* PT_NOTE hdr */ /* First program header is PT_NOTE header. */ - vmcore_off = sizeof(Elf64_Ehdr) + - (ehdr_ptr->e_phnum) * sizeof(Elf64_Phdr) + + vmcore_off = elfsz + phdr_ptr->p_memsz; /* Note sections */ for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { @@ -435,8 +439,7 @@ static int __init process_ptload_program_headers_elf32(char *elfptr, phdr_ptr = (Elf32_Phdr*)(elfptr + sizeof(Elf32_Ehdr)); /* PT_NOTE hdr */ /* First program header is PT_NOTE header. */ - vmcore_off = sizeof(Elf32_Ehdr) + - (ehdr_ptr->e_phnum) * sizeof(Elf32_Phdr) + + vmcore_off = elfsz + phdr_ptr->p_memsz; /* Note sections */ for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { @@ -459,18 +462,14 @@ static int __init process_ptload_program_headers_elf32(char *elfptr, } /* Sets offset fields of vmcore elements. */ -static void __init set_vmcore_list_offsets_elf64(char *elfptr, - struct list_head *vc_list) +static void __init set_vmcore_list_offsets(size_t elfsz, + struct list_head *vc_list) { loff_t vmcore_off; - Elf64_Ehdr *ehdr_ptr; struct vmcore *m; - ehdr_ptr = (Elf64_Ehdr *)elfptr; - /* Skip Elf header and program headers. */ - vmcore_off = sizeof(Elf64_Ehdr) + - (ehdr_ptr->e_phnum) * sizeof(Elf64_Phdr); + vmcore_off = elfsz; list_for_each_entry(m, vc_list, list) { m->offset = vmcore_off; @@ -478,24 +477,10 @@ static void __init set_vmcore_list_offsets_elf64(char *elfptr, } } -/* Sets offset fields of vmcore elements. */ -static void __init set_vmcore_list_offsets_elf32(char *elfptr, - struct list_head *vc_list) +static void free_elfcorebuf(void) { - loff_t vmcore_off; - Elf32_Ehdr *ehdr_ptr; - struct vmcore *m; - - ehdr_ptr = (Elf32_Ehdr *)elfptr; - - /* Skip Elf header and program headers. */ - vmcore_off = sizeof(Elf32_Ehdr) + - (ehdr_ptr->e_phnum) * sizeof(Elf32_Phdr); - - list_for_each_entry(m, vc_list, list) { - m->offset = vmcore_off; - vmcore_off += m->size; - } + free_pages((unsigned long)elfcorebuf, get_order(elfcorebuf_sz_orig)); + elfcorebuf = NULL; } static int __init parse_crash_elf64_headers(void) @@ -526,31 +511,31 @@ static int __init parse_crash_elf64_headers(void) } /* Read in all elf headers. */ - elfcorebuf_sz = sizeof(Elf64_Ehdr) + ehdr.e_phnum * sizeof(Elf64_Phdr); - elfcorebuf = kmalloc(elfcorebuf_sz, GFP_KERNEL); + elfcorebuf_sz_orig = sizeof(Elf64_Ehdr) + + ehdr.e_phnum * sizeof(Elf64_Phdr); + elfcorebuf_sz = elfcorebuf_sz_orig; + elfcorebuf = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, + get_order(elfcorebuf_sz_orig)); if (!elfcorebuf) return -ENOMEM; addr = elfcorehdr_addr; - rc = read_from_oldmem(elfcorebuf, elfcorebuf_sz, &addr, 0); - if (rc < 0) { - kfree(elfcorebuf); - return rc; - } + rc = read_from_oldmem(elfcorebuf, elfcorebuf_sz_orig, &addr, 0); + if (rc < 0) + goto fail; /* Merge all PT_NOTE headers into one. */ rc = merge_note_headers_elf64(elfcorebuf, &elfcorebuf_sz, &vmcore_list); - if (rc) { - kfree(elfcorebuf); - return rc; - } + if (rc) + goto fail; rc = process_ptload_program_headers_elf64(elfcorebuf, elfcorebuf_sz, &vmcore_list); - if (rc) { - kfree(elfcorebuf); - return rc; - } - set_vmcore_list_offsets_elf64(elfcorebuf, &vmcore_list); + if (rc) + goto fail; + set_vmcore_list_offsets(elfcorebuf_sz, &vmcore_list); return 0; +fail: + free_elfcorebuf(); + return rc; } static int __init parse_crash_elf32_headers(void) @@ -581,31 +566,30 @@ static int __init parse_crash_elf32_headers(void) } /* Read in all elf headers. */ - elfcorebuf_sz = sizeof(Elf32_Ehdr) + ehdr.e_phnum * sizeof(Elf32_Phdr); - elfcorebuf = kmalloc(elfcorebuf_sz, GFP_KERNEL); + elfcorebuf_sz_orig = sizeof(Elf32_Ehdr) + ehdr.e_phnum * sizeof(Elf32_Phdr); + elfcorebuf_sz = elfcorebuf_sz_orig; + elfcorebuf = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, + get_order(elfcorebuf_sz_orig)); if (!elfcorebuf) return -ENOMEM; addr = elfcorehdr_addr; - rc = read_from_oldmem(elfcorebuf, elfcorebuf_sz, &addr, 0); - if (rc < 0) { - kfree(elfcorebuf); - return rc; - } + rc = read_from_oldmem(elfcorebuf, elfcorebuf_sz_orig, &addr, 0); + if (rc < 0) + goto fail; /* Merge all PT_NOTE headers into one. */ rc = merge_note_headers_elf32(elfcorebuf, &elfcorebuf_sz, &vmcore_list); - if (rc) { - kfree(elfcorebuf); - return rc; - } + if (rc) + goto fail; rc = process_ptload_program_headers_elf32(elfcorebuf, elfcorebuf_sz, &vmcore_list); - if (rc) { - kfree(elfcorebuf); - return rc; - } - set_vmcore_list_offsets_elf32(elfcorebuf, &vmcore_list); + if (rc) + goto fail; + set_vmcore_list_offsets(elfcorebuf_sz, &vmcore_list); return 0; +fail: + free_elfcorebuf(); + return rc; } static int __init parse_crash_elf_headers(void) @@ -629,14 +613,14 @@ static int __init parse_crash_elf_headers(void) return rc; /* Determine vmcore size. */ - vmcore_size = get_vmcore_size_elf64(elfcorebuf); + vmcore_size = get_vmcore_size_elf64(elfcorebuf, elfcorebuf_sz); } else if (e_ident[EI_CLASS] == ELFCLASS32) { rc = parse_crash_elf32_headers(); if (rc) return rc; /* Determine vmcore size. */ - vmcore_size = get_vmcore_size_elf32(elfcorebuf); + vmcore_size = get_vmcore_size_elf32(elfcorebuf, elfcorebuf_sz); } else { pr_warn("Warning: Core image elf header is not sane\n"); return -EINVAL; @@ -683,7 +667,6 @@ void vmcore_cleanup(void) list_del(&m->list); kfree(m); } - kfree(elfcorebuf); - elfcorebuf = NULL; + free_elfcorebuf(); } EXPORT_SYMBOL_GPL(vmcore_cleanup); -- cgit v0.10.2 From 7f614cd1e052ebbddee7ea49c725dc75fee74a5a Mon Sep 17 00:00:00 2001 From: HATAYAMA Daisuke Date: Wed, 3 Jul 2013 15:02:15 -0700 Subject: vmcore: treat memory chunks referenced by PT_LOAD program header entries in page-size boundary in vmcore_list Treat memory chunks referenced by PT_LOAD program header entries in page-size boundary in vmcore_list. Formally, for each range [start, end], we set up the corresponding vmcore object in vmcore_list to [rounddown(start, PAGE_SIZE), roundup(end, PAGE_SIZE)]. This change affects layout of /proc/vmcore. The gaps generated by the rearrangement are newly made visible to applications as holes. Concretely, they are two ranges [rounddown(start, PAGE_SIZE), start] and [end, roundup(end, PAGE_SIZE)]. Suppose variable m points at a vmcore object in vmcore_list, and variable phdr points at the program header of PT_LOAD type the variable m corresponds to. Then, pictorially: m->offset +---------------+ | hole | phdr->p_offset = +---------------+ m->offset + (paddr - start) | |\ | kernel memory | phdr->p_memsz | |/ +---------------+ | hole | m->offset + m->size +---------------+ where m->offset and m->offset + m->size are always page-size aligned. Signed-off-by: HATAYAMA Daisuke Acked-by: Vivek Goyal Cc: KOSAKI Motohiro Cc: Atsushi Kumagai Cc: Lisa Mitchell Cc: Zhang Yanfei Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 0b1c04e..78c87a1 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -407,20 +407,27 @@ static int __init process_ptload_program_headers_elf64(char *elfptr, phdr_ptr->p_memsz; /* Note sections */ for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { + u64 paddr, start, end, size; + if (phdr_ptr->p_type != PT_LOAD) continue; + paddr = phdr_ptr->p_offset; + start = rounddown(paddr, PAGE_SIZE); + end = roundup(paddr + phdr_ptr->p_memsz, PAGE_SIZE); + size = end - start; + /* Add this contiguous chunk of memory to vmcore list.*/ new = get_new_element(); if (!new) return -ENOMEM; - new->paddr = phdr_ptr->p_offset; - new->size = phdr_ptr->p_memsz; + new->paddr = start; + new->size = size; list_add_tail(&new->list, vc_list); /* Update the program header offset. */ - phdr_ptr->p_offset = vmcore_off; - vmcore_off = vmcore_off + phdr_ptr->p_memsz; + phdr_ptr->p_offset = vmcore_off + (paddr - start); + vmcore_off = vmcore_off + size; } return 0; } @@ -443,20 +450,27 @@ static int __init process_ptload_program_headers_elf32(char *elfptr, phdr_ptr->p_memsz; /* Note sections */ for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { + u64 paddr, start, end, size; + if (phdr_ptr->p_type != PT_LOAD) continue; + paddr = phdr_ptr->p_offset; + start = rounddown(paddr, PAGE_SIZE); + end = roundup(paddr + phdr_ptr->p_memsz, PAGE_SIZE); + size = end - start; + /* Add this contiguous chunk of memory to vmcore list.*/ new = get_new_element(); if (!new) return -ENOMEM; - new->paddr = phdr_ptr->p_offset; - new->size = phdr_ptr->p_memsz; + new->paddr = start; + new->size = size; list_add_tail(&new->list, vc_list); /* Update the program header offset */ - phdr_ptr->p_offset = vmcore_off; - vmcore_off = vmcore_off + phdr_ptr->p_memsz; + phdr_ptr->p_offset = vmcore_off + (paddr - start); + vmcore_off = vmcore_off + size; } return 0; } -- cgit v0.10.2 From cef2ac3f6c8ab532e49cf69d05f540931ad8ee64 Mon Sep 17 00:00:00 2001 From: HATAYAMA Daisuke Date: Wed, 3 Jul 2013 15:02:17 -0700 Subject: vmalloc: make find_vm_area check in range Currently, __find_vmap_area searches for the kernel VM area starting at a given address. This patch changes this behavior so that it searches for the kernel VM area to which the address belongs. This change is needed by remap_vmalloc_range_partial to be introduced in later patch that receives any position of kernel VM area as target address. This patch changes the condition (addr > va->va_start) to the equivalent (addr >= va->va_end) by taking advantage of the fact that each kernel VM area is non-overlapping. Signed-off-by: HATAYAMA Daisuke Acked-by: KOSAKI Motohiro Cc: Vivek Goyal Cc: Atsushi Kumagai Cc: Lisa Mitchell Cc: Zhang Yanfei Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/vmalloc.c b/mm/vmalloc.c index d365724..3875fa2 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -292,7 +292,7 @@ static struct vmap_area *__find_vmap_area(unsigned long addr) va = rb_entry(n, struct vmap_area, rb_node); if (addr < va->va_start) n = n->rb_left; - else if (addr > va->va_start) + else if (addr >= va->va_end) n = n->rb_right; else return va; -- cgit v0.10.2 From e69e9d4aee712a22665f008ae0550bb3d7c7f7c1 Mon Sep 17 00:00:00 2001 From: HATAYAMA Daisuke Date: Wed, 3 Jul 2013 15:02:18 -0700 Subject: vmalloc: introduce remap_vmalloc_range_partial We want to allocate ELF note segment buffer on the 2nd kernel in vmalloc space and remap it to user-space in order to reduce the risk that memory allocation fails on system with huge number of CPUs and so with huge ELF note segment that exceeds 11-order block size. Although there's already remap_vmalloc_range for the purpose of remapping vmalloc memory to user-space, we need to specify user-space range via vma. Mmap on /proc/vmcore needs to remap range across multiple objects, so the interface that requires vma to cover full range is problematic. This patch introduces remap_vmalloc_range_partial that receives user-space range as a pair of base address and size and can be used for mmap on /proc/vmcore case. remap_vmalloc_range is rewritten using remap_vmalloc_range_partial. [akpm@linux-foundation.org: use PAGE_ALIGNED()] Signed-off-by: HATAYAMA Daisuke Cc: KOSAKI Motohiro Cc: Vivek Goyal Cc: Atsushi Kumagai Cc: Lisa Mitchell Cc: Zhang Yanfei Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 7d5773a..dd0a2c8 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -82,6 +82,10 @@ extern void *vmap(struct page **pages, unsigned int count, unsigned long flags, pgprot_t prot); extern void vunmap(const void *addr); +extern int remap_vmalloc_range_partial(struct vm_area_struct *vma, + unsigned long uaddr, void *kaddr, + unsigned long size); + extern int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, unsigned long pgoff); void vmalloc_sync_all(void); diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 3875fa2..b725990 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1476,10 +1476,9 @@ static void __vunmap(const void *addr, int deallocate_pages) if (!addr) return; - if ((PAGE_SIZE-1) & (unsigned long)addr) { - WARN(1, KERN_ERR "Trying to vfree() bad address (%p)\n", addr); + if (WARN(!PAGE_ALIGNED(addr), "Trying to vfree() bad address (%p)\n", + addr)); return; - } area = remove_vm_area(addr); if (unlikely(!area)) { @@ -2148,42 +2147,43 @@ finished: } /** - * remap_vmalloc_range - map vmalloc pages to userspace - * @vma: vma to cover (map full range of vma) - * @addr: vmalloc memory - * @pgoff: number of pages into addr before first page to map + * remap_vmalloc_range_partial - map vmalloc pages to userspace + * @vma: vma to cover + * @uaddr: target user address to start at + * @kaddr: virtual address of vmalloc kernel memory + * @size: size of map area * * Returns: 0 for success, -Exxx on failure * - * This function checks that addr is a valid vmalloc'ed area, and - * that it is big enough to cover the vma. Will return failure if - * that criteria isn't met. + * This function checks that @kaddr is a valid vmalloc'ed area, + * and that it is big enough to cover the range starting at + * @uaddr in @vma. Will return failure if that criteria isn't + * met. * * Similar to remap_pfn_range() (see mm/memory.c) */ -int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, - unsigned long pgoff) +int remap_vmalloc_range_partial(struct vm_area_struct *vma, unsigned long uaddr, + void *kaddr, unsigned long size) { struct vm_struct *area; - unsigned long uaddr = vma->vm_start; - unsigned long usize = vma->vm_end - vma->vm_start; - if ((PAGE_SIZE-1) & (unsigned long)addr) + size = PAGE_ALIGN(size); + + if (!PAGE_ALIGNED(uaddr) || !PAGE_ALIGNED(kaddr)) return -EINVAL; - area = find_vm_area(addr); + area = find_vm_area(kaddr); if (!area) return -EINVAL; if (!(area->flags & VM_USERMAP)) return -EINVAL; - if (usize + (pgoff << PAGE_SHIFT) > area->size - PAGE_SIZE) + if (kaddr + size > area->addr + area->size) return -EINVAL; - addr += pgoff << PAGE_SHIFT; do { - struct page *page = vmalloc_to_page(addr); + struct page *page = vmalloc_to_page(kaddr); int ret; ret = vm_insert_page(vma, uaddr, page); @@ -2191,14 +2191,37 @@ int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, return ret; uaddr += PAGE_SIZE; - addr += PAGE_SIZE; - usize -= PAGE_SIZE; - } while (usize > 0); + kaddr += PAGE_SIZE; + size -= PAGE_SIZE; + } while (size > 0); vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; return 0; } +EXPORT_SYMBOL(remap_vmalloc_range_partial); + +/** + * remap_vmalloc_range - map vmalloc pages to userspace + * @vma: vma to cover (map full range of vma) + * @addr: vmalloc memory + * @pgoff: number of pages into addr before first page to map + * + * Returns: 0 for success, -Exxx on failure + * + * This function checks that addr is a valid vmalloc'ed area, and + * that it is big enough to cover the vma. Will return failure if + * that criteria isn't met. + * + * Similar to remap_pfn_range() (see mm/memory.c) + */ +int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, + unsigned long pgoff) +{ + return remap_vmalloc_range_partial(vma, vma->vm_start, + addr + (pgoff << PAGE_SHIFT), + vma->vm_end - vma->vm_start); +} EXPORT_SYMBOL(remap_vmalloc_range); /* -- cgit v0.10.2 From 087350c9dcf1b38c597b31d7761f7366e2866e6b Mon Sep 17 00:00:00 2001 From: HATAYAMA Daisuke Date: Wed, 3 Jul 2013 15:02:19 -0700 Subject: vmcore: allocate ELF note segment in the 2nd kernel vmalloc memory The reasons why we don't allocate ELF note segment in the 1st kernel (old memory) on page boundary is to keep backward compatibility for old kernels, and that if doing so, we waste not a little memory due to round-up operation to fit the memory to page boundary since most of the buffers are in per-cpu area. ELF notes are per-cpu, so total size of ELF note segments depends on number of CPUs. The current maximum number of CPUs on x86_64 is 5192, and there's already system with 4192 CPUs in SGI, where total size amounts to 1MB. This can be larger in the near future or possibly even now on another architecture that has larger size of note per a single cpu. Thus, to avoid the case where memory allocation for large block fails, we allocate vmcore objects on vmalloc memory. This patch adds elfnotes_buf and elfnotes_sz variables to keep pointer to the ELF note segment buffer and its size. There's no longer the vmcore object that corresponds to the ELF note segment in vmcore_list. Accordingly, read_vmcore() has new case for ELF note segment and set_vmcore_list_offsets_elf{64,32}() and other helper functions starts calculating offset from sum of size of ELF headers and size of ELF note segment. [akpm@linux-foundation.org: use min(), fix error-path vzalloc() leaks] Signed-off-by: HATAYAMA Daisuke Acked-by: Vivek Goyal Cc: KOSAKI Motohiro Cc: Atsushi Kumagai Cc: Lisa Mitchell Cc: Zhang Yanfei Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 78c87a1..9b9270e 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -34,6 +34,9 @@ static char *elfcorebuf; static size_t elfcorebuf_sz; static size_t elfcorebuf_sz_orig; +static char *elfnotes_buf; +static size_t elfnotes_sz; + /* Total size of vmcore file. */ static u64 vmcore_size; @@ -139,9 +142,7 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer, /* Read ELF core header */ if (*fpos < elfcorebuf_sz) { - tsz = elfcorebuf_sz - *fpos; - if (buflen < tsz) - tsz = buflen; + tsz = min(elfcorebuf_sz - (size_t)*fpos, buflen); if (copy_to_user(buffer, elfcorebuf + *fpos, tsz)) return -EFAULT; buflen -= tsz; @@ -154,11 +155,27 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer, return acc; } + /* Read Elf note segment */ + if (*fpos < elfcorebuf_sz + elfnotes_sz) { + void *kaddr; + + tsz = min(elfcorebuf_sz + elfnotes_sz - (size_t)*fpos, buflen); + kaddr = elfnotes_buf + *fpos - elfcorebuf_sz; + if (copy_to_user(buffer, kaddr, tsz)) + return -EFAULT; + buflen -= tsz; + *fpos += tsz; + buffer += tsz; + acc += tsz; + + /* leave now if filled buffer already */ + if (buflen == 0) + return acc; + } + list_for_each_entry(m, &vmcore_list, list) { if (*fpos < m->offset + m->size) { - tsz = m->offset + m->size - *fpos; - if (buflen < tsz) - tsz = buflen; + tsz = min_t(size_t, m->offset + m->size - *fpos, buflen); start = m->paddr + *fpos - m->offset; tmp = read_from_oldmem(buffer, tsz, &start, 1); if (tmp < 0) @@ -221,27 +238,27 @@ static u64 __init get_vmcore_size_elf32(char *elfptr, size_t elfsz) return size; } -/* Merges all the PT_NOTE headers into one. */ -static int __init merge_note_headers_elf64(char *elfptr, size_t *elfsz, - struct list_head *vc_list) +/** + * update_note_header_size_elf64 - update p_memsz member of each PT_NOTE entry + * + * @ehdr_ptr: ELF header + * + * This function updates p_memsz member of each PT_NOTE entry in the + * program header table pointed to by @ehdr_ptr to real size of ELF + * note segment. + */ +static int __init update_note_header_size_elf64(const Elf64_Ehdr *ehdr_ptr) { - int i, nr_ptnote=0, rc=0; - char *tmp; - Elf64_Ehdr *ehdr_ptr; - Elf64_Phdr phdr, *phdr_ptr; + int i, rc=0; + Elf64_Phdr *phdr_ptr; Elf64_Nhdr *nhdr_ptr; - u64 phdr_sz = 0, note_off; - ehdr_ptr = (Elf64_Ehdr *)elfptr; - phdr_ptr = (Elf64_Phdr*)(elfptr + sizeof(Elf64_Ehdr)); + phdr_ptr = (Elf64_Phdr *)(ehdr_ptr + 1); for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { - int j; void *notes_section; - struct vmcore *new; u64 offset, max_sz, sz, real_sz = 0; if (phdr_ptr->p_type != PT_NOTE) continue; - nr_ptnote++; max_sz = phdr_ptr->p_memsz; offset = phdr_ptr->p_offset; notes_section = kmalloc(max_sz, GFP_KERNEL); @@ -253,7 +270,7 @@ static int __init merge_note_headers_elf64(char *elfptr, size_t *elfsz, return rc; } nhdr_ptr = notes_section; - for (j = 0; j < max_sz; j += sz) { + while (real_sz < max_sz) { if (nhdr_ptr->n_namesz == 0) break; sz = sizeof(Elf64_Nhdr) + @@ -262,26 +279,122 @@ static int __init merge_note_headers_elf64(char *elfptr, size_t *elfsz, real_sz += sz; nhdr_ptr = (Elf64_Nhdr*)((char*)nhdr_ptr + sz); } - - /* Add this contiguous chunk of notes section to vmcore list.*/ - new = get_new_element(); - if (!new) { - kfree(notes_section); - return -ENOMEM; - } - new->paddr = phdr_ptr->p_offset; - new->size = real_sz; - list_add_tail(&new->list, vc_list); - phdr_sz += real_sz; kfree(notes_section); + phdr_ptr->p_memsz = real_sz; } + return 0; +} + +/** + * get_note_number_and_size_elf64 - get the number of PT_NOTE program + * headers and sum of real size of their ELF note segment headers and + * data. + * + * @ehdr_ptr: ELF header + * @nr_ptnote: buffer for the number of PT_NOTE program headers + * @sz_ptnote: buffer for size of unique PT_NOTE program header + * + * This function is used to merge multiple PT_NOTE program headers + * into a unique single one. The resulting unique entry will have + * @sz_ptnote in its phdr->p_mem. + * + * It is assumed that program headers with PT_NOTE type pointed to by + * @ehdr_ptr has already been updated by update_note_header_size_elf64 + * and each of PT_NOTE program headers has actual ELF note segment + * size in its p_memsz member. + */ +static int __init get_note_number_and_size_elf64(const Elf64_Ehdr *ehdr_ptr, + int *nr_ptnote, u64 *sz_ptnote) +{ + int i; + Elf64_Phdr *phdr_ptr; + + *nr_ptnote = *sz_ptnote = 0; + + phdr_ptr = (Elf64_Phdr *)(ehdr_ptr + 1); + for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { + if (phdr_ptr->p_type != PT_NOTE) + continue; + *nr_ptnote += 1; + *sz_ptnote += phdr_ptr->p_memsz; + } + + return 0; +} + +/** + * copy_notes_elf64 - copy ELF note segments in a given buffer + * + * @ehdr_ptr: ELF header + * @notes_buf: buffer into which ELF note segments are copied + * + * This function is used to copy ELF note segment in the 1st kernel + * into the buffer @notes_buf in the 2nd kernel. It is assumed that + * size of the buffer @notes_buf is equal to or larger than sum of the + * real ELF note segment headers and data. + * + * It is assumed that program headers with PT_NOTE type pointed to by + * @ehdr_ptr has already been updated by update_note_header_size_elf64 + * and each of PT_NOTE program headers has actual ELF note segment + * size in its p_memsz member. + */ +static int __init copy_notes_elf64(const Elf64_Ehdr *ehdr_ptr, char *notes_buf) +{ + int i, rc=0; + Elf64_Phdr *phdr_ptr; + + phdr_ptr = (Elf64_Phdr*)(ehdr_ptr + 1); + + for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { + u64 offset; + if (phdr_ptr->p_type != PT_NOTE) + continue; + offset = phdr_ptr->p_offset; + rc = read_from_oldmem(notes_buf, phdr_ptr->p_memsz, &offset, 0); + if (rc < 0) + return rc; + notes_buf += phdr_ptr->p_memsz; + } + + return 0; +} + +/* Merges all the PT_NOTE headers into one. */ +static int __init merge_note_headers_elf64(char *elfptr, size_t *elfsz, + char **notes_buf, size_t *notes_sz) +{ + int i, nr_ptnote=0, rc=0; + char *tmp; + Elf64_Ehdr *ehdr_ptr; + Elf64_Phdr phdr; + u64 phdr_sz = 0, note_off; + + ehdr_ptr = (Elf64_Ehdr *)elfptr; + + rc = update_note_header_size_elf64(ehdr_ptr); + if (rc < 0) + return rc; + + rc = get_note_number_and_size_elf64(ehdr_ptr, &nr_ptnote, &phdr_sz); + if (rc < 0) + return rc; + + *notes_sz = roundup(phdr_sz, PAGE_SIZE); + *notes_buf = vzalloc(*notes_sz); + if (!*notes_buf) + return -ENOMEM; + + rc = copy_notes_elf64(ehdr_ptr, *notes_buf); + if (rc < 0) + return rc; + /* Prepare merged PT_NOTE program header. */ phdr.p_type = PT_NOTE; phdr.p_flags = 0; note_off = sizeof(Elf64_Ehdr) + (ehdr_ptr->e_phnum - nr_ptnote +1) * sizeof(Elf64_Phdr); - phdr.p_offset = note_off; + phdr.p_offset = roundup(note_off, PAGE_SIZE); phdr.p_vaddr = phdr.p_paddr = 0; phdr.p_filesz = phdr.p_memsz = phdr_sz; phdr.p_align = 0; @@ -304,27 +417,27 @@ static int __init merge_note_headers_elf64(char *elfptr, size_t *elfsz, return 0; } -/* Merges all the PT_NOTE headers into one. */ -static int __init merge_note_headers_elf32(char *elfptr, size_t *elfsz, - struct list_head *vc_list) +/** + * update_note_header_size_elf32 - update p_memsz member of each PT_NOTE entry + * + * @ehdr_ptr: ELF header + * + * This function updates p_memsz member of each PT_NOTE entry in the + * program header table pointed to by @ehdr_ptr to real size of ELF + * note segment. + */ +static int __init update_note_header_size_elf32(const Elf32_Ehdr *ehdr_ptr) { - int i, nr_ptnote=0, rc=0; - char *tmp; - Elf32_Ehdr *ehdr_ptr; - Elf32_Phdr phdr, *phdr_ptr; + int i, rc=0; + Elf32_Phdr *phdr_ptr; Elf32_Nhdr *nhdr_ptr; - u64 phdr_sz = 0, note_off; - ehdr_ptr = (Elf32_Ehdr *)elfptr; - phdr_ptr = (Elf32_Phdr*)(elfptr + sizeof(Elf32_Ehdr)); + phdr_ptr = (Elf32_Phdr *)(ehdr_ptr + 1); for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { - int j; void *notes_section; - struct vmcore *new; u64 offset, max_sz, sz, real_sz = 0; if (phdr_ptr->p_type != PT_NOTE) continue; - nr_ptnote++; max_sz = phdr_ptr->p_memsz; offset = phdr_ptr->p_offset; notes_section = kmalloc(max_sz, GFP_KERNEL); @@ -336,7 +449,7 @@ static int __init merge_note_headers_elf32(char *elfptr, size_t *elfsz, return rc; } nhdr_ptr = notes_section; - for (j = 0; j < max_sz; j += sz) { + while (real_sz < max_sz) { if (nhdr_ptr->n_namesz == 0) break; sz = sizeof(Elf32_Nhdr) + @@ -345,26 +458,122 @@ static int __init merge_note_headers_elf32(char *elfptr, size_t *elfsz, real_sz += sz; nhdr_ptr = (Elf32_Nhdr*)((char*)nhdr_ptr + sz); } - - /* Add this contiguous chunk of notes section to vmcore list.*/ - new = get_new_element(); - if (!new) { - kfree(notes_section); - return -ENOMEM; - } - new->paddr = phdr_ptr->p_offset; - new->size = real_sz; - list_add_tail(&new->list, vc_list); - phdr_sz += real_sz; kfree(notes_section); + phdr_ptr->p_memsz = real_sz; + } + + return 0; +} + +/** + * get_note_number_and_size_elf32 - get the number of PT_NOTE program + * headers and sum of real size of their ELF note segment headers and + * data. + * + * @ehdr_ptr: ELF header + * @nr_ptnote: buffer for the number of PT_NOTE program headers + * @sz_ptnote: buffer for size of unique PT_NOTE program header + * + * This function is used to merge multiple PT_NOTE program headers + * into a unique single one. The resulting unique entry will have + * @sz_ptnote in its phdr->p_mem. + * + * It is assumed that program headers with PT_NOTE type pointed to by + * @ehdr_ptr has already been updated by update_note_header_size_elf32 + * and each of PT_NOTE program headers has actual ELF note segment + * size in its p_memsz member. + */ +static int __init get_note_number_and_size_elf32(const Elf32_Ehdr *ehdr_ptr, + int *nr_ptnote, u64 *sz_ptnote) +{ + int i; + Elf32_Phdr *phdr_ptr; + + *nr_ptnote = *sz_ptnote = 0; + + phdr_ptr = (Elf32_Phdr *)(ehdr_ptr + 1); + for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { + if (phdr_ptr->p_type != PT_NOTE) + continue; + *nr_ptnote += 1; + *sz_ptnote += phdr_ptr->p_memsz; } + return 0; +} + +/** + * copy_notes_elf32 - copy ELF note segments in a given buffer + * + * @ehdr_ptr: ELF header + * @notes_buf: buffer into which ELF note segments are copied + * + * This function is used to copy ELF note segment in the 1st kernel + * into the buffer @notes_buf in the 2nd kernel. It is assumed that + * size of the buffer @notes_buf is equal to or larger than sum of the + * real ELF note segment headers and data. + * + * It is assumed that program headers with PT_NOTE type pointed to by + * @ehdr_ptr has already been updated by update_note_header_size_elf32 + * and each of PT_NOTE program headers has actual ELF note segment + * size in its p_memsz member. + */ +static int __init copy_notes_elf32(const Elf32_Ehdr *ehdr_ptr, char *notes_buf) +{ + int i, rc=0; + Elf32_Phdr *phdr_ptr; + + phdr_ptr = (Elf32_Phdr*)(ehdr_ptr + 1); + + for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { + u64 offset; + if (phdr_ptr->p_type != PT_NOTE) + continue; + offset = phdr_ptr->p_offset; + rc = read_from_oldmem(notes_buf, phdr_ptr->p_memsz, &offset, 0); + if (rc < 0) + return rc; + notes_buf += phdr_ptr->p_memsz; + } + + return 0; +} + +/* Merges all the PT_NOTE headers into one. */ +static int __init merge_note_headers_elf32(char *elfptr, size_t *elfsz, + char **notes_buf, size_t *notes_sz) +{ + int i, nr_ptnote=0, rc=0; + char *tmp; + Elf32_Ehdr *ehdr_ptr; + Elf32_Phdr phdr; + u64 phdr_sz = 0, note_off; + + ehdr_ptr = (Elf32_Ehdr *)elfptr; + + rc = update_note_header_size_elf32(ehdr_ptr); + if (rc < 0) + return rc; + + rc = get_note_number_and_size_elf32(ehdr_ptr, &nr_ptnote, &phdr_sz); + if (rc < 0) + return rc; + + *notes_sz = roundup(phdr_sz, PAGE_SIZE); + *notes_buf = vzalloc(*notes_sz); + if (!*notes_buf) + return -ENOMEM; + + rc = copy_notes_elf32(ehdr_ptr, *notes_buf); + if (rc < 0) + return rc; + /* Prepare merged PT_NOTE program header. */ phdr.p_type = PT_NOTE; phdr.p_flags = 0; note_off = sizeof(Elf32_Ehdr) + (ehdr_ptr->e_phnum - nr_ptnote +1) * sizeof(Elf32_Phdr); - phdr.p_offset = note_off; + phdr.p_offset = roundup(note_off, PAGE_SIZE); phdr.p_vaddr = phdr.p_paddr = 0; phdr.p_filesz = phdr.p_memsz = phdr_sz; phdr.p_align = 0; @@ -391,6 +600,7 @@ static int __init merge_note_headers_elf32(char *elfptr, size_t *elfsz, * the new offset fields of exported program headers. */ static int __init process_ptload_program_headers_elf64(char *elfptr, size_t elfsz, + size_t elfnotes_sz, struct list_head *vc_list) { int i; @@ -402,9 +612,8 @@ static int __init process_ptload_program_headers_elf64(char *elfptr, ehdr_ptr = (Elf64_Ehdr *)elfptr; phdr_ptr = (Elf64_Phdr*)(elfptr + sizeof(Elf64_Ehdr)); /* PT_NOTE hdr */ - /* First program header is PT_NOTE header. */ - vmcore_off = elfsz + - phdr_ptr->p_memsz; /* Note sections */ + /* Skip Elf header, program headers and Elf note segment. */ + vmcore_off = elfsz + elfnotes_sz; for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { u64 paddr, start, end, size; @@ -434,6 +643,7 @@ static int __init process_ptload_program_headers_elf64(char *elfptr, static int __init process_ptload_program_headers_elf32(char *elfptr, size_t elfsz, + size_t elfnotes_sz, struct list_head *vc_list) { int i; @@ -445,9 +655,8 @@ static int __init process_ptload_program_headers_elf32(char *elfptr, ehdr_ptr = (Elf32_Ehdr *)elfptr; phdr_ptr = (Elf32_Phdr*)(elfptr + sizeof(Elf32_Ehdr)); /* PT_NOTE hdr */ - /* First program header is PT_NOTE header. */ - vmcore_off = elfsz + - phdr_ptr->p_memsz; /* Note sections */ + /* Skip Elf header, program headers and Elf note segment. */ + vmcore_off = elfsz + elfnotes_sz; for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { u64 paddr, start, end, size; @@ -476,14 +685,14 @@ static int __init process_ptload_program_headers_elf32(char *elfptr, } /* Sets offset fields of vmcore elements. */ -static void __init set_vmcore_list_offsets(size_t elfsz, +static void __init set_vmcore_list_offsets(size_t elfsz, size_t elfnotes_sz, struct list_head *vc_list) { loff_t vmcore_off; struct vmcore *m; - /* Skip Elf header and program headers. */ - vmcore_off = elfsz; + /* Skip Elf header, program headers and Elf note segment. */ + vmcore_off = elfsz + elfnotes_sz; list_for_each_entry(m, vc_list, list) { m->offset = vmcore_off; @@ -495,6 +704,8 @@ static void free_elfcorebuf(void) { free_pages((unsigned long)elfcorebuf, get_order(elfcorebuf_sz_orig)); elfcorebuf = NULL; + vfree(elfnotes_buf); + elfnotes_buf = NULL; } static int __init parse_crash_elf64_headers(void) @@ -538,14 +749,15 @@ static int __init parse_crash_elf64_headers(void) goto fail; /* Merge all PT_NOTE headers into one. */ - rc = merge_note_headers_elf64(elfcorebuf, &elfcorebuf_sz, &vmcore_list); + rc = merge_note_headers_elf64(elfcorebuf, &elfcorebuf_sz, + &elfnotes_buf, &elfnotes_sz); if (rc) goto fail; rc = process_ptload_program_headers_elf64(elfcorebuf, elfcorebuf_sz, - &vmcore_list); + elfnotes_sz, &vmcore_list); if (rc) goto fail; - set_vmcore_list_offsets(elfcorebuf_sz, &vmcore_list); + set_vmcore_list_offsets(elfcorebuf_sz, elfnotes_sz, &vmcore_list); return 0; fail: free_elfcorebuf(); @@ -592,14 +804,15 @@ static int __init parse_crash_elf32_headers(void) goto fail; /* Merge all PT_NOTE headers into one. */ - rc = merge_note_headers_elf32(elfcorebuf, &elfcorebuf_sz, &vmcore_list); + rc = merge_note_headers_elf32(elfcorebuf, &elfcorebuf_sz, + &elfnotes_buf, &elfnotes_sz); if (rc) goto fail; rc = process_ptload_program_headers_elf32(elfcorebuf, elfcorebuf_sz, - &vmcore_list); + elfnotes_sz, &vmcore_list); if (rc) goto fail; - set_vmcore_list_offsets(elfcorebuf_sz, &vmcore_list); + set_vmcore_list_offsets(elfcorebuf_sz, elfnotes_sz, &vmcore_list); return 0; fail: free_elfcorebuf(); -- cgit v0.10.2 From ef9e78fd2753213ea01d77f7a76a9cb6ad0f50a7 Mon Sep 17 00:00:00 2001 From: HATAYAMA Daisuke Date: Wed, 3 Jul 2013 15:02:21 -0700 Subject: vmcore: allow user process to remap ELF note segment buffer Now ELF note segment has been copied in the buffer on vmalloc memory. To allow user process to remap the ELF note segment buffer with remap_vmalloc_page, the corresponding VM area object has to have VM_USERMAP flag set. [akpm@linux-foundation.org: use the conventional comment layout] Signed-off-by: HATAYAMA Daisuke Acked-by: Vivek Goyal Cc: KOSAKI Motohiro Cc: Atsushi Kumagai Cc: Lisa Mitchell Cc: Zhang Yanfei Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 9b9270e..1082492 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -369,6 +369,7 @@ static int __init merge_note_headers_elf64(char *elfptr, size_t *elfsz, Elf64_Ehdr *ehdr_ptr; Elf64_Phdr phdr; u64 phdr_sz = 0, note_off; + struct vm_struct *vm; ehdr_ptr = (Elf64_Ehdr *)elfptr; @@ -385,6 +386,14 @@ static int __init merge_note_headers_elf64(char *elfptr, size_t *elfsz, if (!*notes_buf) return -ENOMEM; + /* + * Allow users to remap ELF note segment buffer on vmalloc memory using + * remap_vmalloc_range.() + */ + vm = find_vm_area(*notes_buf); + BUG_ON(!vm); + vm->flags |= VM_USERMAP; + rc = copy_notes_elf64(ehdr_ptr, *notes_buf); if (rc < 0) return rc; @@ -548,6 +557,7 @@ static int __init merge_note_headers_elf32(char *elfptr, size_t *elfsz, Elf32_Ehdr *ehdr_ptr; Elf32_Phdr phdr; u64 phdr_sz = 0, note_off; + struct vm_struct *vm; ehdr_ptr = (Elf32_Ehdr *)elfptr; @@ -564,6 +574,14 @@ static int __init merge_note_headers_elf32(char *elfptr, size_t *elfsz, if (!*notes_buf) return -ENOMEM; + /* + * Allow users to remap ELF note segment buffer on vmalloc memory using + * remap_vmalloc_range() + */ + vm = find_vm_area(*notes_buf); + BUG_ON(!vm); + vm->flags |= VM_USERMAP; + rc = copy_notes_elf32(ehdr_ptr, *notes_buf); if (rc < 0) return rc; -- cgit v0.10.2 From 591ff71664e764a3806e341370f3c758cb2e7e3c Mon Sep 17 00:00:00 2001 From: HATAYAMA Daisuke Date: Wed, 3 Jul 2013 15:02:22 -0700 Subject: vmcore: calculate vmcore file size from buffer size and total size of vmcore objects The previous patches newly added holes before each chunk of memory and the holes need to be count in vmcore file size. There are two ways to count file size in such a way: 1) suppose m is a poitner to the last vmcore object in vmcore_list. Then file size is (m->offset + m->size), or 2) calculate sum of size of buffers for ELF header, program headers, ELF note segments and objects in vmcore_list. Although 1) is more direct and simpler than 2), 2) seems better in that it reflects internal object structure of /proc/vmcore. Thus, this patch changes get_vmcore_size_elf{64, 32} so that it calculates size in the way of 2). As a result, both get_vmcore_size_elf{64, 32} have the same definition. Merge them as get_vmcore_size. Signed-off-by: HATAYAMA Daisuke Acked-by: Vivek Goyal Cc: KOSAKI Motohiro Cc: Atsushi Kumagai Cc: Lisa Mitchell Cc: Zhang Yanfei Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 1082492..8ec6483 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -204,36 +204,15 @@ static struct vmcore* __init get_new_element(void) return kzalloc(sizeof(struct vmcore), GFP_KERNEL); } -static u64 __init get_vmcore_size_elf64(char *elfptr, size_t elfsz) +static u64 __init get_vmcore_size(size_t elfsz, size_t elfnotesegsz, + struct list_head *vc_list) { - int i; u64 size; - Elf64_Ehdr *ehdr_ptr; - Elf64_Phdr *phdr_ptr; - - ehdr_ptr = (Elf64_Ehdr *)elfptr; - phdr_ptr = (Elf64_Phdr*)(elfptr + sizeof(Elf64_Ehdr)); - size = elfsz; - for (i = 0; i < ehdr_ptr->e_phnum; i++) { - size += phdr_ptr->p_memsz; - phdr_ptr++; - } - return size; -} - -static u64 __init get_vmcore_size_elf32(char *elfptr, size_t elfsz) -{ - int i; - u64 size; - Elf32_Ehdr *ehdr_ptr; - Elf32_Phdr *phdr_ptr; + struct vmcore *m; - ehdr_ptr = (Elf32_Ehdr *)elfptr; - phdr_ptr = (Elf32_Phdr*)(elfptr + sizeof(Elf32_Ehdr)); - size = elfsz; - for (i = 0; i < ehdr_ptr->e_phnum; i++) { - size += phdr_ptr->p_memsz; - phdr_ptr++; + size = elfsz + elfnotesegsz; + list_for_each_entry(m, vc_list, list) { + size += m->size; } return size; } @@ -856,20 +835,19 @@ static int __init parse_crash_elf_headers(void) rc = parse_crash_elf64_headers(); if (rc) return rc; - - /* Determine vmcore size. */ - vmcore_size = get_vmcore_size_elf64(elfcorebuf, elfcorebuf_sz); } else if (e_ident[EI_CLASS] == ELFCLASS32) { rc = parse_crash_elf32_headers(); if (rc) return rc; - - /* Determine vmcore size. */ - vmcore_size = get_vmcore_size_elf32(elfcorebuf, elfcorebuf_sz); } else { pr_warn("Warning: Core image elf header is not sane\n"); return -EINVAL; } + + /* Determine vmcore size. */ + vmcore_size = get_vmcore_size(elfcorebuf_sz, elfnotes_sz, + &vmcore_list); + return 0; } -- cgit v0.10.2 From 83086978c63afd7c73e1c173c84aeab184c1e916 Mon Sep 17 00:00:00 2001 From: HATAYAMA Daisuke Date: Wed, 3 Jul 2013 15:02:23 -0700 Subject: vmcore: support mmap() on /proc/vmcore This patch introduces mmap_vmcore(). Don't permit writable nor executable mapping even with mprotect() because this mmap() is aimed at reading crash dump memory. Non-writable mapping is also requirement of remap_pfn_range() when mapping linear pages on non-consecutive physical pages; see is_cow_mapping(). Set VM_MIXEDMAP flag to remap memory by remap_pfn_range and by remap_vmalloc_range_pertial at the same time for a single vma. do_munmap() can correctly clean partially remapped vma with two functions in abnormal case. See zap_pte_range(), vm_normal_page() and their comments for details. On x86-32 PAE kernels, mmap() supports at most 16TB memory only. This limitation comes from the fact that the third argument of remap_pfn_range(), pfn, is of 32-bit length on x86-32: unsigned long. [akpm@linux-foundation.org: use min(), switch to conventional error-unwinding approach] Signed-off-by: HATAYAMA Daisuke Acked-by: Vivek Goyal Cc: KOSAKI Motohiro Cc: Atsushi Kumagai Cc: Lisa Mitchell Cc: Zhang Yanfei Tested-by: Maxim Uvarov Cc: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 8ec6483..2850317 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include "internal.h" @@ -194,9 +195,122 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer, return acc; } +/** + * alloc_elfnotes_buf - allocate buffer for ELF note segment in + * vmalloc memory + * + * @notes_sz: size of buffer + * + * If CONFIG_MMU is defined, use vmalloc_user() to allow users to mmap + * the buffer to user-space by means of remap_vmalloc_range(). + * + * If CONFIG_MMU is not defined, use vzalloc() since mmap_vmcore() is + * disabled and there's no need to allow users to mmap the buffer. + */ +static inline char *alloc_elfnotes_buf(size_t notes_sz) +{ +#ifdef CONFIG_MMU + return vmalloc_user(notes_sz); +#else + return vzalloc(notes_sz); +#endif +} + +/* + * Disable mmap_vmcore() if CONFIG_MMU is not defined. MMU is + * essential for mmap_vmcore() in order to map physically + * non-contiguous objects (ELF header, ELF note segment and memory + * regions in the 1st kernel pointed to by PT_LOAD entries) into + * virtually contiguous user-space in ELF layout. + */ +#ifdef CONFIG_MMU +static int mmap_vmcore(struct file *file, struct vm_area_struct *vma) +{ + size_t size = vma->vm_end - vma->vm_start; + u64 start, end, len, tsz; + struct vmcore *m; + + start = (u64)vma->vm_pgoff << PAGE_SHIFT; + end = start + size; + + if (size > vmcore_size || end > vmcore_size) + return -EINVAL; + + if (vma->vm_flags & (VM_WRITE | VM_EXEC)) + return -EPERM; + + vma->vm_flags &= ~(VM_MAYWRITE | VM_MAYEXEC); + vma->vm_flags |= VM_MIXEDMAP; + + len = 0; + + if (start < elfcorebuf_sz) { + u64 pfn; + + tsz = min(elfcorebuf_sz - (size_t)start, size); + pfn = __pa(elfcorebuf + start) >> PAGE_SHIFT; + if (remap_pfn_range(vma, vma->vm_start, pfn, tsz, + vma->vm_page_prot)) + return -EAGAIN; + size -= tsz; + start += tsz; + len += tsz; + + if (size == 0) + return 0; + } + + if (start < elfcorebuf_sz + elfnotes_sz) { + void *kaddr; + + tsz = min(elfcorebuf_sz + elfnotes_sz - (size_t)start, size); + kaddr = elfnotes_buf + start - elfcorebuf_sz; + if (remap_vmalloc_range_partial(vma, vma->vm_start + len, + kaddr, tsz)) + goto fail; + size -= tsz; + start += tsz; + len += tsz; + + if (size == 0) + return 0; + } + + list_for_each_entry(m, &vmcore_list, list) { + if (start < m->offset + m->size) { + u64 paddr = 0; + + tsz = min_t(size_t, m->offset + m->size - start, size); + paddr = m->paddr + start - m->offset; + if (remap_pfn_range(vma, vma->vm_start + len, + paddr >> PAGE_SHIFT, tsz, + vma->vm_page_prot)) + goto fail; + size -= tsz; + start += tsz; + len += tsz; + + if (size == 0) + return 0; + } + } + + return 0; +fail: + do_munmap(vma->vm_mm, vma->vm_start, len); + return -EAGAIN; +} +#else +static int mmap_vmcore(struct file *file, struct vm_area_struct *vma) +{ + return -ENOSYS; +} +#endif + static const struct file_operations proc_vmcore_operations = { .read = read_vmcore, .llseek = default_llseek, + .mmap = mmap_vmcore, }; static struct vmcore* __init get_new_element(void) @@ -348,7 +462,6 @@ static int __init merge_note_headers_elf64(char *elfptr, size_t *elfsz, Elf64_Ehdr *ehdr_ptr; Elf64_Phdr phdr; u64 phdr_sz = 0, note_off; - struct vm_struct *vm; ehdr_ptr = (Elf64_Ehdr *)elfptr; @@ -361,18 +474,10 @@ static int __init merge_note_headers_elf64(char *elfptr, size_t *elfsz, return rc; *notes_sz = roundup(phdr_sz, PAGE_SIZE); - *notes_buf = vzalloc(*notes_sz); + *notes_buf = alloc_elfnotes_buf(*notes_sz); if (!*notes_buf) return -ENOMEM; - /* - * Allow users to remap ELF note segment buffer on vmalloc memory using - * remap_vmalloc_range.() - */ - vm = find_vm_area(*notes_buf); - BUG_ON(!vm); - vm->flags |= VM_USERMAP; - rc = copy_notes_elf64(ehdr_ptr, *notes_buf); if (rc < 0) return rc; @@ -536,7 +641,6 @@ static int __init merge_note_headers_elf32(char *elfptr, size_t *elfsz, Elf32_Ehdr *ehdr_ptr; Elf32_Phdr phdr; u64 phdr_sz = 0, note_off; - struct vm_struct *vm; ehdr_ptr = (Elf32_Ehdr *)elfptr; @@ -549,18 +653,10 @@ static int __init merge_note_headers_elf32(char *elfptr, size_t *elfsz, return rc; *notes_sz = roundup(phdr_sz, PAGE_SIZE); - *notes_buf = vzalloc(*notes_sz); + *notes_buf = alloc_elfnotes_buf(*notes_sz); if (!*notes_buf) return -ENOMEM; - /* - * Allow users to remap ELF note segment buffer on vmalloc memory using - * remap_vmalloc_range() - */ - vm = find_vm_area(*notes_buf); - BUG_ON(!vm); - vm->flags |= VM_USERMAP; - rc = copy_notes_elf32(ehdr_ptr, *notes_buf); if (rc < 0) return rc; -- cgit v0.10.2 From f968ef1c55199301e98c28fe7dfa8ace05ecdb96 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 3 Jul 2013 15:02:25 -0700 Subject: memcg: update TODO list in Documentation hugetlb cgroup has already been implemented. Signed-off-by: Li Zefan Acked-by: KAMEZAWA Hiroyuki Acked-by: Rob Landley Cc: Michal Hocko Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt index ddf4f93..327acec 100644 --- a/Documentation/cgroups/memory.txt +++ b/Documentation/cgroups/memory.txt @@ -834,10 +834,9 @@ Test: 12. TODO -1. Add support for accounting huge pages (as a separate controller) -2. Make per-cgroup scanner reclaim not-shared pages first -3. Teach controller to account for shared-pages -4. Start reclamation in the background when the limit is +1. Make per-cgroup scanner reclaim not-shared pages first +2. Teach controller to account for shared-pages +3. Start reclamation in the background when the limit is not yet hit but the usage is getting closer Summary -- cgit v0.10.2 From c6286c983900c77410a951874f1589f4a41fbbae Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 3 Jul 2013 15:02:26 -0700 Subject: mm: add tracepoints for LRU activation and insertions Andrew Perepechko reported a problem whereby pages are being prematurely evicted as the mark_page_accessed() hint is ignored for pages that are currently on a pagevec -- http://www.spinics.net/lists/linux-ext4/msg37340.html . Alexey Lyahkov and Robin Dong have also reported problems recently that could be due to hot pages reaching the end of the inactive list too quickly and be reclaimed. Rather than addressing this on a per-filesystem basis, this series aims to fix the mark_page_accessed() interface by deferring what LRU a page is added to pagevec drain time and allowing mark_page_accessed() to call SetPageActive on a pagevec page. Patch 1 adds two tracepoints for LRU page activation and insertion. Using these processes it's possible to build a model of pages in the LRU that can be processed offline. Patch 2 defers making the decision on what LRU to add a page to until when the pagevec is drained. Patch 3 searches the local pagevec for pages to mark PageActive on mark_page_accessed. The changelog explains why only the local pagevec is examined. Patches 4 and 5 tidy up the API. postmark, a dd-based test and fs-mark both single and threaded mode were run but none of them showed any performance degradation or gain as a result of the patch. Using patch 1, I built a *very* basic model of the LRU to examine offline what the average age of different page types on the LRU were in milliseconds. Of course, capturing the trace distorts the test as it's written to local disk but it does not matter for the purposes of this test. The average age of pages in milliseconds were vanilla deferdrain Average age mapped anon: 1454 1250 Average age mapped file: 127841 155552 Average age unmapped anon: 85 235 Average age unmapped file: 73633 38884 Average age unmapped buffers: 74054 116155 The LRU activity was mostly files which you'd expect for a dd-based workload. Note that the average age of buffer pages is increased by the series and it is expected this is due to the fact that the buffer pages are now getting added to the active list when drained from the pagevecs. Note that the average age of the unmapped file data is decreased as they are still added to the inactive list and are reclaimed before the buffers. There is no guarantee this is a universal win for all workloads and it would be nice if the filesystem people gave some thought as to whether this decision is generally a win or a loss. This patch: Using these tracepoints it is possible to model LRU activity and the average residency of pages of different types. This can be used to debug problems related to premature reclaim of pages of particular types. Signed-off-by: Mel Gorman Reviewed-by: Rik van Riel Cc: Jan Kara Cc: Johannes Weiner Cc: Alexey Lyahkov Cc: Andrew Perepechko Cc: Robin Dong Cc: Theodore Tso Cc: Hugh Dickins Cc: Rik van Riel Cc: Bernd Schubert Cc: David Howells Cc: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/trace/events/pagemap.h b/include/trace/events/pagemap.h new file mode 100644 index 0000000..1c9fabd --- /dev/null +++ b/include/trace/events/pagemap.h @@ -0,0 +1,89 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM pagemap + +#if !defined(_TRACE_PAGEMAP_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_PAGEMAP_H + +#include +#include + +#define PAGEMAP_MAPPED 0x0001u +#define PAGEMAP_ANONYMOUS 0x0002u +#define PAGEMAP_FILE 0x0004u +#define PAGEMAP_SWAPCACHE 0x0008u +#define PAGEMAP_SWAPBACKED 0x0010u +#define PAGEMAP_MAPPEDDISK 0x0020u +#define PAGEMAP_BUFFERS 0x0040u + +#define trace_pagemap_flags(page) ( \ + (PageAnon(page) ? PAGEMAP_ANONYMOUS : PAGEMAP_FILE) | \ + (page_mapped(page) ? PAGEMAP_MAPPED : 0) | \ + (PageSwapCache(page) ? PAGEMAP_SWAPCACHE : 0) | \ + (PageSwapBacked(page) ? PAGEMAP_SWAPBACKED : 0) | \ + (PageMappedToDisk(page) ? PAGEMAP_MAPPEDDISK : 0) | \ + (page_has_private(page) ? PAGEMAP_BUFFERS : 0) \ + ) + +TRACE_EVENT(mm_lru_insertion, + + TP_PROTO( + struct page *page, + unsigned long pfn, + int lru, + unsigned long flags + ), + + TP_ARGS(page, pfn, lru, flags), + + TP_STRUCT__entry( + __field(struct page *, page ) + __field(unsigned long, pfn ) + __field(int, lru ) + __field(unsigned long, flags ) + ), + + TP_fast_assign( + __entry->page = page; + __entry->pfn = pfn; + __entry->lru = lru; + __entry->flags = flags; + ), + + /* Flag format is based on page-types.c formatting for pagemap */ + TP_printk("page=%p pfn=%lu lru=%d flags=%s%s%s%s%s%s", + __entry->page, + __entry->pfn, + __entry->lru, + __entry->flags & PAGEMAP_MAPPED ? "M" : " ", + __entry->flags & PAGEMAP_ANONYMOUS ? "a" : "f", + __entry->flags & PAGEMAP_SWAPCACHE ? "s" : " ", + __entry->flags & PAGEMAP_SWAPBACKED ? "b" : " ", + __entry->flags & PAGEMAP_MAPPEDDISK ? "d" : " ", + __entry->flags & PAGEMAP_BUFFERS ? "B" : " ") +); + +TRACE_EVENT(mm_lru_activate, + + TP_PROTO(struct page *page, unsigned long pfn), + + TP_ARGS(page, pfn), + + TP_STRUCT__entry( + __field(struct page *, page ) + __field(unsigned long, pfn ) + ), + + TP_fast_assign( + __entry->page = page; + __entry->pfn = pfn; + ), + + /* Flag format is based on page-types.c formatting for pagemap */ + TP_printk("page=%p pfn=%lu", __entry->page, __entry->pfn) + +); + +#endif /* _TRACE_PAGEMAP_H */ + +/* This part must be outside protection */ +#include diff --git a/mm/swap.c b/mm/swap.c index dfd7d71..53c9ceb 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -34,6 +34,9 @@ #include "internal.h" +#define CREATE_TRACE_POINTS +#include + /* How many pages do we try to swap or page in/out together? */ int page_cluster; @@ -384,6 +387,7 @@ static void __activate_page(struct page *page, struct lruvec *lruvec, SetPageActive(page); lru += LRU_ACTIVE; add_page_to_lru_list(page, lruvec, lru); + trace_mm_lru_activate(page, page_to_pfn(page)); __count_vm_event(PGACTIVATE); update_page_reclaim_stat(lruvec, file, 1); @@ -808,6 +812,7 @@ static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec, SetPageActive(page); add_page_to_lru_list(page, lruvec, lru); update_page_reclaim_stat(lruvec, file, active); + trace_mm_lru_insertion(page, page_to_pfn(page), lru, trace_pagemap_flags(page)); } /* -- cgit v0.10.2 From 13f7f78981e49f288d871bb918545ef5c952e00b Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 3 Jul 2013 15:02:28 -0700 Subject: mm: pagevec: defer deciding which LRU to add a page to until pagevec drain time mark_page_accessed() cannot activate an inactive page that is located on an inactive LRU pagevec. Hints from filesystems may be ignored as a result. In preparation for fixing that problem, this patch removes the per-LRU pagevecs and leaves just one pagevec. The final LRU the page is added to is deferred until the pagevec is drained. This means that fewer pagevecs are available and potentially there is greater contention on the LRU lock. However, this only applies in the case where there is an almost perfect mix of file, anon, active and inactive pages being added to the LRU. In practice I expect that we are adding stream of pages of a particular time and that the changes in contention will barely be measurable. Signed-off-by: Mel Gorman Acked-by: Rik van Riel Cc: Jan Kara Acked-by: Johannes Weiner Cc: Alexey Lyahkov Cc: Andrew Perepechko Cc: Robin Dong Cc: Theodore Tso Cc: Hugh Dickins Cc: Rik van Riel Cc: Bernd Schubert Cc: David Howells Cc: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/swap.c b/mm/swap.c index 53c9ceb..868b493 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -40,7 +40,7 @@ /* How many pages do we try to swap or page in/out together? */ int page_cluster; -static DEFINE_PER_CPU(struct pagevec[NR_LRU_LISTS], lru_add_pvecs); +static DEFINE_PER_CPU(struct pagevec, lru_add_pvec); static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs); static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs); @@ -452,22 +452,25 @@ void mark_page_accessed(struct page *page) EXPORT_SYMBOL(mark_page_accessed); /* - * Order of operations is important: flush the pagevec when it's already - * full, not when adding the last page, to make sure that last page is - * not added to the LRU directly when passed to this function. Because - * mark_page_accessed() (called after this when writing) only activates - * pages that are on the LRU, linear writes in subpage chunks would see - * every PAGEVEC_SIZE page activated, which is unexpected. + * Queue the page for addition to the LRU via pagevec. The decision on whether + * to add the page to the [in]active [file|anon] list is deferred until the + * pagevec is drained. This gives a chance for the caller of __lru_cache_add() + * have the page added to the active list using mark_page_accessed(). */ void __lru_cache_add(struct page *page, enum lru_list lru) { - struct pagevec *pvec = &get_cpu_var(lru_add_pvecs)[lru]; + struct pagevec *pvec = &get_cpu_var(lru_add_pvec); + + if (is_active_lru(lru)) + SetPageActive(page); + else + ClearPageActive(page); page_cache_get(page); if (!pagevec_space(pvec)) __pagevec_lru_add(pvec, lru); pagevec_add(pvec, page); - put_cpu_var(lru_add_pvecs); + put_cpu_var(lru_add_pvec); } EXPORT_SYMBOL(__lru_cache_add); @@ -480,13 +483,11 @@ void lru_cache_add_lru(struct page *page, enum lru_list lru) { if (PageActive(page)) { VM_BUG_ON(PageUnevictable(page)); - ClearPageActive(page); } else if (PageUnevictable(page)) { VM_BUG_ON(PageActive(page)); - ClearPageUnevictable(page); } - VM_BUG_ON(PageLRU(page) || PageActive(page) || PageUnevictable(page)); + VM_BUG_ON(PageLRU(page)); __lru_cache_add(page, lru); } @@ -587,15 +588,10 @@ static void lru_deactivate_fn(struct page *page, struct lruvec *lruvec, */ void lru_add_drain_cpu(int cpu) { - struct pagevec *pvecs = per_cpu(lru_add_pvecs, cpu); - struct pagevec *pvec; - int lru; + struct pagevec *pvec = &per_cpu(lru_add_pvec, cpu); - for_each_lru(lru) { - pvec = &pvecs[lru - LRU_BASE]; - if (pagevec_count(pvec)) - __pagevec_lru_add(pvec, lru); - } + if (pagevec_count(pvec)) + __pagevec_lru_add(pvec, NR_LRU_LISTS); pvec = &per_cpu(lru_rotate_pvecs, cpu); if (pagevec_count(pvec)) { @@ -799,17 +795,16 @@ void lru_add_page_tail(struct page *page, struct page *page_tail, static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec, void *arg) { - enum lru_list lru = (enum lru_list)arg; - int file = is_file_lru(lru); - int active = is_active_lru(lru); + enum lru_list requested_lru = (enum lru_list)arg; + int file = page_is_file_cache(page); + int active = PageActive(page); + enum lru_list lru = page_lru(page); - VM_BUG_ON(PageActive(page)); + WARN_ON_ONCE(requested_lru < NR_LRU_LISTS && requested_lru != lru); VM_BUG_ON(PageUnevictable(page)); VM_BUG_ON(PageLRU(page)); SetPageLRU(page); - if (active) - SetPageActive(page); add_page_to_lru_list(page, lruvec, lru); update_page_reclaim_stat(lruvec, file, active); trace_mm_lru_insertion(page, page_to_pfn(page), lru, trace_pagemap_flags(page)); -- cgit v0.10.2 From 059285a25f30c13ed4f5d91cecd6094b9b20bb7b Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 3 Jul 2013 15:02:30 -0700 Subject: mm: activate !PageLRU pages on mark_page_accessed if page is on local pagevec If a page is on a pagevec then it is !PageLRU and mark_page_accessed() may fail to move a page to the active list as expected. Now that the LRU is selected at LRU drain time, mark pages PageActive if they are on the local pagevec so it gets moved to the correct list at LRU drain time. Using a debugging patch it was found that for a simple git checkout based workload that pages were never added to the active file list in practice but with this patch applied they are. before after LRU Add Active File 0 750583 LRU Add Active Anon 2640587 2702818 LRU Add Inactive File 8833662 8068353 LRU Add Inactive Anon 207 200 Note that only pages on the local pagevec are considered on purpose. A !PageLRU page could be in the process of being released, reclaimed, migrated or on a remote pagevec that is currently being drained. Marking it PageActive is vunerable to races where PageLRU and Active bits are checked at the wrong time. Page reclaim will trigger VM_BUG_ONs but depending on when the race hits, it could also free a PageActive page to the page allocator and trigger a bad_page warning. Similarly a potential race exists between a per-cpu drain on a pagevec list and an activation on a remote CPU. lru_add_drain_cpu __pagevec_lru_add lru = page_lru(page); mark_page_accessed if (PageLRU(page)) activate_page else SetPageActive SetPageLRU(page); add_page_to_lru_list(page, lruvec, lru); In this case a PageActive page is added to the inactivate list and later the inactive/active stats will get skewed. While the PageActive checks in vmscan could be removed and potentially dealt with, a skew in the statistics would be very difficult to detect. Hence this patch deals just with the common case where a page being marked accessed has just been added to the local pagevec. Signed-off-by: Mel Gorman Cc: Jan Kara Cc: Rik van Riel Acked-by: Johannes Weiner Cc: Alexey Lyahkov Cc: Andrew Perepechko Cc: Robin Dong Cc: Theodore Tso Cc: Hugh Dickins Cc: Rik van Riel Cc: Bernd Schubert Cc: David Howells Cc: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/swap.c b/mm/swap.c index 868b493..c53d161 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -432,6 +432,33 @@ void activate_page(struct page *page) } #endif +static void __lru_cache_activate_page(struct page *page) +{ + struct pagevec *pvec = &get_cpu_var(lru_add_pvec); + int i; + + /* + * Search backwards on the optimistic assumption that the page being + * activated has just been added to this pagevec. Note that only + * the local pagevec is examined as a !PageLRU page could be in the + * process of being released, reclaimed, migrated or on a remote + * pagevec that is currently being drained. Furthermore, marking + * a remote pagevec's page PageActive potentially hits a race where + * a page is marked PageActive just after it is added to the inactive + * list causing accounting errors and BUG_ON checks to trigger. + */ + for (i = pagevec_count(pvec) - 1; i >= 0; i--) { + struct page *pagevec_page = pvec->pages[i]; + + if (pagevec_page == page) { + SetPageActive(page); + break; + } + } + + put_cpu_var(lru_add_pvec); +} + /* * Mark a page as having seen activity. * @@ -442,8 +469,18 @@ void activate_page(struct page *page) void mark_page_accessed(struct page *page) { if (!PageActive(page) && !PageUnevictable(page) && - PageReferenced(page) && PageLRU(page)) { - activate_page(page); + PageReferenced(page)) { + + /* + * If the page is on the LRU, queue it for activation via + * activate_page_pvecs. Otherwise, assume the page is on a + * pagevec, mark it active and it'll be moved to the active + * LRU on the next drain. + */ + if (PageLRU(page)) + activate_page(page); + else + __lru_cache_activate_page(page); ClearPageReferenced(page); } else if (!PageReferenced(page)) { SetPageReferenced(page); -- cgit v0.10.2 From a0b8cab3b9b2efadabdcff264c450ca515e2619c Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 3 Jul 2013 15:02:32 -0700 Subject: mm: remove lru parameter from __pagevec_lru_add and remove parts of pagevec API Now that the LRU to add a page to is decided at LRU-add time, remove the misleading lru parameter from __pagevec_lru_add. A consequence of this is that the pagevec_lru_add_file, pagevec_lru_add_anon and similar helpers are misleading as the caller no longer has direct control over what LRU the page is added to. Unused helpers are removed by this patch and existing users of pagevec_lru_add_file() are converted to use lru_cache_add_file() directly and use the per-cpu pagevecs instead of creating their own pagevec. Signed-off-by: Mel Gorman Reviewed-by: Jan Kara Reviewed-by: Rik van Riel Acked-by: Johannes Weiner Cc: Alexey Lyahkov Cc: Andrew Perepechko Cc: Robin Dong Cc: Theodore Tso Cc: Hugh Dickins Cc: Rik van Riel Cc: Bernd Schubert Cc: David Howells Cc: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index 317f9ee..ebaff36 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "internal.h" /* @@ -227,8 +228,7 @@ static void cachefiles_read_copier(struct fscache_operation *_op) */ static int cachefiles_read_backing_file_one(struct cachefiles_object *object, struct fscache_retrieval *op, - struct page *netpage, - struct pagevec *pagevec) + struct page *netpage) { struct cachefiles_one_read *monitor; struct address_space *bmapping; @@ -237,8 +237,6 @@ static int cachefiles_read_backing_file_one(struct cachefiles_object *object, _enter(""); - pagevec_reinit(pagevec); - _debug("read back %p{%lu,%d}", netpage, netpage->index, page_count(netpage)); @@ -283,9 +281,7 @@ installed_new_backing_page: backpage = newpage; newpage = NULL; - page_cache_get(backpage); - pagevec_add(pagevec, backpage); - __pagevec_lru_add_file(pagevec); + lru_cache_add_file(backpage); read_backing_page: ret = bmapping->a_ops->readpage(NULL, backpage); @@ -452,8 +448,7 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op, if (block) { /* submit the apparently valid page to the backing fs to be * read from disk */ - ret = cachefiles_read_backing_file_one(object, op, page, - &pagevec); + ret = cachefiles_read_backing_file_one(object, op, page); } else if (cachefiles_has_space(cache, 0, 1) == 0) { /* there's space in the cache we can use */ fscache_mark_page_cached(op, page); @@ -482,14 +477,11 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object, { struct cachefiles_one_read *monitor = NULL; struct address_space *bmapping = object->backer->d_inode->i_mapping; - struct pagevec lru_pvec; struct page *newpage = NULL, *netpage, *_n, *backpage = NULL; int ret = 0; _enter(""); - pagevec_init(&lru_pvec, 0); - list_for_each_entry_safe(netpage, _n, list, lru) { list_del(&netpage->lru); @@ -534,9 +526,7 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object, backpage = newpage; newpage = NULL; - page_cache_get(backpage); - if (!pagevec_add(&lru_pvec, backpage)) - __pagevec_lru_add_file(&lru_pvec); + lru_cache_add_file(backpage); reread_backing_page: ret = bmapping->a_ops->readpage(NULL, backpage); @@ -559,9 +549,7 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object, goto nomem; } - page_cache_get(netpage); - if (!pagevec_add(&lru_pvec, netpage)) - __pagevec_lru_add_file(&lru_pvec); + lru_cache_add_file(netpage); /* install a monitor */ page_cache_get(netpage); @@ -643,9 +631,7 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object, fscache_mark_page_cached(op, netpage); - page_cache_get(netpage); - if (!pagevec_add(&lru_pvec, netpage)) - __pagevec_lru_add_file(&lru_pvec); + lru_cache_add_file(netpage); /* the netpage is unlocked and marked up to date here */ fscache_end_io(op, netpage, 0); @@ -661,8 +647,6 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object, out: /* tidy up */ - pagevec_lru_add_file(&lru_pvec); - if (newpage) page_cache_release(newpage); if (netpage) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 5d05141..d7ed697 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -1758,7 +1759,6 @@ EXPORT_SYMBOL_GPL(nfs_unlink); */ int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) { - struct pagevec lru_pvec; struct page *page; char *kaddr; struct iattr attr; @@ -1798,11 +1798,8 @@ int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) * No big deal if we can't add this page to the page cache here. * READLINK will get the missing page from the server if needed. */ - pagevec_init(&lru_pvec, 0); - if (!add_to_page_cache(page, dentry->d_inode->i_mapping, 0, + if (!add_to_page_cache_lru(page, dentry->d_inode->i_mapping, 0, GFP_KERNEL)) { - pagevec_add(&lru_pvec, page); - pagevec_lru_add_file(&lru_pvec); SetPageUptodate(page); unlock_page(page); } else diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h index 2aa12b8..e4dbfab 100644 --- a/include/linux/pagevec.h +++ b/include/linux/pagevec.h @@ -21,7 +21,7 @@ struct pagevec { }; void __pagevec_release(struct pagevec *pvec); -void __pagevec_lru_add(struct pagevec *pvec, enum lru_list lru); +void __pagevec_lru_add(struct pagevec *pvec); unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping, pgoff_t start, unsigned nr_pages); unsigned pagevec_lookup_tag(struct pagevec *pvec, @@ -64,36 +64,4 @@ static inline void pagevec_release(struct pagevec *pvec) __pagevec_release(pvec); } -static inline void __pagevec_lru_add_anon(struct pagevec *pvec) -{ - __pagevec_lru_add(pvec, LRU_INACTIVE_ANON); -} - -static inline void __pagevec_lru_add_active_anon(struct pagevec *pvec) -{ - __pagevec_lru_add(pvec, LRU_ACTIVE_ANON); -} - -static inline void __pagevec_lru_add_file(struct pagevec *pvec) -{ - __pagevec_lru_add(pvec, LRU_INACTIVE_FILE); -} - -static inline void __pagevec_lru_add_active_file(struct pagevec *pvec) -{ - __pagevec_lru_add(pvec, LRU_ACTIVE_FILE); -} - -static inline void pagevec_lru_add_file(struct pagevec *pvec) -{ - if (pagevec_count(pvec)) - __pagevec_lru_add_file(pvec); -} - -static inline void pagevec_lru_add_anon(struct pagevec *pvec) -{ - if (pagevec_count(pvec)) - __pagevec_lru_add_anon(pvec); -} - #endif /* _LINUX_PAGEVEC_H */ diff --git a/mm/swap.c b/mm/swap.c index c53d161..6a9d0c4 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -505,7 +505,7 @@ void __lru_cache_add(struct page *page, enum lru_list lru) page_cache_get(page); if (!pagevec_space(pvec)) - __pagevec_lru_add(pvec, lru); + __pagevec_lru_add(pvec); pagevec_add(pvec, page); put_cpu_var(lru_add_pvec); } @@ -628,7 +628,7 @@ void lru_add_drain_cpu(int cpu) struct pagevec *pvec = &per_cpu(lru_add_pvec, cpu); if (pagevec_count(pvec)) - __pagevec_lru_add(pvec, NR_LRU_LISTS); + __pagevec_lru_add(pvec); pvec = &per_cpu(lru_rotate_pvecs, cpu); if (pagevec_count(pvec)) { @@ -832,12 +832,10 @@ void lru_add_page_tail(struct page *page, struct page *page_tail, static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec, void *arg) { - enum lru_list requested_lru = (enum lru_list)arg; int file = page_is_file_cache(page); int active = PageActive(page); enum lru_list lru = page_lru(page); - WARN_ON_ONCE(requested_lru < NR_LRU_LISTS && requested_lru != lru); VM_BUG_ON(PageUnevictable(page)); VM_BUG_ON(PageLRU(page)); @@ -851,11 +849,9 @@ static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec, * Add the passed pages to the LRU, then drop the caller's refcount * on them. Reinitialises the caller's pagevec. */ -void __pagevec_lru_add(struct pagevec *pvec, enum lru_list lru) +void __pagevec_lru_add(struct pagevec *pvec) { - VM_BUG_ON(is_unevictable_lru(lru)); - - pagevec_lru_move_fn(pvec, __pagevec_lru_add_fn, (void *)lru); + pagevec_lru_move_fn(pvec, __pagevec_lru_add_fn, NULL); } EXPORT_SYMBOL(__pagevec_lru_add); -- cgit v0.10.2 From c53954a092d07c5684d31ea1fc813d262cff08a5 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 3 Jul 2013 15:02:34 -0700 Subject: mm: remove lru parameter from __lru_cache_add and lru_cache_add_lru Similar to __pagevec_lru_add, this patch removes the LRU parameter from __lru_cache_add and lru_cache_add_lru as the caller does not control the exact LRU the page gets added to. lru_cache_add_lru gets renamed to lru_cache_add the name is silly without the lru parameter. With the parameter removed, it is required that the caller indicate if they want the page added to the active or inactive list by setting or clearing PageActive respectively. [akpm@linux-foundation.org: Suggested the patch] [gang.chen@asianux.com: fix used-unintialized warning] Signed-off-by: Mel Gorman Signed-off-by: Chen Gang Cc: Jan Kara Cc: Rik van Riel Acked-by: Johannes Weiner Cc: Alexey Lyahkov Cc: Andrew Perepechko Cc: Robin Dong Cc: Theodore Tso Cc: Hugh Dickins Cc: Rik van Riel Cc: Bernd Schubert Cc: David Howells Cc: Trond Myklebust Cc: Mel Gorman Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/swap.h b/include/linux/swap.h index 1701ce4..85d7437 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -10,6 +10,7 @@ #include #include #include +#include #include struct notifier_block; @@ -233,8 +234,8 @@ extern unsigned long nr_free_pagecache_pages(void); /* linux/mm/swap.c */ -extern void __lru_cache_add(struct page *, enum lru_list lru); -extern void lru_cache_add_lru(struct page *, enum lru_list lru); +extern void __lru_cache_add(struct page *); +extern void lru_cache_add(struct page *); extern void lru_add_page_tail(struct page *page, struct page *page_tail, struct lruvec *lruvec, struct list_head *head); extern void activate_page(struct page *); @@ -254,12 +255,14 @@ extern void add_page_to_unevictable_list(struct page *page); */ static inline void lru_cache_add_anon(struct page *page) { - __lru_cache_add(page, LRU_INACTIVE_ANON); + ClearPageActive(page); + __lru_cache_add(page); } static inline void lru_cache_add_file(struct page *page) { - __lru_cache_add(page, LRU_INACTIVE_FILE); + ClearPageActive(page); + __lru_cache_add(page); } /* linux/mm/vmscan.c */ diff --git a/mm/rmap.c b/mm/rmap.c index 6280da8..e22ceeb 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1093,9 +1093,10 @@ void page_add_new_anon_rmap(struct page *page, else __inc_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES); __page_set_anon_rmap(page, vma, address, 1); - if (!mlocked_vma_newpage(vma, page)) - lru_cache_add_lru(page, LRU_ACTIVE_ANON); - else + if (!mlocked_vma_newpage(vma, page)) { + SetPageActive(page); + lru_cache_add(page); + } else add_page_to_unevictable_list(page); } diff --git a/mm/swap.c b/mm/swap.c index 6a9d0c4..4a1d0d2 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -494,15 +494,10 @@ EXPORT_SYMBOL(mark_page_accessed); * pagevec is drained. This gives a chance for the caller of __lru_cache_add() * have the page added to the active list using mark_page_accessed(). */ -void __lru_cache_add(struct page *page, enum lru_list lru) +void __lru_cache_add(struct page *page) { struct pagevec *pvec = &get_cpu_var(lru_add_pvec); - if (is_active_lru(lru)) - SetPageActive(page); - else - ClearPageActive(page); - page_cache_get(page); if (!pagevec_space(pvec)) __pagevec_lru_add(pvec); @@ -512,11 +507,10 @@ void __lru_cache_add(struct page *page, enum lru_list lru) EXPORT_SYMBOL(__lru_cache_add); /** - * lru_cache_add_lru - add a page to a page list + * lru_cache_add - add a page to a page list * @page: the page to be added to the LRU. - * @lru: the LRU list to which the page is added. */ -void lru_cache_add_lru(struct page *page, enum lru_list lru) +void lru_cache_add(struct page *page) { if (PageActive(page)) { VM_BUG_ON(PageUnevictable(page)); @@ -525,7 +519,7 @@ void lru_cache_add_lru(struct page *page, enum lru_list lru) } VM_BUG_ON(PageLRU(page)); - __lru_cache_add(page, lru); + __lru_cache_add(page); } /** @@ -745,6 +739,9 @@ void release_pages(struct page **pages, int nr, int cold) del_page_from_lru_list(page, lruvec, page_off_lru(page)); } + /* Clear Active bit in case of parallel mark_page_accessed */ + ClearPageActive(page); + list_add(&page->lru, &pages_to_free); } if (zone) diff --git a/mm/vmscan.c b/mm/vmscan.c index c857943..99b3ac7 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -546,7 +546,6 @@ int remove_mapping(struct address_space *mapping, struct page *page) void putback_lru_page(struct page *page) { int lru; - int active = !!TestClearPageActive(page); int was_unevictable = PageUnevictable(page); VM_BUG_ON(PageLRU(page)); @@ -561,8 +560,8 @@ redo: * unevictable page on [in]active list. * We know how to handle that. */ - lru = active + page_lru_base_type(page); - lru_cache_add_lru(page, lru); + lru = page_lru_base_type(page); + lru_cache_add(page); } else { /* * Put unevictable pages directly on zone's unevictable -- cgit v0.10.2 From dacbde0963d62a4962d5e8a5cc38dfd1f016124b Mon Sep 17 00:00:00 2001 From: Chen Gang Date: Wed, 3 Jul 2013 15:02:35 -0700 Subject: mm/page_alloc.c: add additional checking and return value for the 'table->data' - check the length of the procfs data before copying it into a fixed size array. - when __parse_numa_zonelist_order() fails, save the error code for return. - 'char*' --> 'char *' coding style fix Signed-off-by: Chen Gang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/page_alloc.c b/mm/page_alloc.c index fab9506..a662c74 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3256,18 +3256,25 @@ int numa_zonelist_order_handler(ctl_table *table, int write, static DEFINE_MUTEX(zl_order_mutex); mutex_lock(&zl_order_mutex); - if (write) - strcpy(saved_string, (char*)table->data); + if (write) { + if (strlen((char *)table->data) >= NUMA_ZONELIST_ORDER_LEN) { + ret = -EINVAL; + goto out; + } + strcpy(saved_string, (char *)table->data); + } ret = proc_dostring(table, write, buffer, length, ppos); if (ret) goto out; if (write) { int oldval = user_zonelist_order; - if (__parse_numa_zonelist_order((char*)table->data)) { + + ret = __parse_numa_zonelist_order((char *)table->data); + if (ret) { /* * bogus value. restore saved string */ - strncpy((char*)table->data, saved_string, + strncpy((char *)table->data, saved_string, NUMA_ZONELIST_ORDER_LEN); user_zonelist_order = oldval; } else if (oldval != user_zonelist_order) { -- cgit v0.10.2 From 9bde916bc73255dcee3d8aded990443675daa707 Mon Sep 17 00:00:00 2001 From: Chen Gang Date: Wed, 3 Jul 2013 15:02:36 -0700 Subject: mm/nommu.c: add additional check for vread() just like vwrite() has done vwrite() checks for overflow. vread() should do the same thing. Since vwrite() checks the source buffer address, vread() should check the destination buffer address. Signed-off-by: Chen Gang Cc: Al Viro Cc: Michel Lespinasse Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/nommu.c b/mm/nommu.c index 298884d..1898b2fe 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -282,6 +282,10 @@ EXPORT_SYMBOL(vmalloc_to_pfn); long vread(char *buf, char *addr, unsigned long count) { + /* Don't allow overflow */ + if ((unsigned long) buf + count < count) + count = -(unsigned long) buf; + memcpy(buf, addr, count); return count; } -- cgit v0.10.2 From f15bdfa802bfa5eb6b4b5a241b97ec9fa1204a35 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Wed, 3 Jul 2013 15:02:37 -0700 Subject: mm/memory-failure.c: fix memory leak in successful soft offlining After a successful page migration by soft offlining, the source page is not properly freed and it's never reusable even if we unpoison it afterward. This is caused by the race between freeing page and setting PG_hwpoison. In successful soft offlining, the source page is put (and the refcount becomes 0) by putback_lru_page() in unmap_and_move(), where it's linked to pagevec and actual freeing back to buddy is delayed. So if PG_hwpoison is set for the page before freeing, the freeing does not functions as expected (in such case freeing aborts in free_pages_prepare() check.) This patch tries to make sure to free the source page before setting PG_hwpoison on it. To avoid reallocating, the page keeps MIGRATE_ISOLATE until after setting PG_hwpoison. This patch also removes obsolete comments about "keeping elevated refcount" because what they say is not true. Unlike memory_failure(), soft_offline_page() uses no special page isolation code, and the soft-offlined pages have no elevated. Signed-off-by: Naoya Horiguchi Cc: Andi Kleen Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/memory-failure.c b/mm/memory-failure.c index ceb0c7f..2c13aa7 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1410,7 +1410,8 @@ static int __get_any_page(struct page *p, unsigned long pfn, int flags) /* * Isolate the page, so that it doesn't get reallocated if it - * was free. + * was free. This flag should be kept set until the source page + * is freed and PG_hwpoison on it is set. */ set_migratetype_isolate(p, true); /* @@ -1433,7 +1434,6 @@ static int __get_any_page(struct page *p, unsigned long pfn, int flags) /* Not a free page */ ret = 1; } - unset_migratetype_isolate(p, MIGRATE_MOVABLE); unlock_memory_hotplug(); return ret; } @@ -1494,7 +1494,6 @@ static int soft_offline_huge_page(struct page *page, int flags) atomic_long_add(1 << compound_trans_order(hpage), &num_poisoned_pages); } - /* keep elevated page count for bad page */ return ret; } @@ -1559,7 +1558,7 @@ int soft_offline_page(struct page *page, int flags) atomic_long_inc(&num_poisoned_pages); } } - /* keep elevated page count for bad page */ + unset_migratetype_isolate(page, MIGRATE_MOVABLE); return ret; } @@ -1625,7 +1624,22 @@ static int __soft_offline_page(struct page *page, int flags) if (ret > 0) ret = -EIO; } else { + /* + * After page migration succeeds, the source page can + * be trapped in pagevec and actual freeing is delayed. + * Freeing code works differently based on PG_hwpoison, + * so there's a race. We need to make sure that the + * source page should be freed back to buddy before + * setting PG_hwpoison. + */ + if (!is_free_buddy_page(page)) + lru_add_drain_all(); + if (!is_free_buddy_page(page)) + drain_all_pages(); SetPageHWPoison(page); + if (!is_free_buddy_page(page)) + pr_info("soft offline: %#lx: page leaked\n", + pfn); atomic_long_inc(&num_poisoned_pages); } } else { -- cgit v0.10.2 From 4996eed867a7215958267252fafddc41d5f26140 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Wed, 3 Jul 2013 15:02:39 -0700 Subject: mm/memory_hotplug.c: change normal message to use pr_debug During early boot-up, iomem_resource is set up from the boot descriptor table, such as EFI Memory Table and e820. Later, acpi_memory_device_add() calls add_memory() for each ACPI memory device object as it enumerates ACPI namespace. This add_memory() call is expected to fail in register_memory_resource() at boot since iomem_resource has been set up from EFI/e820. As a result, add_memory() returns -EEXIST, which acpi_memory_device_add() handles as the normal case. This scheme works fine, but the following error message is logged for every ACPI memory device object during boot-up. "System RAM resource %pR cannot be added\n" This patch changes register_memory_resource() to use pr_debug() for the message as it shows up under the normal case. Signed-off-by: Toshi Kani Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index a66d002..e3097f2 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -75,7 +75,7 @@ static struct resource *register_memory_resource(u64 start, u64 size) res->end = start + size - 1; res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; if (request_resource(&iomem_resource, res) < 0) { - printk("System RAM resource %pR cannot be added\n", res); + pr_debug("System RAM resource %pR cannot be added\n", res); kfree(res); res = NULL; } -- cgit v0.10.2 From cea27eb2a202959783f81254c48c250ddd80e129 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Wed, 3 Jul 2013 15:02:40 -0700 Subject: mm/memory-hotplug: fix lowmem count overflow when offline pages The logic for the memory-remove code fails to correctly account the Total High Memory when a memory block which contains High Memory is offlined as shown in the example below. The following patch fixes it. Before logic memory remove: MemTotal: 7603740 kB MemFree: 6329612 kB Buffers: 94352 kB Cached: 872008 kB SwapCached: 0 kB Active: 626932 kB Inactive: 519216 kB Active(anon): 180776 kB Inactive(anon): 222944 kB Active(file): 446156 kB Inactive(file): 296272 kB Unevictable: 0 kB Mlocked: 0 kB HighTotal: 7294672 kB HighFree: 5704696 kB LowTotal: 309068 kB LowFree: 624916 kB After logic memory remove: MemTotal: 7079452 kB MemFree: 5805976 kB Buffers: 94372 kB Cached: 872000 kB SwapCached: 0 kB Active: 626936 kB Inactive: 519236 kB Active(anon): 180780 kB Inactive(anon): 222944 kB Active(file): 446156 kB Inactive(file): 296292 kB Unevictable: 0 kB Mlocked: 0 kB HighTotal: 7294672 kB HighFree: 5181024 kB LowTotal: 4294752076 kB LowFree: 624952 kB [mhocko@suse.cz: fix CONFIG_HIGHMEM=n build] Signed-off-by: Wanpeng Li Reviewed-by: Michal Hocko Cc: KAMEZAWA Hiroyuki Cc: David Rientjes Cc: [2.6.24+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/page_alloc.c b/mm/page_alloc.c index a662c74..d711dcd 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -6185,6 +6185,10 @@ __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn) list_del(&page->lru); rmv_page_order(page); zone->free_area[order].nr_free--; +#ifdef CONFIG_HIGHMEM + if (PageHighMem(page)) + totalhigh_pages -= 1 << order; +#endif for (i = 0; i < (1 << order); i++) SetPageReserved((page+i)); pfn += (1 << order); -- cgit v0.10.2 From 4c42efa266030f32227f35fabbdd986fcf8f2ec4 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Wed, 3 Jul 2013 15:02:41 -0700 Subject: mm/pageblock: remove get/set_pageblock_flags get_pageblock_flags and set_pageblock_flags are not used any more, this patch removes them. Signed-off-by: Wanpeng Li Reviewed-by: Michal Hocko Cc: KAMEZAWA Hiroyuki Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h index be655e4..2ee8cd2 100644 --- a/include/linux/pageblock-flags.h +++ b/include/linux/pageblock-flags.h @@ -80,10 +80,4 @@ void set_pageblock_flags_group(struct page *page, unsigned long flags, PB_migrate_skip) #endif /* CONFIG_COMPACTION */ -#define get_pageblock_flags(page) \ - get_pageblock_flags_group(page, 0, PB_migrate_end) -#define set_pageblock_flags(page, flags) \ - set_pageblock_flags_group(page, flags, \ - 0, PB_migrate_end) - #endif /* PAGEBLOCK_FLAGS_H */ -- cgit v0.10.2 From 5f1e31d2f5977d910d0b2f5018173e99241d1940 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Wed, 3 Jul 2013 15:02:42 -0700 Subject: mm/hugetlb: remove hugetlb_prefault hugetlb_prefault() is not used any more, this patch removes it. Signed-off-by: Wanpeng Li Reviewed-by: Michal Hocko Cc: KAMEZAWA Hiroyuki Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 89d4fbf..c2b1801 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -55,7 +55,6 @@ void __unmap_hugepage_range_final(struct mmu_gather *tlb, void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long start, unsigned long end, struct page *ref_page); -int hugetlb_prefault(struct address_space *, struct vm_area_struct *); void hugetlb_report_meminfo(struct seq_file *); int hugetlb_report_node_meminfo(int, char *); void hugetlb_show_meminfo(void); @@ -114,7 +113,6 @@ static inline unsigned long hugetlb_total_pages(void) #define follow_hugetlb_page(m,v,p,vs,a,b,i,w) ({ BUG(); 0; }) #define follow_huge_addr(mm, addr, write) ERR_PTR(-EINVAL) #define copy_hugetlb_page_range(src, dst, vma) ({ BUG(); 0; }) -#define hugetlb_prefault(mapping, vma) ({ BUG(); 0; }) static inline void hugetlb_report_meminfo(struct seq_file *m) { } -- cgit v0.10.2 From 2415cf12e04d415b16d9c2f2a705bcd6cd9a0474 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Wed, 3 Jul 2013 15:02:43 -0700 Subject: mm/hugetlb: use already existing interface huge_page_shift Use the already existing interface huge_page_shift instead of h->order + PAGE_SHIFT. Signed-off-by: Wanpeng Li Cc: KAMEZAWA Hiroyuki Cc: David Rientjes Cc: Benjamin Herrenschmidt Reviewed-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 77fdd2c..4210549 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -357,7 +357,7 @@ void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages) int alloc_bootmem_huge_page(struct hstate *hstate) { struct huge_bootmem_page *m; - int idx = shift_to_mmu_psize(hstate->order + PAGE_SHIFT); + int idx = shift_to_mmu_psize(huge_page_shift(hstate)); int nr_gpages = gpage_freearray[idx].nr_gpages; if (nr_gpages == 0) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index aed085a..fe09515 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -319,7 +319,7 @@ unsigned long vma_kernel_pagesize(struct vm_area_struct *vma) hstate = hstate_vma(vma); - return 1UL << (hstate->order + PAGE_SHIFT); + return 1UL << huge_page_shift(hstate); } EXPORT_SYMBOL_GPL(vma_kernel_pagesize); -- cgit v0.10.2 From 917d9290af749fac9c4d90bacf18699c9d8ba28d Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Wed, 3 Jul 2013 15:02:44 -0700 Subject: mm: tune vm_committed_as percpu_counter batching size Currently the per cpu counter's batch size for memory accounting is configured as twice the number of cpus in the system. However, for system with very large memory, it is more appropriate to make it proportional to the memory size per cpu in the system. For example, for a x86_64 system with 64 cpus and 128 GB of memory, the batch size is only 2*64 pages (0.5 MB). So any memory accounting changes of more than 0.5MB will overflow the per cpu counter into the global counter. Instead, for the new scheme, the batch size is configured to be 0.4% of the memory/cpu = 8MB (128 GB/64 /256), which is more inline with the memory size. I've done a repeated brk test of 800KB (from will-it-scale test suite) with 80 concurrent processes on a 4 socket Westmere machine with a total of 40 cores. Without the patch, about 80% of cpu is spent on spin-lock contention within the vm_committed_as counter. With the patch, there's a 73x speedup on the benchmark and the lock contention drops off almost entirely. [akpm@linux-foundation.org: fix section mismatch] Signed-off-by: Tim Chen Cc: Tejun Heo Cc: Eric Dumazet Cc: Dave Hansen Cc: Andi Kleen Cc: Wu Fengguang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/mman.h b/include/linux/mman.h index 9aa863d..92dc257 100644 --- a/include/linux/mman.h +++ b/include/linux/mman.h @@ -11,11 +11,17 @@ extern int sysctl_overcommit_memory; extern int sysctl_overcommit_ratio; extern struct percpu_counter vm_committed_as; +#ifdef CONFIG_SMP +extern s32 vm_committed_as_batch; +#else +#define vm_committed_as_batch 0 +#endif + unsigned long vm_memory_committed(void); static inline void vm_acct_memory(long pages) { - percpu_counter_add(&vm_committed_as, pages); + __percpu_counter_add(&vm_committed_as, pages, vm_committed_as_batch); } static inline void vm_unacct_memory(long pages) diff --git a/mm/mm_init.c b/mm/mm_init.c index c280a02..633c088 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -9,6 +9,8 @@ #include #include #include +#include +#include #include "internal.h" #ifdef CONFIG_DEBUG_MEMORY_INIT @@ -147,6 +149,51 @@ early_param("mminit_loglevel", set_mminit_loglevel); struct kobject *mm_kobj; EXPORT_SYMBOL_GPL(mm_kobj); +#ifdef CONFIG_SMP +s32 vm_committed_as_batch = 32; + +static void __meminit mm_compute_batch(void) +{ + u64 memsized_batch; + s32 nr = num_present_cpus(); + s32 batch = max_t(s32, nr*2, 32); + + /* batch size set to 0.4% of (total memory/#cpus), or max int32 */ + memsized_batch = min_t(u64, (totalram_pages/nr)/256, 0x7fffffff); + + vm_committed_as_batch = max_t(s32, memsized_batch, batch); +} + +static int __meminit mm_compute_batch_notifier(struct notifier_block *self, + unsigned long action, void *arg) +{ + switch (action) { + case MEM_ONLINE: + case MEM_OFFLINE: + mm_compute_batch(); + default: + break; + } + return NOTIFY_OK; +} + +static struct notifier_block compute_batch_nb __meminitdata = { + .notifier_call = mm_compute_batch_notifier, + .priority = IPC_CALLBACK_PRI, /* use lowest priority */ +}; + +static int __init mm_compute_batch_init(void) +{ + mm_compute_batch(); + register_hotmemory_notifier(&compute_batch_nb); + + return 0; +} + +__initcall(mm_compute_batch_init); + +#endif + static int __init mm_sysfs_init(void) { mm_kobj = kobject_create_and_add("mm", kernel_kobj); -- cgit v0.10.2 From dcf6b7ddd7df8965727746f89c59229b23180e5a Mon Sep 17 00:00:00 2001 From: Rafael Aquini Date: Wed, 3 Jul 2013 15:02:46 -0700 Subject: swap: discard while swapping only if SWAP_FLAG_DISCARD_PAGES Considering the use cases where the swap device supports discard: a) and can do it quickly; b) but it's slow to do in small granularities (or concurrent with other I/O); c) but the implementation is so horrendous that you don't even want to send one down; And assuming that the sysadmin considers it useful to send the discards down at all, we would (probably) want the following solutions: i. do the fine-grained discards for freed swap pages, if device is capable of doing so optimally; ii. do single-time (batched) swap area discards, either at swapon or via something like fstrim (not implemented yet); iii. allow doing both single-time and fine-grained discards; or iv. turn it off completely (default behavior) As implemented today, one can only enable/disable discards for swap, but one cannot select, for instance, solution (ii) on a swap device like (b) even though the single-time discard is regarded to be interesting, or necessary to the workload because it would imply (1), and the device is not capable of performing it optimally. This patch addresses the scenario depicted above by introducing a way to ensure the (probably) wanted solutions (i, ii, iii and iv) can be flexibly flagged through swapon(8) to allow a sysadmin to select the best suitable swap discard policy accordingly to system constraints. This patch introduces SWAP_FLAG_DISCARD_PAGES and SWAP_FLAG_DISCARD_ONCE new flags to allow more flexibe swap discard policies being flagged through swapon(8). The default behavior is to keep both single-time, or batched, area discards (SWAP_FLAG_DISCARD_ONCE) and fine-grained discards for page-clusters (SWAP_FLAG_DISCARD_PAGES) enabled, in order to keep consistentcy with older kernel behavior, as well as maintain compatibility with older swapon(8). However, through the new introduced flags the best suitable discard policy can be selected accordingly to any given swap device constraint. [akpm@linux-foundation.org: tweak comments] Signed-off-by: Rafael Aquini Acked-by: KOSAKI Motohiro Cc: Hugh Dickins Cc: Shaohua Li Cc: Karel Zak Cc: Jeff Moyer Cc: Rik van Riel Cc: Larry Woodman Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/swap.h b/include/linux/swap.h index 85d7437..d95cde5 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -20,10 +20,13 @@ struct bio; #define SWAP_FLAG_PREFER 0x8000 /* set if swap priority specified */ #define SWAP_FLAG_PRIO_MASK 0x7fff #define SWAP_FLAG_PRIO_SHIFT 0 -#define SWAP_FLAG_DISCARD 0x10000 /* discard swap cluster after use */ +#define SWAP_FLAG_DISCARD 0x10000 /* enable discard for swap */ +#define SWAP_FLAG_DISCARD_ONCE 0x20000 /* discard swap area at swapon-time */ +#define SWAP_FLAG_DISCARD_PAGES 0x40000 /* discard page-clusters after use */ #define SWAP_FLAGS_VALID (SWAP_FLAG_PRIO_MASK | SWAP_FLAG_PREFER | \ - SWAP_FLAG_DISCARD) + SWAP_FLAG_DISCARD | SWAP_FLAG_DISCARD_ONCE | \ + SWAP_FLAG_DISCARD_PAGES) static inline int current_is_kswapd(void) { @@ -147,14 +150,16 @@ struct swap_extent { enum { SWP_USED = (1 << 0), /* is slot in swap_info[] used? */ SWP_WRITEOK = (1 << 1), /* ok to write to this swap? */ - SWP_DISCARDABLE = (1 << 2), /* swapon+blkdev support discard */ + SWP_DISCARDABLE = (1 << 2), /* blkdev support discard */ SWP_DISCARDING = (1 << 3), /* now discarding a free cluster */ SWP_SOLIDSTATE = (1 << 4), /* blkdev seeks are cheap */ SWP_CONTINUED = (1 << 5), /* swap_map has count continuation */ SWP_BLKDEV = (1 << 6), /* its a block device */ SWP_FILE = (1 << 7), /* set after swap_activate success */ + SWP_AREA_DISCARD = (1 << 8), /* single-time swap area discards */ + SWP_PAGE_DISCARD = (1 << 9), /* freed swap page-cluster discards */ /* add others here before... */ - SWP_SCANNING = (1 << 8), /* refcount in scan_swap_map */ + SWP_SCANNING = (1 << 10), /* refcount in scan_swap_map */ }; #define SWAP_CLUSTER_MAX 32UL diff --git a/mm/swapfile.c b/mm/swapfile.c index 746af55b..36af6ee 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -212,7 +212,7 @@ static unsigned long scan_swap_map(struct swap_info_struct *si, si->cluster_nr = SWAPFILE_CLUSTER - 1; goto checks; } - if (si->flags & SWP_DISCARDABLE) { + if (si->flags & SWP_PAGE_DISCARD) { /* * Start range check on racing allocations, in case * they overlap the cluster we eventually decide on @@ -322,7 +322,7 @@ checks: if (si->lowest_alloc) { /* - * Only set when SWP_DISCARDABLE, and there's a scan + * Only set when SWP_PAGE_DISCARD, and there's a scan * for a free cluster in progress or just completed. */ if (found_free_cluster) { @@ -2016,6 +2016,20 @@ static int setup_swap_map_and_extents(struct swap_info_struct *p, return nr_extents; } +/* + * Helper to sys_swapon determining if a given swap + * backing device queue supports DISCARD operations. + */ +static bool swap_discardable(struct swap_info_struct *si) +{ + struct request_queue *q = bdev_get_queue(si->bdev); + + if (!q || !blk_queue_discard(q)) + return false; + + return true; +} + SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) { struct swap_info_struct *p; @@ -2123,8 +2137,37 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) p->flags |= SWP_SOLIDSTATE; p->cluster_next = 1 + (prandom_u32() % p->highest_bit); } - if ((swap_flags & SWAP_FLAG_DISCARD) && discard_swap(p) == 0) - p->flags |= SWP_DISCARDABLE; + + if ((swap_flags & SWAP_FLAG_DISCARD) && swap_discardable(p)) { + /* + * When discard is enabled for swap with no particular + * policy flagged, we set all swap discard flags here in + * order to sustain backward compatibility with older + * swapon(8) releases. + */ + p->flags |= (SWP_DISCARDABLE | SWP_AREA_DISCARD | + SWP_PAGE_DISCARD); + + /* + * By flagging sys_swapon, a sysadmin can tell us to + * either do single-time area discards only, or to just + * perform discards for released swap page-clusters. + * Now it's time to adjust the p->flags accordingly. + */ + if (swap_flags & SWAP_FLAG_DISCARD_ONCE) + p->flags &= ~SWP_PAGE_DISCARD; + else if (swap_flags & SWAP_FLAG_DISCARD_PAGES) + p->flags &= ~SWP_AREA_DISCARD; + + /* issue a swapon-time discard if it's still required */ + if (p->flags & SWP_AREA_DISCARD) { + int err = discard_swap(p); + if (unlikely(err)) + printk(KERN_ERR + "swapon: discard_swap(%p): %d\n", + p, err); + } + } } mutex_lock(&swapon_mutex); @@ -2135,11 +2178,13 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) enable_swap_info(p, prio, swap_map, frontswap_map); printk(KERN_INFO "Adding %uk swap on %s. " - "Priority:%d extents:%d across:%lluk %s%s%s\n", + "Priority:%d extents:%d across:%lluk %s%s%s%s%s\n", p->pages<<(PAGE_SHIFT-10), name->name, p->prio, nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10), (p->flags & SWP_SOLIDSTATE) ? "SS" : "", (p->flags & SWP_DISCARDABLE) ? "D" : "", + (p->flags & SWP_AREA_DISCARD) ? "s" : "", + (p->flags & SWP_PAGE_DISCARD) ? "c" : "", (frontswap_map) ? "FS" : ""); mutex_unlock(&swapon_mutex); -- cgit v0.10.2 From 11199692d83dd3fe1511203024fb9853d176ec4c Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:02:48 -0700 Subject: mm: change signature of free_reserved_area() to fix building warnings Change signature of free_reserved_area() according to Russell King's suggestion to fix following build warnings: arch/arm/mm/init.c: In function 'mem_init': arch/arm/mm/init.c:603:2: warning: passing argument 1 of 'free_reserved_area' makes integer from pointer without a cast [enabled by default] free_reserved_area(__va(PHYS_PFN_OFFSET), swapper_pg_dir, 0, NULL); ^ In file included from include/linux/mman.h:4:0, from arch/arm/mm/init.c:15: include/linux/mm.h:1301:22: note: expected 'long unsigned int' but argument is of type 'void *' extern unsigned long free_reserved_area(unsigned long start, unsigned long end, mm/page_alloc.c: In function 'free_reserved_area': >> mm/page_alloc.c:5134:3: warning: passing argument 1 of 'virt_to_phys' makes pointer from integer without a cast [enabled by default] In file included from arch/mips/include/asm/page.h:49:0, from include/linux/mmzone.h:20, from include/linux/gfp.h:4, from include/linux/mm.h:8, from mm/page_alloc.c:18: arch/mips/include/asm/io.h:119:29: note: expected 'const volatile void *' but argument is of type 'long unsigned int' mm/page_alloc.c: In function 'free_area_init_nodes': mm/page_alloc.c:5030:34: warning: array subscript is below array bounds [-Warray-bounds] Also address some minor code review comments. Signed-off-by: Jiang Liu Reported-by: Arnd Bergmann Cc: "H. Peter Anvin" Cc: "Michael S. Tsirkin" Cc: Cc: Catalin Marinas Cc: Chris Metcalf Cc: David Howells Cc: Geert Uytterhoeven Cc: Ingo Molnar Cc: Jeremy Fitzhardinge Cc: Jianguo Wu Cc: Joonsoo Kim Cc: Kamezawa Hiroyuki Cc: Konrad Rzeszutek Wilk Cc: Marek Szyprowski Cc: Mel Gorman Cc: Michel Lespinasse Cc: Minchan Kim Cc: Rik van Riel Cc: Rusty Russell Cc: Tang Chen Cc: Tejun Heo Cc: Thomas Gleixner Cc: Wen Congyang Cc: Will Deacon Cc: Yasuaki Ishimatsu Cc: Yinghai Lu Cc: Russell King Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/alpha/kernel/sys_nautilus.c b/arch/alpha/kernel/sys_nautilus.c index 1d4aabf..891bd27 100644 --- a/arch/alpha/kernel/sys_nautilus.c +++ b/arch/alpha/kernel/sys_nautilus.c @@ -238,8 +238,8 @@ nautilus_init_pci(void) if (pci_mem < memtop) memtop = pci_mem; if (memtop > alpha_mv.min_mem_address) { - free_reserved_area((unsigned long)__va(alpha_mv.min_mem_address), - (unsigned long)__va(memtop), 0, NULL); + free_reserved_area(__va(alpha_mv.min_mem_address), + __va(memtop), 0, NULL); printk("nautilus_init_pci: %ldk freed\n", (memtop - alpha_mv.min_mem_address) >> 10); } diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c index 0ba85ee..d54848d 100644 --- a/arch/alpha/mm/init.c +++ b/arch/alpha/mm/init.c @@ -326,6 +326,6 @@ free_initmem(void) void free_initrd_mem(unsigned long start, unsigned long end) { - free_reserved_area(start, end, 0, "initrd"); + free_reserved_area((void *)start, (void *)end, 0, "initrd"); } #endif diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c index 4a17736..dce02e4 100644 --- a/arch/arc/mm/init.c +++ b/arch/arc/mm/init.c @@ -152,7 +152,7 @@ void __init_refok free_initmem(void) #ifdef CONFIG_BLK_DEV_INITRD void __init free_initrd_mem(unsigned long start, unsigned long end) { - free_reserved_area(start, end, 0, "initrd"); + free_reserved_area((void *)start, (void *)end, 0, "initrd"); } #endif diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 2ffee02..7fae391 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -745,7 +745,7 @@ void free_initrd_mem(unsigned long start, unsigned long end) { if (!keep_initrd) { poison_init_mem((void *)start, PAGE_ALIGN(end) - start); - free_reserved_area(start, end, 0, "initrd"); + free_reserved_area((void *)start, (void *)end, 0, "initrd"); } } diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index f497ca7..6041e40 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -398,7 +398,7 @@ void free_initrd_mem(unsigned long start, unsigned long end) { if (!keep_initrd) { poison_init_mem((void *)start, PAGE_ALIGN(end) - start); - free_reserved_area(start, end, 0, "initrd"); + free_reserved_area((void *)start, (void *)end, 0, "initrd"); } } diff --git a/arch/avr32/mm/init.c b/arch/avr32/mm/init.c index e66e840..5a79fa0 100644 --- a/arch/avr32/mm/init.c +++ b/arch/avr32/mm/init.c @@ -154,6 +154,6 @@ void free_initmem(void) #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - free_reserved_area(start, end, 0, "initrd"); + free_reserved_area((void *)start, (void *)end, 0, "initrd"); } #endif diff --git a/arch/blackfin/mm/init.c b/arch/blackfin/mm/init.c index 82d01a7..8e9eab2 100644 --- a/arch/blackfin/mm/init.c +++ b/arch/blackfin/mm/init.c @@ -133,7 +133,7 @@ void __init mem_init(void) void __init free_initrd_mem(unsigned long start, unsigned long end) { #ifndef CONFIG_MPU - free_reserved_area(start, end, 0, "initrd"); + free_reserved_area((void *)start, (void *)end, 0, "initrd"); #endif } #endif diff --git a/arch/c6x/mm/init.c b/arch/c6x/mm/init.c index b74ccb5..07bfcc9 100644 --- a/arch/c6x/mm/init.c +++ b/arch/c6x/mm/init.c @@ -78,7 +78,7 @@ void __init mem_init(void) #ifdef CONFIG_BLK_DEV_INITRD void __init free_initrd_mem(unsigned long start, unsigned long end) { - free_reserved_area(start, end, 0, "initrd"); + free_reserved_area((void *)start, (void *)end, 0, "initrd"); } #endif diff --git a/arch/frv/mm/init.c b/arch/frv/mm/init.c index dee354f..a67f3a5 100644 --- a/arch/frv/mm/init.c +++ b/arch/frv/mm/init.c @@ -173,6 +173,6 @@ void free_initmem(void) #ifdef CONFIG_BLK_DEV_INITRD void __init free_initrd_mem(unsigned long start, unsigned long end) { - free_reserved_area(start, end, 0, "initrd"); + free_reserved_area((void *)start, (void *)end, 0, "initrd"); } /* end free_initrd_mem() */ #endif diff --git a/arch/h8300/mm/init.c b/arch/h8300/mm/init.c index ff349d7..57e03c5 100644 --- a/arch/h8300/mm/init.c +++ b/arch/h8300/mm/init.c @@ -161,7 +161,7 @@ void __init mem_init(void) #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - free_reserved_area(start, end, 0, "initrd"); + free_reserved_area((void *)start, (void *)end, 0, "initrd"); } #endif diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index d1fe4b4..da568c2 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -154,8 +154,7 @@ ia64_init_addr_space (void) void free_initmem (void) { - free_reserved_area((unsigned long)ia64_imva(__init_begin), - (unsigned long)ia64_imva(__init_end), + free_reserved_area(ia64_imva(__init_begin), ia64_imva(__init_end), 0, "unused kernel"); } diff --git a/arch/m32r/mm/init.c b/arch/m32r/mm/init.c index ab4cbce..d80412d 100644 --- a/arch/m32r/mm/init.c +++ b/arch/m32r/mm/init.c @@ -191,6 +191,6 @@ void free_initmem(void) *======================================================================*/ void free_initrd_mem(unsigned long start, unsigned long end) { - free_reserved_area(start, end, 0, "initrd"); + free_reserved_area((void *)start, (void *)end, 0, "initrd"); } #endif diff --git a/arch/m68k/mm/init.c b/arch/m68k/mm/init.c index 1af2ca3..95de725 100644 --- a/arch/m68k/mm/init.c +++ b/arch/m68k/mm/init.c @@ -202,6 +202,6 @@ void __init mem_init(void) #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - free_reserved_area(start, end, 0, "initrd"); + free_reserved_area((void *)start, (void *)end, 0, "initrd"); } #endif diff --git a/arch/metag/mm/init.c b/arch/metag/mm/init.c index d05b845..5e2238d 100644 --- a/arch/metag/mm/init.c +++ b/arch/metag/mm/init.c @@ -414,7 +414,8 @@ void free_initmem(void) #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - free_reserved_area(start, end, POISON_FREE_INITMEM, "initrd"); + free_reserved_area((void *)start, (void *)end, POISON_FREE_INITMEM, + "initrd"); } #endif diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c index b38ae3a..d7b8ada 100644 --- a/arch/microblaze/mm/init.c +++ b/arch/microblaze/mm/init.c @@ -235,7 +235,7 @@ void __init setup_memory(void) #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - free_reserved_area(start, end, 0, "initrd"); + free_reserved_area((void *)start, (void *)end, 0, "initrd"); } #endif diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 9b973e0..268f2a9 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -440,7 +440,8 @@ void free_init_pages(const char *what, unsigned long begin, unsigned long end) #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - free_reserved_area(start, end, POISON_FREE_INITMEM, "initrd"); + free_reserved_area((void *)start, (void *)end, POISON_FREE_INITMEM, + "initrd"); } #endif diff --git a/arch/mn10300/mm/init.c b/arch/mn10300/mm/init.c index 5a8ace6..e19049d 100644 --- a/arch/mn10300/mm/init.c +++ b/arch/mn10300/mm/init.c @@ -152,6 +152,7 @@ void free_initmem(void) #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - free_reserved_area(start, end, POISON_FREE_INITMEM, "initrd"); + free_reserved_area((void *)start, (void *)end, POISON_FREE_INITMEM, + "initrd"); } #endif diff --git a/arch/openrisc/mm/init.c b/arch/openrisc/mm/init.c index b3cbc67..ab11332 100644 --- a/arch/openrisc/mm/init.c +++ b/arch/openrisc/mm/init.c @@ -261,7 +261,7 @@ void __init mem_init(void) #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - free_reserved_area(start, end, 0, "initrd"); + free_reserved_area((void *)start, (void *)end, 0, "initrd"); } #endif diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index 505b56c..3223d5e 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -1101,6 +1101,7 @@ void flush_tlb_all(void) #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - num_physpages += free_reserved_area(start, end, 0, "initrd"); + num_physpages += free_reserved_area((void *)start, (void *)end, 0, + "initrd"); } #endif diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c index 6782221..5e4830a 100644 --- a/arch/powerpc/kernel/kvm.c +++ b/arch/powerpc/kernel/kvm.c @@ -756,7 +756,7 @@ static __init void kvm_free_tmp(void) end = (ulong)&kvm_tmp[ARRAY_SIZE(kvm_tmp)] & PAGE_MASK; /* Free the tmp space we don't need */ - free_reserved_area(start, end, 0, NULL); + free_reserved_area((void *)start, (void *)end, 0, NULL); } static int __init kvm_guest_init(void) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 0988a26..347c5b1 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -407,7 +407,7 @@ void free_initmem(void) #ifdef CONFIG_BLK_DEV_INITRD void __init free_initrd_mem(unsigned long start, unsigned long end) { - free_reserved_area(start, end, 0, "initrd"); + free_reserved_area((void *)start, (void *)end, 0, "initrd"); } #endif diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 89ebae4..0878c89 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -172,7 +172,8 @@ void free_initmem(void) #ifdef CONFIG_BLK_DEV_INITRD void __init free_initrd_mem(unsigned long start, unsigned long end) { - free_reserved_area(start, end, POISON_FREE_INITMEM, "initrd"); + free_reserved_area((void *)start, (void *)end, POISON_FREE_INITMEM, + "initrd"); } #endif diff --git a/arch/score/mm/init.c b/arch/score/mm/init.c index 0940682..f5dd61e 100644 --- a/arch/score/mm/init.c +++ b/arch/score/mm/init.c @@ -108,7 +108,8 @@ void __init mem_init(void) #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - free_reserved_area(start, end, POISON_FREE_INITMEM, "initrd"); + free_reserved_area((void *)start, (void *)end, POISON_FREE_INITMEM, + "initrd"); } #endif diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index 20f9ead..b892a9b 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -505,7 +505,7 @@ void free_initmem(void) #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - free_reserved_area(start, end, 0, "initrd"); + free_reserved_area((void *)start, (void *)end, 0, "initrd"); } #endif diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c index af472cf..d5f9c02 100644 --- a/arch/sparc/mm/init_32.c +++ b/arch/sparc/mm/init_32.c @@ -372,8 +372,8 @@ void free_initmem (void) #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - num_physpages += free_reserved_area(start, end, POISON_FREE_INITMEM, - "initrd"); + num_physpages += free_reserved_area((void *)start, (void *)end, + POISON_FREE_INITMEM, "initrd"); } #endif diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 04fd55a..8269deb 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -2131,8 +2131,8 @@ void free_initmem(void) #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - num_physpages += free_reserved_area(start, end, POISON_FREE_INITMEM, - "initrd"); + num_physpages += free_reserved_area((void *)start, (void *)end, + POISON_FREE_INITMEM, "initrd"); } #endif diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c index 9df292b..2aa7a24 100644 --- a/arch/um/kernel/mem.c +++ b/arch/um/kernel/mem.c @@ -244,7 +244,7 @@ void free_initmem(void) #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - free_reserved_area(start, end, 0, "initrd"); + free_reserved_area((void *)start, (void *)end, 0, "initrd"); } #endif diff --git a/arch/unicore32/mm/init.c b/arch/unicore32/mm/init.c index 63df12d..220755c 100644 --- a/arch/unicore32/mm/init.c +++ b/arch/unicore32/mm/init.c @@ -486,7 +486,7 @@ static int keep_initrd; void free_initrd_mem(unsigned long start, unsigned long end) { if (!keep_initrd) - free_reserved_area(start, end, 0, "initrd"); + free_reserved_area((void *)start, (void *)end, 0, "initrd"); } static int __init keepinitrd_setup(char *__unused) diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c index bba125b..4d658ef 100644 --- a/arch/xtensa/mm/init.c +++ b/arch/xtensa/mm/init.c @@ -214,7 +214,7 @@ extern int initrd_is_mapped; void free_initrd_mem(unsigned long start, unsigned long end) { if (initrd_is_mapped) - free_reserved_area(start, end, 0, "initrd"); + free_reserved_area((void *)start, (void *)end, 0, "initrd"); } #endif diff --git a/include/linux/mm.h b/include/linux/mm.h index 949bd70..be1b96c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1311,7 +1311,7 @@ extern void free_initmem(void); * "poison" if it's non-zero. * Return pages freed into the buddy system. */ -extern unsigned long free_reserved_area(unsigned long start, unsigned long end, +extern unsigned long free_reserved_area(void *start, void *end, int poison, char *s); #ifdef CONFIG_HIGHMEM /* @@ -1355,8 +1355,7 @@ static inline unsigned long free_initmem_default(int poison) { extern char __init_begin[], __init_end[]; - return free_reserved_area(PAGE_ALIGN((unsigned long)&__init_begin) , - ((unsigned long)&__init_end) & PAGE_MASK, + return free_reserved_area(&__init_begin, &__init_end, poison, "unused kernel"); } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index d711dcd..be18ccd 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5206,25 +5206,26 @@ early_param("movablecore", cmdline_parse_movablecore); #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ -unsigned long free_reserved_area(unsigned long start, unsigned long end, - int poison, char *s) +unsigned long free_reserved_area(void *start, void *end, int poison, char *s) { - unsigned long pages, pos; + void *pos; + unsigned long pages = 0; - pos = start = PAGE_ALIGN(start); - end &= PAGE_MASK; - for (pages = 0; pos < end; pos += PAGE_SIZE, pages++) { + start = (void *)PAGE_ALIGN((unsigned long)start); + end = (void *)((unsigned long)end & PAGE_MASK); + for (pos = start; pos < end; pos += PAGE_SIZE, pages++) { if (poison) - memset((void *)pos, poison, PAGE_SIZE); - free_reserved_page(virt_to_page((void *)pos)); + memset(pos, poison, PAGE_SIZE); + free_reserved_page(virt_to_page(pos)); } if (pages && s) - pr_info("Freeing %s memory: %ldK (%lx - %lx)\n", + pr_info("Freeing %s memory: %ldK (%p - %p)\n", s, pages << (PAGE_SHIFT - 10), start, end); return pages; } +EXPORT_SYMBOL(free_reserved_area); #ifdef CONFIG_HIGHMEM void free_highmem_page(struct page *page) -- cgit v0.10.2 From dbe67df4ba78c79db547c7864e1120981c144c97 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:02:51 -0700 Subject: mm: enhance free_reserved_area() to support poisoning memory with zero Address more review comments from last round of code review. 1) Enhance free_reserved_area() to support poisoning freed memory with pattern '0'. This could be used to get rid of poison_init_mem() on ARM64. 2) A previous patch has disabled memory poison for initmem on s390 by mistake, so restore to the original behavior. 3) Remove redundant PAGE_ALIGN() when calling free_reserved_area(). Signed-off-by: Jiang Liu Cc: Geert Uytterhoeven Cc: "H. Peter Anvin" Cc: "Michael S. Tsirkin" Cc: Cc: Arnd Bergmann Cc: Catalin Marinas Cc: Chris Metcalf Cc: David Howells Cc: Ingo Molnar Cc: Jeremy Fitzhardinge Cc: Jianguo Wu Cc: Joonsoo Kim Cc: Kamezawa Hiroyuki Cc: Konrad Rzeszutek Wilk Cc: Marek Szyprowski Cc: Mel Gorman Cc: Michel Lespinasse Cc: Minchan Kim Cc: Rik van Riel Cc: Rusty Russell Cc: Tang Chen Cc: Tejun Heo Cc: Thomas Gleixner Cc: Wen Congyang Cc: Will Deacon Cc: Yasuaki Ishimatsu Cc: Yinghai Lu Cc: Russell King Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/alpha/kernel/sys_nautilus.c b/arch/alpha/kernel/sys_nautilus.c index 891bd27..837c0fa 100644 --- a/arch/alpha/kernel/sys_nautilus.c +++ b/arch/alpha/kernel/sys_nautilus.c @@ -239,7 +239,7 @@ nautilus_init_pci(void) memtop = pci_mem; if (memtop > alpha_mv.min_mem_address) { free_reserved_area(__va(alpha_mv.min_mem_address), - __va(memtop), 0, NULL); + __va(memtop), -1, NULL); printk("nautilus_init_pci: %ldk freed\n", (memtop - alpha_mv.min_mem_address) >> 10); } diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c index d54848d..218c29c 100644 --- a/arch/alpha/mm/init.c +++ b/arch/alpha/mm/init.c @@ -319,13 +319,13 @@ mem_init(void) void free_initmem(void) { - free_initmem_default(0); + free_initmem_default(-1); } #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - free_reserved_area((void *)start, (void *)end, 0, "initrd"); + free_reserved_area((void *)start, (void *)end, -1, "initrd"); } #endif diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c index dce02e4..f9c7077 100644 --- a/arch/arc/mm/init.c +++ b/arch/arc/mm/init.c @@ -146,13 +146,13 @@ void __init mem_init(void) */ void __init_refok free_initmem(void) { - free_initmem_default(0); + free_initmem_default(-1); } #ifdef CONFIG_BLK_DEV_INITRD void __init free_initrd_mem(unsigned long start, unsigned long end) { - free_reserved_area((void *)start, (void *)end, 0, "initrd"); + free_reserved_area((void *)start, (void *)end, -1, "initrd"); } #endif diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 7fae391..2070651 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -601,7 +601,7 @@ void __init mem_init(void) #ifdef CONFIG_SA1111 /* now that our DMA memory is actually so designated, we can free it */ - free_reserved_area(__va(PHYS_PFN_OFFSET), swapper_pg_dir, 0, NULL); + free_reserved_area(__va(PHYS_PFN_OFFSET), swapper_pg_dir, -1, NULL); #endif free_highpages(); @@ -729,12 +729,12 @@ void free_initmem(void) extern char __tcm_start, __tcm_end; poison_init_mem(&__tcm_start, &__tcm_end - &__tcm_start); - free_reserved_area(&__tcm_start, &__tcm_end, 0, "TCM link"); + free_reserved_area(&__tcm_start, &__tcm_end, -1, "TCM link"); #endif poison_init_mem(__init_begin, __init_end - __init_begin); if (!machine_is_integrator() && !machine_is_cintegrator()) - free_initmem_default(0); + free_initmem_default(-1); } #ifdef CONFIG_BLK_DEV_INITRD @@ -745,7 +745,7 @@ void free_initrd_mem(unsigned long start, unsigned long end) { if (!keep_initrd) { poison_init_mem((void *)start, PAGE_ALIGN(end) - start); - free_reserved_area((void *)start, (void *)end, 0, "initrd"); + free_reserved_area((void *)start, (void *)end, -1, "initrd"); } } diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 6041e40..997c634 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -387,7 +387,7 @@ void __init mem_init(void) void free_initmem(void) { poison_init_mem(__init_begin, __init_end - __init_begin); - free_initmem_default(0); + free_initmem_default(-1); } #ifdef CONFIG_BLK_DEV_INITRD @@ -398,7 +398,7 @@ void free_initrd_mem(unsigned long start, unsigned long end) { if (!keep_initrd) { poison_init_mem((void *)start, PAGE_ALIGN(end) - start); - free_reserved_area((void *)start, (void *)end, 0, "initrd"); + free_reserved_area((void *)start, (void *)end, -1, "initrd"); } } diff --git a/arch/avr32/mm/init.c b/arch/avr32/mm/init.c index 5a79fa0..b079e04 100644 --- a/arch/avr32/mm/init.c +++ b/arch/avr32/mm/init.c @@ -148,12 +148,12 @@ void __init mem_init(void) void free_initmem(void) { - free_initmem_default(0); + free_initmem_default(-1); } #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - free_reserved_area((void *)start, (void *)end, 0, "initrd"); + free_reserved_area((void *)start, (void *)end, -1, "initrd"); } #endif diff --git a/arch/blackfin/mm/init.c b/arch/blackfin/mm/init.c index 8e9eab2..fa241f5 100644 --- a/arch/blackfin/mm/init.c +++ b/arch/blackfin/mm/init.c @@ -133,7 +133,7 @@ void __init mem_init(void) void __init free_initrd_mem(unsigned long start, unsigned long end) { #ifndef CONFIG_MPU - free_reserved_area((void *)start, (void *)end, 0, "initrd"); + free_reserved_area((void *)start, (void *)end, -1, "initrd"); #endif } #endif @@ -141,7 +141,7 @@ void __init free_initrd_mem(unsigned long start, unsigned long end) void __init_refok free_initmem(void) { #if defined CONFIG_RAMKERNEL && !defined CONFIG_MPU - free_initmem_default(0); + free_initmem_default(-1); if (memory_start == (unsigned long)(&__init_end)) memory_start = (unsigned long)(&__init_begin); #endif diff --git a/arch/c6x/mm/init.c b/arch/c6x/mm/init.c index 07bfcc9..3987a20 100644 --- a/arch/c6x/mm/init.c +++ b/arch/c6x/mm/init.c @@ -78,11 +78,11 @@ void __init mem_init(void) #ifdef CONFIG_BLK_DEV_INITRD void __init free_initrd_mem(unsigned long start, unsigned long end) { - free_reserved_area((void *)start, (void *)end, 0, "initrd"); + free_reserved_area((void *)start, (void *)end, -1, "initrd"); } #endif void __init free_initmem(void) { - free_initmem_default(0); + free_initmem_default(-1); } diff --git a/arch/cris/mm/init.c b/arch/cris/mm/init.c index 9ac8094..8fec263 100644 --- a/arch/cris/mm/init.c +++ b/arch/cris/mm/init.c @@ -65,5 +65,5 @@ mem_init(void) void free_initmem(void) { - free_initmem_default(0); + free_initmem_default(-1); } diff --git a/arch/frv/mm/init.c b/arch/frv/mm/init.c index a67f3a5..8ba9d22 100644 --- a/arch/frv/mm/init.c +++ b/arch/frv/mm/init.c @@ -162,7 +162,7 @@ void __init mem_init(void) void free_initmem(void) { #if defined(CONFIG_RAMKERNEL) && !defined(CONFIG_PROTECT_KERNEL) - free_initmem_default(0); + free_initmem_default(-1); #endif } /* end free_initmem() */ @@ -173,6 +173,6 @@ void free_initmem(void) #ifdef CONFIG_BLK_DEV_INITRD void __init free_initrd_mem(unsigned long start, unsigned long end) { - free_reserved_area((void *)start, (void *)end, 0, "initrd"); + free_reserved_area((void *)start, (void *)end, -1, "initrd"); } /* end free_initrd_mem() */ #endif diff --git a/arch/h8300/mm/init.c b/arch/h8300/mm/init.c index 57e03c5..c831f1d 100644 --- a/arch/h8300/mm/init.c +++ b/arch/h8300/mm/init.c @@ -161,7 +161,7 @@ void __init mem_init(void) #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - free_reserved_area((void *)start, (void *)end, 0, "initrd"); + free_reserved_area((void *)start, (void *)end, -1, "initrd"); } #endif @@ -169,7 +169,7 @@ void free_initmem(void) { #ifdef CONFIG_RAMKERNEL - free_initmem_default(0); + free_initmem_default(-1); #endif } diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index da568c2..f8a4f38 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -155,7 +155,7 @@ void free_initmem (void) { free_reserved_area(ia64_imva(__init_begin), ia64_imva(__init_end), - 0, "unused kernel"); + -1, "unused kernel"); } void __init diff --git a/arch/m32r/mm/init.c b/arch/m32r/mm/init.c index d80412d..cca87d9 100644 --- a/arch/m32r/mm/init.c +++ b/arch/m32r/mm/init.c @@ -181,7 +181,7 @@ void __init mem_init(void) *======================================================================*/ void free_initmem(void) { - free_initmem_default(0); + free_initmem_default(-1); } #ifdef CONFIG_BLK_DEV_INITRD @@ -191,6 +191,6 @@ void free_initmem(void) *======================================================================*/ void free_initrd_mem(unsigned long start, unsigned long end) { - free_reserved_area((void *)start, (void *)end, 0, "initrd"); + free_reserved_area((void *)start, (void *)end, -1, "initrd"); } #endif diff --git a/arch/m68k/mm/init.c b/arch/m68k/mm/init.c index 95de725..ab0b54c 100644 --- a/arch/m68k/mm/init.c +++ b/arch/m68k/mm/init.c @@ -110,7 +110,7 @@ void __init paging_init(void) void free_initmem(void) { #ifndef CONFIG_MMU_SUN3 - free_initmem_default(0); + free_initmem_default(-1); #endif /* CONFIG_MMU_SUN3 */ } @@ -202,6 +202,6 @@ void __init mem_init(void) #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - free_reserved_area((void *)start, (void *)end, 0, "initrd"); + free_reserved_area((void *)start, (void *)end, -1, "initrd"); } #endif diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c index d7b8ada..d149e0e 100644 --- a/arch/microblaze/mm/init.c +++ b/arch/microblaze/mm/init.c @@ -235,13 +235,13 @@ void __init setup_memory(void) #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - free_reserved_area((void *)start, (void *)end, 0, "initrd"); + free_reserved_area((void *)start, (void *)end, -1, "initrd"); } #endif void free_initmem(void) { - free_initmem_default(0); + free_initmem_default(-1); } void __init mem_init(void) diff --git a/arch/openrisc/mm/init.c b/arch/openrisc/mm/init.c index ab11332..c371e4a 100644 --- a/arch/openrisc/mm/init.c +++ b/arch/openrisc/mm/init.c @@ -261,11 +261,11 @@ void __init mem_init(void) #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - free_reserved_area((void *)start, (void *)end, 0, "initrd"); + free_reserved_area((void *)start, (void *)end, -1, "initrd"); } #endif void free_initmem(void) { - free_initmem_default(0); + free_initmem_default(-1); } diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index 3223d5e..ebac7bd 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -532,7 +532,7 @@ void free_initmem(void) * pages are no-longer executable */ flush_icache_range(init_begin, init_end); - num_physpages += free_initmem_default(0); + num_physpages += free_initmem_default(-1); /* set up a new led state on systems shipped LED State panel */ pdc_chassis_send_status(PDC_CHASSIS_DIRECT_BCOMPLETE); @@ -1101,7 +1101,7 @@ void flush_tlb_all(void) #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - num_physpages += free_reserved_area((void *)start, (void *)end, 0, + num_physpages += free_reserved_area((void *)start, (void *)end, -1, "initrd"); } #endif diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c index 5e4830a..db28032 100644 --- a/arch/powerpc/kernel/kvm.c +++ b/arch/powerpc/kernel/kvm.c @@ -750,13 +750,8 @@ EXPORT_SYMBOL_GPL(kvm_hypercall); static __init void kvm_free_tmp(void) { - unsigned long start, end; - - start = (ulong)&kvm_tmp[kvm_tmp_index + (PAGE_SIZE - 1)] & PAGE_MASK; - end = (ulong)&kvm_tmp[ARRAY_SIZE(kvm_tmp)] & PAGE_MASK; - - /* Free the tmp space we don't need */ - free_reserved_area((void *)start, (void *)end, 0, NULL); + free_reserved_area(&kvm_tmp[kvm_tmp_index], + &kvm_tmp[ARRAY_SIZE(kvm_tmp)], -1, NULL); } static int __init kvm_guest_init(void) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 347c5b1..7f47a05 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -407,7 +407,7 @@ void free_initmem(void) #ifdef CONFIG_BLK_DEV_INITRD void __init free_initrd_mem(unsigned long start, unsigned long end) { - free_reserved_area((void *)start, (void *)end, 0, "initrd"); + free_reserved_area((void *)start, (void *)end, -1, "initrd"); } #endif diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 0878c89..bf01d18 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -166,7 +166,7 @@ void __init mem_init(void) void free_initmem(void) { - free_initmem_default(0); + free_initmem_default(POISON_FREE_INITMEM); } #ifdef CONFIG_BLK_DEV_INITRD diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index b892a9b..d3af56b 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -499,13 +499,13 @@ void __init mem_init(void) void free_initmem(void) { - free_initmem_default(0); + free_initmem_default(-1); } #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - free_reserved_area((void *)start, (void *)end, 0, "initrd"); + free_reserved_area((void *)start, (void *)end, -1, "initrd"); } #endif diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c index 2aa7a24..8ff0b7a 100644 --- a/arch/um/kernel/mem.c +++ b/arch/um/kernel/mem.c @@ -244,7 +244,7 @@ void free_initmem(void) #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - free_reserved_area((void *)start, (void *)end, 0, "initrd"); + free_reserved_area((void *)start, (void *)end, -1, "initrd"); } #endif diff --git a/arch/unicore32/mm/init.c b/arch/unicore32/mm/init.c index 220755c..df9b8ab 100644 --- a/arch/unicore32/mm/init.c +++ b/arch/unicore32/mm/init.c @@ -476,7 +476,7 @@ void __init mem_init(void) void free_initmem(void) { - free_initmem_default(0); + free_initmem_default(-1); } #ifdef CONFIG_BLK_DEV_INITRD @@ -486,7 +486,7 @@ static int keep_initrd; void free_initrd_mem(unsigned long start, unsigned long end) { if (!keep_initrd) - free_reserved_area((void *)start, (void *)end, 0, "initrd"); + free_reserved_area((void *)start, (void *)end, -1, "initrd"); } static int __init keepinitrd_setup(char *__unused) diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c index 4d658ef..026d29b 100644 --- a/arch/xtensa/mm/init.c +++ b/arch/xtensa/mm/init.c @@ -214,11 +214,11 @@ extern int initrd_is_mapped; void free_initrd_mem(unsigned long start, unsigned long end) { if (initrd_is_mapped) - free_reserved_area((void *)start, (void *)end, 0, "initrd"); + free_reserved_area((void *)start, (void *)end, -1, "initrd"); } #endif void free_initmem(void) { - free_initmem_default(0); + free_initmem_default(-1); } diff --git a/include/linux/mm.h b/include/linux/mm.h index be1b96c..083cc0b 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1308,7 +1308,7 @@ extern void free_initmem(void); /* * Free reserved pages within range [PAGE_ALIGN(start), end & PAGE_MASK) * into the buddy system. The freed pages will be poisoned with pattern - * "poison" if it's non-zero. + * "poison" if it's within range [0, UCHAR_MAX]. * Return pages freed into the buddy system. */ extern unsigned long free_reserved_area(void *start, void *end, @@ -1348,8 +1348,9 @@ static inline void mark_page_reserved(struct page *page) /* * Default method to free all the __init memory into the buddy system. - * The freed pages will be poisoned with pattern "poison" if it is - * non-zero. Return pages freed into the buddy system. + * The freed pages will be poisoned with pattern "poison" if it's within + * range [0, UCHAR_MAX]. + * Return pages freed into the buddy system. */ static inline unsigned long free_initmem_default(int poison) { diff --git a/mm/page_alloc.c b/mm/page_alloc.c index be18ccd..6780b2e 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5214,7 +5214,7 @@ unsigned long free_reserved_area(void *start, void *end, int poison, char *s) start = (void *)PAGE_ALIGN((unsigned long)start); end = (void *)((unsigned long)end & PAGE_MASK); for (pos = start; pos < end; pos += PAGE_SIZE, pages++) { - if (poison) + if ((unsigned int)poison <= 0xFF) memset(pos, poison, PAGE_SIZE); free_reserved_page(virt_to_page(pos)); } -- cgit v0.10.2 From 9af5b80765e4c30b8eba8218724459226fa5e81f Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:02:54 -0700 Subject: mm/ARM64: kill poison_init_mem() Use free_reserved_area() to poison initmem memory pages and kill poison_init_mem() on ARM64. Signed-off-by: Jiang Liu Acked-by: Catalin Marinas Cc: Will Deacon Cc: "H. Peter Anvin" Cc: "Michael S. Tsirkin" Cc: Cc: Arnd Bergmann Cc: Chris Metcalf Cc: David Howells Cc: Geert Uytterhoeven Cc: Ingo Molnar Cc: Jeremy Fitzhardinge Cc: Jianguo Wu Cc: Joonsoo Kim Cc: Kamezawa Hiroyuki Cc: Konrad Rzeszutek Wilk Cc: Marek Szyprowski Cc: Mel Gorman Cc: Michel Lespinasse Cc: Minchan Kim Cc: Rik van Riel Cc: Rusty Russell Cc: Tang Chen Cc: Tejun Heo Cc: Thomas Gleixner Cc: Wen Congyang Cc: Yasuaki Ishimatsu Cc: Yinghai Lu Cc: Russell King Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 997c634..a398eb9 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -197,14 +197,6 @@ void __init bootmem_init(void) max_pfn = max_low_pfn = max; } -/* - * Poison init memory with an undefined instruction (0x0). - */ -static inline void poison_init_mem(void *s, size_t count) -{ - memset(s, 0, count); -} - #ifndef CONFIG_SPARSEMEM_VMEMMAP static inline void free_memmap(unsigned long start_pfn, unsigned long end_pfn) { @@ -386,8 +378,7 @@ void __init mem_init(void) void free_initmem(void) { - poison_init_mem(__init_begin, __init_end - __init_begin); - free_initmem_default(-1); + free_initmem_default(0); } #ifdef CONFIG_BLK_DEV_INITRD @@ -396,10 +387,8 @@ static int keep_initrd; void free_initrd_mem(unsigned long start, unsigned long end) { - if (!keep_initrd) { - poison_init_mem((void *)start, PAGE_ALIGN(end) - start); - free_reserved_area((void *)start, (void *)end, -1, "initrd"); - } + if (!keep_initrd) + free_reserved_area((void *)start, (void *)end, 0, "initrd"); } static int __init keepinitrd_setup(char *__unused) -- cgit v0.10.2 From c88442ec45f30d587b38b935a14acde4e217a926 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:02:58 -0700 Subject: mm/x86: use free_reserved_area() to simplify code Use common help function free_reserved_area() to simplify code. Signed-off-by: Jiang Liu Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Yinghai Lu Cc: Tang Chen Cc: Wen Congyang Cc: Jianguo Wu Cc: "Michael S. Tsirkin" Cc: Cc: Arnd Bergmann Cc: Catalin Marinas Cc: Chris Metcalf Cc: David Howells Cc: Geert Uytterhoeven Cc: Jeremy Fitzhardinge Cc: Joonsoo Kim Cc: Kamezawa Hiroyuki Cc: Konrad Rzeszutek Wilk Cc: Marek Szyprowski Cc: Mel Gorman Cc: Michel Lespinasse Cc: Minchan Kim Cc: Rik van Riel Cc: Rusty Russell Cc: Tejun Heo Cc: Will Deacon Cc: Yasuaki Ishimatsu Cc: Russell King Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 1f34e92..2ec29ac 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -494,7 +494,6 @@ int devmem_is_allowed(unsigned long pagenr) void free_init_pages(char *what, unsigned long begin, unsigned long end) { - unsigned long addr; unsigned long begin_aligned, end_aligned; /* Make sure boundaries are page aligned */ @@ -509,8 +508,6 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end) if (begin >= end) return; - addr = begin; - /* * If debugging page accesses then do not free this memory but * mark them not present - any buggy init-section access will @@ -529,18 +526,13 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end) set_memory_nx(begin, (end - begin) >> PAGE_SHIFT); set_memory_rw(begin, (end - begin) >> PAGE_SHIFT); - printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); - - for (; addr < end; addr += PAGE_SIZE) { - memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE); - free_reserved_page(virt_to_page(addr)); - } + free_reserved_area((void *)begin, (void *)end, POISON_FREE_INITMEM, what); #endif } void free_initmem(void) { - free_init_pages("unused kernel memory", + free_init_pages("unused kernel", (unsigned long)(&__init_begin), (unsigned long)(&__init_end)); } @@ -566,7 +558,7 @@ void __init free_initrd_mem(unsigned long start, unsigned long end) * - relocate_initrd() * So here We can do PAGE_ALIGN() safely to get partial page to be freed */ - free_init_pages("initrd memory", start, PAGE_ALIGN(end)); + free_init_pages("initrd", start, PAGE_ALIGN(end)); } #endif diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index b3940b6..b7bdf7b 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1166,11 +1166,10 @@ void mark_rodata_ro(void) set_memory_ro(start, (end-start) >> PAGE_SHIFT); #endif - free_init_pages("unused kernel memory", + free_init_pages("unused kernel", (unsigned long) __va(__pa_symbol(text_end)), (unsigned long) __va(__pa_symbol(rodata_start))); - - free_init_pages("unused kernel memory", + free_init_pages("unused kernel", (unsigned long) __va(__pa_symbol(rodata_end)), (unsigned long) __va(__pa_symbol(_sdata))); } -- cgit v0.10.2 From abd1b6d65fc49b7b95724c94a4e5892a3b3fc618 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:03:01 -0700 Subject: mm/tile: use common help functions to free reserved pages Use common help functions to free reserved pages. Signed-off-by: Jiang Liu Cc: Chris Metcalf Cc: Wen Congyang Cc: "H. Peter Anvin" Cc: "Michael S. Tsirkin" Cc: Cc: Arnd Bergmann Cc: Catalin Marinas Cc: David Howells Cc: Geert Uytterhoeven Cc: Ingo Molnar Cc: Jeremy Fitzhardinge Cc: Jianguo Wu Cc: Joonsoo Kim Cc: Kamezawa Hiroyuki Cc: Konrad Rzeszutek Wilk Cc: Marek Szyprowski Cc: Mel Gorman Cc: Michel Lespinasse Cc: Minchan Kim Cc: Rik van Riel Cc: Rusty Russell Cc: Tang Chen Cc: Tejun Heo Cc: Thomas Gleixner Cc: Will Deacon Cc: Yasuaki Ishimatsu Cc: Yinghai Lu Cc: Russell King Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c index 2749515..ccfeb3f 100644 --- a/arch/tile/mm/init.c +++ b/arch/tile/mm/init.c @@ -720,7 +720,7 @@ static void __init init_free_pfn_range(unsigned long start, unsigned long end) } init_page_count(page); __free_pages(page, order); - totalram_pages += count; + adjust_managed_page_count(page, count); page += count; pfn += count; @@ -1024,16 +1024,13 @@ static void free_init_pages(char *what, unsigned long begin, unsigned long end) pte_clear(&init_mm, addr, ptep); continue; } - __ClearPageReserved(page); - init_page_count(page); if (pte_huge(*ptep)) BUG_ON(!kdata_huge); else set_pte_at(&init_mm, addr, ptep, pfn_pte(pfn, PAGE_KERNEL)); memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE); - free_page(addr); - totalram_pages++; + free_reserved_page(page); } pr_info("Freeing %s: %ldk freed\n", what, (end - begin) >> 10); } -- cgit v0.10.2 From 834405c3b6aebf6853663796401cdfe11aac6275 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:03:04 -0700 Subject: mm: fix some trivial typos in comments Fix some trivial typos in comments. Signed-off-by: Jiang Liu Cc: Wen Congyang Cc: Tang Chen Cc: Yasuaki Ishimatsu Cc: Mel Gorman Cc: Minchan Kim Cc: Marek Szyprowski Cc: "H. Peter Anvin" Cc: "Michael S. Tsirkin" Cc: Cc: Arnd Bergmann Cc: Catalin Marinas Cc: Chris Metcalf Cc: David Howells Cc: Geert Uytterhoeven Cc: Ingo Molnar Cc: Jeremy Fitzhardinge Cc: Jianguo Wu Cc: Joonsoo Kim Cc: Kamezawa Hiroyuki Cc: Konrad Rzeszutek Wilk Cc: Michel Lespinasse Cc: Rik van Riel Cc: Rusty Russell Cc: Tejun Heo Cc: Thomas Gleixner Cc: Will Deacon Cc: Yinghai Lu Cc: Russell King Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index e3097f2..6096cb9 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -309,7 +309,7 @@ static int __meminit move_pfn_range_left(struct zone *z1, struct zone *z2, /* can't move pfns which are higher than @z2 */ if (end_pfn > zone_end_pfn(z2)) goto out_fail; - /* the move out part mast at the left most of @z2 */ + /* the move out part must be at the left most of @z2 */ if (start_pfn > z2->zone_start_pfn) goto out_fail; /* must included/overlap */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 6780b2e..657daea 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2845,7 +2845,7 @@ EXPORT_SYMBOL(free_pages_exact); * nr_free_zone_pages() counts the number of counts pages which are beyond the * high watermark within all zones at or below a given zone index. For each * zone, the number of pages is calculated as: - * present_pages - high_pages + * managed_pages - high_pages */ static unsigned long nr_free_zone_pages(int offset) { -- cgit v0.10.2 From 4f9f47745e948eca18bb97c82dbb4d53f2380086 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:03:07 -0700 Subject: mm: use managed_pages to calculate default zonelist order Use zone->managed_pages instead of zone->present_pages to calculate default zonelist order because managed_pages means allocatable pages. Signed-off-by: Jiang Liu Cc: Mel Gorman Cc: Minchan Kim Cc: Marek Szyprowski Cc: "H. Peter Anvin" Cc: "Michael S. Tsirkin" Cc: Cc: Arnd Bergmann Cc: Catalin Marinas Cc: Chris Metcalf Cc: David Howells Cc: Geert Uytterhoeven Cc: Ingo Molnar Cc: Jeremy Fitzhardinge Cc: Jianguo Wu Cc: Joonsoo Kim Cc: Kamezawa Hiroyuki Cc: Konrad Rzeszutek Wilk Cc: Michel Lespinasse Cc: Rik van Riel Cc: Rusty Russell Cc: Tang Chen Cc: Tejun Heo Cc: Thomas Gleixner Cc: Wen Congyang Cc: Will Deacon Cc: Yasuaki Ishimatsu Cc: Yinghai Lu Cc: Russell King Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 657daea..f22542f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3438,8 +3438,8 @@ static int default_zonelist_order(void) z = &NODE_DATA(nid)->node_zones[zone_type]; if (populated_zone(z)) { if (zone_type < ZONE_NORMAL) - low_kmem_size += z->present_pages; - total_size += z->present_pages; + low_kmem_size += z->managed_pages; + total_size += z->managed_pages; } else if (zone_type == ZONE_NORMAL) { /* * If any node has only lowmem, then node order -- cgit v0.10.2 From 7b4b2a0d6c8500350784beb83a6a55e60ea3bea3 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:03:11 -0700 Subject: mm: accurately calculate zone->managed_pages for highmem zones Commit "mm: introduce new field 'managed_pages' to struct zone" assumes that all highmem pages will be freed into the buddy system by function mem_init(). But that's not always true, some architectures may reserve some highmem pages during boot. For example PPC may allocate highmem pages for giagant HugeTLB pages, and several architectures have code to check PageReserved flag to exclude highmem pages allocated during boot when freeing highmem pages into the buddy system. So treat highmem pages in the same way as normal pages, that is to: 1) reset zone->managed_pages to zero in mem_init(). 2) recalculate managed_pages when freeing pages into the buddy system. Signed-off-by: Jiang Liu Cc: "H. Peter Anvin" Cc: Tejun Heo Cc: Joonsoo Kim Cc: Yinghai Lu Cc: Mel Gorman Cc: Minchan Kim Cc: Kamezawa Hiroyuki Cc: Marek Szyprowski Cc: "Michael S. Tsirkin" Cc: Cc: Arnd Bergmann Cc: Catalin Marinas Cc: Chris Metcalf Cc: David Howells Cc: Geert Uytterhoeven Cc: Ingo Molnar Cc: Jeremy Fitzhardinge Cc: Jianguo Wu Cc: Konrad Rzeszutek Wilk Cc: Michel Lespinasse Cc: Rik van Riel Cc: Rusty Russell Cc: Tang Chen Cc: Thomas Gleixner Cc: Wen Congyang Cc: Will Deacon Cc: Yasuaki Ishimatsu Cc: Russell King Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/metag/mm/init.c b/arch/metag/mm/init.c index 5e2238d..d7595f5 100644 --- a/arch/metag/mm/init.c +++ b/arch/metag/mm/init.c @@ -380,6 +380,12 @@ void __init mem_init(void) #ifdef CONFIG_HIGHMEM unsigned long tmp; + + /* + * Explicitly reset zone->managed_pages because highmem pages are + * freed before calling free_all_bootmem_node(); + */ + reset_all_zones_managed_pages(); for (tmp = highstart_pfn; tmp < highend_pfn; tmp++) free_highmem_page(pfn_to_page(tmp)); num_physpages += totalhigh_pages; diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c index 252b8f5..4500142 100644 --- a/arch/x86/mm/highmem_32.c +++ b/arch/x86/mm/highmem_32.c @@ -1,6 +1,7 @@ #include #include #include /* for totalram_pages */ +#include void *kmap(struct page *page) { @@ -121,6 +122,11 @@ void __init set_highmem_pages_init(void) struct zone *zone; int nid; + /* + * Explicitly reset zone->managed_pages because set_highmem_pages_init() + * is invoked before free_all_bootmem() + */ + reset_all_zones_managed_pages(); for_each_zone(zone) { unsigned long zone_start_pfn, zone_end_pfn; diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index 5f0b0e1..0e48c32 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -46,6 +46,7 @@ extern unsigned long init_bootmem(unsigned long addr, unsigned long memend); extern unsigned long free_all_bootmem_node(pg_data_t *pgdat); extern unsigned long free_all_bootmem(void); +extern void reset_all_zones_managed_pages(void); extern void free_bootmem_node(pg_data_t *pgdat, unsigned long addr, diff --git a/mm/bootmem.c b/mm/bootmem.c index 2b0bcb0..eb792323 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -241,20 +241,26 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) return count; } -static void reset_node_lowmem_managed_pages(pg_data_t *pgdat) +static int reset_managed_pages_done __initdata; + +static inline void __init reset_node_managed_pages(pg_data_t *pgdat) { struct zone *z; - /* - * In free_area_init_core(), highmem zone's managed_pages is set to - * present_pages, and bootmem allocator doesn't allocate from highmem - * zones. So there's no need to recalculate managed_pages because all - * highmem pages will be managed by the buddy system. Here highmem - * zone also includes highmem movable zone. - */ + if (reset_managed_pages_done) + return; + for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++) - if (!is_highmem(z)) - z->managed_pages = 0; + z->managed_pages = 0; +} + +void __init reset_all_zones_managed_pages(void) +{ + struct pglist_data *pgdat; + + for_each_online_pgdat(pgdat) + reset_node_managed_pages(pgdat); + reset_managed_pages_done = 1; } /** @@ -266,7 +272,7 @@ static void reset_node_lowmem_managed_pages(pg_data_t *pgdat) unsigned long __init free_all_bootmem_node(pg_data_t *pgdat) { register_page_bootmem_info_node(pgdat); - reset_node_lowmem_managed_pages(pgdat); + reset_node_managed_pages(pgdat); return free_all_bootmem_core(pgdat->bdata); } @@ -279,10 +285,8 @@ unsigned long __init free_all_bootmem(void) { unsigned long total_pages = 0; bootmem_data_t *bdata; - struct pglist_data *pgdat; - for_each_online_pgdat(pgdat) - reset_node_lowmem_managed_pages(pgdat); + reset_all_zones_managed_pages(); list_for_each_entry(bdata, &bdata_list, list) total_pages += free_all_bootmem_core(bdata); diff --git a/mm/nobootmem.c b/mm/nobootmem.c index bdd3fa2..0ae8d91 100644 --- a/mm/nobootmem.c +++ b/mm/nobootmem.c @@ -137,20 +137,25 @@ static unsigned long __init free_low_memory_core_early(void) return count; } -static void reset_node_lowmem_managed_pages(pg_data_t *pgdat) +static int reset_managed_pages_done __initdata; + +static inline void __init reset_node_managed_pages(pg_data_t *pgdat) { struct zone *z; - /* - * In free_area_init_core(), highmem zone's managed_pages is set to - * present_pages, and bootmem allocator doesn't allocate from highmem - * zones. So there's no need to recalculate managed_pages because all - * highmem pages will be managed by the buddy system. Here highmem - * zone also includes highmem movable zone. - */ + if (reset_managed_pages_done) + return; for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++) - if (!is_highmem(z)) - z->managed_pages = 0; + z->managed_pages = 0; +} + +void __init reset_all_zones_managed_pages(void) +{ + struct pglist_data *pgdat; + + for_each_online_pgdat(pgdat) + reset_node_managed_pages(pgdat); + reset_managed_pages_done = 1; } /** @@ -160,10 +165,7 @@ static void reset_node_lowmem_managed_pages(pg_data_t *pgdat) */ unsigned long __init free_all_bootmem(void) { - struct pglist_data *pgdat; - - for_each_online_pgdat(pgdat) - reset_node_lowmem_managed_pages(pgdat); + reset_all_zones_managed_pages(); /* * We need to use MAX_NUMNODES instead of NODE_DATA(0)->node_id diff --git a/mm/page_alloc.c b/mm/page_alloc.c index f22542f..22438eb 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5232,6 +5232,7 @@ void free_highmem_page(struct page *page) { __free_reserved_page(page); totalram_pages++; + page_zone(page)->managed_pages++; totalhigh_pages++; } #endif -- cgit v0.10.2 From c3d5f5f0c2bc4eabeaf49f1a21e1aeb965246cd2 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:03:14 -0700 Subject: mm: use a dedicated lock to protect totalram_pages and zone->managed_pages Currently lock_memory_hotplug()/unlock_memory_hotplug() are used to protect totalram_pages and zone->managed_pages. Other than the memory hotplug driver, totalram_pages and zone->managed_pages may also be modified at runtime by other drivers, such as Xen balloon, virtio_balloon etc. For those cases, memory hotplug lock is a little too heavy, so introduce a dedicated lock to protect totalram_pages and zone->managed_pages. Now we have a simplified locking rules totalram_pages and zone->managed_pages as: 1) no locking for read accesses because they are unsigned long. 2) no locking for write accesses at boot time in single-threaded context. 3) serialize write accesses at runtime by acquiring the dedicated managed_page_count_lock. Also adjust zone->managed_pages when freeing reserved pages into the buddy system, to keep totalram_pages and zone->managed_pages in consistence. [akpm@linux-foundation.org: don't export adjust_managed_page_count to modules (for now)] Signed-off-by: Jiang Liu Cc: Mel Gorman Cc: Michel Lespinasse Cc: Rik van Riel Cc: Minchan Kim Cc: "H. Peter Anvin" Cc: "Michael S. Tsirkin" Cc: Cc: Arnd Bergmann Cc: Catalin Marinas Cc: Chris Metcalf Cc: David Howells Cc: Geert Uytterhoeven Cc: Ingo Molnar Cc: Jeremy Fitzhardinge Cc: Jianguo Wu Cc: Joonsoo Kim Cc: Kamezawa Hiroyuki Cc: Konrad Rzeszutek Wilk Cc: Marek Szyprowski Cc: Rusty Russell Cc: Tang Chen Cc: Tejun Heo Cc: Thomas Gleixner Cc: Wen Congyang Cc: Will Deacon Cc: Yasuaki Ishimatsu Cc: Yinghai Lu Cc: Russell King Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/mm.h b/include/linux/mm.h index 083cc0b..4310f80 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1313,6 +1313,7 @@ extern void free_initmem(void); */ extern unsigned long free_reserved_area(void *start, void *end, int poison, char *s); + #ifdef CONFIG_HIGHMEM /* * Free a highmem page into the buddy system, adjusting totalhigh_pages @@ -1321,10 +1322,7 @@ extern unsigned long free_reserved_area(void *start, void *end, extern void free_highmem_page(struct page *page); #endif -static inline void adjust_managed_page_count(struct page *page, long count) -{ - totalram_pages += count; -} +extern void adjust_managed_page_count(struct page *page, long count); /* Free the reserved page into the buddy system, so it gets managed. */ static inline void __free_reserved_page(struct page *page) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index e511f94..09d381b 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -474,10 +474,16 @@ struct zone { * frequently read in proximity to zone->lock. It's good to * give them a chance of being in the same cacheline. * - * Write access to present_pages and managed_pages at runtime should - * be protected by lock_memory_hotplug()/unlock_memory_hotplug(). - * Any reader who can't tolerant drift of present_pages and - * managed_pages should hold memory hotplug lock to get a stable value. + * Write access to present_pages at runtime should be protected by + * lock_memory_hotplug()/unlock_memory_hotplug(). Any reader who can't + * tolerant drift of present_pages should hold memory hotplug lock to + * get a stable value. + * + * Read access to managed_pages should be safe because it's unsigned + * long. Write access to zone->managed_pages and totalram_pages are + * protected by managed_page_count_lock at runtime. Idealy only + * adjust_managed_page_count() should be used instead of directly + * touching zone->managed_pages and totalram_pages. */ unsigned long spanned_pages; unsigned long present_pages; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 22438eb..93f292a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -103,6 +103,9 @@ nodemask_t node_states[NR_NODE_STATES] __read_mostly = { }; EXPORT_SYMBOL(node_states); +/* Protect totalram_pages and zone->managed_pages */ +static DEFINE_SPINLOCK(managed_page_count_lock); + unsigned long totalram_pages __read_mostly; unsigned long totalreserve_pages __read_mostly; /* @@ -5206,6 +5209,14 @@ early_param("movablecore", cmdline_parse_movablecore); #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ +void adjust_managed_page_count(struct page *page, long count) +{ + spin_lock(&managed_page_count_lock); + page_zone(page)->managed_pages += count; + totalram_pages += count; + spin_unlock(&managed_page_count_lock); +} + unsigned long free_reserved_area(void *start, void *end, int poison, char *s) { void *pos; -- cgit v0.10.2 From 170a5a7eb2bf10161197e5490fbc29ca4561aedb Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:03:17 -0700 Subject: mm: make __free_pages_bootmem() only available at boot time In order to simpilify management of totalram_pages and zone->managed_pages, make __free_pages_bootmem() only available at boot time. With this change applied, __free_pages_bootmem() will only be used by bootmem.c and nobootmem.c at boot time, so mark it as __init. Other callers of __free_pages_bootmem() have been converted to use free_reserved_page(), which handles totalram_pages and zone->managed_pages in a safer way. This patch also fix a bug in free_pagetable() for x86_64, which should increase zone->managed_pages instead of zone->present_pages when freeing reserved pages. And now we have managed_pages_count_lock to protect totalram_pages and zone->managed_pages, so remove the redundant ppb_lock lock in put_page_bootmem(). This greatly simplifies the locking rules. Signed-off-by: Jiang Liu Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Yinghai Lu Cc: Wen Congyang Cc: Tang Chen Cc: Yasuaki Ishimatsu Cc: Mel Gorman Cc: Minchan Kim Cc: "Michael S. Tsirkin" Cc: Cc: Arnd Bergmann Cc: Catalin Marinas Cc: Chris Metcalf Cc: David Howells Cc: Geert Uytterhoeven Cc: Jeremy Fitzhardinge Cc: Jianguo Wu Cc: Joonsoo Kim Cc: Kamezawa Hiroyuki Cc: Konrad Rzeszutek Wilk Cc: Marek Szyprowski Cc: Michel Lespinasse Cc: Rik van Riel Cc: Rusty Russell Cc: Tejun Heo Cc: Will Deacon Cc: Russell King Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index b7bdf7b..ec312a9 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -712,36 +712,22 @@ EXPORT_SYMBOL_GPL(arch_add_memory); static void __meminit free_pagetable(struct page *page, int order) { - struct zone *zone; - bool bootmem = false; unsigned long magic; unsigned int nr_pages = 1 << order; /* bootmem page has reserved flag */ if (PageReserved(page)) { __ClearPageReserved(page); - bootmem = true; magic = (unsigned long)page->lru.next; if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) { while (nr_pages--) put_page_bootmem(page++); } else - __free_pages_bootmem(page, order); + while (nr_pages--) + free_reserved_page(page++); } else free_pages((unsigned long)page_address(page), order); - - /* - * SECTION_INFO pages and MIX_SECTION_INFO pages - * are all allocated by bootmem. - */ - if (bootmem) { - zone = page_zone(page); - zone_span_writelock(zone); - zone->present_pages += nr_pages; - zone_span_writeunlock(zone); - totalram_pages += nr_pages; - } } static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 6096cb9..814ecb2 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -101,12 +101,9 @@ void get_page_bootmem(unsigned long info, struct page *page, atomic_inc(&page->_count); } -/* reference to __meminit __free_pages_bootmem is valid - * so use __ref to tell modpost not to generate a warning */ -void __ref put_page_bootmem(struct page *page) +void put_page_bootmem(struct page *page) { unsigned long type; - static DEFINE_MUTEX(ppb_lock); type = (unsigned long) page->lru.next; BUG_ON(type < MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE || @@ -116,17 +113,8 @@ void __ref put_page_bootmem(struct page *page) ClearPagePrivate(page); set_page_private(page, 0); INIT_LIST_HEAD(&page->lru); - - /* - * Please refer to comment for __free_pages_bootmem() - * for why we serialize here. - */ - mutex_lock(&ppb_lock); - __free_pages_bootmem(page, 0); - mutex_unlock(&ppb_lock); - totalram_pages++; + free_reserved_page(page); } - } #ifdef CONFIG_HAVE_BOOTMEM_INFO_NODE diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 93f292a..2437a7e 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -745,14 +745,7 @@ static void __free_pages_ok(struct page *page, unsigned int order) local_irq_restore(flags); } -/* - * Read access to zone->managed_pages is safe because it's unsigned long, - * but we still need to serialize writers. Currently all callers of - * __free_pages_bootmem() except put_page_bootmem() should only be used - * at boot time. So for shorter boot time, we shift the burden to - * put_page_bootmem() to serialize writers. - */ -void __meminit __free_pages_bootmem(struct page *page, unsigned int order) +void __init __free_pages_bootmem(struct page *page, unsigned int order) { unsigned int nr_pages = 1 << order; unsigned int loop; -- cgit v0.10.2 From 3dcc0571cd64816309765b7c7e4691a4cadf2ee7 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:03:21 -0700 Subject: mm: correctly update zone->managed_pages Enhance adjust_managed_page_count() to adjust totalhigh_pages for highmem pages. And change code which directly adjusts totalram_pages to use adjust_managed_page_count() because it adjusts totalram_pages, totalhigh_pages and zone->managed_pages altogether in a safe way. Remove inc_totalhigh_pages() and dec_totalhigh_pages() from xen/balloon driver bacause adjust_managed_page_count() has already adjusted totalhigh_pages. This patch also fixes two bugs: 1) enhances virtio_balloon driver to adjust totalhigh_pages when reserve/unreserve pages. 2) enhance memory_hotplug.c to adjust totalhigh_pages when hot-removing memory. We still need to deal with modifications of totalram_pages in file arch/powerpc/platforms/pseries/cmm.c, but need help from PPC experts. [akpm@linux-foundation.org: remove ifdef, per Wanpeng Li, virtio_balloon.c cleanup, per Sergei] [akpm@linux-foundation.org: export adjust_managed_page_count() to modules, for drivers/virtio/virtio_balloon.c] Signed-off-by: Jiang Liu Cc: Chris Metcalf Cc: Rusty Russell Cc: "Michael S. Tsirkin" Cc: Konrad Rzeszutek Wilk Cc: Jeremy Fitzhardinge Cc: Wen Congyang Cc: Tang Chen Cc: Yasuaki Ishimatsu Cc: Mel Gorman Cc: Minchan Kim Cc: "H. Peter Anvin" Cc: Cc: Arnd Bergmann Cc: Catalin Marinas Cc: David Howells Cc: Geert Uytterhoeven Cc: Ingo Molnar Cc: Jianguo Wu Cc: Joonsoo Kim Cc: Kamezawa Hiroyuki Cc: Marek Szyprowski Cc: Michel Lespinasse Cc: Rik van Riel Cc: Tejun Heo Cc: Thomas Gleixner Cc: Will Deacon Cc: Yinghai Lu Cc: Russell King Cc: Sergei Shtylyov Cc: Wu Fengguang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index bd3ae32..0098810 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -148,7 +148,7 @@ static void fill_balloon(struct virtio_balloon *vb, size_t num) } set_page_pfns(vb->pfns + vb->num_pfns, page); vb->num_pages += VIRTIO_BALLOON_PAGES_PER_PAGE; - totalram_pages--; + adjust_managed_page_count(page, -1); } /* Did we get any? */ @@ -163,8 +163,9 @@ static void release_pages_by_pfn(const u32 pfns[], unsigned int num) /* Find pfns pointing at start of each page, get pages and free them. */ for (i = 0; i < num; i += VIRTIO_BALLOON_PAGES_PER_PAGE) { - balloon_page_free(balloon_pfn_to_page(pfns[i])); - totalram_pages++; + struct page *page = balloon_pfn_to_page(pfns[i]); + balloon_page_free(page); + adjust_managed_page_count(page, 1); } } diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 930fb68..c8aab4e 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -89,14 +89,6 @@ EXPORT_SYMBOL_GPL(balloon_stats); /* We increase/decrease in batches which fit in a page */ static xen_pfn_t frame_list[PAGE_SIZE / sizeof(unsigned long)]; -#ifdef CONFIG_HIGHMEM -#define inc_totalhigh_pages() (totalhigh_pages++) -#define dec_totalhigh_pages() (totalhigh_pages--) -#else -#define inc_totalhigh_pages() do {} while (0) -#define dec_totalhigh_pages() do {} while (0) -#endif - /* List of ballooned pages, threaded through the mem_map array. */ static LIST_HEAD(ballooned_pages); @@ -132,9 +124,7 @@ static void __balloon_append(struct page *page) static void balloon_append(struct page *page) { __balloon_append(page); - if (PageHighMem(page)) - dec_totalhigh_pages(); - totalram_pages--; + adjust_managed_page_count(page, -1); } /* balloon_retrieve: rescue a page from the balloon, if it is not empty. */ @@ -151,13 +141,12 @@ static struct page *balloon_retrieve(bool prefer_highmem) page = list_entry(ballooned_pages.next, struct page, lru); list_del(&page->lru); - if (PageHighMem(page)) { + if (PageHighMem(page)) balloon_stats.balloon_high--; - inc_totalhigh_pages(); - } else + else balloon_stats.balloon_low--; - totalram_pages++; + adjust_managed_page_count(page, 1); return page; } @@ -372,9 +361,7 @@ static enum bp_state increase_reservation(unsigned long nr_pages) #endif /* Relinquish the page back to the allocator. */ - ClearPageReserved(page); - init_page_count(page); - __free_page(page); + __free_reserved_page(page); } balloon_stats.current_pages += rc; diff --git a/mm/hugetlb.c b/mm/hugetlb.c index fe09515..83aff0a 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1263,7 +1263,7 @@ static void __init gather_bootmem_prealloc(void) * side-effects, like CommitLimit going negative. */ if (h->order > (MAX_ORDER - 1)) - totalram_pages += 1 << h->order; + adjust_managed_page_count(page, 1 << h->order); } } diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 814ecb2..5e34922 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -772,20 +772,13 @@ EXPORT_SYMBOL_GPL(__online_page_set_limits); void __online_page_increment_counters(struct page *page) { - totalram_pages++; - -#ifdef CONFIG_HIGHMEM - if (PageHighMem(page)) - totalhigh_pages++; -#endif + adjust_managed_page_count(page, 1); } EXPORT_SYMBOL_GPL(__online_page_increment_counters); void __online_page_free(struct page *page) { - ClearPageReserved(page); - init_page_count(page); - __free_page(page); + __free_reserved_page(page); } EXPORT_SYMBOL_GPL(__online_page_free); @@ -983,7 +976,6 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ return ret; } - zone->managed_pages += onlined_pages; zone->present_pages += onlined_pages; pgdat_resize_lock(zone->zone_pgdat, &flags); @@ -1572,15 +1564,13 @@ repeat: /* reset pagetype flags and makes migrate type to be MOVABLE */ undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE); /* removal success */ - zone->managed_pages -= offlined_pages; + adjust_managed_page_count(pfn_to_page(start_pfn), -offlined_pages); zone->present_pages -= offlined_pages; pgdat_resize_lock(zone->zone_pgdat, &flags); zone->zone_pgdat->node_present_pages -= offlined_pages; pgdat_resize_unlock(zone->zone_pgdat, &flags); - totalram_pages -= offlined_pages; - init_per_zone_wmark_min(); if (!populated_zone(zone)) { diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 2437a7e..1481439 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -780,11 +780,7 @@ void __init init_cma_reserved_pageblock(struct page *page) set_page_refcounted(page); set_pageblock_migratetype(page, MIGRATE_CMA); __free_pages(page, pageblock_order); - totalram_pages += pageblock_nr_pages; -#ifdef CONFIG_HIGHMEM - if (PageHighMem(page)) - totalhigh_pages += pageblock_nr_pages; -#endif + adjust_managed_page_count(page, pageblock_nr_pages); } #endif @@ -5207,8 +5203,13 @@ void adjust_managed_page_count(struct page *page, long count) spin_lock(&managed_page_count_lock); page_zone(page)->managed_pages += count; totalram_pages += count; +#ifdef CONFIG_HIGHMEM + if (PageHighMem(page)) + totalhigh_pages += count; +#endif spin_unlock(&managed_page_count_lock); } +EXPORT_SYMBOL(adjust_managed_page_count); unsigned long free_reserved_area(void *start, void *end, int poison, char *s) { -- cgit v0.10.2 From 0c988534737a358fdff42fcce78f0ff1a12dbfc5 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:03:24 -0700 Subject: mm: concentrate modification of totalram_pages into the mm core Concentrate code to modify totalram_pages into the mm core, so the arch memory initialized code doesn't need to take care of it. With these changes applied, only following functions from mm core modify global variable totalram_pages: free_bootmem_late(), free_all_bootmem(), free_all_bootmem_node(), adjust_managed_page_count(). With this patch applied, it will be much more easier for us to keep totalram_pages and zone->managed_pages in consistence. Signed-off-by: Jiang Liu Acked-by: David Howells Cc: "H. Peter Anvin" Cc: "Michael S. Tsirkin" Cc: Cc: Arnd Bergmann Cc: Catalin Marinas Cc: Chris Metcalf Cc: Geert Uytterhoeven Cc: Ingo Molnar Cc: Jeremy Fitzhardinge Cc: Jianguo Wu Cc: Joonsoo Kim Cc: Kamezawa Hiroyuki Cc: Konrad Rzeszutek Wilk Cc: Marek Szyprowski Cc: Mel Gorman Cc: Michel Lespinasse Cc: Minchan Kim Cc: Rik van Riel Cc: Rusty Russell Cc: Tang Chen Cc: Tejun Heo Cc: Thomas Gleixner Cc: Wen Congyang Cc: Will Deacon Cc: Yasuaki Ishimatsu Cc: Yinghai Lu Cc: Russell King Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c index 218c29c..eee47a4 100644 --- a/arch/alpha/mm/init.c +++ b/arch/alpha/mm/init.c @@ -309,7 +309,7 @@ void __init mem_init(void) { max_mapnr = num_physpages = max_low_pfn; - totalram_pages += free_all_bootmem(); + free_all_bootmem(); high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); printk_memory_info(); diff --git a/arch/alpha/mm/numa.c b/arch/alpha/mm/numa.c index 3388504..857452c 100644 --- a/arch/alpha/mm/numa.c +++ b/arch/alpha/mm/numa.c @@ -334,7 +334,7 @@ void __init mem_init(void) /* * This will free up the bootmem, ie, slot 0 memory */ - totalram_pages += free_all_bootmem_node(NODE_DATA(nid)); + free_all_bootmem_node(NODE_DATA(nid)); pfn = NODE_DATA(nid)->node_start_pfn; for (i = 0; i < node_spanned_pages(nid); i++, pfn++) diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c index f9c7077..c668a60 100644 --- a/arch/arc/mm/init.c +++ b/arch/arc/mm/init.c @@ -111,7 +111,7 @@ void __init mem_init(void) high_memory = (void *)(CONFIG_LINUX_LINK_BASE + arc_mem_sz); - totalram_pages = free_all_bootmem(); + free_all_bootmem(); /* count all reserved pages [kernel code/data/mem_map..] */ reserved_pages = 0; diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 2070651..06e9ce1 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -596,8 +596,7 @@ void __init mem_init(void) /* this will put all unused low memory onto the freelists */ free_unused_memmap(&meminfo); - - totalram_pages += free_all_bootmem(); + free_all_bootmem(); #ifdef CONFIG_SA1111 /* now that our DMA memory is actually so designated, we can free it */ diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index a398eb9..93de98a 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -284,7 +284,7 @@ void __init mem_init(void) free_unused_memmap(); #endif - totalram_pages += free_all_bootmem(); + free_all_bootmem(); reserved_pages = free_pages = 0; diff --git a/arch/avr32/mm/init.c b/arch/avr32/mm/init.c index b079e04..af6890f 100644 --- a/arch/avr32/mm/init.c +++ b/arch/avr32/mm/init.c @@ -117,8 +117,6 @@ void __init mem_init(void) if (pgdat->node_spanned_pages != 0) node_pages = free_all_bootmem_node(pgdat); - totalram_pages += node_pages; - for (i = 0; i < node_pages; i++) if (PageReserved(pgdat->node_mem_map + i)) reservedpages++; diff --git a/arch/blackfin/mm/init.c b/arch/blackfin/mm/init.c index fa241f5..c73d80e 100644 --- a/arch/blackfin/mm/init.c +++ b/arch/blackfin/mm/init.c @@ -104,7 +104,7 @@ void __init mem_init(void) printk(KERN_DEBUG "Kernel managed physical pages: %lu\n", num_physpages); /* This will put all low memory onto the freelists. */ - totalram_pages = free_all_bootmem(); + free_all_bootmem(); reservedpages = 0; for (tmp = ARCH_PFN_OFFSET; tmp < max_mapnr; tmp++) diff --git a/arch/c6x/mm/init.c b/arch/c6x/mm/init.c index 3987a20..c9ae8ce 100644 --- a/arch/c6x/mm/init.c +++ b/arch/c6x/mm/init.c @@ -65,7 +65,7 @@ void __init mem_init(void) high_memory = (void *)(memory_end & PAGE_MASK); /* this will put all memory onto the freelists */ - totalram_pages = free_all_bootmem(); + free_all_bootmem(); codek = (_etext - _stext) >> 10; datak = (_end - _sdata) >> 10; diff --git a/arch/cris/mm/init.c b/arch/cris/mm/init.c index 8fec263..52b8b56 100644 --- a/arch/cris/mm/init.c +++ b/arch/cris/mm/init.c @@ -33,7 +33,7 @@ mem_init(void) max_mapnr = num_physpages = max_low_pfn - min_low_pfn; /* this will put all memory onto the freelists */ - totalram_pages = free_all_bootmem(); + free_all_bootmem(); reservedpages = 0; for (tmp = 0; tmp < max_mapnr; tmp++) { diff --git a/arch/frv/mm/init.c b/arch/frv/mm/init.c index 8ba9d22..3dcc888 100644 --- a/arch/frv/mm/init.c +++ b/arch/frv/mm/init.c @@ -123,7 +123,7 @@ void __init mem_init(void) int codek = 0, datak = 0; /* this will put all low memory onto the freelists */ - totalram_pages = free_all_bootmem(); + free_all_bootmem(); #ifdef CONFIG_MMU for (loop = 0 ; loop < npages ; loop++) diff --git a/arch/h8300/mm/init.c b/arch/h8300/mm/init.c index c831f1d..a506dd4 100644 --- a/arch/h8300/mm/init.c +++ b/arch/h8300/mm/init.c @@ -140,7 +140,7 @@ void __init mem_init(void) max_mapnr = num_physpages = MAP_NR(high_memory); /* this will put all low memory onto the freelists */ - totalram_pages = free_all_bootmem(); + free_all_bootmem(); codek = (_etext - _stext) >> 10; datak = (__bss_stop - _sdata) >> 10; diff --git a/arch/hexagon/mm/init.c b/arch/hexagon/mm/init.c index 2561d25..0ab5b43 100644 --- a/arch/hexagon/mm/init.c +++ b/arch/hexagon/mm/init.c @@ -70,7 +70,7 @@ unsigned long long kmap_generation; void __init mem_init(void) { /* No idea where this is actually declared. Seems to evade LXR. */ - totalram_pages += free_all_bootmem(); + free_all_bootmem(); num_physpages = bootmem_lastpg-ARCH_PFN_OFFSET; printk(KERN_INFO "totalram_pages = %ld\n", totalram_pages); diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index f8a4f38..d141f7e 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -622,7 +622,7 @@ mem_init (void) for_each_online_pgdat(pgdat) if (pgdat->bdata->node_bootmem_map) - totalram_pages += free_all_bootmem_node(pgdat); + free_all_bootmem_node(pgdat); reserved_pages = 0; efi_memmap_walk(count_reserved_pages, &reserved_pages); diff --git a/arch/m32r/mm/init.c b/arch/m32r/mm/init.c index cca87d9..a501838 100644 --- a/arch/m32r/mm/init.c +++ b/arch/m32r/mm/init.c @@ -158,7 +158,7 @@ void __init mem_init(void) /* this will put all low memory onto the freelists */ for_each_online_node(nid) - totalram_pages += free_all_bootmem_node(NODE_DATA(nid)); + free_all_bootmem_node(NODE_DATA(nid)); reservedpages = reservedpages_count() - hole_pages; codesize = (unsigned long) &_etext - (unsigned long)&_text; diff --git a/arch/m68k/mm/init.c b/arch/m68k/mm/init.c index ab0b54c..614c60a 100644 --- a/arch/m68k/mm/init.c +++ b/arch/m68k/mm/init.c @@ -155,11 +155,11 @@ void __init mem_init(void) int i; /* this will put all memory onto the freelists */ - totalram_pages = num_physpages = 0; + num_physpages = 0; for_each_online_pgdat(pgdat) { num_physpages += pgdat->node_present_pages; - totalram_pages += free_all_bootmem_node(pgdat); + free_all_bootmem_node(pgdat); for (i = 0; i < pgdat->node_spanned_pages; i++) { struct page *page = pgdat->node_mem_map + i; char *addr = page_to_virt(page); diff --git a/arch/metag/mm/init.c b/arch/metag/mm/init.c index d7595f5..ce81d7c 100644 --- a/arch/metag/mm/init.c +++ b/arch/metag/mm/init.c @@ -393,14 +393,11 @@ void __init mem_init(void) for_each_online_node(nid) { pg_data_t *pgdat = NODE_DATA(nid); - unsigned long node_pages = 0; num_physpages += pgdat->node_present_pages; if (pgdat->node_spanned_pages) - node_pages = free_all_bootmem_node(pgdat); - - totalram_pages += node_pages; + free_all_bootmem_node(pgdat); } pr_info("Memory: %luk/%luk available\n", diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c index d149e0e..b384cbc 100644 --- a/arch/microblaze/mm/init.c +++ b/arch/microblaze/mm/init.c @@ -252,7 +252,7 @@ void __init mem_init(void) high_memory = (void *)__va(memory_start + lowmem_size - 1); /* this will put all memory onto the freelists */ - totalram_pages += free_all_bootmem(); + free_all_bootmem(); for_each_online_pgdat(pgdat) { unsigned long i; diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 268f2a9..e7333f1 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -374,7 +374,7 @@ void __init mem_init(void) #endif high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT); - totalram_pages += free_all_bootmem(); + free_all_bootmem(); setup_zero_pages(); /* Setup zeroed pages. */ reservedpages = ram = 0; diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c index 1230f56..aecac4a 100644 --- a/arch/mips/sgi-ip27/ip27-memory.c +++ b/arch/mips/sgi-ip27/ip27-memory.c @@ -489,7 +489,7 @@ void __init mem_init(void) /* * This will free up the bootmem, ie, slot 0 memory. */ - totalram_pages += free_all_bootmem_node(NODE_DATA(node)); + free_all_bootmem_node(NODE_DATA(node)); } setup_zero_pages(); /* This comes from node 0 */ diff --git a/arch/mn10300/mm/init.c b/arch/mn10300/mm/init.c index e19049d..7590d91 100644 --- a/arch/mn10300/mm/init.c +++ b/arch/mn10300/mm/init.c @@ -114,7 +114,7 @@ void __init mem_init(void) memset(empty_zero_page, 0, PAGE_SIZE); /* this will put all low memory onto the freelists */ - totalram_pages += free_all_bootmem(); + free_all_bootmem(); reservedpages = 0; for (tmp = 0; tmp < num_physpages; tmp++) diff --git a/arch/openrisc/mm/init.c b/arch/openrisc/mm/init.c index c371e4a..16c1e13 100644 --- a/arch/openrisc/mm/init.c +++ b/arch/openrisc/mm/init.c @@ -207,7 +207,7 @@ static int __init free_pages_init(void) int reservedpages, pfn; /* this will put all low memory onto the freelists */ - totalram_pages = free_all_bootmem(); + free_all_bootmem(); reservedpages = 0; for (pfn = 0; pfn < max_low_pfn; pfn++) { diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index ebac7bd..d8aaaf0 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -593,13 +593,13 @@ void __init mem_init(void) #ifndef CONFIG_DISCONTIGMEM max_mapnr = page_to_pfn(virt_to_page(high_memory - 1)) + 1; - totalram_pages += free_all_bootmem(); + free_all_bootmem(); #else { int i; for (i = 0; i < npmem_ranges; i++) - totalram_pages += free_all_bootmem_node(NODE_DATA(i)); + free_all_bootmem_node(NODE_DATA(i)); } #endif diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 7f47a05..3bcfc0d 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -318,13 +318,12 @@ void __init mem_init(void) for_each_online_node(nid) { if (NODE_DATA(nid)->node_spanned_pages != 0) { printk("freeing bootmem node %d\n", nid); - totalram_pages += - free_all_bootmem_node(NODE_DATA(nid)); + free_all_bootmem_node(NODE_DATA(nid)); } } #else max_mapnr = max_pfn; - totalram_pages += free_all_bootmem(); + free_all_bootmem(); #endif for_each_online_pgdat(pgdat) { for (i = 0; i < pgdat->node_spanned_pages; i++) { diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index bf01d18..a2aafe1 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -144,7 +144,7 @@ void __init mem_init(void) cmma_init(); /* this will put all low memory onto the freelists */ - totalram_pages += free_all_bootmem(); + free_all_bootmem(); setup_zero_pages(); /* Setup zeroed pages. */ reservedpages = 0; diff --git a/arch/score/mm/init.c b/arch/score/mm/init.c index f5dd61e..a8b9177 100644 --- a/arch/score/mm/init.c +++ b/arch/score/mm/init.c @@ -79,7 +79,7 @@ void __init mem_init(void) unsigned long tmp, ram = 0; high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT); - totalram_pages += free_all_bootmem(); + free_all_bootmem(); setup_zero_page(); /* Setup zeroed pages. */ reservedpages = 0; diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index d3af56b..fc0c8e1 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -422,7 +422,7 @@ void __init mem_init(void) num_physpages += pgdat->node_present_pages; if (pgdat->node_spanned_pages) - totalram_pages += free_all_bootmem_node(pgdat); + free_all_bootmem_node(pgdat); node_high_memory = (void *)__va((pgdat->node_start_pfn + diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c index d5f9c02..a438abb 100644 --- a/arch/sparc/mm/init_32.c +++ b/arch/sparc/mm/init_32.c @@ -323,8 +323,7 @@ void __init mem_init(void) max_mapnr = last_valid_pfn - pfn_base; high_memory = __va(max_low_pfn << PAGE_SHIFT); - - totalram_pages = free_all_bootmem(); + free_all_bootmem(); for (i = 0; sp_banks[i].num_bytes != 0; i++) { unsigned long start_pfn = sp_banks[i].base_addr >> PAGE_SHIFT; diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 8269deb..752d738 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -2061,7 +2061,7 @@ void __init mem_init(void) high_memory = __va(last_valid_pfn << PAGE_SHIFT); register_page_bootmem_info(); - totalram_pages = free_all_bootmem(); + free_all_bootmem(); /* We subtract one to account for the mem_map_zero page * allocated below. diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c index ccfeb3f..45ce26d 100644 --- a/arch/tile/mm/init.c +++ b/arch/tile/mm/init.c @@ -846,7 +846,7 @@ void __init mem_init(void) set_max_mapnr_init(); /* this will put all bootmem onto the freelists */ - totalram_pages += free_all_bootmem(); + free_all_bootmem(); #ifndef CONFIG_64BIT /* count all remaining LOWMEM and give all HIGHMEM to page allocator */ diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c index 8ff0b7a..b0c7630 100644 --- a/arch/um/kernel/mem.c +++ b/arch/um/kernel/mem.c @@ -65,7 +65,7 @@ void __init mem_init(void) uml_reserved = brk_end; /* this will put all low memory onto the freelists */ - totalram_pages = free_all_bootmem(); + free_all_bootmem(); max_low_pfn = totalram_pages; #ifdef CONFIG_HIGHMEM setup_highmem(end_iomem, highmem); diff --git a/arch/unicore32/mm/init.c b/arch/unicore32/mm/init.c index df9b8ab..7d1356c 100644 --- a/arch/unicore32/mm/init.c +++ b/arch/unicore32/mm/init.c @@ -392,7 +392,7 @@ void __init mem_init(void) free_unused_memmap(&meminfo); /* this will put all unused low memory onto the freelists */ - totalram_pages += free_all_bootmem(); + free_all_bootmem(); reserved_pages = free_pages = 0; diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 3ac7e31..9fa46ba 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -759,7 +759,7 @@ void __init mem_init(void) set_highmem_pages_init(); /* this will put all low memory onto the freelists */ - totalram_pages += free_all_bootmem(); + free_all_bootmem(); reservedpages = 0; for (tmp = 0; tmp < max_low_pfn; tmp++) diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index ec312a9..9577638 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1054,7 +1054,7 @@ void __init mem_init(void) register_page_bootmem_info(); /* this will put all memory onto the freelists */ - totalram_pages = free_all_bootmem(); + free_all_bootmem(); absent_pages = absent_pages_in_range(0, max_pfn); reservedpages = max_pfn - totalram_pages - absent_pages; diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c index 026d29b..663c161 100644 --- a/arch/xtensa/mm/init.c +++ b/arch/xtensa/mm/init.c @@ -184,7 +184,7 @@ void __init mem_init(void) #error HIGHGMEM not implemented in init.c #endif - totalram_pages += free_all_bootmem(); + free_all_bootmem(); reservedpages = ram = 0; for (tmp = 0; tmp < max_mapnr; tmp++) { diff --git a/mm/bootmem.c b/mm/bootmem.c index eb792323..58609bb 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -271,9 +271,14 @@ void __init reset_all_zones_managed_pages(void) */ unsigned long __init free_all_bootmem_node(pg_data_t *pgdat) { + unsigned long pages; + register_page_bootmem_info_node(pgdat); reset_node_managed_pages(pgdat); - return free_all_bootmem_core(pgdat->bdata); + pages = free_all_bootmem_core(pgdat->bdata); + totalram_pages += pages; + + return pages; } /** @@ -291,6 +296,8 @@ unsigned long __init free_all_bootmem(void) list_for_each_entry(bdata, &bdata_list, list) total_pages += free_all_bootmem_core(bdata); + totalram_pages += total_pages; + return total_pages; } diff --git a/mm/nobootmem.c b/mm/nobootmem.c index 0ae8d91..61107cf 100644 --- a/mm/nobootmem.c +++ b/mm/nobootmem.c @@ -165,6 +165,8 @@ void __init reset_all_zones_managed_pages(void) */ unsigned long __init free_all_bootmem(void) { + unsigned long pages; + reset_all_zones_managed_pages(); /* @@ -172,7 +174,10 @@ unsigned long __init free_all_bootmem(void) * because in some case like Node0 doesn't have RAM installed * low ram will be on Node1 */ - return free_low_memory_core_early(); + pages = free_low_memory_core_early(); + totalram_pages += pages; + + return pages; } /** -- cgit v0.10.2 From cdd91a77043ba81585236ef61f65c18222b212e6 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:03:27 -0700 Subject: mm: report available pages as "MemTotal" for each NUMA node As reported by https://bugzilla.kernel.org/show_bug.cgi?id=53501, "MemTotal" from /proc/meminfo means memory pages managed by the buddy system (managed_pages), but "MemTotal" from /sys/.../node/nodex/meminfo means physical pages present (present_pages) within the NUMA node. There's a difference between managed_pages and present_pages due to bootmem allocator and reserved pages. And Documentation/filesystems/proc.txt says MemTotal: Total usable ram (i.e. physical ram minus a few reserved bits and the kernel binary code) So change /sys/.../node/nodex/meminfo to report available pages within the node as "MemTotal". Signed-off-by: Jiang Liu Reported-by: Cc: Mel Gorman Cc: Minchan Kim Cc: "H. Peter Anvin" Cc: "Michael S. Tsirkin" Cc: Arnd Bergmann Cc: Catalin Marinas Cc: Chris Metcalf Cc: David Howells Cc: Geert Uytterhoeven Cc: Ingo Molnar Cc: Jeremy Fitzhardinge Cc: Jianguo Wu Cc: Joonsoo Kim Cc: Kamezawa Hiroyuki Cc: Konrad Rzeszutek Wilk Cc: Marek Szyprowski Cc: Michel Lespinasse Cc: Rik van Riel Cc: Rusty Russell Cc: Tang Chen Cc: Tejun Heo Cc: Thomas Gleixner Cc: Wen Congyang Cc: Will Deacon Cc: Yasuaki Ishimatsu Cc: Yinghai Lu Cc: Russell King Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 1481439..d9445c4 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2904,9 +2904,13 @@ EXPORT_SYMBOL(si_meminfo); #ifdef CONFIG_NUMA void si_meminfo_node(struct sysinfo *val, int nid) { + int zone_type; /* needs to be signed */ + unsigned long managed_pages = 0; pg_data_t *pgdat = NODE_DATA(nid); - val->totalram = pgdat->node_present_pages; + for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++) + managed_pages += pgdat->node_zones[zone_type].managed_pages; + val->totalram = managed_pages; val->freeram = node_page_state(nid, NR_FREE_PAGES); #ifdef CONFIG_HIGHMEM val->totalhigh = pgdat->node_zones[ZONE_HIGHMEM].managed_pages; -- cgit v0.10.2 From f60e2a968e2bebe34986f49251017f72b725d8c0 Mon Sep 17 00:00:00 2001 From: Sergey Dyasly Date: Wed, 3 Jul 2013 15:03:30 -0700 Subject: memcg: Kconfig info update Now there are only 2 members in struct page_cgroup. Update config MEMCG description accordingly. Signed-off-by: Sergey Dyasly Acked-by: Michal Hocko Acked-by: KOSAKI Motohiro Acked-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/init/Kconfig b/init/Kconfig index 118895c..ef10d83 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -899,7 +899,7 @@ config MEMCG Note that setting this option increases fixed memory overhead associated with each page of memory in the system. By this, - 20(40)bytes/PAGE_SIZE on 32(64)bit system will be occupied by memory + 8(16)bytes/PAGE_SIZE on 32(64)bit system will be occupied by memory usage tracking struct at boot. Total amount of this is printed out at boot. -- cgit v0.10.2 From e6c495a96ce02574e765d5140039a64c8d4e8c9e Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Wed, 3 Jul 2013 15:03:31 -0700 Subject: mm: fix the TLB range flushed when __tlb_remove_page() runs out of slots zap_pte_range loops from @addr to @end. In the middle, if it runs out of batching slots, TLB entries needs to be flushed for @start to @interim, NOT @interim to @end. Since ARC port doesn't use page free batching I can't test it myself but this seems like the right thing to do. Observed this when working on a fix for the issue at thread: http://www.spinics.net/lists/linux-arch/msg21736.html Signed-off-by: Vineet Gupta Cc: Mel Gorman Cc: Hugh Dickins Cc: Rik van Riel Cc: David Rientjes Cc: Peter Zijlstra Acked-by: Catalin Marinas Cc: Max Filippov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/memory.c b/mm/memory.c index a101bbc..4075332 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1101,6 +1101,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, spinlock_t *ptl; pte_t *start_pte; pte_t *pte; + unsigned long range_start = addr; again: init_rss_vec(rss); @@ -1206,12 +1207,14 @@ again: force_flush = 0; #ifdef HAVE_GENERIC_MMU_GATHER - tlb->start = addr; - tlb->end = end; + tlb->start = range_start; + tlb->end = addr; #endif tlb_flush_mmu(tlb); - if (addr != end) + if (addr != end) { + range_start = addr; goto again; + } } return addr; -- cgit v0.10.2 From 1622d1abdf7b554eb7cc40c0d08d989176732242 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:03:33 -0700 Subject: vmlinux.lds: add comments for global variables and clean up useless declarations The original goal of this patchset is to fix the bug reported by https://bugzilla.kernel.org/show_bug.cgi?id=53501 Now it has also been expanded to reduce common code used by memory initializion. Patch 1-7: 1) add comments for global variables exported by vmlinux.lds 2) normalize global variables exported by vmlinux.lds Patch 8: Introduce helper functions mem_init_print_info() and get_num_physpages() Patch 9: Avoid using global variable num_physpages at runtime Patch 10: Don't update num_physpages in memory_hotplug.c Patch 11-40: Modify arch mm initialization code to: 1) Simplify mem_init() by using mem_init_print_info() 2) Prepare for killing global variable num_physpages Patch 41: Kill the global variable num_physpages With all patches applied, mem_init(), free_initmem(), free_initrd_mem() could be as simple as below. This patch series has reduced about 1.2K lines of code in total. #ifndef CONFIG_DISCONTIGMEM void __init mem_init(void) { max_mapnr = max_low_pfn; free_all_bootmem(); high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); mem_init_print_info(NULL); } #endif /* CONFIG_DISCONTIGMEM */ void free_initmem(void) { free_initmem_default(-1); } #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { free_reserved_area(start, end, -1, "initrd"); } #endif Due to hardware resource limitations, I have only tested this on x86_64. And the messages reported on an x86_64 system are: Log message before applying patches: Memory: 7745676k/8910848k available (6934k kernel code, 836024k absent, 329148k reserved, 6343k data, 1012k init) Log message after applying patches: Memory: 7744624K/8074824K available (6969K kernel code, 1011K data, 2828K rodata, 1016K init, 9640K bss, 330200K reserved) Great thanks to Vineet Gupta for testing on ARC. This patch: Document global variables exported from vmlinux.lds. 1) Add comments about usage guidelines for global variables exported from vmlinux.lds.S. 2) Remove unused __initdata_begin[] and __initdata_end[]. Signed-off-by: Jiang Liu Acked-by: Arnd Bergmann Cc: Arnd Bergmann Cc: Vineet Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h index c1a1216..f1a24b5 100644 --- a/include/asm-generic/sections.h +++ b/include/asm-generic/sections.h @@ -3,6 +3,26 @@ /* References to section boundaries */ +/* + * Usage guidelines: + * _text, _data: architecture specific, don't use them in arch-independent code + * [_stext, _etext]: contains .text.* sections, may also contain .rodata.* + * and/or .init.* sections + * [_sdata, _edata]: contains .data.* sections, may also contain .rodata.* + * and/or .init.* sections. + * [__start_rodata, __end_rodata]: contains .rodata.* sections + * [__init_begin, __init_end]: contains .init.* sections, but .init.text.* + * may be out of this range on some architectures. + * [_sinittext, _einittext]: contains .init.text.* sections + * [__bss_start, __bss_stop]: contains BSS sections + * + * Following global variables are optional and may be unavailable on some + * architectures and/or kernel configurations. + * _text, _data + * __kprobes_text_start, __kprobes_text_end + * __entry_text_start, __entry_text_end + * __ctors_start, __ctors_end + */ extern char _text[], _stext[], _etext[]; extern char _data[], _sdata[], _edata[]; extern char __bss_start[], __bss_stop[]; @@ -12,7 +32,6 @@ extern char _end[]; extern char __per_cpu_load[], __per_cpu_start[], __per_cpu_end[]; extern char __kprobes_text_start[], __kprobes_text_end[]; extern char __entry_text_start[], __entry_text_end[]; -extern char __initdata_begin[], __initdata_end[]; extern char __start_rodata[], __end_rodata[]; /* Start and end of .ctors section - used for constructor calls. */ -- cgit v0.10.2 From 2e555f8d0f81d8e28a63fe432ec7bcc26e95006b Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:03:34 -0700 Subject: avr32: normalize global variables exported by vmlinux.lds Normalize global variables exported by vmlinux.lds to conform usage guidelines from include/asm-generic/sections.h. Use _text to mark the start of the kernel image including the head text, and _stext to mark the start of the .text section. Signed-off-by: Jiang Liu Acked-by: Hans-Christian Egtvedt Cc: Haavard Skinnemoen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/avr32/kernel/setup.c b/arch/avr32/kernel/setup.c index b4247f4..209ae5a 100644 --- a/arch/avr32/kernel/setup.c +++ b/arch/avr32/kernel/setup.c @@ -555,7 +555,7 @@ void __init setup_arch (char **cmdline_p) { struct clk *cpu_clk; - init_mm.start_code = (unsigned long)_text; + init_mm.start_code = (unsigned long)_stext; init_mm.end_code = (unsigned long)_etext; init_mm.end_data = (unsigned long)_edata; init_mm.brk = (unsigned long)_end; diff --git a/arch/avr32/kernel/vmlinux.lds.S b/arch/avr32/kernel/vmlinux.lds.S index 9cd2bd9..a458917 100644 --- a/arch/avr32/kernel/vmlinux.lds.S +++ b/arch/avr32/kernel/vmlinux.lds.S @@ -23,7 +23,7 @@ SECTIONS { . = CONFIG_ENTRY_ADDRESS; .init : AT(ADDR(.init) - LOAD_OFFSET) { - _stext = .; + _text = .; __init_begin = .; _sinittext = .; *(.text.reset) @@ -46,7 +46,7 @@ SECTIONS .text : AT(ADDR(.text) - LOAD_OFFSET) { _evba = .; - _text = .; + _stext = .; *(.ex.text) *(.irq.text) KPROBES_TEXT -- cgit v0.10.2 From 06256f8f719917581a221221bb851fcf88de564b Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:03:35 -0700 Subject: c6x: normalize global variables exported by vmlinux.lds Normalize global variables exported by vmlinux.lds to conform usage guidelines from include/asm-generic/sections.h. Use _text to mark the start of the kernel image including the head text, and _stext to mark the start of the .text section. This patch also fixes possible bugs due to current address layout that [__init_begin, __init_end] is a sub-range of [_stext, _etext] and pages within range [__init_begin, __init_end] will be freed by free_initmem(). Signed-off-by: Jiang Liu Cc: Mark Salter Cc: Aurelien Jacquiot Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/c6x/kernel/vmlinux.lds.S b/arch/c6x/kernel/vmlinux.lds.S index 1d81c4c..279d807 100644 --- a/arch/c6x/kernel/vmlinux.lds.S +++ b/arch/c6x/kernel/vmlinux.lds.S @@ -54,16 +54,15 @@ SECTIONS } . = ALIGN(PAGE_SIZE); + __init_begin = .; .init : { - _stext = .; _sinittext = .; HEAD_TEXT INIT_TEXT _einittext = .; } - __init_begin = _stext; INIT_DATA_SECTION(16) PERCPU_SECTION(128) @@ -74,6 +73,7 @@ SECTIONS .text : { _text = .; + _stext = .; TEXT_TEXT SCHED_TEXT LOCK_TEXT -- cgit v0.10.2 From 5dd7cd11a0dde589e3532cfdc06372a1af9fdba7 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:03:36 -0700 Subject: h8300: normalize global variables exported by vmlinux.lds Generate mandatory global variables __bss_start/__bss_stop in file vmlinux.lds. Also remove one unused declaration of _text. Signed-off-by: Jiang Liu Cc: Yoshinori Sato Cc: Jiang Liu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/h8300/boot/compressed/misc.c b/arch/h8300/boot/compressed/misc.c index 51ab6cb..4a1e3dd 100644 --- a/arch/h8300/boot/compressed/misc.c +++ b/arch/h8300/boot/compressed/misc.c @@ -79,7 +79,6 @@ static void error(char *m); int puts(const char *); -extern int _text; /* Defined in vmlinux.lds.S */ extern int _end; static unsigned long free_mem_ptr; static unsigned long free_mem_end_ptr; diff --git a/arch/h8300/kernel/vmlinux.lds.S b/arch/h8300/kernel/vmlinux.lds.S index 03d356d..3253fed 100644 --- a/arch/h8300/kernel/vmlinux.lds.S +++ b/arch/h8300/kernel/vmlinux.lds.S @@ -132,10 +132,12 @@ SECTIONS { . = ALIGN(0x4) ; __sbss = . ; + ___bss_start = . ; *(.bss*) . = ALIGN(0x4) ; *(COMMON) . = ALIGN(0x4) ; + ___bss_stop = . ; __ebss = . ; __end = . ; __ramstart = .; -- cgit v0.10.2 From ae49b83dcacfb69e22092cab688c415c2f2d870c Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:03:37 -0700 Subject: score: normalize global variables exported by vmlinux.lds Generate mandatory global variables _sdata in file vmlinux.lds. Signed-off-by: Jiang Liu Cc: Chen Liqin Cc: Lennox Wu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/score/kernel/vmlinux.lds.S b/arch/score/kernel/vmlinux.lds.S index eebcbaa..7274b5c 100644 --- a/arch/score/kernel/vmlinux.lds.S +++ b/arch/score/kernel/vmlinux.lds.S @@ -49,6 +49,7 @@ SECTIONS } . = ALIGN(16); + _sdata = .; /* Start of data section */ RODATA EXCEPTION_TABLE(16) -- cgit v0.10.2 From 40a3b8df7be3b813cef7e32f74dea398274c2d47 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:03:39 -0700 Subject: tile: normalize global variables exported by vmlinux.lds Normalize global variables exported by vmlinux.lds to conform usage guidelines from include/asm-generic/sections.h. 1) Use _text to mark the start of the kernel image including the head text, and _stext to mark the start of the .text section. 2) Export mandatory global variables __init_begin and __init_end. Signed-off-by: Jiang Liu Acked-by: Chris Metcalf Cc: Rusty Russell Cc: Bjorn Helgaas Cc: "David S. Miller" Cc: Wen Congyang Cc: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/tile/include/asm/sections.h b/arch/tile/include/asm/sections.h index d062d46..7d8a935 100644 --- a/arch/tile/include/asm/sections.h +++ b/arch/tile/include/asm/sections.h @@ -34,7 +34,7 @@ extern char __sys_cmpxchg_grab_lock[]; extern char __start_atomic_asm_code[], __end_atomic_asm_code[]; #endif -/* Handle the discontiguity between _sdata and _stext. */ +/* Handle the discontiguity between _sdata and _text. */ static inline int arch_is_kernel_data(unsigned long addr) { return addr >= (unsigned long)_sdata && diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c index 7a5aa1a..41818e3 100644 --- a/arch/tile/kernel/setup.c +++ b/arch/tile/kernel/setup.c @@ -307,8 +307,8 @@ static void __cpuinit store_permanent_mappings(void) hv_store_mapping(addr, pages << PAGE_SHIFT, pa); } - hv_store_mapping((HV_VirtAddr)_stext, - (uint32_t)(_einittext - _stext), 0); + hv_store_mapping((HV_VirtAddr)_text, + (uint32_t)(_einittext - _text), 0); } /* diff --git a/arch/tile/kernel/vmlinux.lds.S b/arch/tile/kernel/vmlinux.lds.S index 631f10d..a13ed90 100644 --- a/arch/tile/kernel/vmlinux.lds.S +++ b/arch/tile/kernel/vmlinux.lds.S @@ -27,7 +27,6 @@ SECTIONS .intrpt1 (LOAD_OFFSET) : AT ( 0 ) /* put at the start of physical memory */ { _text = .; - _stext = .; *(.intrpt1) } :intrpt1 =0 @@ -36,6 +35,7 @@ SECTIONS /* Now the real code */ . = ALIGN(0x20000); + _stext = .; .text : AT (ADDR(.text) - LOAD_OFFSET) { HEAD_TEXT SCHED_TEXT @@ -58,11 +58,13 @@ SECTIONS #define LOAD_OFFSET PAGE_OFFSET . = ALIGN(PAGE_SIZE); + __init_begin = .; VMLINUX_SYMBOL(_sinitdata) = .; INIT_DATA_SECTION(16) :data =0 PERCPU_SECTION(L2_CACHE_BYTES) . = ALIGN(PAGE_SIZE); VMLINUX_SYMBOL(_einitdata) = .; + __init_end = .; _sdata = .; /* Start of data section */ diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c index 45ce26d..f2ac2f4 100644 --- a/arch/tile/mm/init.c +++ b/arch/tile/mm/init.c @@ -562,7 +562,7 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) prot = ktext_set_nocache(prot); } - BUG_ON(address != (unsigned long)_stext); + BUG_ON(address != (unsigned long)_text); pte = NULL; for (; address < (unsigned long)_einittext; pfn++, address += PAGE_SIZE) { -- cgit v0.10.2 From a214a8c68bcdef2fb0803425f7fe36fe41030d3f Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:03:40 -0700 Subject: UML: normalize global variables exported by vmlinux.lds Normalize global variables exported by vmlinux.lds to conform usage guidelines from include/asm-generic/sections.h. 1) Use _text to mark the start of the kernel image including the head text, and _stext to mark the start of the .text section. 2) Export mandatory global variables __bss_stop. 3) Adjust __init_begin and __init_end to avoid acrossing .text and .data sections. Signed-off-by: Jiang Liu Cc: Jeff Dike Cc: Richard Weinberger Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/um/include/asm/common.lds.S b/arch/um/include/asm/common.lds.S index 4938de5..1dd5bd8 100644 --- a/arch/um/include/asm/common.lds.S +++ b/arch/um/include/asm/common.lds.S @@ -57,7 +57,6 @@ *(.uml.initcall.init) __uml_initcall_end = .; } - __init_end = .; SECURITY_INIT diff --git a/arch/um/kernel/dyn.lds.S b/arch/um/kernel/dyn.lds.S index fb8fd6f..adde088 100644 --- a/arch/um/kernel/dyn.lds.S +++ b/arch/um/kernel/dyn.lds.S @@ -14,8 +14,6 @@ SECTIONS __binary_start = .; . = ALIGN(4096); /* Init code and data */ _text = .; - _stext = .; - __init_begin = .; INIT_TEXT_SECTION(PAGE_SIZE) . = ALIGN(PAGE_SIZE); @@ -67,6 +65,7 @@ SECTIONS } =0x90909090 .plt : { *(.plt) } .text : { + _stext = .; TEXT_TEXT SCHED_TEXT LOCK_TEXT @@ -91,7 +90,9 @@ SECTIONS #include + __init_begin = .; init.data : { INIT_DATA } + __init_end = .; /* Ensure the __preinit_array_start label is properly aligned. We could instead move the label definition inside the section, but @@ -155,6 +156,7 @@ SECTIONS . = ALIGN(32 / 8); . = ALIGN(32 / 8); } + __bss_stop = .; _end = .; PROVIDE (end = .); diff --git a/arch/um/kernel/uml.lds.S b/arch/um/kernel/uml.lds.S index ff65fb4..6899195 100644 --- a/arch/um/kernel/uml.lds.S +++ b/arch/um/kernel/uml.lds.S @@ -20,13 +20,12 @@ SECTIONS . = START + SIZEOF_HEADERS; _text = .; - _stext = .; - __init_begin = .; INIT_TEXT_SECTION(0) . = ALIGN(PAGE_SIZE); .text : { + _stext = .; TEXT_TEXT SCHED_TEXT LOCK_TEXT @@ -62,7 +61,10 @@ SECTIONS #include + __init_begin = .; init.data : { INIT_DATA } + __init_end = .; + .data : { INIT_TASK_DATA(KERNEL_STACK_SIZE) @@ -97,6 +99,7 @@ SECTIONS PROVIDE(_bss_start = .); SBSS(0) BSS(0) + __bss_stop = .; _end = .; PROVIDE (end = .); -- cgit v0.10.2 From 7ee3d4e8cd560500192d80ca84d7f15d6dee0807 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:03:41 -0700 Subject: mm: introduce helper function mem_init_print_info() to simplify mem_init() Introduce helper function mem_init_print_info() to simplify mem_init() across different architectures, which also unifies the format and information printed. Function mem_init_print_info() calculates memory statistics information without walking each page, so it should be a little faster on some architectures. Also introduce another helper get_num_physpages() to kill the global variable num_physpages. Signed-off-by: Jiang Liu Cc: Mel Gorman Cc: Michel Lespinasse Cc: Rik van Riel Cc: Minchan Kim Cc: Marek Szyprowski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/mm.h b/include/linux/mm.h index 4310f80..09c2353 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1323,6 +1323,7 @@ extern void free_highmem_page(struct page *page); #endif extern void adjust_managed_page_count(struct page *page, long count); +extern void mem_init_print_info(const char *str); /* Free the reserved page into the buddy system, so it gets managed. */ static inline void __free_reserved_page(struct page *page) @@ -1358,6 +1359,17 @@ static inline unsigned long free_initmem_default(int poison) poison, "unused kernel"); } +static inline unsigned long get_num_physpages(void) +{ + int nid; + unsigned long phys_pages = 0; + + for_each_online_node(nid) + phys_pages += node_present_pages(nid); + + return phys_pages; +} + #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP /* * With CONFIG_HAVE_MEMBLOCK_NODE_MAP set, an architecture may initialise its diff --git a/mm/page_alloc.c b/mm/page_alloc.c index d9445c4..327516b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -61,6 +61,7 @@ #include #include +#include #include #include #include "internal.h" @@ -5246,6 +5247,57 @@ void free_highmem_page(struct page *page) } #endif + +void __init mem_init_print_info(const char *str) +{ + unsigned long physpages, codesize, datasize, rosize, bss_size; + unsigned long init_code_size, init_data_size; + + physpages = get_num_physpages(); + codesize = _etext - _stext; + datasize = _edata - _sdata; + rosize = __end_rodata - __start_rodata; + bss_size = __bss_stop - __bss_start; + init_data_size = __init_end - __init_begin; + init_code_size = _einittext - _sinittext; + + /* + * Detect special cases and adjust section sizes accordingly: + * 1) .init.* may be embedded into .data sections + * 2) .init.text.* may be out of [__init_begin, __init_end], + * please refer to arch/tile/kernel/vmlinux.lds.S. + * 3) .rodata.* may be embedded into .text or .data sections. + */ +#define adj_init_size(start, end, size, pos, adj) \ + if (start <= pos && pos < end && size > adj) \ + size -= adj; + + adj_init_size(__init_begin, __init_end, init_data_size, + _sinittext, init_code_size); + adj_init_size(_stext, _etext, codesize, _sinittext, init_code_size); + adj_init_size(_sdata, _edata, datasize, __init_begin, init_data_size); + adj_init_size(_stext, _etext, codesize, __start_rodata, rosize); + adj_init_size(_sdata, _edata, datasize, __start_rodata, rosize); + +#undef adj_init_size + + printk("Memory: %luK/%luK available " + "(%luK kernel code, %luK rwdata, %luK rodata, " + "%luK init, %luK bss, %luK reserved" +#ifdef CONFIG_HIGHMEM + ", %luK highmem" +#endif + "%s%s)\n", + nr_free_pages() << (PAGE_SHIFT-10), physpages << (PAGE_SHIFT-10), + codesize >> 10, datasize >> 10, rosize >> 10, + (init_data_size + init_code_size) >> 10, bss_size >> 10, + (physpages - totalram_pages) << (PAGE_SHIFT-10), +#ifdef CONFIG_HIGHMEM + totalhigh_pages << (PAGE_SHIFT-10), +#endif + str ? ", " : "", str ? str : ""); +} + /** * set_dma_reserve - set the specified number of pages reserved in the first zone * @new_dma_reserve: The number of pages to mark reserved -- cgit v0.10.2 From 0ed5fd138539940a493dc69359cb2f49de70ad89 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:03:43 -0700 Subject: mm: use totalram_pages instead of num_physpages at runtime The global variable num_physpages is scheduled to be removed, so use totalram_pages instead of num_physpages at runtime. Signed-off-by: Jiang Liu Cc: Miklos Szeredi Cc: "David S. Miller" Cc: Alexey Kuznetsov Cc: James Morris Cc: Hideaki YOSHIFUJI Cc: Patrick McHardy Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 9a0cdde..0b57859 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -785,7 +785,7 @@ static const struct super_operations fuse_super_operations = { static void sanitize_global_limit(unsigned *limit) { if (*limit == 0) - *limit = ((num_physpages << PAGE_SHIFT) >> 13) / + *limit = ((totalram_pages << PAGE_SHIFT) >> 13) / sizeof(struct fuse_req); if (*limit >= 1 << 16) diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 0de2857..8b5d1cd 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -1651,7 +1651,7 @@ unsigned long snapshot_get_image_size(void) static int init_header(struct swsusp_info *info) { memset(info, 0, sizeof(struct swsusp_info)); - info->num_physpages = num_physpages; + info->num_physpages = get_num_physpages(); info->image_pages = nr_copy_pages; info->pages = snapshot_get_image_size(); info->size = info->pages; @@ -1795,7 +1795,7 @@ static int check_header(struct swsusp_info *info) char *reason; reason = check_image_kernel(info); - if (!reason && info->num_physpages != num_physpages) + if (!reason && info->num_physpages != get_num_physpages()) reason = "memory size"; if (reason) { printk(KERN_ERR "PM: Image mismatch: %s\n", reason); diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 7e06641..cec5394 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -93,7 +93,7 @@ void inet_frags_init(struct inet_frags *f) } rwlock_init(&f->lock); - f->rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^ + f->rnd = (u32) ((totalram_pages ^ (totalram_pages >> 7)) ^ (jiffies ^ (jiffies >> 6))); setup_timer(&f->secret_timer, inet_frag_secret_rebuild, -- cgit v0.10.2 From e461d627d5c0957457eb354843f3c29b50646d63 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:03:44 -0700 Subject: mm/hotplug: prepare for removing num_physpages Prepare for removing num_physpages. Signed-off-by: Jiang Liu Cc: Wen Congyang Cc: Tang Chen Cc: Yasuaki Ishimatsu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 5e34922..106602e 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -763,10 +763,6 @@ EXPORT_SYMBOL_GPL(restore_online_page_callback); void __online_page_set_limits(struct page *page) { - unsigned long pfn = page_to_pfn(page); - - if (pfn >= num_physpages) - num_physpages = pfn + 1; } EXPORT_SYMBOL_GPL(__online_page_set_limits); -- cgit v0.10.2 From d385d9ee7add81254cb7094d616138089ea500f5 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:03:45 -0700 Subject: mm/alpha: prepare for removing num_physpages and simplify mem_init() Prepare for removing num_physpages and simplify mem_init(). Signed-off-by: Jiang Liu Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Matt Turner Cc: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c index eee47a4..af91010 100644 --- a/arch/alpha/mm/init.c +++ b/arch/alpha/mm/init.c @@ -277,42 +277,14 @@ srm_paging_stop (void) #endif #ifndef CONFIG_DISCONTIGMEM -static void __init -printk_memory_info(void) -{ - unsigned long codesize, reservedpages, datasize, initsize, tmp; - extern int page_is_ram(unsigned long) __init; - - /* printk all informations */ - reservedpages = 0; - for (tmp = 0; tmp < max_low_pfn; tmp++) - /* - * Only count reserved RAM pages - */ - if (page_is_ram(tmp) && PageReserved(mem_map+tmp)) - reservedpages++; - - codesize = (unsigned long) &_etext - (unsigned long) &_text; - datasize = (unsigned long) &_edata - (unsigned long) &_data; - initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; - - printk("Memory: %luk/%luk available (%luk kernel code, %luk reserved, %luk data, %luk init)\n", - nr_free_pages() << (PAGE_SHIFT-10), - max_mapnr << (PAGE_SHIFT-10), - codesize >> 10, - reservedpages << (PAGE_SHIFT-10), - datasize >> 10, - initsize >> 10); -} - void __init mem_init(void) { - max_mapnr = num_physpages = max_low_pfn; + max_mapnr = max_low_pfn; free_all_bootmem(); high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); - printk_memory_info(); + mem_init_print_info(NULL); } #endif /* CONFIG_DISCONTIGMEM */ diff --git a/arch/alpha/mm/numa.c b/arch/alpha/mm/numa.c index 857452c..0894b3a8 100644 --- a/arch/alpha/mm/numa.c +++ b/arch/alpha/mm/numa.c @@ -129,8 +129,6 @@ setup_memory_node(int nid, void *kernel_end) if (node_max_pfn > max_low_pfn) max_pfn = max_low_pfn = node_max_pfn; - num_physpages += node_max_pfn - node_min_pfn; - #if 0 /* we'll try this one again in a little while */ /* Cute trick to make sure our local node data is on local memory */ node_data[nid] = (pg_data_t *)(__va(node_min_pfn << PAGE_SHIFT)); @@ -324,37 +322,9 @@ void __init paging_init(void) void __init mem_init(void) { - unsigned long codesize, reservedpages, datasize, initsize, pfn; - extern int page_is_ram(unsigned long) __init; - unsigned long nid, i; high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT); - - reservedpages = 0; - for_each_online_node(nid) { - /* - * This will free up the bootmem, ie, slot 0 memory - */ - free_all_bootmem_node(NODE_DATA(nid)); - - pfn = NODE_DATA(nid)->node_start_pfn; - for (i = 0; i < node_spanned_pages(nid); i++, pfn++) - if (page_is_ram(pfn) && - PageReserved(nid_page_nr(nid, i))) - reservedpages++; - } - - codesize = (unsigned long) &_etext - (unsigned long) &_text; - datasize = (unsigned long) &_edata - (unsigned long) &_data; - initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; - - printk("Memory: %luk/%luk available (%luk kernel code, %luk reserved, " - "%luk data, %luk init)\n", - nr_free_pages() << (PAGE_SHIFT-10), - num_physpages << (PAGE_SHIFT-10), - codesize >> 10, - reservedpages << (PAGE_SHIFT-10), - datasize >> 10, - initsize >> 10); + free_all_bootmem(); + mem_init_print_info(NULL); #if 0 mem_stress(); #endif -- cgit v0.10.2 From de35e1b828cb21d027265847ca32e5b9cff1c1e9 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:03:47 -0700 Subject: mm/ARC: prepare for removing num_physpages and simplify mem_init() Prepare for removing num_physpages and simplify mem_init(). Signed-off-by: Jiang Liu Acked-by: Vineet Gupta # for arch/arc Cc: James Hogan Cc: Rob Herring Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c index c668a60..a08ce71 100644 --- a/arch/arc/mm/init.c +++ b/arch/arc/mm/init.c @@ -74,7 +74,7 @@ void __init setup_arch_memory(void) /* Last usable page of low mem (no HIGHMEM yet for ARC port) */ max_low_pfn = max_pfn = PFN_DOWN(end_mem); - max_mapnr = num_physpages = max_low_pfn - min_low_pfn; + max_mapnr = max_low_pfn - min_low_pfn; /*------------- reserve kernel image -----------------------*/ memblock_reserve(CONFIG_LINUX_LINK_BASE, @@ -84,7 +84,7 @@ void __init setup_arch_memory(void) /*-------------- node setup --------------------------------*/ memset(zones_size, 0, sizeof(zones_size)); - zones_size[ZONE_NORMAL] = num_physpages; + zones_size[ZONE_NORMAL] = max_low_pfn - min_low_pfn; /* * We can't use the helper free_area_init(zones[]) because it uses @@ -106,39 +106,9 @@ void __init setup_arch_memory(void) */ void __init mem_init(void) { - int codesize, datasize, initsize, reserved_pages, free_pages; - int tmp; - high_memory = (void *)(CONFIG_LINUX_LINK_BASE + arc_mem_sz); - free_all_bootmem(); - - /* count all reserved pages [kernel code/data/mem_map..] */ - reserved_pages = 0; - for (tmp = 0; tmp < max_mapnr; tmp++) - if (PageReserved(mem_map + tmp)) - reserved_pages++; - - /* XXX: nr_free_pages() is equivalent */ - free_pages = max_mapnr - reserved_pages; - - /* - * For the purpose of display below, split the "reserve mem" - * kernel code/data is already shown explicitly, - * Show any other reservations (mem_map[ ] et al) - */ - reserved_pages -= (((unsigned int)_end - CONFIG_LINUX_LINK_BASE) >> - PAGE_SHIFT); - - codesize = _etext - _text; - datasize = _end - _etext; - initsize = __init_end - __init_begin; - - pr_info("Memory Available: %dM / %ldM (%dK code, %dK data, %dK init, %dK reserv)\n", - PAGES_TO_MB(free_pages), - TO_MB(arc_mem_sz), - TO_KB(codesize), TO_KB(datasize), TO_KB(initsize), - PAGES_TO_KB(reserved_pages)); + mem_init_print_info(NULL); } /* -- cgit v0.10.2 From 2450c97323e635a04f7b2f4b68680ab2c151bbbf Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:03:48 -0700 Subject: mm/ARM: prepare for removing num_physpages and simplify mem_init() Prepare for removing num_physpages and simplify mem_init(). Signed-off-by: Jiang Liu Cc: Russell King Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 06e9ce1..6833cbe 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -583,9 +583,6 @@ static void __init free_highpages(void) */ void __init mem_init(void) { - unsigned long reserved_pages, free_pages; - struct memblock_region *reg; - int i; #ifdef CONFIG_HAVE_TCM /* These pointers are filled in on TCM detection */ extern u32 dtcm_end; @@ -605,47 +602,7 @@ void __init mem_init(void) free_highpages(); - reserved_pages = free_pages = 0; - - for_each_bank(i, &meminfo) { - struct membank *bank = &meminfo.bank[i]; - unsigned int pfn1, pfn2; - struct page *page, *end; - - pfn1 = bank_pfn_start(bank); - pfn2 = bank_pfn_end(bank); - - page = pfn_to_page(pfn1); - end = pfn_to_page(pfn2 - 1) + 1; - - do { - if (PageReserved(page)) - reserved_pages++; - else if (!page_count(page)) - free_pages++; - page++; - } while (page < end); - } - - /* - * Since our memory may not be contiguous, calculate the - * real number of pages we have in this system - */ - printk(KERN_INFO "Memory:"); - num_physpages = 0; - for_each_memblock(memory, reg) { - unsigned long pages = memblock_region_memory_end_pfn(reg) - - memblock_region_memory_base_pfn(reg); - num_physpages += pages; - printk(" %ldMB", pages >> (20 - PAGE_SHIFT)); - } - printk(" = %luMB total\n", num_physpages >> (20 - PAGE_SHIFT)); - - printk(KERN_NOTICE "Memory: %luk/%luk available, %luk reserved, %luK highmem\n", - nr_free_pages() << (PAGE_SHIFT-10), - free_pages << (PAGE_SHIFT-10), - reserved_pages << (PAGE_SHIFT-10), - totalhigh_pages << (PAGE_SHIFT-10)); + mem_init_print_info(NULL); #define MLK(b, t) b, t, ((t) - (b)) >> 10 #define MLM(b, t) b, t, ((t) - (b)) >> 20 @@ -711,7 +668,7 @@ void __init mem_init(void) BUG_ON(PKMAP_BASE + LAST_PKMAP * PAGE_SIZE > PAGE_OFFSET); #endif - if (PAGE_SIZE >= 16384 && num_physpages <= 128) { + if (PAGE_SIZE >= 16384 && get_num_physpages() <= 128) { extern int sysctl_overcommit_memory; /* * On a machine this small we won't get -- cgit v0.10.2 From bee4ebd117ac943308dd57bc0a5a3cc539f0eaac Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:03:49 -0700 Subject: mm/ARM64: prepare for removing num_physpages and simplify mem_init() Prepare for removing num_physpages and simplify mem_init(). Signed-off-by: Jiang Liu Cc: Russell King Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 93de98a..b16c778 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -272,59 +272,17 @@ static void __init free_unused_memmap(void) */ void __init mem_init(void) { - unsigned long reserved_pages, free_pages; - struct memblock_region *reg; - arm64_swiotlb_init(); max_mapnr = pfn_to_page(max_pfn + PHYS_PFN_OFFSET) - mem_map; #ifndef CONFIG_SPARSEMEM_VMEMMAP - /* this will put all unused low memory onto the freelists */ free_unused_memmap(); #endif - + /* this will put all unused low memory onto the freelists */ free_all_bootmem(); - reserved_pages = free_pages = 0; - - for_each_memblock(memory, reg) { - unsigned int pfn1, pfn2; - struct page *page, *end; - - pfn1 = __phys_to_pfn(reg->base); - pfn2 = pfn1 + __phys_to_pfn(reg->size); - - page = pfn_to_page(pfn1); - end = pfn_to_page(pfn2 - 1) + 1; - - do { - if (PageReserved(page)) - reserved_pages++; - else if (!page_count(page)) - free_pages++; - page++; - } while (page < end); - } - - /* - * Since our memory may not be contiguous, calculate the real number - * of pages we have in this system. - */ - pr_info("Memory:"); - num_physpages = 0; - for_each_memblock(memory, reg) { - unsigned long pages = memblock_region_memory_end_pfn(reg) - - memblock_region_memory_base_pfn(reg); - num_physpages += pages; - printk(" %ldMB", pages >> (20 - PAGE_SHIFT)); - } - printk(" = %luMB total\n", num_physpages >> (20 - PAGE_SHIFT)); - - pr_notice("Memory: %luk/%luk available, %luk reserved\n", - nr_free_pages() << (PAGE_SHIFT-10), - free_pages << (PAGE_SHIFT-10), - reserved_pages << (PAGE_SHIFT-10)); + mem_init_print_info(); #define MLK(b, t) b, t, ((t) - (b)) >> 10 #define MLM(b, t) b, t, ((t) - (b)) >> 20 @@ -366,7 +324,7 @@ void __init mem_init(void) BUILD_BUG_ON(TASK_SIZE_64 > MODULES_VADDR); BUG_ON(TASK_SIZE_64 > MODULES_VADDR); - if (PAGE_SIZE >= 16384 && num_physpages <= 128) { + if (PAGE_SIZE >= 16384 && get_num_physpages() <= 128) { extern int sysctl_overcommit_memory; /* * On a machine this small we won't get anywhere without -- cgit v0.10.2 From 6703bdf669f0e4bbeb8f9c1afb87c54bdd60e852 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:03:50 -0700 Subject: mm/AVR32: prepare for removing num_physpages and simplify mem_init() Prepare for removing num_physpages and simplify mem_init(). Signed-off-by: Jiang Liu Acked-by: Hans-Christian Egtvedt Cc: Haavard Skinnemoen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/avr32/mm/init.c b/arch/avr32/mm/init.c index af6890f..0fc04b9 100644 --- a/arch/avr32/mm/init.c +++ b/arch/avr32/mm/init.c @@ -100,26 +100,16 @@ void __init paging_init(void) void __init mem_init(void) { - int codesize, reservedpages, datasize, initsize; - int nid, i; + pg_data_t *pgdat; - reservedpages = 0; high_memory = NULL; /* this will put all low memory onto the freelists */ - for_each_online_node(nid) { - pg_data_t *pgdat = NODE_DATA(nid); - unsigned long node_pages = 0; + for_each_online_pgdat(pgdat) { void *node_high_memory; - num_physpages += pgdat->node_present_pages; - if (pgdat->node_spanned_pages != 0) - node_pages = free_all_bootmem_node(pgdat); - - for (i = 0; i < node_pages; i++) - if (PageReserved(pgdat->node_mem_map + i)) - reservedpages++; + free_all_bootmem_node(pgdat); node_high_memory = (void *)((pgdat->node_start_pfn + pgdat->node_spanned_pages) @@ -130,18 +120,7 @@ void __init mem_init(void) max_mapnr = MAP_NR(high_memory); - codesize = (unsigned long)_etext - (unsigned long)_text; - datasize = (unsigned long)_edata - (unsigned long)_data; - initsize = (unsigned long)__init_end - (unsigned long)__init_begin; - - printk ("Memory: %luk/%luk available (%dk kernel code, " - "%dk reserved, %dk data, %dk init)\n", - nr_free_pages() << (PAGE_SHIFT - 10), - totalram_pages << (PAGE_SHIFT - 10), - codesize >> 10, - reservedpages << (PAGE_SHIFT - 10), - datasize >> 10, - initsize >> 10); + mem_init_print_info(NULL); } void free_initmem(void) -- cgit v0.10.2 From d9d7e769815c9cb66c8a4b144f066bb957ebd98e Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:03:51 -0700 Subject: mm/blackfin: prepare for removing num_physpages and simplify mem_init() Prepare for removing num_physpages and simplify mem_init(). Signed-off-by: Jiang Liu Cc: Mike Frysinger Cc: Bob Liu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/blackfin/mm/init.c b/arch/blackfin/mm/init.c index c73d80e..166842d 100644 --- a/arch/blackfin/mm/init.c +++ b/arch/blackfin/mm/init.c @@ -90,43 +90,17 @@ asmlinkage void __init init_pda(void) void __init mem_init(void) { - unsigned int codek = 0, datak = 0, initk = 0; - unsigned int reservedpages = 0, freepages = 0; - unsigned long tmp; - unsigned long start_mem = memory_start; - unsigned long end_mem = memory_end; + char buf[64]; - end_mem &= PAGE_MASK; - high_memory = (void *)end_mem; - - start_mem = PAGE_ALIGN(start_mem); - max_mapnr = num_physpages = MAP_NR(high_memory); - printk(KERN_DEBUG "Kernel managed physical pages: %lu\n", num_physpages); + high_memory = (void *)(memory_end & PAGE_MASK); + max_mapnr = MAP_NR(high_memory); + printk(KERN_DEBUG "Kernel managed physical pages: %lu\n", max_mapnr); /* This will put all low memory onto the freelists. */ free_all_bootmem(); - reservedpages = 0; - for (tmp = ARCH_PFN_OFFSET; tmp < max_mapnr; tmp++) - if (PageReserved(pfn_to_page(tmp))) - reservedpages++; - freepages = max_mapnr - ARCH_PFN_OFFSET - reservedpages; - - /* do not count in kernel image between _rambase and _ramstart */ - reservedpages -= (_ramstart - _rambase) >> PAGE_SHIFT; -#if (defined(CONFIG_BFIN_EXTMEM_ICACHEABLE) && ANOMALY_05000263) - reservedpages += (_ramend - memory_end - DMA_UNCACHED_REGION) >> PAGE_SHIFT; -#endif - - codek = (_etext - _stext) >> 10; - initk = (__init_end - __init_begin) >> 10; - datak = ((_ramstart - _rambase) >> 10) - codek - initk; - - printk(KERN_INFO - "Memory available: %luk/%luk RAM, " - "(%uk init code, %uk kernel code, %uk data, %uk dma, %uk reserved)\n", - (unsigned long) freepages << (PAGE_SHIFT-10), (_ramend - CONFIG_PHY_RAM_BASE_ADDRESS) >> 10, - initk, codek, datak, DMA_UNCACHED_REGION >> 10, (reservedpages << (PAGE_SHIFT-10))); + snprintf(buf, sizeof(buf) - 1, "%uK DMA", DMA_UNCACHED_REGION >> 10); + mem_init_print_info(buf); } #ifdef CONFIG_BLK_DEV_INITRD -- cgit v0.10.2 From 02f5532445c4aed57697b612e72b2a334eeb10ce Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:03:52 -0700 Subject: mm/c6x: prepare for removing num_physpages and simplify mem_init() Prepare for removing num_physpages and simplify mem_init(). Signed-off-by: Jiang Liu Cc: Mark Salter Cc: Aurelien Jacquiot Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/c6x/mm/init.c b/arch/c6x/mm/init.c index c9ae8ce..63f5560 100644 --- a/arch/c6x/mm/init.c +++ b/arch/c6x/mm/init.c @@ -58,21 +58,12 @@ void __init paging_init(void) void __init mem_init(void) { - int codek, datak; - unsigned long tmp; - unsigned long len = memory_end - memory_start; - high_memory = (void *)(memory_end & PAGE_MASK); /* this will put all memory onto the freelists */ free_all_bootmem(); - codek = (_etext - _stext) >> 10; - datak = (_end - _sdata) >> 10; - - tmp = nr_free_pages() << PAGE_SHIFT; - printk(KERN_INFO "Memory: %luk/%luk RAM (%dk kernel code, %dk data)\n", - tmp >> 10, len >> 10, codek, datak); + mem_init_print_info(NULL); } #ifdef CONFIG_BLK_DEV_INITRD -- cgit v0.10.2 From 4e422de996da62e933dcc0fd3c2d7fe513cf32a2 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:03:54 -0700 Subject: mm/cris: prepare for removing num_physpages and simplify mem_init() Prepare for removing num_physpages and simplify mem_init(). Signed-off-by: Jiang Liu Acked-by: Jesper Nilsson Cc: Mikael Starvik Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/cris/mm/init.c b/arch/cris/mm/init.c index 52b8b56..c81af5b 100644 --- a/arch/cris/mm/init.c +++ b/arch/cris/mm/init.c @@ -19,9 +19,6 @@ unsigned long empty_zero_page; void __init mem_init(void) { - int codesize, reservedpages, datasize, initsize; - unsigned long tmp; - BUG_ON(!mem_map); /* max/min_low_pfn was set by setup.c @@ -29,35 +26,9 @@ mem_init(void) * * high_memory was also set in setup.c */ - - max_mapnr = num_physpages = max_low_pfn - min_low_pfn; - - /* this will put all memory onto the freelists */ + max_mapnr = max_low_pfn - min_low_pfn; free_all_bootmem(); - - reservedpages = 0; - for (tmp = 0; tmp < max_mapnr; tmp++) { - /* - * Only count reserved RAM pages - */ - if (PageReserved(mem_map + tmp)) - reservedpages++; - } - - codesize = (unsigned long) &_etext - (unsigned long) &_stext; - datasize = (unsigned long) &_edata - (unsigned long) &_etext; - initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; - - printk(KERN_INFO - "Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, " - "%dk init)\n" , - nr_free_pages() << (PAGE_SHIFT-10), - max_mapnr << (PAGE_SHIFT-10), - codesize >> 10, - reservedpages << (PAGE_SHIFT-10), - datasize >> 10, - initsize >> 10 - ); + mem_init_print_info(NULL); } /* free the pages occupied by initialization code */ -- cgit v0.10.2 From 3f2b73c3c3e59cb9b94490d664b2439cd9c540e2 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:03:55 -0700 Subject: mm/frv: prepare for removing num_physpages and simplify mem_init() Prepare for removing num_physpages and simplify mem_init(). Signed-off-by: Jiang Liu Cc: David Howells Cc: Andi Kleen Cc: Geert Uytterhoeven Cc: Greg Kroah-Hartman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/frv/kernel/setup.c b/arch/frv/kernel/setup.c index a513647..f78f8cb 100644 --- a/arch/frv/kernel/setup.c +++ b/arch/frv/kernel/setup.c @@ -876,6 +876,7 @@ late_initcall(setup_arch_serial); static void __init setup_linux_memory(void) { unsigned long bootmap_size, low_top_pfn, kstart, kend, high_mem; + unsigned long physpages; kstart = (unsigned long) &__kernel_image_start - PAGE_OFFSET; kend = (unsigned long) &__kernel_image_end - PAGE_OFFSET; @@ -893,19 +894,19 @@ static void __init setup_linux_memory(void) ); /* pass the memory that the kernel can immediately use over to the bootmem allocator */ - max_mapnr = num_physpages = (memory_end - memory_start) >> PAGE_SHIFT; + max_mapnr = physpages = (memory_end - memory_start) >> PAGE_SHIFT; low_top_pfn = (KERNEL_LOWMEM_END - KERNEL_LOWMEM_START) >> PAGE_SHIFT; high_mem = 0; - if (num_physpages > low_top_pfn) { + if (physpages > low_top_pfn) { #ifdef CONFIG_HIGHMEM - high_mem = num_physpages - low_top_pfn; + high_mem = physpages - low_top_pfn; #else - max_mapnr = num_physpages = low_top_pfn; + max_mapnr = physpages = low_top_pfn; #endif } else { - low_top_pfn = num_physpages; + low_top_pfn = physpages; } min_low_pfn = memory_start >> PAGE_SHIFT; @@ -979,7 +980,7 @@ static void __init setup_uclinux_memory(void) free_bootmem(memory_start, memory_end - memory_start); high_memory = (void *) (memory_end & PAGE_MASK); - max_mapnr = num_physpages = ((unsigned long) high_memory - PAGE_OFFSET) >> PAGE_SHIFT; + max_mapnr = ((unsigned long) high_memory - PAGE_OFFSET) >> PAGE_SHIFT; min_low_pfn = memory_start >> PAGE_SHIFT; max_low_pfn = memory_end >> PAGE_SHIFT; diff --git a/arch/frv/mm/init.c b/arch/frv/mm/init.c index 3dcc888..88a1597 100644 --- a/arch/frv/mm/init.c +++ b/arch/frv/mm/init.c @@ -78,7 +78,7 @@ void __init paging_init(void) memset((void *) empty_zero_page, 0, PAGE_SIZE); #ifdef CONFIG_HIGHMEM - if (num_physpages - num_mappedpages) { + if (get_num_physpages() - num_mappedpages) { pgd_t *pge; pud_t *pue; pmd_t *pme; @@ -96,7 +96,7 @@ void __init paging_init(void) */ zones_size[ZONE_NORMAL] = max_low_pfn - min_low_pfn; #ifdef CONFIG_HIGHMEM - zones_size[ZONE_HIGHMEM] = num_physpages - num_mappedpages; + zones_size[ZONE_HIGHMEM] = get_num_physpages() - num_mappedpages; #endif free_area_init(zones_size); @@ -114,45 +114,24 @@ void __init paging_init(void) */ void __init mem_init(void) { - unsigned long npages = (memory_end - memory_start) >> PAGE_SHIFT; - unsigned long tmp; -#ifdef CONFIG_MMU - unsigned long loop, pfn; - int datapages = 0; -#endif - int codek = 0, datak = 0; + unsigned long code_size = _etext - _stext; /* this will put all low memory onto the freelists */ free_all_bootmem(); +#if defined(CONFIG_MMU) && defined(CONFIG_HIGHMEM) + { + unsigned long pfn; -#ifdef CONFIG_MMU - for (loop = 0 ; loop < npages ; loop++) - if (PageReserved(&mem_map[loop])) - datapages++; - -#ifdef CONFIG_HIGHMEM - for (pfn = num_physpages - 1; pfn >= num_mappedpages; pfn--) - free_highmem_page(&mem_map[pfn]); -#endif - - codek = ((unsigned long) &_etext - (unsigned long) &_stext) >> 10; - datak = datapages << (PAGE_SHIFT - 10); - -#else - codek = (_etext - _stext) >> 10; - datak = 0; //(__bss_stop - _sdata) >> 10; + for (pfn = get_num_physpages() - 1; + pfn >= num_mappedpages; pfn--) + free_highmem_page(&mem_map[pfn]); + } #endif - tmp = nr_free_pages() << PAGE_SHIFT; - printk("Memory available: %luKiB/%luKiB RAM, %luKiB/%luKiB ROM (%dKiB kernel code, %dKiB data)\n", - tmp >> 10, - npages << (PAGE_SHIFT - 10), - (rom_length > 0) ? ((rom_length >> 10) - codek) : 0, - rom_length >> 10, - codek, - datak - ); - + mem_init_print_info(NULL); + if (rom_length > 0 && rom_length >= code_size) + printk("Memory available: %luKiB/%luKiB ROM\n", + (rom_length - code_size) >> 10, rom_length >> 10); } /* end mem_init() */ /*****************************************************************************/ -- cgit v0.10.2 From 27a59706e4df3d1ea7aa1803b1ee0c94f1a4f3cf Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:03:56 -0700 Subject: mm/h8300: prepare for removing num_physpages and simplify mem_init() Prepare for removing num_physpages and simplify mem_init(). Signed-off-by: Jiang Liu Cc: Yoshinori Sato Cc: Geert Uytterhoeven Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/h8300/mm/init.c b/arch/h8300/mm/init.c index a506dd4..6c1251e 100644 --- a/arch/h8300/mm/init.c +++ b/arch/h8300/mm/init.c @@ -121,40 +121,20 @@ void __init paging_init(void) void __init mem_init(void) { - int codek = 0, datak = 0, initk = 0; - /* DAVIDM look at setup memory map generically with reserved area */ - unsigned long tmp; - extern unsigned long _ramend, _ramstart; - unsigned long len = &_ramend - &_ramstart; - unsigned long start_mem = memory_start; /* DAVIDM - these must start at end of kernel */ - unsigned long end_mem = memory_end; /* DAVIDM - this must not include kernel stack at top */ + unsigned long codesize = _etext - _stext; -#ifdef DEBUG - printk(KERN_DEBUG "Mem_init: start=%lx, end=%lx\n", start_mem, end_mem); -#endif - - end_mem &= PAGE_MASK; - high_memory = (void *) end_mem; + pr_devel("Mem_init: start=%lx, end=%lx\n", memory_start, memory_end); - start_mem = PAGE_ALIGN(start_mem); - max_mapnr = num_physpages = MAP_NR(high_memory); + high_memory = (void *) (memory_end & PAGE_MASK); + max_mapnr = MAP_NR(high_memory); /* this will put all low memory onto the freelists */ free_all_bootmem(); - codek = (_etext - _stext) >> 10; - datak = (__bss_stop - _sdata) >> 10; - initk = (__init_begin - __init_end) >> 10; - - tmp = nr_free_pages() << PAGE_SHIFT; - printk(KERN_INFO "Memory available: %luk/%luk RAM, %luk/%luk ROM (%dk kernel code, %dk data)\n", - tmp >> 10, - len >> 10, - (rom_length > 0) ? ((rom_length >> 10) - codek) : 0, - rom_length >> 10, - codek, - datak - ); + mem_init_print_info(NULL); + if (rom_length > 0 && rom_length > codesize) + pr_info("Memory available: %luK/%luK ROM\n", + (rom_length - codesize) >> 10, rom_length >> 10); } -- cgit v0.10.2 From 5dc355c146689ac5a9d57dff349958585da21fe3 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:03:57 -0700 Subject: mm/hexagon: prepare for removing num_physpages and simplify mem_init() Prepare for removing num_physpages and simplify mem_init(). Signed-off-by: Jiang Liu Cc: Richard Kuo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/hexagon/mm/init.c b/arch/hexagon/mm/init.c index 0ab5b43..88977e4 100644 --- a/arch/hexagon/mm/init.c +++ b/arch/hexagon/mm/init.c @@ -71,9 +71,7 @@ void __init mem_init(void) { /* No idea where this is actually declared. Seems to evade LXR. */ free_all_bootmem(); - num_physpages = bootmem_lastpg-ARCH_PFN_OFFSET; - - printk(KERN_INFO "totalram_pages = %ld\n", totalram_pages); + mem_init_print_info(NULL); /* * To-Do: someone somewhere should wipe out the bootmem map -- cgit v0.10.2 From de4bcddc13be31c669fc74cd2b400e1e7a1fdbcf Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:03:58 -0700 Subject: mm/IA64: prepare for removing num_physpages and simplify mem_init() Prepare for removing num_physpages and simplify mem_init(). Signed-off-by: Jiang Liu Cc: Tony Luck Cc: Fenghua Yu Cc: Zhang Yanfei Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c index 67c59eb..e4a6a536 100644 --- a/arch/ia64/mm/contig.c +++ b/arch/ia64/mm/contig.c @@ -295,14 +295,6 @@ find_memory (void) alloc_per_cpu_data(); } -static int count_pages(u64 start, u64 end, void *arg) -{ - unsigned long *count = arg; - - *count += (end - start) >> PAGE_SHIFT; - return 0; -} - /* * Set up the page tables. */ @@ -313,9 +305,6 @@ paging_init (void) unsigned long max_dma; unsigned long max_zone_pfns[MAX_NR_ZONES]; - num_physpages = 0; - efi_memmap_walk(count_pages, &num_physpages); - memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); #ifdef CONFIG_ZONE_DMA max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT; diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index ae4db4b..58550b8 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -37,7 +37,6 @@ struct early_node_data { struct ia64_node_data *node_data; unsigned long pernode_addr; unsigned long pernode_size; - unsigned long num_physpages; #ifdef CONFIG_ZONE_DMA unsigned long num_dma_physpages; #endif @@ -732,7 +731,6 @@ static __init int count_node_pages(unsigned long start, unsigned long len, int n { unsigned long end = start + len; - mem_data[node].num_physpages += len >> PAGE_SHIFT; #ifdef CONFIG_ZONE_DMA if (start <= __pa(MAX_DMA_ADDRESS)) mem_data[node].num_dma_physpages += @@ -778,7 +776,6 @@ void __init paging_init(void) #endif for_each_online_node(node) { - num_physpages += mem_data[node].num_physpages; pfn_offset = mem_data[node].min_pfn; #ifdef CONFIG_VIRTUAL_MEM_MAP diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index d141f7e..2d372b4 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -545,19 +545,6 @@ int __init register_active_ranges(u64 start, u64 len, int nid) return 0; } -static int __init -count_reserved_pages(u64 start, u64 end, void *arg) -{ - unsigned long num_reserved = 0; - unsigned long *count = arg; - - for (; start < end; start += PAGE_SIZE) - if (PageReserved(virt_to_page(start))) - ++num_reserved; - *count += num_reserved; - return 0; -} - int find_max_min_low_pfn (u64 start, u64 end, void *arg) { @@ -596,7 +583,6 @@ __setup("nolwsys", nolwsys_setup); void __init mem_init (void) { - long reserved_pages, codesize, datasize, initsize; pg_data_t *pgdat; int i; @@ -624,18 +610,7 @@ mem_init (void) if (pgdat->bdata->node_bootmem_map) free_all_bootmem_node(pgdat); - reserved_pages = 0; - efi_memmap_walk(count_reserved_pages, &reserved_pages); - - codesize = (unsigned long) _etext - (unsigned long) _stext; - datasize = (unsigned long) _edata - (unsigned long) _etext; - initsize = (unsigned long) __init_end - (unsigned long) __init_begin; - - printk(KERN_INFO "Memory: %luk/%luk available (%luk code, %luk reserved, " - "%luk data, %luk init)\n", nr_free_pages() << (PAGE_SHIFT - 10), - num_physpages << (PAGE_SHIFT - 10), codesize >> 10, - reserved_pages << (PAGE_SHIFT - 10), datasize >> 10, initsize >> 10); - + mem_init_print_info(NULL); /* * For fsyscall entrpoints with no light-weight handler, use the ordinary -- cgit v0.10.2 From a0e7b805cd337d387373d197aa15d61a853e5ed7 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:03:59 -0700 Subject: mm/m32r: prepare for removing num_physpages and simplify mem_init() Prepare for removing num_physpages and simplify mem_init(). Signed-off-by: Jiang Liu Cc: Hirokazu Takata Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/m32r/mm/discontig.c b/arch/m32r/mm/discontig.c index 2c468e8..2719630 100644 --- a/arch/m32r/mm/discontig.c +++ b/arch/m32r/mm/discontig.c @@ -129,11 +129,10 @@ unsigned long __init setup_memory(void) #define START_PFN(nid) (NODE_DATA(nid)->bdata->node_min_pfn) #define MAX_LOW_PFN(nid) (NODE_DATA(nid)->bdata->node_low_pfn) -unsigned long __init zone_sizes_init(void) +void __init zone_sizes_init(void) { unsigned long zones_size[MAX_NR_ZONES], zholes_size[MAX_NR_ZONES]; unsigned long low, start_pfn; - unsigned long holes = 0; int nid, i; mem_prof_t *mp; @@ -147,7 +146,6 @@ unsigned long __init zone_sizes_init(void) low = MAX_LOW_PFN(nid); zones_size[ZONE_DMA] = low - start_pfn; zholes_size[ZONE_DMA] = mp->holes; - holes += zholes_size[ZONE_DMA]; node_set_state(nid, N_NORMAL_MEMORY); free_area_init_node(nid, zones_size, start_pfn, zholes_size); @@ -161,6 +159,4 @@ unsigned long __init zone_sizes_init(void) NODE_DATA(1)->node_zones->watermark[WMARK_MIN] = 0; NODE_DATA(1)->node_zones->watermark[WMARK_LOW] = 0; NODE_DATA(1)->node_zones->watermark[WMARK_HIGH] = 0; - - return holes; } diff --git a/arch/m32r/mm/init.c b/arch/m32r/mm/init.c index a501838..a4f8d93 100644 --- a/arch/m32r/mm/init.c +++ b/arch/m32r/mm/init.c @@ -40,7 +40,6 @@ unsigned long mmu_context_cache_dat; #else unsigned long mmu_context_cache_dat[NR_CPUS]; #endif -static unsigned long hole_pages; /* * function prototype @@ -57,7 +56,7 @@ void free_initrd_mem(unsigned long, unsigned long); #define MAX_LOW_PFN(nid) (NODE_DATA(nid)->bdata->node_low_pfn) #ifndef CONFIG_DISCONTIGMEM -unsigned long __init zone_sizes_init(void) +void __init zone_sizes_init(void) { unsigned long zones_size[MAX_NR_ZONES] = {0, }; unsigned long max_dma; @@ -83,11 +82,9 @@ unsigned long __init zone_sizes_init(void) #endif /* CONFIG_MMU */ free_area_init_node(0, zones_size, start_pfn, 0); - - return 0; } #else /* CONFIG_DISCONTIGMEM */ -extern unsigned long zone_sizes_init(void); +extern void zone_sizes_init(void); #endif /* CONFIG_DISCONTIGMEM */ /*======================================================================* @@ -105,24 +102,7 @@ void __init paging_init(void) for (i = 0 ; i < USER_PTRS_PER_PGD * 2 ; i++) pgd_val(pg_dir[i]) = 0; #endif /* CONFIG_MMU */ - hole_pages = zone_sizes_init(); -} - -int __init reservedpages_count(void) -{ - int reservedpages, nid, i; - - reservedpages = 0; - for_each_online_node(nid) { - unsigned long flags; - pgdat_resize_lock(NODE_DATA(nid), &flags); - for (i = 0 ; i < MAX_LOW_PFN(nid) - START_PFN(nid) ; i++) - if (PageReserved(nid_page_nr(nid, i))) - reservedpages++; - pgdat_resize_unlock(NODE_DATA(nid), &flags); - } - - return reservedpages; + zone_sizes_init(); } /*======================================================================* @@ -131,20 +111,13 @@ int __init reservedpages_count(void) *======================================================================*/ void __init mem_init(void) { - int codesize, reservedpages, datasize, initsize; int nid; #ifndef CONFIG_MMU extern unsigned long memory_end; #endif - num_physpages = 0; - for_each_online_node(nid) - num_physpages += MAX_LOW_PFN(nid) - START_PFN(nid) + 1; - - num_physpages -= hole_pages; - #ifndef CONFIG_DISCONTIGMEM - max_mapnr = num_physpages; + max_mapnr = get_num_physpages(); #endif /* CONFIG_DISCONTIGMEM */ #ifdef CONFIG_MMU @@ -160,19 +133,7 @@ void __init mem_init(void) for_each_online_node(nid) free_all_bootmem_node(NODE_DATA(nid)); - reservedpages = reservedpages_count() - hole_pages; - codesize = (unsigned long) &_etext - (unsigned long)&_text; - datasize = (unsigned long) &_edata - (unsigned long)&_etext; - initsize = (unsigned long) &__init_end - (unsigned long)&__init_begin; - - printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, " - "%dk reserved, %dk data, %dk init)\n", - nr_free_pages() << (PAGE_SHIFT-10), - num_physpages << (PAGE_SHIFT-10), - codesize >> 10, - reservedpages << (PAGE_SHIFT-10), - datasize >> 10, - initsize >> 10); + mem_init_print_info(NULL); } /*======================================================================* -- cgit v0.10.2 From 9671468f1e1e7ef67169f7fb518a9905f44b0dd6 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:04:00 -0700 Subject: mm/m68k: prepare for removing num_physpages and simplify mem_init() Prepare for removing num_physpages and simplify mem_init(). Signed-off-by: Jiang Liu Acked-by: Greg Ungerer Cc: Geert Uytterhoeven Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/m68k/mm/init.c b/arch/m68k/mm/init.c index 614c60a..397a884 100644 --- a/arch/m68k/mm/init.c +++ b/arch/m68k/mm/init.c @@ -149,33 +149,11 @@ void __init print_memmap(void) void __init mem_init(void) { pg_data_t *pgdat; - int codepages = 0; - int datapages = 0; - int initpages = 0; int i; /* this will put all memory onto the freelists */ - num_physpages = 0; - for_each_online_pgdat(pgdat) { - num_physpages += pgdat->node_present_pages; - + for_each_online_pgdat(pgdat) free_all_bootmem_node(pgdat); - for (i = 0; i < pgdat->node_spanned_pages; i++) { - struct page *page = pgdat->node_mem_map + i; - char *addr = page_to_virt(page); - - if (!PageReserved(page)) - continue; - if (addr >= _text && - addr < _etext) - codepages++; - else if (addr >= __init_begin && - addr < __init_end) - initpages++; - else - datapages++; - } - } #if defined(CONFIG_MMU) && !defined(CONFIG_SUN3) && !defined(CONFIG_COLDFIRE) /* insert pointer tables allocated so far into the tablelist */ @@ -190,12 +168,7 @@ void __init mem_init(void) init_pointer_table((unsigned long)zero_pgtable); #endif - pr_info("Memory: %luk/%luk available (%dk kernel code, %dk data, %dk init)\n", - nr_free_pages() << (PAGE_SHIFT-10), - totalram_pages << (PAGE_SHIFT-10), - codepages << (PAGE_SHIFT-10), - datapages << (PAGE_SHIFT-10), - initpages << (PAGE_SHIFT-10)); + mem_init_print_info(NULL); print_memmap(); } -- cgit v0.10.2 From 132de6717c47f6fb1d4d3ccd6033cd45aee8f779 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:04:01 -0700 Subject: mm/metag: prepare for removing num_physpages and simplify mem_init() Prepare for removing num_physpages and simplify mem_init(). Signed-off-by: Jiang Liu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/metag/mm/init.c b/arch/metag/mm/init.c index ce81d7c..e0862b7 100644 --- a/arch/metag/mm/init.c +++ b/arch/metag/mm/init.c @@ -388,22 +388,16 @@ void __init mem_init(void) reset_all_zones_managed_pages(); for (tmp = highstart_pfn; tmp < highend_pfn; tmp++) free_highmem_page(pfn_to_page(tmp)); - num_physpages += totalhigh_pages; #endif /* CONFIG_HIGHMEM */ for_each_online_node(nid) { pg_data_t *pgdat = NODE_DATA(nid); - num_physpages += pgdat->node_present_pages; - if (pgdat->node_spanned_pages) free_all_bootmem_node(pgdat); } - pr_info("Memory: %luk/%luk available\n", - (unsigned long)nr_free_pages() << (PAGE_SHIFT - 10), - num_physpages << (PAGE_SHIFT - 10)); - + mem_init_print_info(NULL); show_mem(0); return; -- cgit v0.10.2 From 6879ea83c6d14e4f21bf48b5780ed549197c668b Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:04:02 -0700 Subject: mm/microblaze: prepare for removing num_physpages and simplify mem_init() Prepare for removing num_physpages and simplify mem_init(). Signed-off-by: Jiang Liu Cc: Michal Simek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index b16c778..67e8d7c 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -282,7 +282,7 @@ void __init mem_init(void) /* this will put all unused low memory onto the freelists */ free_all_bootmem(); - mem_init_print_info(); + mem_init_print_info(NULL); #define MLK(b, t) b, t, ((t) - (b)) >> 10 #define MLM(b, t) b, t, ((t) - (b)) >> 20 diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c index b384cbc..74c7bcc 100644 --- a/arch/microblaze/mm/init.c +++ b/arch/microblaze/mm/init.c @@ -71,24 +71,17 @@ static void __init highmem_init(void) kmap_prot = PAGE_KERNEL; } -static unsigned long highmem_setup(void) +static void highmem_setup(void) { unsigned long pfn; - unsigned long reservedpages = 0; for (pfn = max_low_pfn; pfn < max_pfn; ++pfn) { struct page *page = pfn_to_page(pfn); /* FIXME not sure about */ - if (memblock_is_reserved(pfn << PAGE_SHIFT)) - continue; - free_highmem_page(page); - reservedpages++; + if (!memblock_is_reserved(pfn << PAGE_SHIFT)) + free_highmem_page(page); } - pr_info("High memory: %luk\n", - totalhigh_pages << (PAGE_SHIFT-10)); - - return reservedpages; } #endif /* CONFIG_HIGHMEM */ @@ -167,13 +160,12 @@ void __init setup_memory(void) * min_low_pfn - the first page (mm/bootmem.c - node_boot_start) * max_low_pfn * max_mapnr - the first unused page (mm/bootmem.c - node_low_pfn) - * num_physpages - number of all pages */ /* memory start is from the kernel end (aligned) to higher addr */ min_low_pfn = memory_start >> PAGE_SHIFT; /* minimum for allocation */ /* RAM is assumed contiguous */ - num_physpages = max_mapnr = memory_size >> PAGE_SHIFT; + max_mapnr = memory_size >> PAGE_SHIFT; max_low_pfn = ((u64)memory_start + (u64)lowmem_size) >> PAGE_SHIFT; max_pfn = ((u64)memory_start + (u64)memory_size) >> PAGE_SHIFT; @@ -246,46 +238,15 @@ void free_initmem(void) void __init mem_init(void) { - pg_data_t *pgdat; - unsigned long reservedpages = 0, codesize, initsize, datasize, bsssize; - high_memory = (void *)__va(memory_start + lowmem_size - 1); /* this will put all memory onto the freelists */ free_all_bootmem(); - - for_each_online_pgdat(pgdat) { - unsigned long i; - struct page *page; - - for (i = 0; i < pgdat->node_spanned_pages; i++) { - if (!pfn_valid(pgdat->node_start_pfn + i)) - continue; - page = pgdat_page_nr(pgdat, i); - if (PageReserved(page)) - reservedpages++; - } - } - #ifdef CONFIG_HIGHMEM - reservedpages -= highmem_setup(); + highmem_setup(); #endif - codesize = (unsigned long)&_sdata - (unsigned long)&_stext; - datasize = (unsigned long)&_edata - (unsigned long)&_sdata; - initsize = (unsigned long)&__init_end - (unsigned long)&__init_begin; - bsssize = (unsigned long)&__bss_stop - (unsigned long)&__bss_start; - - pr_info("Memory: %luk/%luk available (%luk kernel code, ", - nr_free_pages() << (PAGE_SHIFT-10), - num_physpages << (PAGE_SHIFT-10), - codesize >> 10); - pr_cont("%luk reserved, %luk data, %luk bss, %luk init)\n", - reservedpages << (PAGE_SHIFT-10), - datasize >> 10, - bsssize >> 10, - initsize >> 10); - + mem_init_print_info(NULL); #ifdef CONFIG_MMU pr_info("Kernel virtual memory layout:\n"); pr_info(" * 0x%08lx..0x%08lx : fixmap\n", FIXADDR_START, FIXADDR_TOP); -- cgit v0.10.2 From 1132137e87898d0b6786d85a99de35ce196ecbfb Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:04:04 -0700 Subject: mm/MIPS: prepare for removing num_physpages and simplify mem_init() Prepare for removing num_physpages and simplify mem_init(). Signed-off-by: Jiang Liu Cc: Ralf Baechle Cc: David Daney Cc: Arnd Bergmann Cc: Jiri Kosina Cc: John Crispin Cc: Greg Kroah-Hartman Cc: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index e7333f1..4e73f10 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -359,11 +359,24 @@ void __init paging_init(void) static struct kcore_list kcore_kseg0; #endif -void __init mem_init(void) +static inline void mem_init_free_highmem(void) { - unsigned long codesize, reservedpages, datasize, initsize; - unsigned long tmp, ram; +#ifdef CONFIG_HIGHMEM + unsigned long tmp; + for (tmp = highstart_pfn; tmp < highend_pfn; tmp++) { + struct page *page = pfn_to_page(tmp); + + if (!page_is_ram(tmp)) + SetPageReserved(page); + else + free_highmem_page(page); + } +#endif +} + +void __init mem_init(void) +{ #ifdef CONFIG_HIGHMEM #ifdef CONFIG_DISCONTIGMEM #error "CONFIG_HIGHMEM and CONFIG_DISCONTIGMEM dont work together yet" @@ -376,32 +389,8 @@ void __init mem_init(void) free_all_bootmem(); setup_zero_pages(); /* Setup zeroed pages. */ - - reservedpages = ram = 0; - for (tmp = 0; tmp < max_low_pfn; tmp++) - if (page_is_ram(tmp) && pfn_valid(tmp)) { - ram++; - if (PageReserved(pfn_to_page(tmp))) - reservedpages++; - } - num_physpages = ram; - -#ifdef CONFIG_HIGHMEM - for (tmp = highstart_pfn; tmp < highend_pfn; tmp++) { - struct page *page = pfn_to_page(tmp); - - if (!page_is_ram(tmp)) { - SetPageReserved(page); - continue; - } - free_highmem_page(page); - } - num_physpages += totalhigh_pages; -#endif - - codesize = (unsigned long) &_etext - (unsigned long) &_text; - datasize = (unsigned long) &_edata - (unsigned long) &_etext; - initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; + mem_init_free_highmem(); + mem_init_print_info(NULL); #ifdef CONFIG_64BIT if ((unsigned long) &_text > (unsigned long) CKSEG0) @@ -410,16 +399,6 @@ void __init mem_init(void) kclist_add(&kcore_kseg0, (void *) CKSEG0, 0x80000000 - 4, KCORE_TEXT); #endif - - printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, " - "%ldk reserved, %ldk data, %ldk init, %ldk highmem)\n", - nr_free_pages() << (PAGE_SHIFT-10), - ram << (PAGE_SHIFT-10), - codesize >> 10, - reservedpages << (PAGE_SHIFT-10), - datasize >> 10, - initsize >> 10, - totalhigh_pages << (PAGE_SHIFT-10)); } #endif /* !CONFIG_NEED_MULTIPLE_NODES */ diff --git a/arch/mips/pci/pci-lantiq.c b/arch/mips/pci/pci-lantiq.c index 879077b..cb1ef99 100644 --- a/arch/mips/pci/pci-lantiq.c +++ b/arch/mips/pci/pci-lantiq.c @@ -89,7 +89,7 @@ static inline u32 ltq_calc_bar11mask(void) u32 mem, bar11mask; /* BAR11MASK value depends on available memory on system. */ - mem = num_physpages * PAGE_SIZE; + mem = get_num_physpages() * PAGE_SIZE; bar11mask = (0x0ffffff0 & ~((1 << (fls(mem) - 1)) - 1)) | 8; return bar11mask; diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c index aecac4a..a0c9e34 100644 --- a/arch/mips/sgi-ip27/ip27-memory.c +++ b/arch/mips/sgi-ip27/ip27-memory.c @@ -357,8 +357,6 @@ static void __init szmem(void) int slot; cnodeid_t node; - num_physpages = 0; - for_each_online_node(node) { nodebytes = 0; for (slot = 0; slot < MAX_MEM_SLOTS; slot++) { @@ -381,7 +379,6 @@ static void __init szmem(void) slot = MAX_MEM_SLOTS; continue; } - num_physpages += slot_psize; memblock_add_node(PFN_PHYS(slot_getbasepfn(node, slot)), PFN_PHYS(slot_psize), node); } @@ -480,10 +477,9 @@ void __init paging_init(void) void __init mem_init(void) { - unsigned long codesize, datasize, initsize, tmp; unsigned node; - high_memory = (void *) __va(num_physpages << PAGE_SHIFT); + high_memory = (void *) __va(get_num_physpages() << PAGE_SHIFT); for_each_online_node(node) { /* @@ -494,18 +490,5 @@ void __init mem_init(void) setup_zero_pages(); /* This comes from node 0 */ - codesize = (unsigned long) &_etext - (unsigned long) &_text; - datasize = (unsigned long) &_edata - (unsigned long) &_etext; - initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; - - tmp = nr_free_pages(); - printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, " - "%ldk reserved, %ldk data, %ldk init, %ldk highmem)\n", - tmp << (PAGE_SHIFT-10), - num_physpages << (PAGE_SHIFT-10), - codesize >> 10, - (num_physpages - tmp) << (PAGE_SHIFT-10), - datasize >> 10, - initsize >> 10, - totalhigh_pages << (PAGE_SHIFT-10)); + mem_init_print_info(NULL); } -- cgit v0.10.2 From 76feaedeb9f33011c7048d4339d18c3fd342f984 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:04:05 -0700 Subject: mm/mn10300: prepare for removing num_physpages and simplify mem_init() Prepare for removing num_physpages and simplify mem_init(). Signed-off-by: Jiang Liu Cc: David Howells Cc: Koichi Yasutake Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/mn10300/mm/init.c b/arch/mn10300/mm/init.c index 7590d91..97a1ec0 100644 --- a/arch/mn10300/mm/init.c +++ b/arch/mn10300/mm/init.c @@ -99,15 +99,12 @@ void __init paging_init(void) */ void __init mem_init(void) { - int codesize, reservedpages, datasize, initsize; - int tmp; - BUG_ON(!mem_map); #define START_PFN (contig_page_data.bdata->node_min_pfn) #define MAX_LOW_PFN (contig_page_data.bdata->node_low_pfn) - max_mapnr = num_physpages = MAX_LOW_PFN - START_PFN; + max_mapnr = MAX_LOW_PFN - START_PFN; high_memory = (void *) __va(MAX_LOW_PFN * PAGE_SIZE); /* clear the zero-page */ @@ -116,26 +113,7 @@ void __init mem_init(void) /* this will put all low memory onto the freelists */ free_all_bootmem(); - reservedpages = 0; - for (tmp = 0; tmp < num_physpages; tmp++) - if (PageReserved(&mem_map[tmp])) - reservedpages++; - - codesize = (unsigned long) &_etext - (unsigned long) &_stext; - datasize = (unsigned long) &_edata - (unsigned long) &_etext; - initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; - - printk(KERN_INFO - "Memory: %luk/%luk available" - " (%dk kernel code, %dk reserved, %dk data, %dk init," - " %ldk highmem)\n", - nr_free_pages() << (PAGE_SHIFT - 10), - max_mapnr << (PAGE_SHIFT - 10), - codesize >> 10, - reservedpages << (PAGE_SHIFT - 10), - datasize >> 10, - initsize >> 10, - totalhigh_pages << (PAGE_SHIFT - 10)); + mem_init_print_info(NULL); } /* -- cgit v0.10.2 From 1173db12bf145a0d0c6a2716e861de2fafde0522 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:04:06 -0700 Subject: mm/openrisc: prepare for removing num_physpages and simplify mem_init() Prepare for removing num_physpages and simplify mem_init(). Signed-off-by: Jiang Liu Acked-by: Jonas Bonn Cc: David Howells Cc: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/openrisc/mm/init.c b/arch/openrisc/mm/init.c index 16c1e13..7f94652 100644 --- a/arch/openrisc/mm/init.c +++ b/arch/openrisc/mm/init.c @@ -202,56 +202,20 @@ void __init paging_init(void) /* References to section boundaries */ -static int __init free_pages_init(void) -{ - int reservedpages, pfn; - - /* this will put all low memory onto the freelists */ - free_all_bootmem(); - - reservedpages = 0; - for (pfn = 0; pfn < max_low_pfn; pfn++) { - /* - * Only count reserved RAM pages - */ - if (PageReserved(mem_map + pfn)) - reservedpages++; - } - - return reservedpages; -} - -static void __init set_max_mapnr_init(void) -{ - max_mapnr = num_physpages = max_low_pfn; -} - void __init mem_init(void) { - int codesize, reservedpages, datasize, initsize; - BUG_ON(!mem_map); - set_max_mapnr_init(); - + max_mapnr = max_low_pfn; high_memory = (void *)__va(max_low_pfn * PAGE_SIZE); /* clear the zero-page */ memset((void *)empty_zero_page, 0, PAGE_SIZE); - reservedpages = free_pages_init(); - - codesize = (unsigned long)&_etext - (unsigned long)&_stext; - datasize = (unsigned long)&_edata - (unsigned long)&_etext; - initsize = (unsigned long)&__init_end - (unsigned long)&__init_begin; + /* this will put all low memory onto the freelists */ + free_all_bootmem(); - printk(KERN_INFO - "Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init, %ldk highmem)\n", - (unsigned long)nr_free_pages() << (PAGE_SHIFT - 10), - max_mapnr << (PAGE_SHIFT - 10), codesize >> 10, - reservedpages << (PAGE_SHIFT - 10), datasize >> 10, - initsize >> 10, (unsigned long)(0 << (PAGE_SHIFT - 10)) - ); + mem_init_print_info(NULL); printk("mem_init_done ...........................................\n"); mem_init_done = 1; -- cgit v0.10.2 From 7d2c7747086354f7889e8d577c537f9ad8985230 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:04:07 -0700 Subject: mm/PARISC: prepare for removing num_physpages and simplify mem_init() Prepare for removing num_physpages and simplify mem_init(). Signed-off-by: Jiang Liu Cc: "James E.J. Bottomley" Cc: Helge Deller Cc: Thomas Gleixner Cc: Michal Hocko Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index d8aaaf0..b4edc76 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -214,7 +214,6 @@ static void __init setup_bootmem(void) mem_limit_func(); /* check for "mem=" argument */ mem_max = 0; - num_physpages = 0; for (i = 0; i < npmem_ranges; i++) { unsigned long rsize; @@ -229,10 +228,8 @@ static void __init setup_bootmem(void) npmem_ranges = i + 1; mem_max = mem_limit; } - num_physpages += pmem_ranges[i].pages; break; } - num_physpages += pmem_ranges[i].pages; mem_max += rsize; } @@ -532,7 +529,7 @@ void free_initmem(void) * pages are no-longer executable */ flush_icache_range(init_begin, init_end); - num_physpages += free_initmem_default(-1); + free_initmem_default(-1); /* set up a new led state on systems shipped LED State panel */ pdc_chassis_send_status(PDC_CHASSIS_DIRECT_BCOMPLETE); @@ -580,8 +577,6 @@ unsigned long pcxl_dma_start __read_mostly; void __init mem_init(void) { - int codesize, reservedpages, datasize, initsize; - /* Do sanity checks on page table constants */ BUILD_BUG_ON(PTE_ENTRY_SIZE != sizeof(pte_t)); BUILD_BUG_ON(PMD_ENTRY_SIZE != sizeof(pmd_t)); @@ -603,33 +598,6 @@ void __init mem_init(void) } #endif - codesize = (unsigned long)_etext - (unsigned long)_text; - datasize = (unsigned long)_edata - (unsigned long)_etext; - initsize = (unsigned long)__init_end - (unsigned long)__init_begin; - - reservedpages = 0; -{ - unsigned long pfn; -#ifdef CONFIG_DISCONTIGMEM - int i; - - for (i = 0; i < npmem_ranges; i++) { - for (pfn = node_start_pfn(i); pfn < node_end_pfn(i); pfn++) { - if (PageReserved(pfn_to_page(pfn))) - reservedpages++; - } - } -#else /* !CONFIG_DISCONTIGMEM */ - for (pfn = 0; pfn < max_pfn; pfn++) { - /* - * Only count reserved RAM pages - */ - if (PageReserved(pfn_to_page(pfn))) - reservedpages++; - } -#endif -} - #ifdef CONFIG_PA11 if (hppa_dma_ops == &pcxl_dma_ops) { pcxl_dma_start = (unsigned long)SET_MAP_OFFSET(MAP_START); @@ -643,15 +611,7 @@ void __init mem_init(void) parisc_vmalloc_start = SET_MAP_OFFSET(MAP_START); #endif - printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init)\n", - nr_free_pages() << (PAGE_SHIFT-10), - num_physpages << (PAGE_SHIFT-10), - codesize >> 10, - reservedpages << (PAGE_SHIFT-10), - datasize >> 10, - initsize >> 10 - ); - + mem_init_print_info(NULL); #ifdef CONFIG_DEBUG_KERNEL /* double-sanity-check paranoia */ printk("virtual kernel memory layout:\n" " vmalloc : 0x%p - 0x%p (%4ld MB)\n" @@ -1101,7 +1061,6 @@ void flush_tlb_all(void) #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - num_physpages += free_reserved_area((void *)start, (void *)end, -1, - "initrd"); + free_reserved_area((void *)start, (void *)end, -1, "initrd"); } #endif -- cgit v0.10.2 From 369a9d8523dc7317eccb64b7aee6e9641d84cc8b Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:04:09 -0700 Subject: mm/ppc: prepare for removing num_physpages and simplify mem_init() Prepare for removing num_physpages and simplify mem_init(). Signed-off-by: Jiang Liu Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 3bcfc0d..49c18b6 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -299,46 +299,27 @@ void __init paging_init(void) void __init mem_init(void) { -#ifdef CONFIG_NEED_MULTIPLE_NODES - int nid; -#endif - pg_data_t *pgdat; - unsigned long i; - struct page *page; - unsigned long reservedpages = 0, codesize, initsize, datasize, bsssize; - #ifdef CONFIG_SWIOTLB swiotlb_init(0); #endif - num_physpages = memblock_phys_mem_size() >> PAGE_SHIFT; high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); #ifdef CONFIG_NEED_MULTIPLE_NODES - for_each_online_node(nid) { - if (NODE_DATA(nid)->node_spanned_pages != 0) { - printk("freeing bootmem node %d\n", nid); - free_all_bootmem_node(NODE_DATA(nid)); - } + { + pg_data_t *pgdat; + + for_each_online_pgdat(pgdat) + if (pgdat->node_spanned_pages != 0) { + printk("freeing bootmem node %d\n", + pgdat->node_id); + free_all_bootmem_node(pgdat); + } } #else max_mapnr = max_pfn; free_all_bootmem(); #endif - for_each_online_pgdat(pgdat) { - for (i = 0; i < pgdat->node_spanned_pages; i++) { - if (!pfn_valid(pgdat->node_start_pfn + i)) - continue; - page = pgdat_page_nr(pgdat, i); - if (PageReserved(page)) - reservedpages++; - } - } - - codesize = (unsigned long)&_sdata - (unsigned long)&_stext; - datasize = (unsigned long)&_edata - (unsigned long)&_sdata; - initsize = (unsigned long)&__init_end - (unsigned long)&__init_begin; - bsssize = (unsigned long)&__bss_stop - (unsigned long)&__bss_start; #ifdef CONFIG_HIGHMEM { @@ -348,13 +329,9 @@ void __init mem_init(void) for (pfn = highmem_mapnr; pfn < max_mapnr; ++pfn) { phys_addr_t paddr = (phys_addr_t)pfn << PAGE_SHIFT; struct page *page = pfn_to_page(pfn); - if (memblock_is_reserved(paddr)) - continue; - free_highmem_page(page); - reservedpages--; + if (!memblock_is_reserved(paddr)) + free_highmem_page(page); } - printk(KERN_DEBUG "High memory: %luk\n", - totalhigh_pages << (PAGE_SHIFT-10)); } #endif /* CONFIG_HIGHMEM */ @@ -367,16 +344,7 @@ void __init mem_init(void) (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) - 1; #endif - printk(KERN_INFO "Memory: %luk/%luk available (%luk kernel code, " - "%luk reserved, %luk data, %luk bss, %luk init)\n", - nr_free_pages() << (PAGE_SHIFT-10), - num_physpages << (PAGE_SHIFT-10), - codesize >> 10, - reservedpages << (PAGE_SHIFT-10), - datasize >> 10, - bsssize >> 10, - initsize >> 10); - + mem_init_print_info(NULL); #ifdef CONFIG_PPC32 pr_info("Kernel virtual memory layout:\n"); pr_info(" * 0x%08lx..0x%08lx : fixmap\n", FIXADDR_START, FIXADDR_TOP); -- cgit v0.10.2 From a18d0e2d7097937e9f51b83eda4bc750d93eb34d Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:04:10 -0700 Subject: mm/s390: prepare for removing num_physpages and simplify mem_init() Prepare for removing num_physpages and simplify mem_init(). Signed-off-by: Jiang Liu Cc: Martin Schwidefsky Cc: Heiko Carstens Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index a2aafe1..ce36ea8 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -135,9 +135,7 @@ void __init paging_init(void) void __init mem_init(void) { - unsigned long codesize, reservedpages, datasize, initsize; - - max_mapnr = num_physpages = max_low_pfn; + max_mapnr = max_low_pfn; high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); /* Setup guest page hinting */ @@ -147,18 +145,7 @@ void __init mem_init(void) free_all_bootmem(); setup_zero_pages(); /* Setup zeroed pages. */ - reservedpages = 0; - - codesize = (unsigned long) &_etext - (unsigned long) &_text; - datasize = (unsigned long) &_edata - (unsigned long) &_etext; - initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; - printk("Memory: %luk/%luk available (%ldk kernel code, %ldk reserved, %ldk data, %ldk init)\n", - nr_free_pages() << (PAGE_SHIFT-10), - max_mapnr << (PAGE_SHIFT-10), - codesize >> 10, - reservedpages << (PAGE_SHIFT-10), - datasize >>10, - initsize >> 10); + mem_init_print_info(NULL); printk("Write protected kernel read-only data: %#lx - %#lx\n", (unsigned long)&_stext, PFN_ALIGN((unsigned long)&_eshared) - 1); -- cgit v0.10.2 From ad941989d0cf2ef1320ecf46ee1188df85e94b43 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:04:11 -0700 Subject: mm/score: prepare for removing num_physpages and simplify mem_init() Prepare for removing num_physpages and simplify mem_init(). Signed-off-by: Jiang Liu Cc: Chen Liqin Cc: Lennox Wu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/score/mm/init.c b/arch/score/mm/init.c index a8b9177..9fbce49 100644 --- a/arch/score/mm/init.c +++ b/arch/score/mm/init.c @@ -75,33 +75,11 @@ void __init paging_init(void) void __init mem_init(void) { - unsigned long codesize, reservedpages, datasize, initsize; - unsigned long tmp, ram = 0; - high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT); free_all_bootmem(); setup_zero_page(); /* Setup zeroed pages. */ - reservedpages = 0; - - for (tmp = 0; tmp < max_low_pfn; tmp++) - if (page_is_ram(tmp)) { - ram++; - if (PageReserved(pfn_to_page(tmp))) - reservedpages++; - } - - num_physpages = ram; - codesize = (unsigned long) &_etext - (unsigned long) &_text; - datasize = (unsigned long) &_edata - (unsigned long) &_etext; - initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; - - printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, " - "%ldk reserved, %ldk data, %ldk init, %ldk highmem)\n", - (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), - ram << (PAGE_SHIFT-10), codesize >> 10, - reservedpages << (PAGE_SHIFT-10), datasize >> 10, - initsize >> 10, - totalhigh_pages << (PAGE_SHIFT-10)); + + mem_init_print_info(NULL); } #endif /* !CONFIG_NEED_MULTIPLE_NODES */ -- cgit v0.10.2 From da61efcfedabf16e994d67f983588683f3d59837 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:04:12 -0700 Subject: mm/SH: prepare for removing num_physpages and simplify mem_init() Prepare for removing num_physpages and simplify mem_init(). Signed-off-by: Jiang Liu Cc: Paul Mundt Cc: Wen Congyang Cc: Tang Chen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index fc0c8e1..c9a517c 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -407,24 +407,18 @@ unsigned int mem_init_done = 0; void __init mem_init(void) { - int codesize, datasize, initsize; - int nid; + pg_data_t *pgdat; iommu_init(); - num_physpages = 0; high_memory = NULL; - for_each_online_node(nid) { - pg_data_t *pgdat = NODE_DATA(nid); + for_each_online_pgdat(pgdat) { void *node_high_memory; - num_physpages += pgdat->node_present_pages; - if (pgdat->node_spanned_pages) free_all_bootmem_node(pgdat); - node_high_memory = (void *)__va((pgdat->node_start_pfn + pgdat->node_spanned_pages) << PAGE_SHIFT); @@ -441,19 +435,8 @@ void __init mem_init(void) vsyscall_init(); - codesize = (unsigned long) &_etext - (unsigned long) &_text; - datasize = (unsigned long) &_edata - (unsigned long) &_etext; - initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; - - printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, " - "%dk data, %dk init)\n", - nr_free_pages() << (PAGE_SHIFT-10), - num_physpages << (PAGE_SHIFT-10), - codesize >> 10, - datasize >> 10, - initsize >> 10); - - printk(KERN_INFO "virtual kernel memory layout:\n" + mem_init_print_info(NULL); + pr_info("virtual kernel memory layout:\n" " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n" #ifdef CONFIG_HIGHMEM " pkmap : 0x%08lx - 0x%08lx (%4ld kB)\n" -- cgit v0.10.2 From dceccbe9209bded817f39c01a8a8bd1e69d6277a Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:04:14 -0700 Subject: mm/SPARC: prepare for removing num_physpages and simplify mem_init() Prepare for removing num_physpages and simplify mem_init(). Signed-off-by: Jiang Liu Acked-by: Sam Ravnborg Cc: "David S. Miller" Cc: Yasuaki Ishimatsu Cc: Tang Chen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/sparc/kernel/leon_smp.c b/arch/sparc/kernel/leon_smp.c index 6cfc1b0..d7aa524 100644 --- a/arch/sparc/kernel/leon_smp.c +++ b/arch/sparc/kernel/leon_smp.c @@ -254,15 +254,12 @@ void __init leon_smp_done(void) /* Free unneeded trap tables */ if (!cpu_present(1)) { free_reserved_page(virt_to_page(&trapbase_cpu1)); - num_physpages++; } if (!cpu_present(2)) { free_reserved_page(virt_to_page(&trapbase_cpu2)); - num_physpages++; } if (!cpu_present(3)) { free_reserved_page(virt_to_page(&trapbase_cpu3)); - num_physpages++; } /* Ok, they are spinning and ready to go. */ smp_processors_ready = 1; diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c index a438abb..db69870 100644 --- a/arch/sparc/mm/init_32.c +++ b/arch/sparc/mm/init_32.c @@ -288,10 +288,6 @@ static void map_high_region(unsigned long start_pfn, unsigned long end_pfn) void __init mem_init(void) { - int codepages = 0; - int datapages = 0; - int initpages = 0; - int reservedpages = 0; int i; if (PKMAP_BASE+LAST_PKMAP*PAGE_SIZE >= FIXADDR_START) { @@ -329,8 +325,6 @@ void __init mem_init(void) unsigned long start_pfn = sp_banks[i].base_addr >> PAGE_SHIFT; unsigned long end_pfn = (sp_banks[i].base_addr + sp_banks[i].num_bytes) >> PAGE_SHIFT; - num_physpages += sp_banks[i].num_bytes >> PAGE_SHIFT; - if (end_pfn <= highstart_pfn) continue; @@ -340,39 +334,19 @@ void __init mem_init(void) map_high_region(start_pfn, end_pfn); } - codepages = (((unsigned long) &_etext) - ((unsigned long)&_start)); - codepages = PAGE_ALIGN(codepages) >> PAGE_SHIFT; - datapages = (((unsigned long) &_edata) - ((unsigned long)&_etext)); - datapages = PAGE_ALIGN(datapages) >> PAGE_SHIFT; - initpages = (((unsigned long) &__init_end) - ((unsigned long) &__init_begin)); - initpages = PAGE_ALIGN(initpages) >> PAGE_SHIFT; - - /* Ignore memory holes for the purpose of counting reserved pages */ - for (i=0; i < max_low_pfn; i++) - if (test_bit(i >> (20 - PAGE_SHIFT), sparc_valid_addr_bitmap) - && PageReserved(pfn_to_page(i))) - reservedpages++; - - printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init, %ldk highmem)\n", - nr_free_pages() << (PAGE_SHIFT-10), - num_physpages << (PAGE_SHIFT - 10), - codepages << (PAGE_SHIFT-10), - reservedpages << (PAGE_SHIFT - 10), - datapages << (PAGE_SHIFT-10), - initpages << (PAGE_SHIFT-10), - totalhigh_pages << (PAGE_SHIFT-10)); + mem_init_print_info(NULL); } void free_initmem (void) { - num_physpages += free_initmem_default(POISON_FREE_INITMEM); + free_initmem_default(POISON_FREE_INITMEM); } #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - num_physpages += free_reserved_area((void *)start, (void *)end, - POISON_FREE_INITMEM, "initrd"); + free_reserved_area((void *)start, (void *)end, POISON_FREE_INITMEM, + "initrd"); } #endif diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 752d738..a9c42a7 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -2045,7 +2045,6 @@ static void __init register_page_bootmem_info(void) } void __init mem_init(void) { - unsigned long codepages, datapages, initpages; unsigned long addr, last; addr = PAGE_OFFSET + kern_base; @@ -2063,11 +2062,6 @@ void __init mem_init(void) register_page_bootmem_info(); free_all_bootmem(); - /* We subtract one to account for the mem_map_zero page - * allocated below. - */ - num_physpages = totalram_pages - 1; - /* * Set up the zero page, mark it reserved, so that page count * is not manipulated when freeing the page from user ptes. @@ -2079,19 +2073,7 @@ void __init mem_init(void) } mark_page_reserved(mem_map_zero); - codepages = (((unsigned long) _etext) - ((unsigned long) _start)); - codepages = PAGE_ALIGN(codepages) >> PAGE_SHIFT; - datapages = (((unsigned long) _edata) - ((unsigned long) _etext)); - datapages = PAGE_ALIGN(datapages) >> PAGE_SHIFT; - initpages = (((unsigned long) __init_end) - ((unsigned long) __init_begin)); - initpages = PAGE_ALIGN(initpages) >> PAGE_SHIFT; - - printk("Memory: %luk available (%ldk kernel code, %ldk data, %ldk init) [%016lx,%016lx]\n", - nr_free_pages() << (PAGE_SHIFT-10), - codepages << (PAGE_SHIFT-10), - datapages << (PAGE_SHIFT-10), - initpages << (PAGE_SHIFT-10), - PAGE_OFFSET, (last_valid_pfn << PAGE_SHIFT)); + mem_init_print_info(NULL); if (tlb_type == cheetah || tlb_type == cheetah_plus) cheetah_ecache_flush_init(); @@ -2131,8 +2113,8 @@ void free_initmem(void) #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - num_physpages += free_reserved_area((void *)start, (void *)end, - POISON_FREE_INITMEM, "initrd"); + free_reserved_area((void *)start, (void *)end, POISON_FREE_INITMEM, + "initrd"); } #endif -- cgit v0.10.2 From 3f29c33194d3b5fffdccc89665982a36b408b6f9 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:04:15 -0700 Subject: mm/tile: prepare for removing num_physpages and simplify mem_init() Prepare for removing num_physpages and simplify mem_init(). Signed-off-by: Jiang Liu Acked-by: Chris Metcalf Cc: Bjorn Helgaas Cc: "David S. Miller" Cc: Wen Congyang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c index 41818e3..68b5426 100644 --- a/arch/tile/kernel/setup.c +++ b/arch/tile/kernel/setup.c @@ -329,6 +329,7 @@ static void __init setup_memory(void) #if defined(CONFIG_HIGHMEM) || defined(__tilegx__) long lowmem_pages; #endif + unsigned long physpages = 0; /* We are using a char to hold the cpu_2_node[] mapping */ BUILD_BUG_ON(MAX_NUMNODES > 127); @@ -388,8 +389,8 @@ static void __init setup_memory(void) continue; } } - if (num_physpages + PFN_DOWN(range.size) > maxmem_pfn) { - int max_size = maxmem_pfn - num_physpages; + if (physpages + PFN_DOWN(range.size) > maxmem_pfn) { + int max_size = maxmem_pfn - physpages; if (max_size > 0) { pr_err("Maxmem reduced node %d to %d pages\n", i, max_size); @@ -446,7 +447,7 @@ static void __init setup_memory(void) node_start_pfn[i] = start; node_end_pfn[i] = end; node_controller[i] = range.controller; - num_physpages += size; + physpages += size; max_pfn = end; /* Mark node as online */ @@ -465,7 +466,7 @@ static void __init setup_memory(void) * we're willing to use at 8 million pages (32GB of 4KB pages). */ cap = 8 * 1024 * 1024; /* 8 million pages */ - if (num_physpages > cap) { + if (physpages > cap) { int num_nodes = num_online_nodes(); int cap_each = cap / num_nodes; unsigned long dropped_pages = 0; @@ -476,10 +477,10 @@ static void __init setup_memory(void) node_end_pfn[i] = node_start_pfn[i] + cap_each; } } - num_physpages -= dropped_pages; + physpages -= dropped_pages; pr_warning("Only using %ldMB memory;" " ignoring %ldMB.\n", - num_physpages >> (20 - PAGE_SHIFT), + physpages >> (20 - PAGE_SHIFT), dropped_pages >> (20 - PAGE_SHIFT)); pr_warning("Consider using a larger page size.\n"); } @@ -497,7 +498,7 @@ static void __init setup_memory(void) lowmem_pages = (mappable_physpages > MAXMEM_PFN) ? MAXMEM_PFN : mappable_physpages; - highmem_pages = (long) (num_physpages - lowmem_pages); + highmem_pages = (long) (physpages - lowmem_pages); pr_notice("%ldMB HIGHMEM available.\n", pages_to_mb(highmem_pages > 0 ? highmem_pages : 0)); @@ -514,7 +515,6 @@ static void __init setup_memory(void) pr_warning("Use a HIGHMEM enabled kernel.\n"); max_low_pfn = MAXMEM_PFN; max_pfn = MAXMEM_PFN; - num_physpages = MAXMEM_PFN; node_end_pfn[0] = MAXMEM_PFN; } else { pr_notice("%ldMB memory available.\n", diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c index f2ac2f4..e182958 100644 --- a/arch/tile/mm/init.c +++ b/arch/tile/mm/init.c @@ -821,7 +821,6 @@ static void __init set_max_mapnr_init(void) void __init mem_init(void) { - int codesize, datasize, initsize; int i; #ifndef __tilegx__ void *last; @@ -853,19 +852,7 @@ void __init mem_init(void) set_non_bootmem_pages_init(); #endif - codesize = (unsigned long)&_etext - (unsigned long)&_text; - datasize = (unsigned long)&_end - (unsigned long)&_sdata; - initsize = (unsigned long)&_einittext - (unsigned long)&_sinittext; - initsize += (unsigned long)&_einitdata - (unsigned long)&_sinitdata; - - pr_info("Memory: %luk/%luk available (%dk kernel code, %dk data, %dk init, %ldk highmem)\n", - (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), - num_physpages << (PAGE_SHIFT-10), - codesize >> 10, - datasize >> 10, - initsize >> 10, - (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)) - ); + mem_init_print_info(NULL); /* * In debug mode, dump some interesting memory mappings. -- cgit v0.10.2 From 715ee356535e861c0c0890697fa48eeb8af94019 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:04:16 -0700 Subject: mm/um: prepare for removing num_physpages and simplify mem_init() Prepare for removing num_physpages and simplify mem_init(). Signed-off-by: Jiang Liu Cc: Jeff Dike Cc: Richard Weinberger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c index b0c7630..7ddb64b 100644 --- a/arch/um/kernel/mem.c +++ b/arch/um/kernel/mem.c @@ -70,10 +70,8 @@ void __init mem_init(void) #ifdef CONFIG_HIGHMEM setup_highmem(end_iomem, highmem); #endif - num_physpages = totalram_pages; max_pfn = totalram_pages; - printk(KERN_INFO "Memory: %luk available\n", - nr_free_pages() << (PAGE_SHIFT-10)); + mem_init_print_info(NULL); kmalloc_ok = 1; } -- cgit v0.10.2 From 0d0b6d26ad52093452b1228325a99244e925582a Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:04:17 -0700 Subject: mm/unicore32: prepare for removing num_physpages and simplify mem_init() Prepare for removing num_physpages and simplify mem_init(). Signed-off-by: Jiang Liu Cc: Guan Xuetao Cc: Michal Hocko Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/unicore32/mm/init.c b/arch/unicore32/mm/init.c index 7d1356c..ae6bc03 100644 --- a/arch/unicore32/mm/init.c +++ b/arch/unicore32/mm/init.c @@ -383,10 +383,6 @@ static void __init free_unused_memmap(struct meminfo *mi) */ void __init mem_init(void) { - unsigned long reserved_pages, free_pages; - struct memblock_region *reg; - int i; - max_mapnr = pfn_to_page(max_pfn + PHYS_PFN_OFFSET) - mem_map; free_unused_memmap(&meminfo); @@ -394,48 +390,7 @@ void __init mem_init(void) /* this will put all unused low memory onto the freelists */ free_all_bootmem(); - reserved_pages = free_pages = 0; - - for_each_bank(i, &meminfo) { - struct membank *bank = &meminfo.bank[i]; - unsigned int pfn1, pfn2; - struct page *page, *end; - - pfn1 = bank_pfn_start(bank); - pfn2 = bank_pfn_end(bank); - - page = pfn_to_page(pfn1); - end = pfn_to_page(pfn2 - 1) + 1; - - do { - if (PageReserved(page)) - reserved_pages++; - else if (!page_count(page)) - free_pages++; - page++; - } while (page < end); - } - - /* - * Since our memory may not be contiguous, calculate the - * real number of pages we have in this system - */ - printk(KERN_INFO "Memory:"); - num_physpages = 0; - for_each_memblock(memory, reg) { - unsigned long pages = memblock_region_memory_end_pfn(reg) - - memblock_region_memory_base_pfn(reg); - num_physpages += pages; - printk(" %ldMB", pages >> (20 - PAGE_SHIFT)); - } - printk(" = %luMB total\n", num_physpages >> (20 - PAGE_SHIFT)); - - printk(KERN_NOTICE "Memory: %luk/%luk available, %luk reserved, %luK highmem\n", - nr_free_pages() << (PAGE_SHIFT-10), - free_pages << (PAGE_SHIFT-10), - reserved_pages << (PAGE_SHIFT-10), - totalhigh_pages << (PAGE_SHIFT-10)); - + mem_init_print_info(NULL); printk(KERN_NOTICE "Virtual kernel memory layout:\n" " vector : 0x%08lx - 0x%08lx (%4ld kB)\n" " vmalloc : 0x%08lx - 0x%08lx (%4ld MB)\n" @@ -464,7 +419,7 @@ void __init mem_init(void) BUILD_BUG_ON(TASK_SIZE > MODULES_VADDR); BUG_ON(TASK_SIZE > MODULES_VADDR); - if (PAGE_SIZE >= 16384 && num_physpages <= 128) { + if (PAGE_SIZE >= 16384 && get_num_physpages() <= 128) { /* * On a machine this small we won't get * anywhere without overcommit, so turn -- cgit v0.10.2 From 46a841329a6cd6298e131afd82e7d58130b19025 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:04:19 -0700 Subject: mm/x86: prepare for removing num_physpages and simplify mem_init() Prepare for removing num_physpages and simplify mem_init(). Signed-off-by: Jiang Liu Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Andreas Herrmann Cc: Tang Chen Cc: Wen Congyang Cc: Jianguo Wu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 5013a48..c587a87 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -90,7 +90,7 @@ static void __cpuinit init_amd_k5(struct cpuinfo_x86 *c) static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c) { u32 l, h; - int mbytes = num_physpages >> (20-PAGE_SHIFT); + int mbytes = get_num_physpages() >> (20-PAGE_SHIFT); if (c->x86_model < 6) { /* Based on AMD doc 20734R - June 2000 */ diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 56f7fcf..e68709d 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1040,8 +1040,6 @@ void __init setup_arch(char **cmdline_p) /* max_low_pfn get updated here */ find_low_pfn_range(); #else - num_physpages = max_pfn; - check_x2apic(); /* How many end-of-memory variables you have, grandma! */ diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 9fa46ba..4287f1f 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -660,10 +660,8 @@ void __init initmem_init(void) highstart_pfn = max_low_pfn; printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", pages_to_mb(highend_pfn - highstart_pfn)); - num_physpages = highend_pfn; high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; #else - num_physpages = max_low_pfn; high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; #endif @@ -671,7 +669,7 @@ void __init initmem_init(void) sparse_memory_present_with_active_regions(0); #ifdef CONFIG_FLATMEM - max_mapnr = num_physpages; + max_mapnr = IS_ENABLED(CONFIG_HIGHMEM) ? highend_pfn : max_low_pfn; #endif __vmalloc_start_set = true; @@ -739,9 +737,6 @@ static void __init test_wp_bit(void) void __init mem_init(void) { - int codesize, reservedpages, datasize, initsize; - int tmp; - pci_iommu_alloc(); #ifdef CONFIG_FLATMEM @@ -761,30 +756,9 @@ void __init mem_init(void) /* this will put all low memory onto the freelists */ free_all_bootmem(); - reservedpages = 0; - for (tmp = 0; tmp < max_low_pfn; tmp++) - /* - * Only count reserved RAM pages: - */ - if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp))) - reservedpages++; - after_bootmem = 1; - codesize = (unsigned long) &_etext - (unsigned long) &_text; - datasize = (unsigned long) &_edata - (unsigned long) &_etext; - initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; - - printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, " - "%dk reserved, %dk data, %dk init, %ldk highmem)\n", - nr_free_pages() << (PAGE_SHIFT-10), - num_physpages << (PAGE_SHIFT-10), - codesize >> 10, - reservedpages << (PAGE_SHIFT-10), - datasize >> 10, - initsize >> 10, - totalhigh_pages << (PAGE_SHIFT-10)); - + mem_init_print_info(NULL); printk(KERN_INFO "virtual kernel memory layout:\n" " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n" #ifdef CONFIG_HIGHMEM diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 9577638..104d56a 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1044,9 +1044,6 @@ static void __init register_page_bootmem_info(void) void __init mem_init(void) { - long codesize, reservedpages, datasize, initsize; - unsigned long absent_pages; - pci_iommu_alloc(); /* clear_bss() already clear the empty_zero_page */ @@ -1055,28 +1052,13 @@ void __init mem_init(void) /* this will put all memory onto the freelists */ free_all_bootmem(); - - absent_pages = absent_pages_in_range(0, max_pfn); - reservedpages = max_pfn - totalram_pages - absent_pages; after_bootmem = 1; - codesize = (unsigned long) &_etext - (unsigned long) &_text; - datasize = (unsigned long) &_edata - (unsigned long) &_etext; - initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; - /* Register memory areas for /proc/kcore */ kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START, VSYSCALL_END - VSYSCALL_START, KCORE_OTHER); - printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, " - "%ldk absent, %ldk reserved, %ldk data, %ldk init)\n", - nr_free_pages() << (PAGE_SHIFT-10), - max_pfn << (PAGE_SHIFT-10), - codesize >> 10, - absent_pages << (PAGE_SHIFT-10), - reservedpages << (PAGE_SHIFT-10), - datasize >> 10, - initsize >> 10); + mem_init_print_info(NULL); } #ifdef CONFIG_DEBUG_RODATA diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index 73a6d73..0342d27 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c @@ -83,10 +83,8 @@ void __init initmem_init(void) highstart_pfn = max_low_pfn; printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", pages_to_mb(highend_pfn - highstart_pfn)); - num_physpages = highend_pfn; high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; #else - num_physpages = max_low_pfn; high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; #endif printk(KERN_NOTICE "%ldMB LOWMEM available.\n", -- cgit v0.10.2 From 808c2c3745975714ecd4da4d68c915de9048b12f Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:04:20 -0700 Subject: mm/xtensa: prepare for removing num_physpages and simplify mem_init() Prepare for removing num_physpages and simplify mem_init(). Signed-off-by: Jiang Liu Cc: Chris Zankel Cc: Max Filippov Cc: Geert Uytterhoeven Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c index 663c161..479d753 100644 --- a/arch/xtensa/mm/init.c +++ b/arch/xtensa/mm/init.c @@ -173,12 +173,8 @@ void __init zones_init(void) void __init mem_init(void) { - unsigned long codesize, reservedpages, datasize, initsize; - unsigned long highmemsize, tmp, ram; - - max_mapnr = num_physpages = max_low_pfn - ARCH_PFN_OFFSET; + max_mapnr = max_low_pfn - ARCH_PFN_OFFSET; high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT); - highmemsize = 0; #ifdef CONFIG_HIGHMEM #error HIGHGMEM not implemented in init.c @@ -186,26 +182,7 @@ void __init mem_init(void) free_all_bootmem(); - reservedpages = ram = 0; - for (tmp = 0; tmp < max_mapnr; tmp++) { - ram++; - if (PageReserved(mem_map+tmp)) - reservedpages++; - } - - codesize = (unsigned long) _etext - (unsigned long) _stext; - datasize = (unsigned long) _edata - (unsigned long) _sdata; - initsize = (unsigned long) __init_end - (unsigned long) __init_begin; - - printk("Memory: %luk/%luk available (%ldk kernel code, %ldk reserved, " - "%ldk data, %ldk init %ldk highmem)\n", - nr_free_pages() << (PAGE_SHIFT-10), - ram << (PAGE_SHIFT-10), - codesize >> 10, - reservedpages << (PAGE_SHIFT-10), - datasize >> 10, - initsize >> 10, - highmemsize >> 10); + mem_init_print_info(NULL); } #ifdef CONFIG_BLK_DEV_INITRD -- cgit v0.10.2 From 1895418189e08c1d1eec4fbdb5fb41d793f57ba5 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:04:21 -0700 Subject: mm: kill global variable num_physpages Now all references to num_physpages have been removed, so kill it. Signed-off-by: Jiang Liu Cc: Mel Gorman Cc: Michel Lespinasse Cc: Rik van Riel Cc: Jiang Liu Cc: Hugh Dickins Cc: David Rientjes Cc: Al Viro Cc: Konstantin Khlebnikov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/mm.h b/include/linux/mm.h index 09c2353..e550a17 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -29,7 +29,6 @@ struct writeback_control; extern unsigned long max_mapnr; #endif -extern unsigned long num_physpages; extern unsigned long totalram_pages; extern void * high_memory; extern int page_cluster; diff --git a/mm/memory.c b/mm/memory.c index 4075332..b68812d 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -82,7 +82,6 @@ EXPORT_SYMBOL(max_mapnr); EXPORT_SYMBOL(mem_map); #endif -unsigned long num_physpages; /* * A number of key systems in x86 including ioremap() rely on the assumption * that high_memory defines the upper bound on direct map memory, then end @@ -92,7 +91,6 @@ unsigned long num_physpages; */ void * high_memory; -EXPORT_SYMBOL(num_physpages); EXPORT_SYMBOL(high_memory); /* diff --git a/mm/nommu.c b/mm/nommu.c index 1898b2fe..e44e6e0 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -56,7 +56,6 @@ void *high_memory; struct page *mem_map; unsigned long max_mapnr; -unsigned long num_physpages; unsigned long highest_memmap_pfn; struct percpu_counter vm_committed_as; int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */ @@ -85,7 +84,6 @@ unsigned long vm_memory_committed(void) EXPORT_SYMBOL_GPL(vm_memory_committed); EXPORT_SYMBOL(mem_map); -EXPORT_SYMBOL(num_physpages); /* list of mapped, potentially shareable regions */ static struct kmem_cache *vm_region_jar; -- cgit v0.10.2 From fccc998771043db1613509b26c18b3d7f071e668 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:04:23 -0700 Subject: mm: introduce helper function set_max_mapnr() Introduce a helper function set_max_mapnr() to set global variable max_mapnr. Also unify condition compilation for max_mapnr with CONFIG_NEED_MULTIPLE_NODES instead of CONFIG_DISCONTIGMEM. Signed-off-by: Jiang Liu Cc: Greg Kroah-Hartman Cc: Mauro Carvalho Chehab Cc: "David S. Miller" Cc: Mark Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/mm.h b/include/linux/mm.h index e550a17..b87681a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -25,8 +25,15 @@ struct file_ra_state; struct user_struct; struct writeback_control; -#ifndef CONFIG_DISCONTIGMEM /* Don't use mapnrs, do it properly */ +#ifndef CONFIG_NEED_MULTIPLE_NODES /* Don't use mapnrs, do it properly */ extern unsigned long max_mapnr; + +static inline void set_max_mapnr(unsigned long limit) +{ + max_mapnr = limit; +} +#else +static inline void set_max_mapnr(unsigned long limit) { } #endif extern unsigned long totalram_pages; -- cgit v0.10.2 From ce7549e1d824e08871beddb6c7945635dfc4341d Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:04:24 -0700 Subject: mm/AVR32: prepare for killing free_all_bootmem_node() Prepare for killing free_all_bootmem_node() by using free_all_bootmem() instead. Signed-off-by: Jiang Liu Cc: Haavard Skinnemoen Cc: Hans-Christian Egtvedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/avr32/mm/init.c b/arch/avr32/mm/init.c index 0fc04b9..def5391 100644 --- a/arch/avr32/mm/init.c +++ b/arch/avr32/mm/init.c @@ -103,23 +103,12 @@ void __init mem_init(void) pg_data_t *pgdat; high_memory = NULL; + for_each_online_pgdat(pgdat) + high_memory = max_t(void *, high_memory, + __va(pgdat_end_pfn(pgdat) << PAGE_SHIFT)); - /* this will put all low memory onto the freelists */ - for_each_online_pgdat(pgdat) { - void *node_high_memory; - - if (pgdat->node_spanned_pages != 0) - free_all_bootmem_node(pgdat); - - node_high_memory = (void *)((pgdat->node_start_pfn - + pgdat->node_spanned_pages) - << PAGE_SHIFT); - if (node_high_memory > high_memory) - high_memory = node_high_memory; - } - - max_mapnr = MAP_NR(high_memory); - + set_max_mapnr(MAP_NR(high_memory)); + free_all_bootmem(); mem_init_print_info(NULL); } -- cgit v0.10.2 From b57b63a2ace84d09c6fc270150d0408ddef1efa7 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:04:25 -0700 Subject: mm/IA64: prepare for killing free_all_bootmem_node() Prepare for killing free_all_bootmem_node() by using free_all_bootmem(). Signed-off-by: Jiang Liu Cc: Tony Luck Cc: Fenghua Yu Cc: Tang Chen Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 2d372b4..b6f7f43 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -583,7 +583,6 @@ __setup("nolwsys", nolwsys_setup); void __init mem_init (void) { - pg_data_t *pgdat; int i; BUG_ON(PTRS_PER_PGD * sizeof(pgd_t) != PAGE_SIZE); @@ -601,15 +600,11 @@ mem_init (void) #ifdef CONFIG_FLATMEM BUG_ON(!mem_map); - max_mapnr = max_low_pfn; #endif + set_max_mapnr(max_low_pfn); high_memory = __va(max_low_pfn * PAGE_SIZE); - - for_each_online_pgdat(pgdat) - if (pgdat->bdata->node_bootmem_map) - free_all_bootmem_node(pgdat); - + free_all_bootmem(); mem_init_print_info(NULL); /* -- cgit v0.10.2 From c5c009fbe753393dabc5d67b617c8188f81740a2 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:04:26 -0700 Subject: mm/m32r: prepare for killing free_all_bootmem_node() Prepare for killing free_all_bootmem_node() by using free_all_bootmem(). Signed-off-by: Jiang Liu Cc: Hirokazu Takata Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/m32r/mm/init.c b/arch/m32r/mm/init.c index a4f8d93..0d4146f 100644 --- a/arch/m32r/mm/init.c +++ b/arch/m32r/mm/init.c @@ -111,28 +111,19 @@ void __init paging_init(void) *======================================================================*/ void __init mem_init(void) { - int nid; #ifndef CONFIG_MMU extern unsigned long memory_end; -#endif -#ifndef CONFIG_DISCONTIGMEM - max_mapnr = get_num_physpages(); -#endif /* CONFIG_DISCONTIGMEM */ - -#ifdef CONFIG_MMU - high_memory = (void *)__va(PFN_PHYS(MAX_LOW_PFN(0))); -#else high_memory = (void *)(memory_end & PAGE_MASK); +#else + high_memory = (void *)__va(PFN_PHYS(MAX_LOW_PFN(0))); #endif /* CONFIG_MMU */ /* clear the zero-page */ memset(empty_zero_page, 0, PAGE_SIZE); - /* this will put all low memory onto the freelists */ - for_each_online_node(nid) - free_all_bootmem_node(NODE_DATA(nid)); - + set_max_mapnr(get_num_physpages()); + free_all_bootmem(); mem_init_print_info(NULL); } -- cgit v0.10.2 From b69a9787b1dfb2e5055fb4e0fea71f9e16c7ea01 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:04:27 -0700 Subject: mm/m68k: prepare for killing free_all_bootmem_node() Prepare for killing free_all_bootmem_node() by using free_all_bootmem(). Signed-off-by: Jiang Liu Cc: Geert Uytterhoeven Cc: Greg Ungerer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/m68k/mm/init.c b/arch/m68k/mm/init.c index 397a884..6e0a938 100644 --- a/arch/m68k/mm/init.c +++ b/arch/m68k/mm/init.c @@ -148,12 +148,10 @@ void __init print_memmap(void) void __init mem_init(void) { - pg_data_t *pgdat; int i; /* this will put all memory onto the freelists */ - for_each_online_pgdat(pgdat) - free_all_bootmem_node(pgdat); + free_all_bootmem(); #if defined(CONFIG_MMU) && !defined(CONFIG_SUN3) && !defined(CONFIG_COLDFIRE) /* insert pointer tables allocated so far into the tablelist */ -- cgit v0.10.2 From 5ad62f24ba22543a3b35b4f28fafd80e55eda269 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:04:28 -0700 Subject: mm/metag: prepare for killing free_all_bootmem_node() Prepare for killing free_all_bootmem_node() by using free_all_bootmem(). Signed-off-by: Jiang Liu Cc: James Hogan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/metag/mm/init.c b/arch/metag/mm/init.c index e0862b7..28813f1 100644 --- a/arch/metag/mm/init.c +++ b/arch/metag/mm/init.c @@ -376,31 +376,21 @@ void __init paging_init(unsigned long mem_end) void __init mem_init(void) { - int nid; - #ifdef CONFIG_HIGHMEM unsigned long tmp; /* * Explicitly reset zone->managed_pages because highmem pages are - * freed before calling free_all_bootmem_node(); + * freed before calling free_all_bootmem(); */ reset_all_zones_managed_pages(); for (tmp = highstart_pfn; tmp < highend_pfn; tmp++) free_highmem_page(pfn_to_page(tmp)); #endif /* CONFIG_HIGHMEM */ - for_each_online_node(nid) { - pg_data_t *pgdat = NODE_DATA(nid); - - if (pgdat->node_spanned_pages) - free_all_bootmem_node(pgdat); - } - + free_all_bootmem(); mem_init_print_info(NULL); show_mem(0); - - return; } void free_initmem(void) -- cgit v0.10.2 From 629e7b4c8051f88daa7d2d53283128a4b2a6fa72 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:04:30 -0700 Subject: mm/MIPS: prepare for killing free_all_bootmem_node() Prepare for killing free_all_bootmem_node() by using free_all_bootmem(). Signed-off-by: Jiang Liu Cc: Ralf Baechle Cc: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c index a0c9e34..a95c00f 100644 --- a/arch/mips/sgi-ip27/ip27-memory.c +++ b/arch/mips/sgi-ip27/ip27-memory.c @@ -477,18 +477,8 @@ void __init paging_init(void) void __init mem_init(void) { - unsigned node; - high_memory = (void *) __va(get_num_physpages() << PAGE_SHIFT); - - for_each_online_node(node) { - /* - * This will free up the bootmem, ie, slot 0 memory. - */ - free_all_bootmem_node(NODE_DATA(node)); - } - + free_all_bootmem(); setup_zero_pages(); /* This comes from node 0 */ - mem_init_print_info(NULL); } -- cgit v0.10.2 From d5c017dde4ea0932f861d3ddf2b2ade92ee3d033 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:04:31 -0700 Subject: mm/PARISC: prepare for killing free_all_bootmem_node() Prepare for killing free_all_bootmem_node() by using free_all_bootmem(). Signed-off-by: Jiang Liu Cc: "James E.J. Bottomley" Cc: Helge Deller Cc: Michal Hocko Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index b4edc76..b0f96c0 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -585,18 +585,8 @@ void __init mem_init(void) > BITS_PER_LONG); high_memory = __va((max_pfn << PAGE_SHIFT)); - -#ifndef CONFIG_DISCONTIGMEM - max_mapnr = page_to_pfn(virt_to_page(high_memory - 1)) + 1; + set_max_mapnr(page_to_pfn(virt_to_page(high_memory - 1)) + 1); free_all_bootmem(); -#else - { - int i; - - for (i = 0; i < npmem_ranges; i++) - free_all_bootmem_node(NODE_DATA(i)); - } -#endif #ifdef CONFIG_PA11 if (hppa_dma_ops == &pcxl_dma_ops) { -- cgit v0.10.2 From 602ddc70ecf710e0cf512862473eac52524ec081 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:04:32 -0700 Subject: mm/PPC: prepare for killing free_all_bootmem_node() Prepare for killing free_all_bootmem_node() by using free_all_bootmem(). Signed-off-by: Jiang Liu Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Alexander Graf Cc: "Suzuki K. Poulose" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 49c18b6..1cb1ea1 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -304,22 +304,8 @@ void __init mem_init(void) #endif high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); - -#ifdef CONFIG_NEED_MULTIPLE_NODES - { - pg_data_t *pgdat; - - for_each_online_pgdat(pgdat) - if (pgdat->node_spanned_pages != 0) { - printk("freeing bootmem node %d\n", - pgdat->node_id); - free_all_bootmem_node(pgdat); - } - } -#else - max_mapnr = max_pfn; + set_max_mapnr(max_pfn); free_all_bootmem(); -#endif #ifdef CONFIG_HIGHMEM { -- cgit v0.10.2 From e3a466b29fc3da335a5a5b695ca6da980d157070 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:04:33 -0700 Subject: mm/SH: prepare for killing free_all_bootmem_node() Prepare for killing free_all_bootmem_node() by using free_all_bootmem(). Signed-off-by: Jiang Liu Cc: Paul Mundt Cc: Wen Congyang Cc: Tang Chen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index c9a517c..33890fd 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -412,19 +412,11 @@ void __init mem_init(void) iommu_init(); high_memory = NULL; + for_each_online_pgdat(pgdat) + high_memory = max_t(void *, high_memory, + __va(pgdat_end_pfn(pgdat) << PAGE_SHIFT)); - for_each_online_pgdat(pgdat) { - void *node_high_memory; - - if (pgdat->node_spanned_pages) - free_all_bootmem_node(pgdat); - - node_high_memory = (void *)__va((pgdat->node_start_pfn + - pgdat->node_spanned_pages) << - PAGE_SHIFT); - if (node_high_memory > high_memory) - high_memory = node_high_memory; - } + free_all_bootmem(); /* Set this up early, so we can take care of the zero page */ cpu_cache_init(); -- cgit v0.10.2 From e1280be0d8614be94e5bef48b6c830dfa03e82a7 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:04:34 -0700 Subject: mm: kill free_all_bootmem_node() Now nobody makes use of free_all_bootmem_node(), kill it. Signed-off-by: Jiang Liu Cc: Johannes Weiner Cc: "David S. Miller" Cc: Yinghai Lu Acked-by: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index 0e48c32..f1f07d3 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -44,7 +44,6 @@ extern unsigned long init_bootmem_node(pg_data_t *pgdat, unsigned long endpfn); extern unsigned long init_bootmem(unsigned long addr, unsigned long memend); -extern unsigned long free_all_bootmem_node(pg_data_t *pgdat); extern unsigned long free_all_bootmem(void); extern void reset_all_zones_managed_pages(void); diff --git a/mm/bootmem.c b/mm/bootmem.c index 58609bb..6ab7744 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -264,24 +264,6 @@ void __init reset_all_zones_managed_pages(void) } /** - * free_all_bootmem_node - release a node's free pages to the buddy allocator - * @pgdat: node to be released - * - * Returns the number of pages actually released. - */ -unsigned long __init free_all_bootmem_node(pg_data_t *pgdat) -{ - unsigned long pages; - - register_page_bootmem_info_node(pgdat); - reset_node_managed_pages(pgdat); - pages = free_all_bootmem_core(pgdat->bdata); - totalram_pages += pages; - - return pages; -} - -/** * free_all_bootmem - release free pages to the buddy allocator * * Returns the number of pages actually released. -- cgit v0.10.2 From 2fb1cd5a283a7c40457731415c6e6432f061edc2 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:04:36 -0700 Subject: mm/alpha: unify mem_init() for both UMA and NUMA architectures Now mem_init() for both Alpha UMA and Alpha NUMA are the same, so unify it to reduce duplicated code. Signed-off-by: Jiang Liu Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Matt Turner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c index af91010..a1bea91 100644 --- a/arch/alpha/mm/init.c +++ b/arch/alpha/mm/init.c @@ -276,17 +276,14 @@ srm_paging_stop (void) } #endif -#ifndef CONFIG_DISCONTIGMEM void __init mem_init(void) { - max_mapnr = max_low_pfn; - free_all_bootmem(); + set_max_mapnr(max_low_pfn); high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); - + free_all_bootmem(); mem_init_print_info(NULL); } -#endif /* CONFIG_DISCONTIGMEM */ void free_initmem(void) diff --git a/arch/alpha/mm/numa.c b/arch/alpha/mm/numa.c index 0894b3a8..d543d71 100644 --- a/arch/alpha/mm/numa.c +++ b/arch/alpha/mm/numa.c @@ -319,13 +319,3 @@ void __init paging_init(void) /* Initialize the kernel's ZERO_PGE. */ memset((void *)ZERO_PGE, 0, PAGE_SIZE); } - -void __init mem_init(void) -{ - high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT); - free_all_bootmem(); - mem_init_print_info(NULL); -#if 0 - mem_stress(); -#endif -} -- cgit v0.10.2 From 3e548a9e8895318143a983681a8ef97312989880 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:04:37 -0700 Subject: mm/m68k: fix build warning of unused variable Fix build warning of unused variable: arch/m68k/mm/init.c: In function 'mem_init': arch/m68k/mm/init.c:151:6: warning: unused variable 'i' [-Wunused-variable] Signed-off-by: Jiang Liu Cc: Geert Uytterhoeven Cc: Greg Ungerer Cc: Thadeu Lima de Souza Cascardo Cc: Sergei Shtylyov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/m68k/mm/init.c b/arch/m68k/mm/init.c index 6e0a938..6b4baa6 100644 --- a/arch/m68k/mm/init.c +++ b/arch/m68k/mm/init.c @@ -146,14 +146,11 @@ void __init print_memmap(void) MLK_ROUNDUP(__bss_start, __bss_stop)); } -void __init mem_init(void) +static inline void init_pointer_tables(void) { +#if defined(CONFIG_MMU) && !defined(CONFIG_SUN3) && !defined(CONFIG_COLDFIRE) int i; - /* this will put all memory onto the freelists */ - free_all_bootmem(); - -#if defined(CONFIG_MMU) && !defined(CONFIG_SUN3) && !defined(CONFIG_COLDFIRE) /* insert pointer tables allocated so far into the tablelist */ init_pointer_table((unsigned long)kernel_pg_dir); for (i = 0; i < PTRS_PER_PGD; i++) { @@ -165,7 +162,13 @@ void __init mem_init(void) if (zero_pgtable) init_pointer_table((unsigned long)zero_pgtable); #endif +} +void __init mem_init(void) +{ + /* this will put all memory onto the freelists */ + free_all_bootmem(); + init_pointer_tables(); mem_init_print_info(NULL); print_memmap(); } -- cgit v0.10.2 From 922c1df068e69a5acf482c54e37d41e633a7d54a Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:04:38 -0700 Subject: mm/ALPHA: clean up unused VALID_PAGE() VALID_PAGE() has been removed from kernel long time ago, so clean up it. Signed-off-by: Jiang Liu Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Matt Turner Cc: Nadia Yvette Chambers Cc: Jiri Kosina Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/alpha/include/asm/mmzone.h b/arch/alpha/include/asm/mmzone.h index c5b5d6b..14ce27b 100644 --- a/arch/alpha/include/asm/mmzone.h +++ b/arch/alpha/include/asm/mmzone.h @@ -71,8 +71,6 @@ PLAT_NODE_DATA_LOCALNR(unsigned long p, int n) #define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) -#define VALID_PAGE(page) (((page) - mem_map) < max_mapnr) - #define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> 32)) #define pgd_page(pgd) (pfn_to_page(pgd_val(pgd) >> 32)) #define pte_pfn(pte) (pte_val(pte) >> 32) -- cgit v0.10.2 From 5b84de34642bad442942ace0a57c4dee00266657 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:04:40 -0700 Subject: mm/ARM: fix stale comment about VALID_PAGE() VALID_PAGE() has been removed from kernel long time ago, so fix the comment. Signed-off-by: Jiang Liu Cc: Russell King Cc: Will Deacon Cc: Nicolas Pitre Cc: Stephen Boyd Cc: Giancarlo Asnaghi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index 584786f..e750a93 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -276,12 +276,6 @@ static inline __deprecated void *bus_to_virt(unsigned long x) /* * Conversion between a struct page and a physical address. * - * Note: when converting an unknown physical address to a - * struct page, the resulting pointer must be validated - * using VALID_PAGE(). It must return an invalid struct page - * for any physical address not corresponding to a system - * RAM address. - * * page_to_pfn(page) convert a struct page * to a PFN number * pfn_to_page(pfn) convert a _valid_ PFN number to struct page * * -- cgit v0.10.2 From 35fa075f3ea432878ef50c0206df82f003a72222 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:04:41 -0700 Subject: mm/CRIS: clean up unused VALID_PAGE() VALID_PAGE() has been removed from kernel long time ago, so clean up it. Signed-off-by: Jiang Liu Acked-by: Jesper Nilsson Cc: Mikael Starvik Cc: Jiang Liu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/cris/include/asm/page.h b/arch/cris/include/asm/page.h index be45ee3..dfc53f9 100644 --- a/arch/cris/include/asm/page.h +++ b/arch/cris/include/asm/page.h @@ -51,7 +51,6 @@ typedef struct page *pgtable_t; */ #define virt_to_page(kaddr) (mem_map + (((unsigned long)(kaddr) - PAGE_OFFSET) >> PAGE_SHIFT)) -#define VALID_PAGE(page) (((page) - mem_map) < max_mapnr) #define virt_addr_valid(kaddr) pfn_valid((unsigned)(kaddr) >> PAGE_SHIFT) /* convert a page (based on mem_map and forward) to a physical address -- cgit v0.10.2 From 924b387a72a02216b4899717c5d249c3ce5d11bd Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:04:42 -0700 Subject: mm/microblaze: clean up unused VALID_PAGE() VALID_PAGE() has been removed from kernel long time ago, so clean up it. Signed-off-by: Jiang Liu Cc: Michal Simek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/microblaze/include/asm/page.h b/arch/microblaze/include/asm/page.h index 85a5ae8..fd85087 100644 --- a/arch/microblaze/include/asm/page.h +++ b/arch/microblaze/include/asm/page.h @@ -168,7 +168,6 @@ extern int page_is_ram(unsigned long pfn); # else /* CONFIG_MMU */ # define ARCH_PFN_OFFSET (memory_start >> PAGE_SHIFT) # define pfn_valid(pfn) ((pfn) < (max_mapnr + ARCH_PFN_OFFSET)) -# define VALID_PAGE(page) ((page - mem_map) < max_mapnr) # endif /* CONFIG_MMU */ # endif /* __ASSEMBLY__ */ -- cgit v0.10.2 From b26a3dfd4c0b888303a5909ef37febeb582e190e Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jul 2013 15:04:43 -0700 Subject: mm/unicore32: fix stale comment about VALID_PAGE() VALID_PAGE() has been removed from kernel long time ago, so fix the comment. Signed-off-by: Jiang Liu Acked-by: Guan Xuetao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/unicore32/include/asm/memory.h b/arch/unicore32/include/asm/memory.h index 5eddb99..debafc4 100644 --- a/arch/unicore32/include/asm/memory.h +++ b/arch/unicore32/include/asm/memory.h @@ -98,12 +98,6 @@ /* * Conversion between a struct page and a physical address. * - * Note: when converting an unknown physical address to a - * struct page, the resulting pointer must be validated - * using VALID_PAGE(). It must return an invalid struct page - * for any physical address not corresponding to a system - * RAM address. - * * page_to_pfn(page) convert a struct page * to a PFN number * pfn_to_page(pfn) convert a _valid_ PFN number to struct page * * -- cgit v0.10.2 From 55878e88c59221c3187e1c24ec3b15eb79c374c0 Mon Sep 17 00:00:00 2001 From: Cody P Schafer Date: Wed, 3 Jul 2013 15:04:44 -0700 Subject: sparsemem: add BUILD_BUG_ON when sizeof mem_section is non-power-of-2 Instead of leaving a hidden trap for the next person who comes along and wants to add something to mem_section, add a big fat warning about it needing to be a power-of-2, and insert a BUILD_BUG_ON() in sparse_init() to catch mistakes. Right now non-power-of-2 mem_sections cause a number of WARNs at boot (which don't clearly point to the size of mem_section as an issue), but the system limps on (temporarily, at least). This is based upon Dave Hansen's earlier RFC where he ran into the same issue: "sparsemem: fix boot when SECTIONS_PER_ROOT is not power-of-2" http://lkml.indiana.edu/hypermail/linux/kernel/1205.2/03077.html Signed-off-by: Cody P Schafer Acked-by: Dave Hansen Cc: Jiang Liu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 09d381b..ae19af5 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1137,6 +1137,10 @@ struct mem_section { struct page_cgroup *page_cgroup; unsigned long pad; #endif + /* + * WARNING: mem_section must be a power-of-2 in size for the + * calculation and use of SECTION_ROOT_MASK to make sense. + */ }; #ifdef CONFIG_SPARSEMEM_EXTREME diff --git a/mm/sparse.c b/mm/sparse.c index 1c91f0d3..3194ec4 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -481,6 +481,9 @@ void __init sparse_init(void) struct page **map_map; #endif + /* see include/linux/mmzone.h 'struct mem_section' definition */ + BUILD_BUG_ON(!is_power_of_2(sizeof(struct mem_section))); + /* Setup pageblock_order for HUGETLB_PAGE_SIZE_VARIABLE */ set_pageblock_order(); -- cgit v0.10.2 From 26c0c5bf38159673f0ae28c38fc9f90dbeb4d4aa Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 3 Jul 2013 15:04:45 -0700 Subject: documentation: update address_space_operations The documentation for address_space_operations is partially out of date. Signed-off-by: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 1f0ba30..fc5d2a1 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -559,7 +559,6 @@ your filesystem. The following members are defined: struct address_space_operations { int (*writepage)(struct page *page, struct writeback_control *wbc); int (*readpage)(struct file *, struct page *); - int (*sync_page)(struct page *); int (*writepages)(struct address_space *, struct writeback_control *); int (*set_page_dirty)(struct page *page); int (*readpages)(struct file *filp, struct address_space *mapping, @@ -581,6 +580,8 @@ struct address_space_operations { /* migrate the contents of a page to the specified target */ int (*migratepage) (struct page *, struct page *); int (*launder_page) (struct page *); + int (*is_partially_uptodate) (struct page *, read_descriptor_t *, + unsigned long); int (*error_remove_page) (struct mapping *mapping, struct page *page); int (*swap_activate)(struct file *); int (*swap_deactivate)(struct file *); @@ -612,13 +613,6 @@ struct address_space_operations { In this case, the page will be relocated, relocked and if that all succeeds, ->readpage will be called again. - sync_page: called by the VM to notify the backing store to perform all - queued I/O operations for a page. I/O operations for other pages - associated with this address_space object may also be performed. - - This function is optional and is called only for pages with - PG_Writeback set while waiting for the writeback to complete. - writepages: called by the VM to write out pages associated with the address_space object. If wbc->sync_mode is WBC_SYNC_ALL, then the writeback_control will specify a range of pages that must be @@ -747,6 +741,11 @@ struct address_space_operations { prevent redirtying the page, it is kept locked during the whole operation. + is_partially_uptodate: Called by the VM when reading a file through the + pagecache when the underlying blocksize != pagesize. If the required + block is up to date then the read can complete without needing the IO + to bring the whole page up to date. + error_remove_page: normally set to generic_error_remove_page if truncation is ok for this address space. Used for memory failure handling. Setting this implies you deal with pages going away under you, -- cgit v0.10.2 From 543cc115339baa44fbea877b3d8673aca652622f Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 3 Jul 2013 15:04:46 -0700 Subject: documentation: document the is_dirty_writeback aops callback Signed-off-by: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index fc5d2a1..f93a882 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -582,6 +582,7 @@ struct address_space_operations { int (*launder_page) (struct page *); int (*is_partially_uptodate) (struct page *, read_descriptor_t *, unsigned long); + void (*is_dirty_writeback) (struct page *, bool *, bool *); int (*error_remove_page) (struct mapping *mapping, struct page *page); int (*swap_activate)(struct file *); int (*swap_deactivate)(struct file *); @@ -746,6 +747,15 @@ struct address_space_operations { block is up to date then the read can complete without needing the IO to bring the whole page up to date. + is_dirty_writeback: Called by the VM when attempting to reclaim a page. + The VM uses dirty and writeback information to determine if it needs + to stall to allow flushers a chance to complete some IO. Ordinarily + it can use PageDirty and PageWriteback but some filesystems have + more complex state (unstable pages in NFS prevent reclaim) or + do not set those flags due to locking problems (jbd). This callback + allows a filesystem to indicate to the VM if a page should be + treated as dirty or writeback for the purposes of stalling. + error_remove_page: normally set to generic_error_remove_page if truncation is ok for this address space. Used for memory failure handling. Setting this implies you deal with pages going away under you, -- cgit v0.10.2 From d82b1d85760a8344d06272da67f0684243235fac Mon Sep 17 00:00:00 2001 From: Zhang Yanfei Date: Wed, 3 Jul 2013 15:04:47 -0700 Subject: mm, vmalloc: only call setup_vmalloc_vm() only in __get_vm_area_node() Now for insert_vmalloc_vm, it only calls the two functions: - setup_vmalloc_vm: fill vm_struct and vmap_area instances - clear_vm_unlist: clear VM_UNLIST bit in vm_struct->flags So in __get_vm_area_node(), if VM_UNLIST bit unset in flags, that is the else branch here, we don't need to clear VM_UNLIST bit for vm->flags since this bit is obviously not set. That is to say, we could only call setup_vmalloc_vm instead of insert_vmalloc_vm here. And then we could even remove the if test here. Signed-off-by: Zhang Yanfei Acked-by: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/vmalloc.c b/mm/vmalloc.c index b725990..d23e70e 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1367,16 +1367,7 @@ static struct vm_struct *__get_vm_area_node(unsigned long size, return NULL; } - /* - * When this function is called from __vmalloc_node_range, - * we add VM_UNLIST flag to avoid accessing uninitialized - * members of vm_struct such as pages and nr_pages fields. - * They will be set later. - */ - if (flags & VM_UNLIST) - setup_vmalloc_vm(area, va, flags, caller); - else - insert_vmalloc_vm(area, va, flags, caller); + setup_vmalloc_vm(area, va, flags, caller); return area; } -- cgit v0.10.2 From 3645cb4a4eb2002dad17b314559badf8a20e55a7 Mon Sep 17 00:00:00 2001 From: Zhang Yanfei Date: Wed, 3 Jul 2013 15:04:48 -0700 Subject: mm, vmalloc: call setup_vmalloc_vm() instead of insert_vmalloc_vm() Here we pass flags with only VM_ALLOC bit set, it is unnecessary to call clear_vm_unlist to clear VM_UNLIST bit. So use setup_vmalloc_vm instead of insert_vmalloc_vm. Signed-off-by: Zhang Yanfei Acked-by: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/vmalloc.c b/mm/vmalloc.c index d23e70e..db48d51 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2526,8 +2526,8 @@ found: /* insert all vm's */ for (area = 0; area < nr_vms; area++) - insert_vmalloc_vm(vms[area], vas[area], VM_ALLOC, - pcpu_get_vm_areas); + setup_vmalloc_vm(vms[area], vas[area], VM_ALLOC, + pcpu_get_vm_areas); kfree(vas); return vms; -- cgit v0.10.2 From f6d480059bedaf4feb06466c770f5fcace9eca31 Mon Sep 17 00:00:00 2001 From: Zhang Yanfei Date: Wed, 3 Jul 2013 15:04:49 -0700 Subject: mm, vmalloc: remove insert_vmalloc_vm() Now this function is nowhere used, we can remove it directly. Signed-off-by: Zhang Yanfei Acked-by: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/vmalloc.c b/mm/vmalloc.c index db48d51..bd60bff 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1322,13 +1322,6 @@ static void clear_vm_unlist(struct vm_struct *vm) vm->flags &= ~VM_UNLIST; } -static void insert_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va, - unsigned long flags, const void *caller) -{ - setup_vmalloc_vm(vm, va, flags, caller); - clear_vm_unlist(vm); -} - static struct vm_struct *__get_vm_area_node(unsigned long size, unsigned long align, unsigned long flags, unsigned long start, unsigned long end, int node, gfp_t gfp_mask, const void *caller) -- cgit v0.10.2 From 0f2d4a8e27108ad3b2555396b06392be590fe287 Mon Sep 17 00:00:00 2001 From: Zhang Yanfei Date: Wed, 3 Jul 2013 15:04:50 -0700 Subject: mm, vmalloc: use clamp() to simplify code Signed-off-by: Zhang Yanfei Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/vmalloc.c b/mm/vmalloc.c index bd60bff..91a1047 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1330,16 +1330,8 @@ static struct vm_struct *__get_vm_area_node(unsigned long size, struct vm_struct *area; BUG_ON(in_interrupt()); - if (flags & VM_IOREMAP) { - int bit = fls(size); - - if (bit > IOREMAP_MAX_ORDER) - bit = IOREMAP_MAX_ORDER; - else if (bit < PAGE_SHIFT) - bit = PAGE_SHIFT; - - align = 1ul << bit; - } + if (flags & VM_IOREMAP) + align = 1ul << clamp(fls(size), PAGE_SHIFT, IOREMAP_MAX_ORDER); size = PAGE_ALIGN(size); if (unlikely(!size)) -- cgit v0.10.2 From 519ebea3bf6df45439e79c54bda1d9e29fe13a64 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 3 Jul 2013 15:04:51 -0700 Subject: mm: memcontrol: factor out reclaim iterator loading and updating mem_cgroup_iter() is too hard to follow. Factor out the lockless reclaim iterator loading and updating so it's easier to follow the big picture. Also document the iterator invalidation mechanism a bit more extensively. Signed-off-by: Johannes Weiner Reported-by: Tejun Heo Reviewed-by: Tejun Heo Acked-by: Michal Hocko Cc: KAMEZAWA Hiroyuki Cc: Glauber Costa Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 4748966..2e851f4 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1148,6 +1148,58 @@ skip_node: return NULL; } +static void mem_cgroup_iter_invalidate(struct mem_cgroup *root) +{ + /* + * When a group in the hierarchy below root is destroyed, the + * hierarchy iterator can no longer be trusted since it might + * have pointed to the destroyed group. Invalidate it. + */ + atomic_inc(&root->dead_count); +} + +static struct mem_cgroup * +mem_cgroup_iter_load(struct mem_cgroup_reclaim_iter *iter, + struct mem_cgroup *root, + int *sequence) +{ + struct mem_cgroup *position = NULL; + /* + * A cgroup destruction happens in two stages: offlining and + * release. They are separated by a RCU grace period. + * + * If the iterator is valid, we may still race with an + * offlining. The RCU lock ensures the object won't be + * released, tryget will fail if we lost the race. + */ + *sequence = atomic_read(&root->dead_count); + if (iter->last_dead_count == *sequence) { + smp_rmb(); + position = iter->last_visited; + if (position && !css_tryget(&position->css)) + position = NULL; + } + return position; +} + +static void mem_cgroup_iter_update(struct mem_cgroup_reclaim_iter *iter, + struct mem_cgroup *last_visited, + struct mem_cgroup *new_position, + int sequence) +{ + if (last_visited) + css_put(&last_visited->css); + /* + * We store the sequence count from the time @last_visited was + * loaded successfully instead of rereading it here so that we + * don't lose destruction events in between. We could have + * raced with the destruction of @new_position after all. + */ + iter->last_visited = new_position; + smp_wmb(); + iter->last_dead_count = sequence; +} + /** * mem_cgroup_iter - iterate over memory cgroup hierarchy * @root: hierarchy root @@ -1171,7 +1223,6 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root, { struct mem_cgroup *memcg = NULL; struct mem_cgroup *last_visited = NULL; - unsigned long uninitialized_var(dead_count); if (mem_cgroup_disabled()) return NULL; @@ -1191,6 +1242,7 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root, rcu_read_lock(); while (!memcg) { struct mem_cgroup_reclaim_iter *uninitialized_var(iter); + int uninitialized_var(seq); if (reclaim) { int nid = zone_to_nid(reclaim->zone); @@ -1204,37 +1256,13 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root, goto out_unlock; } - /* - * If the dead_count mismatches, a destruction - * has happened or is happening concurrently. - * If the dead_count matches, a destruction - * might still happen concurrently, but since - * we checked under RCU, that destruction - * won't free the object until we release the - * RCU reader lock. Thus, the dead_count - * check verifies the pointer is still valid, - * css_tryget() verifies the cgroup pointed to - * is alive. - */ - dead_count = atomic_read(&root->dead_count); - if (dead_count == iter->last_dead_count) { - smp_rmb(); - last_visited = iter->last_visited; - if (last_visited && - !css_tryget(&last_visited->css)) - last_visited = NULL; - } + last_visited = mem_cgroup_iter_load(iter, root, &seq); } memcg = __mem_cgroup_iter_next(root, last_visited); if (reclaim) { - if (last_visited) - css_put(&last_visited->css); - - iter->last_visited = memcg; - smp_wmb(); - iter->last_dead_count = dead_count; + mem_cgroup_iter_update(iter, last_visited, memcg, seq); if (!memcg) iter->generation++; @@ -6318,14 +6346,14 @@ static void mem_cgroup_invalidate_reclaim_iterators(struct mem_cgroup *memcg) struct mem_cgroup *parent = memcg; while ((parent = parent_mem_cgroup(parent))) - atomic_inc(&parent->dead_count); + mem_cgroup_iter_invalidate(parent); /* * if the root memcg is not hierarchical we have to check it * explicitely. */ if (!root_mem_cgroup->use_hierarchy) - atomic_inc(&root_mem_cgroup->dead_count); + mem_cgroup_iter_invalidate(root_mem_cgroup); } static void mem_cgroup_css_offline(struct cgroup *cont) -- cgit v0.10.2 From 760033c45e8aa13c9e02530da7274edc624c7689 Mon Sep 17 00:00:00 2001 From: Chen Gang Date: Wed, 3 Jul 2013 15:04:52 -0700 Subject: arch/frv/kernel/traps.c: using vsnprintf() instead of vsprintf() Since die_if_kernel() is an extern common used function, better always check the buffer length to avoid memory overflow by a long 'str'. Signed-off-by: Chen Gang Cc: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/frv/kernel/traps.c b/arch/frv/kernel/traps.c index 4bff48c..a6d105d 100644 --- a/arch/frv/kernel/traps.c +++ b/arch/frv/kernel/traps.c @@ -523,7 +523,7 @@ void die_if_kernel(const char *str, ...) return; va_start(va, str); - vsprintf(buffer, str, va); + vsnprintf(buffer, sizeof(buffer), str, va); va_end(va); console_verbose(); -- cgit v0.10.2 From 98bd8d05ea5798ff3179ee33f0ca2789722ecc5a Mon Sep 17 00:00:00 2001 From: Chen Gang Date: Wed, 3 Jul 2013 15:04:53 -0700 Subject: arch/frv/kernel/setup.c: use strncmp() instead of memcmp() 'cmdline' is a NUL terminated string, when its length < 4, memcmp() will cause memory access out of boundary. So use strncmp() instead of memcmp(). Signed-off-by: Chen Gang Cc: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/frv/kernel/setup.c b/arch/frv/kernel/setup.c index f78f8cb..ae3a670 100644 --- a/arch/frv/kernel/setup.c +++ b/arch/frv/kernel/setup.c @@ -735,7 +735,7 @@ static void __init parse_cmdline_early(char *cmdline) /* "mem=XXX[kKmM]" sets SDRAM size to , overriding the value we worked * out from the SDRAM controller mask register */ - if (!memcmp(cmdline, "mem=", 4)) { + if (!strncmp(cmdline, "mem=", 4)) { unsigned long long mem_size; mem_size = memparse(cmdline + 4, &cmdline); -- cgit v0.10.2 From e7152b97f38f1f5b915c719e6a3040697a700a16 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 3 Jul 2013 15:04:54 -0700 Subject: err.h: IS_ERR() can accept __user pointers Sparse generates a false positive when you pass a __user or __iomem pointer to the IS_ERR() functions. drivers/rtc/rtc-ds1286.c:344:36: sparse: incorrect type in argument 1 (different address spaces) drivers/rtc/rtc-ds1286.c:344:36: expected void const *ptr drivers/rtc/rtc-ds1286.c:344:36: got unsigned int [noderef] [usertype] *rtcregs We can silence these by adding a __force here and upgrading to Sparse v0.4.5-rc1 or later. This change has no effect when using current Sparse releases. Signed-off-by: Dan Carpenter Acked-by: Christopher Li Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/err.h b/include/linux/err.h index f2edce2..221fcfb 100644 --- a/include/linux/err.h +++ b/include/linux/err.h @@ -24,17 +24,17 @@ static inline void * __must_check ERR_PTR(long error) return (void *) error; } -static inline long __must_check PTR_ERR(const void *ptr) +static inline long __must_check PTR_ERR(__force const void *ptr) { return (long) ptr; } -static inline long __must_check IS_ERR(const void *ptr) +static inline long __must_check IS_ERR(__force const void *ptr) { return IS_ERR_VALUE((unsigned long)ptr); } -static inline long __must_check IS_ERR_OR_NULL(const void *ptr) +static inline long __must_check IS_ERR_OR_NULL(__force const void *ptr) { return !ptr || IS_ERR_VALUE((unsigned long)ptr); } @@ -46,13 +46,13 @@ static inline long __must_check IS_ERR_OR_NULL(const void *ptr) * Explicitly cast an error-valued pointer to another pointer type in such a * way as to make it clear that's what's going on. */ -static inline void * __must_check ERR_CAST(const void *ptr) +static inline void * __must_check ERR_CAST(__force const void *ptr) { /* cast away the const */ return (void *) ptr; } -static inline int __must_check PTR_RET(const void *ptr) +static inline int __must_check PTR_RET(__force const void *ptr) { if (IS_ERR(ptr)) return PTR_ERR(ptr); -- cgit v0.10.2 From 096a8aac6bf4a5a0b2ef812ad76d056bbf3fb2af Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 3 Jul 2013 15:04:55 -0700 Subject: clean up scary strncpy(dst, src, strlen(src)) uses Fix various weird constructions of strncpy(dst, src, strlen(src)). Length limits should be about the space available in the destination, not repurposed as a method to either always include or always exclude a trailing NULL byte. Either the NULL should always be copied (using strlcpy), or it should not be copied (using something like memcpy). Readable code should not depend on the weird behavior of strncpy when it hits the length limit. Better to avoid the anti-pattern entirely. [akpm@linux-foundation.org: revert getdelays.c part due to missing bsd/string.h] Signed-off-by: Kees Cook Acked-by: Greg Kroah-Hartman [staging] Acked-by: Rafael J. Wysocki [acpi] Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Ursula Braun Cc: Frank Blaschka Cc: Richard Weinberger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/acpi/sysfs.c b/drivers/acpi/sysfs.c index fcae5fa..193745d 100644 --- a/drivers/acpi/sysfs.c +++ b/drivers/acpi/sysfs.c @@ -677,10 +677,9 @@ void acpi_irq_stats_init(void) else sprintf(buffer, "bug%02X", i); - name = kzalloc(strlen(buffer) + 1, GFP_KERNEL); + name = kstrdup(buffer, GFP_KERNEL); if (name == NULL) goto fail; - strncpy(name, buffer, strlen(buffer) + 1); sysfs_attr_init(&counter_attrs[i].attr); counter_attrs[i].attr.name = name; diff --git a/drivers/s390/net/qeth_l3_sys.c b/drivers/s390/net/qeth_l3_sys.c index e70af24..d1c8025 100644 --- a/drivers/s390/net/qeth_l3_sys.c +++ b/drivers/s390/net/qeth_l3_sys.c @@ -315,10 +315,8 @@ static ssize_t qeth_l3_dev_hsuid_store(struct device *dev, if (qeth_configure_cq(card, QETH_CQ_ENABLED)) return -EPERM; - for (i = 0; i < 8; i++) - card->options.hsuid[i] = ' '; - card->options.hsuid[8] = '\0'; - strncpy(card->options.hsuid, tmp, strlen(tmp)); + snprintf(card->options.hsuid, sizeof(card->options.hsuid), + "%-8s", tmp); ASCEBC(card->options.hsuid, 8); if (card->dev) memcpy(card->dev->perm_addr, card->options.hsuid, 9); diff --git a/drivers/staging/tidspbridge/rmgr/drv_interface.c b/drivers/staging/tidspbridge/rmgr/drv_interface.c index 9c02056..6d04eb4 100644 --- a/drivers/staging/tidspbridge/rmgr/drv_interface.c +++ b/drivers/staging/tidspbridge/rmgr/drv_interface.c @@ -421,12 +421,11 @@ static int omap3_bridge_startup(struct platform_device *pdev) drv_datap->tc_wordswapon = tc_wordswapon; if (base_img) { - drv_datap->base_img = kmalloc(strlen(base_img) + 1, GFP_KERNEL); + drv_datap->base_img = kstrdup(base_img, GFP_KERNEL); if (!drv_datap->base_img) { err = -ENOMEM; goto err2; } - strncpy(drv_datap->base_img, base_img, strlen(base_img) + 1); } dev_set_drvdata(bridge, drv_datap); diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c index fc90ab1..4338ff3 100644 --- a/fs/hppfs/hppfs.c +++ b/fs/hppfs/hppfs.c @@ -69,7 +69,7 @@ static char *dentry_name(struct dentry *dentry, int extra) struct dentry *parent; char *root, *name; const char *seg_name; - int len, seg_len; + int len, seg_len, root_len; len = 0; parent = dentry; @@ -81,7 +81,8 @@ static char *dentry_name(struct dentry *dentry, int extra) } root = "proc"; - len += strlen(root); + root_len = strlen(root); + len += root_len; name = kmalloc(len + extra + 1, GFP_KERNEL); if (name == NULL) return NULL; @@ -91,7 +92,7 @@ static char *dentry_name(struct dentry *dentry, int extra) while (parent->d_parent != parent) { if (is_pid(parent)) { seg_name = "pid"; - seg_len = strlen("pid"); + seg_len = strlen(seg_name); } else { seg_name = parent->d_name.name; @@ -100,10 +101,10 @@ static char *dentry_name(struct dentry *dentry, int extra) len -= seg_len + 1; name[len] = '/'; - strncpy(&name[len + 1], seg_name, seg_len); + memcpy(&name[len + 1], seg_name, seg_len); parent = parent->d_parent; } - strncpy(name, root, strlen(root)); + memcpy(name, root, root_len); return name; } -- cgit v0.10.2 From 02aa2a37636c8fa4fb9322d91be46ff8225b7de0 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 3 Jul 2013 15:04:56 -0700 Subject: drivers: avoid format string in dev_set_name Calling dev_set_name with a single paramter causes it to be handled as a format string. Many callers are passing potentially dynamic string content, so use "%s" in those cases to avoid any potential accidents, including wrappers like device_create*() and bdi_register(). Signed-off-by: Kees Cook Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/base/attribute_container.c b/drivers/base/attribute_container.c index d78b204..ecc1929 100644 --- a/drivers/base/attribute_container.c +++ b/drivers/base/attribute_container.c @@ -167,7 +167,7 @@ attribute_container_add_device(struct device *dev, ic->classdev.parent = get_device(dev); ic->classdev.class = cont->class; cont->class->dev_release = attribute_container_release; - dev_set_name(&ic->classdev, dev_name(dev)); + dev_set_name(&ic->classdev, "%s", dev_name(dev)); if (fn) fn(cont, dev, &ic->classdev); else diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 3b36797..af8372f 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -477,7 +477,7 @@ struct devfreq *devfreq_add_device(struct device *dev, GFP_KERNEL); devfreq->last_stat_updated = jiffies; - dev_set_name(&devfreq->dev, dev_name(dev)); + dev_set_name(&devfreq->dev, "%s", dev_name(dev)); err = device_register(&devfreq->dev); if (err) { put_device(&devfreq->dev); diff --git a/drivers/extcon/extcon-class.c b/drivers/extcon/extcon-class.c index 8c69803..18ccade 100644 --- a/drivers/extcon/extcon-class.c +++ b/drivers/extcon/extcon-class.c @@ -602,7 +602,7 @@ int extcon_dev_register(struct extcon_dev *edev, struct device *dev) edev->dev->class = extcon_class; edev->dev->release = extcon_dev_release; - dev_set_name(edev->dev, edev->name ? edev->name : dev_name(dev)); + dev_set_name(edev->dev, "%s", edev->name ? edev->name : dev_name(dev)); if (edev->max_supported) { char buf[10]; diff --git a/drivers/hsi/hsi.c b/drivers/hsi/hsi.c index 833dd1a..66d4458 100644 --- a/drivers/hsi/hsi.c +++ b/drivers/hsi/hsi.c @@ -75,7 +75,7 @@ static void hsi_new_client(struct hsi_port *port, struct hsi_board_info *info) cl->device.bus = &hsi_bus_type; cl->device.parent = &port->device; cl->device.release = hsi_client_release; - dev_set_name(&cl->device, info->name); + dev_set_name(&cl->device, "%s", info->name); cl->device.platform_data = info->platform_data; if (info->archdata) cl->device.archdata = *info->archdata; diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 2ff6204..0b510ba 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -1756,7 +1756,7 @@ static int ide_cd_probe(ide_drive_t *drive) info->dev.parent = &drive->gendev; info->dev.release = ide_cd_release; - dev_set_name(&info->dev, dev_name(&drive->gendev)); + dev_set_name(&info->dev, "%s", dev_name(&drive->gendev)); if (device_register(&info->dev)) goto out_free_disk; diff --git a/drivers/ide/ide-gd.c b/drivers/ide/ide-gd.c index de86631..838996a 100644 --- a/drivers/ide/ide-gd.c +++ b/drivers/ide/ide-gd.c @@ -392,7 +392,7 @@ static int ide_gd_probe(ide_drive_t *drive) idkp->dev.parent = &drive->gendev; idkp->dev.release = ide_disk_release; - dev_set_name(&idkp->dev, dev_name(&drive->gendev)); + dev_set_name(&idkp->dev, "%s", dev_name(&drive->gendev)); if (device_register(&idkp->dev)) goto out_free_disk; diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index 068cef0..2a744a9 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -545,7 +545,7 @@ static int ide_register_port(ide_hwif_t *hwif) int ret; /* register with global device tree */ - dev_set_name(&hwif->gendev, hwif->name); + dev_set_name(&hwif->gendev, "%s", hwif->name); dev_set_drvdata(&hwif->gendev, hwif); if (hwif->gendev.parent == NULL) hwif->gendev.parent = hwif->dev; @@ -559,7 +559,7 @@ static int ide_register_port(ide_hwif_t *hwif) } hwif->portdev = device_create(ide_port_class, &hwif->gendev, - MKDEV(0, 0), hwif, hwif->name); + MKDEV(0, 0), hwif, "%s", hwif->name); if (IS_ERR(hwif->portdev)) { ret = PTR_ERR(hwif->portdev); device_unregister(&hwif->gendev); diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index c6c574b..1793aea 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -1985,7 +1985,7 @@ static int ide_tape_probe(ide_drive_t *drive) tape->dev.parent = &drive->gendev; tape->dev.release = ide_tape_release; - dev_set_name(&tape->dev, dev_name(&drive->gendev)); + dev_set_name(&tape->dev, "%s", dev_name(&drive->gendev)); if (device_register(&tape->dev)) goto out_free_disk; diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 246fdc1..99904f7 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -813,7 +813,7 @@ int ib_device_register_sysfs(struct ib_device *device, class_dev->class = &ib_class; class_dev->parent = device->dma_device; - dev_set_name(class_dev, device->name); + dev_set_name(class_dev, "%s", device->name); dev_set_drvdata(class_dev, device); INIT_LIST_HEAD(&device->port_list); diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c index b56c942..9dd0bc8 100644 --- a/drivers/infiniband/hw/qib/qib_file_ops.c +++ b/drivers/infiniband/hw/qib/qib_file_ops.c @@ -2208,7 +2208,7 @@ int qib_cdev_init(int minor, const char *name, goto err_cdev; } - device = device_create(qib_class, NULL, dev, NULL, name); + device = device_create(qib_class, NULL, dev, NULL, "%s", name); if (!IS_ERR(device)) goto done; ret = PTR_ERR(device); diff --git a/drivers/isdn/mISDN/dsp_pipeline.c b/drivers/isdn/mISDN/dsp_pipeline.c index 88305c9..8b1a66c 100644 --- a/drivers/isdn/mISDN/dsp_pipeline.c +++ b/drivers/isdn/mISDN/dsp_pipeline.c @@ -102,7 +102,7 @@ int mISDN_dsp_element_register(struct mISDN_dsp_element *elem) entry->dev.class = elements_class; entry->dev.release = mISDN_dsp_dev_release; dev_set_drvdata(&entry->dev, elem); - dev_set_name(&entry->dev, elem->name); + dev_set_name(&entry->dev, "%s", elem->name); ret = device_register(&entry->dev); if (ret) { printk(KERN_ERR "%s: failed to register %s\n", diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c index c400c57..048c823 100644 --- a/drivers/mtd/mtdcore.c +++ b/drivers/mtd/mtdcore.c @@ -1151,7 +1151,7 @@ static int __init mtd_bdi_init(struct backing_dev_info *bdi, const char *name) ret = bdi_init(bdi); if (!ret) - ret = bdi_register(bdi, NULL, name); + ret = bdi_register(bdi, NULL, "%s", name); if (ret) bdi_destroy(bdi); diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c index e4ac38a..b13344c 100644 --- a/drivers/platform/x86/wmi.c +++ b/drivers/platform/x86/wmi.c @@ -743,7 +743,7 @@ static int wmi_create_device(const struct guid_block *gblock, wblock->dev.class = &wmi_class; wmi_gtoa(gblock->guid, guid_string); - dev_set_name(&wblock->dev, guid_string); + dev_set_name(&wblock->dev, "%s", guid_string); dev_set_drvdata(&wblock->dev, wblock); diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index c1c5552..8fa3d0b 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -2931,7 +2931,7 @@ static int sd_probe(struct device *dev) device_initialize(&sdkp->dev); sdkp->dev.parent = dev; sdkp->dev.class = &sd_disk_class; - dev_set_name(&sdkp->dev, dev_name(dev)); + dev_set_name(&sdkp->dev, "%s", dev_name(dev)); if (device_add(&sdkp->dev)) goto out_free_index; diff --git a/drivers/staging/android/timed_output.c b/drivers/staging/android/timed_output.c index ec9e2ae..ee3a57f 100644 --- a/drivers/staging/android/timed_output.c +++ b/drivers/staging/android/timed_output.c @@ -78,7 +78,7 @@ int timed_output_dev_register(struct timed_output_dev *tdev) tdev->index = atomic_inc_return(&device_count); tdev->dev = device_create(timed_output_class, NULL, - MKDEV(0, tdev->index), NULL, tdev->name); + MKDEV(0, tdev->index), NULL, "%s", tdev->name); if (IS_ERR(tdev->dev)) return PTR_ERR(tdev->dev); diff --git a/drivers/staging/dgrp/dgrp_sysfs.c b/drivers/staging/dgrp/dgrp_sysfs.c index 7d1b36d..8cee9c8 100644 --- a/drivers/staging/dgrp/dgrp_sysfs.c +++ b/drivers/staging/dgrp/dgrp_sysfs.c @@ -273,7 +273,7 @@ void dgrp_create_node_class_sysfs_files(struct nd_struct *nd) sprintf(name, "node%ld", nd->nd_major); nd->nd_class_dev = device_create(dgrp_class, dgrp_class_nodes_dev, - MKDEV(0, nd->nd_major), NULL, name); + MKDEV(0, nd->nd_major), NULL, "%s", name); ret = sysfs_create_group(&nd->nd_class_dev->kobj, &dgrp_node_attribute_group); diff --git a/drivers/uwb/lc-dev.c b/drivers/uwb/lc-dev.c index 5241f1d..9209eaf 100644 --- a/drivers/uwb/lc-dev.c +++ b/drivers/uwb/lc-dev.c @@ -440,7 +440,7 @@ void uwbd_dev_onair(struct uwb_rc *rc, struct uwb_beca_e *bce) uwb_dev_init(uwb_dev); /* This sets refcnt to one, we own it */ uwb_dev->mac_addr = *bce->mac_addr; uwb_dev->dev_addr = bce->dev_addr; - dev_set_name(&uwb_dev->dev, macbuf); + dev_set_name(&uwb_dev->dev, "%s", macbuf); result = uwb_dev_add(uwb_dev, &rc->uwb_dev.dev, rc); if (result < 0) { dev_err(dev, "new device %s: cannot instantiate device\n", diff --git a/drivers/video/backlight/backlight.c b/drivers/video/backlight/backlight.c index c74e7aa..e3c2790 100644 --- a/drivers/video/backlight/backlight.c +++ b/drivers/video/backlight/backlight.c @@ -304,7 +304,7 @@ struct backlight_device *backlight_device_register(const char *name, new_bd->dev.class = backlight_class; new_bd->dev.parent = parent; new_bd->dev.release = bl_device_release; - dev_set_name(&new_bd->dev, name); + dev_set_name(&new_bd->dev, "%s", name); dev_set_drvdata(&new_bd->dev, devdata); /* Set default properties */ diff --git a/drivers/video/backlight/lcd.c b/drivers/video/backlight/lcd.c index 34fb6bd..3649fd9 100644 --- a/drivers/video/backlight/lcd.c +++ b/drivers/video/backlight/lcd.c @@ -219,7 +219,7 @@ struct lcd_device *lcd_device_register(const char *name, struct device *parent, new_ld->dev.class = lcd_class; new_ld->dev.parent = parent; new_ld->dev.release = lcd_device_release; - dev_set_name(&new_ld->dev, name); + dev_set_name(&new_ld->dev, "%s", name); dev_set_drvdata(&new_ld->dev, devdata); rc = device_register(&new_ld->dev); diff --git a/drivers/video/output.c b/drivers/video/output.c index 0d6f2cd..6285b97 100644 --- a/drivers/video/output.c +++ b/drivers/video/output.c @@ -97,7 +97,7 @@ struct output_device *video_output_register(const char *name, new_dev->props = op; new_dev->dev.class = &video_output_class; new_dev->dev.parent = dev; - dev_set_name(&new_dev->dev, name); + dev_set_name(&new_dev->dev, "%s", name); dev_set_drvdata(&new_dev->dev, devdata); ret_code = device_register(&new_dev->dev); if (ret_code) { diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 56cfaaa..8c9db16 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -447,7 +447,7 @@ int xenbus_probe_node(struct xen_bus_type *bus, if (err) goto fail; - dev_set_name(&xendev->dev, devname); + dev_set_name(&xendev->dev, "%s", devname); /* Register with generic device framework. */ err = device_register(&xendev->dev); diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 5025174..d014ee5 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -515,7 +515,6 @@ EXPORT_SYMBOL(bdi_destroy); int bdi_setup_and_register(struct backing_dev_info *bdi, char *name, unsigned int cap) { - char tmp[32]; int err; bdi->name = name; @@ -524,8 +523,8 @@ int bdi_setup_and_register(struct backing_dev_info *bdi, char *name, if (err) return err; - sprintf(tmp, "%.28s%s", name, "-%d"); - err = bdi_register(bdi, NULL, tmp, atomic_long_inc_return(&bdi_seq)); + err = bdi_register(bdi, NULL, "%.28s-%ld", name, + atomic_long_inc_return(&bdi_seq)); if (err) { bdi_destroy(bdi); return err; diff --git a/sound/sound_core.c b/sound/sound_core.c index 359753f..45759f4 100644 --- a/sound/sound_core.c +++ b/sound/sound_core.c @@ -292,7 +292,7 @@ retry: } device_create(sound_class, dev, MKDEV(SOUND_MAJOR, s->unit_minor), - NULL, s->name+6); + NULL, "%s", s->name+6); return s->unit_minor; fail: -- cgit v0.10.2 From d8537548c924db3c44afde7646b6e220c7beb79d Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 3 Jul 2013 15:04:57 -0700 Subject: drivers: avoid format strings in names passed to alloc_workqueue() For the workqueue creation interfaces that do not expect format strings, make sure they cannot accidently be parsed that way. Additionally, clean up calls made with a single parameter that would be handled as a format string. Many callers are passing potentially dynamic string content, so use "%s" in those cases to avoid any potential accidents. Signed-off-by: Kees Cook Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/crypto/pcrypt.c b/crypto/pcrypt.c index b2c99dc..f8c920c 100644 --- a/crypto/pcrypt.c +++ b/crypto/pcrypt.c @@ -455,8 +455,8 @@ static int pcrypt_init_padata(struct padata_pcrypt *pcrypt, get_online_cpus(); - pcrypt->wq = alloc_workqueue(name, - WQ_MEM_RECLAIM | WQ_CPU_INTENSIVE, 1); + pcrypt->wq = alloc_workqueue("%s", WQ_MEM_RECLAIM | WQ_CPU_INTENSIVE, + 1, name); if (!pcrypt->wq) goto err; diff --git a/drivers/media/pci/cx18/cx18-driver.c b/drivers/media/pci/cx18/cx18-driver.c index 67b61cf..004d8ac 100644 --- a/drivers/media/pci/cx18/cx18-driver.c +++ b/drivers/media/pci/cx18/cx18-driver.c @@ -695,7 +695,7 @@ static int cx18_create_in_workq(struct cx18 *cx) { snprintf(cx->in_workq_name, sizeof(cx->in_workq_name), "%s-in", cx->v4l2_dev.name); - cx->in_work_queue = alloc_ordered_workqueue(cx->in_workq_name, 0); + cx->in_work_queue = alloc_ordered_workqueue("%s", 0, cx->in_workq_name); if (cx->in_work_queue == NULL) { CX18_ERR("Unable to create incoming mailbox handler thread\n"); return -ENOMEM; diff --git a/drivers/message/i2o/driver.c b/drivers/message/i2o/driver.c index 8a5b2d8..813eaa3 100644 --- a/drivers/message/i2o/driver.c +++ b/drivers/message/i2o/driver.c @@ -84,8 +84,8 @@ int i2o_driver_register(struct i2o_driver *drv) osm_debug("Register driver %s\n", drv->name); if (drv->event) { - drv->event_queue = alloc_workqueue(drv->name, - WQ_MEM_RECLAIM, 1); + drv->event_queue = alloc_workqueue("%s", WQ_MEM_RECLAIM, 1, + drv->name); if (!drv->event_queue) { osm_err("Could not initialize event queue for driver " "%s\n", drv->name); diff --git a/drivers/net/wireless/rt2x00/rt2x00dev.c b/drivers/net/wireless/rt2x00/rt2x00dev.c index 90dc143..c8b9ef0 100644 --- a/drivers/net/wireless/rt2x00/rt2x00dev.c +++ b/drivers/net/wireless/rt2x00/rt2x00dev.c @@ -1321,7 +1321,7 @@ int rt2x00lib_probe_dev(struct rt2x00_dev *rt2x00dev) * Initialize work. */ rt2x00dev->workqueue = - alloc_ordered_workqueue(wiphy_name(rt2x00dev->hw->wiphy), 0); + alloc_ordered_workqueue("%s", 0, wiphy_name(rt2x00dev->hw->wiphy)); if (!rt2x00dev->workqueue) { retval = -ENOMEM; goto exit; diff --git a/drivers/net/wireless/rtlwifi/base.c b/drivers/net/wireless/rtlwifi/base.c index af59dd5..a5f2231 100644 --- a/drivers/net/wireless/rtlwifi/base.c +++ b/drivers/net/wireless/rtlwifi/base.c @@ -380,7 +380,7 @@ static void _rtl_init_deferred_work(struct ieee80211_hw *hw) /* <2> work queue */ rtlpriv->works.hw = hw; - rtlpriv->works.rtl_wq = alloc_workqueue(rtlpriv->cfg->name, 0, 0); + rtlpriv->works.rtl_wq = alloc_workqueue("%s", 0, 0, rtlpriv->cfg->name); INIT_DELAYED_WORK(&rtlpriv->works.watchdog_wq, (void *)rtl_watchdog_wq_callback); INIT_DELAYED_WORK(&rtlpriv->works.ips_nic_off_wq, diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index 5127f3f..b225573 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -773,14 +773,12 @@ static void pcie_shutdown_notification(struct controller *ctrl) static int pcie_init_slot(struct controller *ctrl) { struct slot *slot; - char name[32]; slot = kzalloc(sizeof(*slot), GFP_KERNEL); if (!slot) return -ENOMEM; - snprintf(name, sizeof(name), "pciehp-%u", PSN(ctrl)); - slot->wq = alloc_workqueue(name, 0, 0); + slot->wq = alloc_workqueue("pciehp-%u", 0, 0, PSN(ctrl)); if (!slot->wq) goto abort; diff --git a/drivers/pci/hotplug/shpchp_core.c b/drivers/pci/hotplug/shpchp_core.c index 3100c52..d3f757d 100644 --- a/drivers/pci/hotplug/shpchp_core.c +++ b/drivers/pci/hotplug/shpchp_core.c @@ -128,8 +128,7 @@ static int init_slots(struct controller *ctrl) slot->hpc_ops = ctrl->hpc_ops; slot->number = ctrl->first_slot + (ctrl->slot_num_inc * i); - snprintf(name, sizeof(name), "shpchp-%d", slot->number); - slot->wq = alloc_workqueue(name, 0, 0); + slot->wq = alloc_workqueue("shpchp-%d", 0, 0, slot->number); if (!slot->wq) { retval = -ENOMEM; goto error_info; diff --git a/drivers/scsi/be2iscsi/be_main.c b/drivers/scsi/be2iscsi/be_main.c index d24a286..a1f5ac7 100644 --- a/drivers/scsi/be2iscsi/be_main.c +++ b/drivers/scsi/be2iscsi/be_main.c @@ -4996,7 +4996,7 @@ static int beiscsi_dev_probe(struct pci_dev *pcidev, snprintf(phba->wq_name, sizeof(phba->wq_name), "beiscsi_%02x_wq", phba->shost->host_no); - phba->wq = alloc_workqueue(phba->wq_name, WQ_MEM_RECLAIM, 1); + phba->wq = alloc_workqueue("%s", WQ_MEM_RECLAIM, 1, phba->wq_name); if (!phba->wq) { beiscsi_log(phba, KERN_ERR, BEISCSI_LOG_INIT, "BM_%d : beiscsi_dev_probe-" diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c index 4d231c1..b246b3c 100644 --- a/drivers/scsi/qla4xxx/ql4_os.c +++ b/drivers/scsi/qla4xxx/ql4_os.c @@ -7060,8 +7060,8 @@ skip_retry_init: } INIT_WORK(&ha->dpc_work, qla4xxx_do_dpc); - sprintf(buf, "qla4xxx_%lu_task", ha->host_no); - ha->task_wq = alloc_workqueue(buf, WQ_MEM_RECLAIM, 1); + ha->task_wq = alloc_workqueue("qla4xxx_%lu_task", WQ_MEM_RECLAIM, 1, + ha->host_no); if (!ha->task_wq) { ql4_printk(KERN_WARNING, ha, "Unable to start task thread!\n"); ret = -ENODEV; diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c index e106c27..4628fd5 100644 --- a/drivers/scsi/scsi_transport_fc.c +++ b/drivers/scsi/scsi_transport_fc.c @@ -435,7 +435,7 @@ static int fc_host_setup(struct transport_container *tc, struct device *dev, snprintf(fc_host->work_q_name, sizeof(fc_host->work_q_name), "fc_wq_%d", shost->host_no); - fc_host->work_q = alloc_workqueue(fc_host->work_q_name, 0, 0); + fc_host->work_q = alloc_workqueue("%s", 0, 0, fc_host->work_q_name); if (!fc_host->work_q) return -ENOMEM; @@ -443,8 +443,8 @@ static int fc_host_setup(struct transport_container *tc, struct device *dev, snprintf(fc_host->devloss_work_q_name, sizeof(fc_host->devloss_work_q_name), "fc_dl_%d", shost->host_no); - fc_host->devloss_work_q = - alloc_workqueue(fc_host->devloss_work_q_name, 0, 0); + fc_host->devloss_work_q = alloc_workqueue("%s", 0, 0, + fc_host->devloss_work_q_name); if (!fc_host->devloss_work_q) { destroy_workqueue(fc_host->work_q); fc_host->work_q = NULL; diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index a9f4119..a0ed78a 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -445,11 +445,12 @@ __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active, alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), 1, ##args) #define create_workqueue(name) \ - alloc_workqueue((name), WQ_MEM_RECLAIM, 1) + alloc_workqueue("%s", WQ_MEM_RECLAIM, 1, (name)) #define create_freezable_workqueue(name) \ - alloc_workqueue((name), WQ_FREEZABLE | WQ_UNBOUND | WQ_MEM_RECLAIM, 1) + alloc_workqueue("%s", WQ_FREEZABLE | WQ_UNBOUND | WQ_MEM_RECLAIM, \ + 1, (name)) #define create_singlethread_workqueue(name) \ - alloc_workqueue((name), WQ_UNBOUND | WQ_MEM_RECLAIM, 1) + alloc_workqueue("%s", WQ_UNBOUND | WQ_MEM_RECLAIM, 1, (name)) extern void destroy_workqueue(struct workqueue_struct *wq); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index ace5e55..db7de80 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2211,16 +2211,15 @@ int hci_register_dev(struct hci_dev *hdev) list_add(&hdev->list, &hci_dev_list); write_unlock(&hci_dev_list_lock); - hdev->workqueue = alloc_workqueue(hdev->name, WQ_HIGHPRI | WQ_UNBOUND | - WQ_MEM_RECLAIM, 1); + hdev->workqueue = alloc_workqueue("%s", WQ_HIGHPRI | WQ_UNBOUND | + WQ_MEM_RECLAIM, 1, hdev->name); if (!hdev->workqueue) { error = -ENOMEM; goto err; } - hdev->req_workqueue = alloc_workqueue(hdev->name, - WQ_HIGHPRI | WQ_UNBOUND | - WQ_MEM_RECLAIM, 1); + hdev->req_workqueue = alloc_workqueue("%s", WQ_HIGHPRI | WQ_UNBOUND | + WQ_MEM_RECLAIM, 1, hdev->name); if (!hdev->req_workqueue) { destroy_workqueue(hdev->workqueue); error = -ENOMEM; diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 8a7bfc4..8eae74a 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -921,7 +921,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) hw->queues = IEEE80211_MAX_QUEUES; local->workqueue = - alloc_ordered_workqueue(wiphy_name(local->hw.wiphy), 0); + alloc_ordered_workqueue("%s", 0, wiphy_name(local->hw.wiphy)); if (!local->workqueue) { result = -ENOMEM; goto fail_workqueue; -- cgit v0.10.2 From f170168b9a0b61ea1e647b082b38f605f1d3de3e Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 3 Jul 2013 15:04:58 -0700 Subject: drivers: avoid parsing names as kthread_run() format strings Calling kthread_run with a single name parameter causes it to be handled as a format string. Many callers are passing potentially dynamic string content, so use "%s" in those cases to avoid any potential accidents. Signed-off-by: Kees Cook Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index fc803ec..b75c7db 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -1340,7 +1340,7 @@ aoe_ktstart(struct ktstate *k) struct task_struct *task; init_completion(&k->rendez); - task = kthread_run(kthread, k, k->name); + task = kthread_run(kthread, k, "%s", k->name); if (task == NULL || IS_ERR(task)) return -ENOMEM; k->task = task; diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 20dd52a..952dbfe 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -4087,7 +4087,8 @@ skip_create_disk: start_service_thread: sprintf(thd_name, "mtip_svc_thd_%02d", index); dd->mtip_svc_handler = kthread_create_on_node(mtip_service_thread, - dd, dd->numa_node, thd_name); + dd, dd->numa_node, "%s", + thd_name); if (IS_ERR(dd->mtip_svc_handler)) { dev_err(&dd->pdev->dev, "service thread failed to start\n"); diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 8bfd1bc..04608a6 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -93,7 +93,7 @@ static void xen_update_blkif_status(struct xen_blkif *blkif) } invalidate_inode_pages2(blkif->vbd.bdev->bd_inode->i_mapping); - blkif->xenblkd = kthread_run(xen_blkif_schedule, blkif, name); + blkif->xenblkd = kthread_run(xen_blkif_schedule, blkif, "%s", name); if (IS_ERR(blkif->xenblkd)) { err = PTR_ERR(blkif->xenblkd); blkif->xenblkd = NULL; diff --git a/drivers/hwmon/adt7470.c b/drivers/hwmon/adt7470.c index b83bf4b..0f34bca 100644 --- a/drivers/hwmon/adt7470.c +++ b/drivers/hwmon/adt7470.c @@ -1285,7 +1285,7 @@ static int adt7470_probe(struct i2c_client *client, } init_completion(&data->auto_update_stop); - data->auto_update = kthread_run(adt7470_update_thread, client, + data->auto_update = kthread_run(adt7470_update_thread, client, "%s", dev_name(data->hwmon_dev)); if (IS_ERR(data->auto_update)) { err = PTR_ERR(data->auto_update); diff --git a/drivers/media/i2c/tvaudio.c b/drivers/media/i2c/tvaudio.c index b72a59d..e0634c8 100644 --- a/drivers/media/i2c/tvaudio.c +++ b/drivers/media/i2c/tvaudio.c @@ -2020,7 +2020,8 @@ static int tvaudio_probe(struct i2c_client *client, const struct i2c_device_id * /* start async thread */ chip->wt.function = chip_thread_wake; chip->wt.data = (unsigned long)chip; - chip->thread = kthread_run(chip_thread, chip, client->name); + chip->thread = kthread_run(chip_thread, chip, "%s", + client->name); if (IS_ERR(chip->thread)) { v4l2_warn(sd, "failed to create kthread\n"); chip->thread = NULL; diff --git a/drivers/media/pci/ivtv/ivtv-driver.c b/drivers/media/pci/ivtv/ivtv-driver.c index 07b8460..b809bc8 100644 --- a/drivers/media/pci/ivtv/ivtv-driver.c +++ b/drivers/media/pci/ivtv/ivtv-driver.c @@ -753,7 +753,7 @@ static int ivtv_init_struct1(struct ivtv *itv) init_kthread_worker(&itv->irq_worker); itv->irq_worker_task = kthread_run(kthread_worker_fn, &itv->irq_worker, - itv->v4l2_dev.name); + "%s", itv->v4l2_dev.name); if (IS_ERR(itv->irq_worker_task)) { IVTV_ERR("Could not create ivtv task\n"); return -1; diff --git a/drivers/media/platform/vivi.c b/drivers/media/platform/vivi.c index 85bc314..1d3f119 100644 --- a/drivers/media/platform/vivi.c +++ b/drivers/media/platform/vivi.c @@ -768,7 +768,8 @@ static int vivi_start_generating(struct vivi_dev *dev) dma_q->frame = 0; dma_q->ini_jiffies = jiffies; - dma_q->kthread = kthread_run(vivi_thread, dev, dev->v4l2_dev.name); + dma_q->kthread = kthread_run(vivi_thread, dev, "%s", + dev->v4l2_dev.name); if (IS_ERR(dma_q->kthread)) { v4l2_err(&dev->v4l2_dev, "kernel_thread() failed\n"); diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c index a561335..0aaece9 100644 --- a/drivers/mtd/ubi/build.c +++ b/drivers/mtd/ubi/build.c @@ -1005,7 +1005,7 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, if (err) goto out_uif; - ubi->bgt_thread = kthread_create(ubi_thread, ubi, ubi->bgt_name); + ubi->bgt_thread = kthread_create(ubi_thread, ubi, "%s", ubi->bgt_name); if (IS_ERR(ubi->bgt_thread)) { err = PTR_ERR(ubi->bgt_thread); ubi_err("cannot spawn \"%s\", error %d", ubi->bgt_name, diff --git a/drivers/net/wireless/airo.c b/drivers/net/wireless/airo.c index 6125adb..d0adbaf 100644 --- a/drivers/net/wireless/airo.c +++ b/drivers/net/wireless/airo.c @@ -1893,7 +1893,8 @@ static int airo_open(struct net_device *dev) { if (ai->wifidev != dev) { clear_bit(JOB_DIE, &ai->jobs); - ai->airo_thread_task = kthread_run(airo_thread, dev, dev->name); + ai->airo_thread_task = kthread_run(airo_thread, dev, "%s", + dev->name); if (IS_ERR(ai->airo_thread_task)) return (int)PTR_ERR(ai->airo_thread_task); diff --git a/drivers/scsi/aacraid/commctrl.c b/drivers/scsi/aacraid/commctrl.c index 1ef041b..d85ac1a 100644 --- a/drivers/scsi/aacraid/commctrl.c +++ b/drivers/scsi/aacraid/commctrl.c @@ -318,7 +318,8 @@ return_fib: kthread_stop(dev->thread); ssleep(1); dev->aif_thread = 0; - dev->thread = kthread_run(aac_command_thread, dev, dev->name); + dev->thread = kthread_run(aac_command_thread, dev, + "%s", dev->name); ssleep(1); } if (f.wait) { diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c index 1be0776..cab190a 100644 --- a/drivers/scsi/aacraid/commsup.c +++ b/drivers/scsi/aacraid/commsup.c @@ -1336,7 +1336,8 @@ static int _aac_reset_adapter(struct aac_dev *aac, int forced) if ((retval = pci_set_dma_mask(aac->pdev, DMA_BIT_MASK(32)))) goto out; if (jafo) { - aac->thread = kthread_run(aac_command_thread, aac, aac->name); + aac->thread = kthread_run(aac_command_thread, aac, "%s", + aac->name); if (IS_ERR(aac->thread)) { retval = PTR_ERR(aac->thread); goto out; diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 32b7bb1..085db8b 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -601,7 +601,7 @@ static int spi_init_queue(struct spi_master *master) init_kthread_worker(&master->kworker); master->kworker_task = kthread_run(kthread_worker_fn, - &master->kworker, + &master->kworker, "%s", dev_name(&master->dev)); if (IS_ERR(master->kworker_task)) { dev_err(&master->dev, "failed to create message pump task\n"); diff --git a/drivers/staging/rtl8712/os_intfs.c b/drivers/staging/rtl8712/os_intfs.c index b65bf5e..6e81ba0 100644 --- a/drivers/staging/rtl8712/os_intfs.c +++ b/drivers/staging/rtl8712/os_intfs.c @@ -238,7 +238,7 @@ struct net_device *r8712_init_netdev(void) static u32 start_drv_threads(struct _adapter *padapter) { - padapter->cmdThread = kthread_run(r8712_cmd_thread, padapter, + padapter->cmdThread = kthread_run(r8712_cmd_thread, padapter, "%s", padapter->pnetdev->name); if (IS_ERR(padapter->cmdThread) < 0) return _FAIL; diff --git a/drivers/usb/atm/usbatm.c b/drivers/usb/atm/usbatm.c index d3527dd..5e0d33a 100644 --- a/drivers/usb/atm/usbatm.c +++ b/drivers/usb/atm/usbatm.c @@ -1020,7 +1020,7 @@ static int usbatm_heavy_init(struct usbatm_data *instance) { struct task_struct *t; - t = kthread_create(usbatm_do_heavy_init, instance, + t = kthread_create(usbatm_do_heavy_init, instance, "%s", instance->driver->driver_name); if (IS_ERR(t)) { usb_err(instance, "%s: failed to create kernel_thread (%ld)!\n", @@ -1076,7 +1076,8 @@ int usbatm_usb_probe(struct usb_interface *intf, const struct usb_device_id *id, /* public fields */ instance->driver = driver; - snprintf(instance->driver_name, sizeof(instance->driver_name), driver->driver_name); + strlcpy(instance->driver_name, driver->driver_name, + sizeof(instance->driver_name)); instance->usb_dev = usb_dev; instance->usb_intf = intf; diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index a2aa97d..10d6c41 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -305,7 +305,7 @@ static int lockd_start_svc(struct svc_serv *serv) svc_sock_update_bufs(serv); serv->sv_maxconn = nlm_max_connections; - nlmsvc_task = kthread_run(lockd, nlmsvc_rqst, serv->sv_name); + nlmsvc_task = kthread_run(lockd, nlmsvc_rqst, "%s", serv->sv_name); if (IS_ERR(nlmsvc_task)) { error = PTR_ERR(nlmsvc_task); printk(KERN_WARNING diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index cff089a..da6a43d 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -211,7 +211,6 @@ static int nfs_callback_start_svc(int minorversion, struct rpc_xprt *xprt, struct svc_rqst *rqstp; int (*callback_svc)(void *vrqstp); struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion]; - char svc_name[12]; int ret; nfs_callback_bc_serv(minorversion, xprt, serv); @@ -235,10 +234,10 @@ static int nfs_callback_start_svc(int minorversion, struct rpc_xprt *xprt, svc_sock_update_bufs(serv); - sprintf(svc_name, "nfsv4.%u-svc", minorversion); cb_info->serv = serv; cb_info->rqst = rqstp; - cb_info->task = kthread_run(callback_svc, cb_info->rqst, svc_name); + cb_info->task = kthread_run(callback_svc, cb_info->rqst, + "nfsv4.%u-svc", minorversion); if (IS_ERR(cb_info->task)) { ret = PTR_ERR(cb_info->task); svc_exit_thread(cb_info->rqst); diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index ff10b4a..5541881 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1194,7 +1194,7 @@ void nfs4_schedule_state_manager(struct nfs_client *clp) snprintf(buf, sizeof(buf), "%s-manager", rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR)); rcu_read_unlock(); - task = kthread_run(nfs4_run_state_manager, clp, buf); + task = kthread_run(nfs4_run_state_manager, clp, "%s", buf); if (IS_ERR(task)) { printk(KERN_ERR "%s: kthread_run: %ld\n", __func__, PTR_ERR(task)); diff --git a/kernel/rcutree.c b/kernel/rcutree.c index cf3adc6..e08abb9 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -3026,7 +3026,7 @@ static int __init rcu_spawn_gp_kthread(void) struct task_struct *t; for_each_rcu_flavor(rsp) { - t = kthread_run(rcu_gp_kthread, rsp, rsp->name); + t = kthread_run(rcu_gp_kthread, rsp, "%s", rsp->name); BUG_ON(IS_ERR(t)); rnp = rcu_get_root(rsp); raw_spin_lock_irqsave(&rnp->lock, flags); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 89a588b..b974571 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -740,7 +740,7 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) __module_get(serv->sv_module); task = kthread_create_on_node(serv->sv_function, rqstp, - node, serv->sv_name); + node, "%s", serv->sv_name); if (IS_ERR(task)) { error = PTR_ERR(task); module_put(serv->sv_module); -- cgit v0.10.2 From b58d977432c80ee60d6970042775a62e3f8d7675 Mon Sep 17 00:00:00 2001 From: Alex Thorlton Date: Wed, 3 Jul 2013 15:04:59 -0700 Subject: dump_stack: serialize the output from dump_stack() Add functionality to serialize the output from dump_stack() to avoid mangling of the output when dump_stack is called simultaneously from multiple cpus. [akpm@linux-foundation.org: fix comment indenting, avoid inclusion of files - use where possiblem fix uniprocessor build (__dump_stack undefined), remove unneeded ifdef around smp.h inclusion] Signed-off-by: Alex Thorlton Reported-by: Russ Anderson Reviewed-by: Robin Holt Cc: Vineet Gupta Cc: David S. Miller Cc: Richard Kuo Cc: Jesper Nilsson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/lib/dump_stack.c b/lib/dump_stack.c index 53bad09..c031541 100644 --- a/lib/dump_stack.c +++ b/lib/dump_stack.c @@ -6,15 +6,58 @@ #include #include #include +#include +#include + +static void __dump_stack(void) +{ + dump_stack_print_info(KERN_DEFAULT); + show_stack(NULL, NULL); +} /** * dump_stack - dump the current task information and its stack trace * * Architectures can override this implementation by implementing its own. */ +#ifdef CONFIG_SMP +static atomic_t dump_lock = ATOMIC_INIT(-1); + void dump_stack(void) { - dump_stack_print_info(KERN_DEFAULT); - show_stack(NULL, NULL); + int was_locked; + int old; + int cpu; + + /* + * Permit this cpu to perform nested stack dumps while serialising + * against other CPUs + */ + preempt_disable(); + +retry: + cpu = smp_processor_id(); + old = atomic_cmpxchg(&dump_lock, -1, cpu); + if (old == -1) { + was_locked = 0; + } else if (old == cpu) { + was_locked = 1; + } else { + cpu_relax(); + goto retry; + } + + __dump_stack(); + + if (!was_locked) + atomic_set(&dump_lock, -1); + + preempt_enable(); +} +#else +void dump_stack(void) +{ + __dump_stack(); } +#endif EXPORT_SYMBOL(dump_stack); -- cgit v0.10.2 From 7ec75e1ca1bd35872a5c7b33da4b05395bc74364 Mon Sep 17 00:00:00 2001 From: liguang Date: Wed, 3 Jul 2013 15:05:00 -0700 Subject: kernel/sys.c: sys_reboot(): fix malformed panic message If LINUX_REBOOT_CMD_HALT for reboot failed, the message "cannot halt" will stay on the same line with the next message, so append a '\n'. Signed-off-by: liguang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/kernel/sys.c b/kernel/sys.c index 2bbd9a7..c1da757 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -511,7 +511,7 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd, case LINUX_REBOOT_CMD_HALT: kernel_halt(); do_exit(0); - panic("cannot halt"); + panic("cannot halt.\n"); case LINUX_REBOOT_CMD_POWER_OFF: kernel_power_off(); -- cgit v0.10.2 From 45c64940c8bb64a042464ecec89d95eb4cce9b07 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 3 Jul 2013 15:05:01 -0700 Subject: kernel/sys.c:do_sysinfo(): use get_monotonic_boottime() Change do_sysinfo() to use get_monotonic_boottime() instead of do_posix_clock_monotonic_gettime() + monotonic_to_bootbased(). Signed-off-by: Oleg Nesterov Cc: "Eric W. Biederman" Acked-by: John Stultz Cc: Tomas Janousek Cc: Tomas Smetana Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/kernel/sys.c b/kernel/sys.c index c1da757..7bf50dc 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2355,8 +2355,7 @@ static int do_sysinfo(struct sysinfo *info) memset(info, 0, sizeof(struct sysinfo)); - ktime_get_ts(&tp); - monotonic_to_bootbased(&tp); + get_monotonic_boottime(&tp); info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0); get_avenrun(info->loads, 0, SI_LOAD_SHIFT - FSHIFT); -- cgit v0.10.2 From 5017b2851373ee15c7035151853bb1448800cae2 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 3 Jul 2013 15:05:02 -0700 Subject: dmi: add support for exact DMI matches in addition to substring matching dmi_match() considers a substring match to be a successful match. This is not always sufficient to distinguish between DMI data for different systems. Add support for exact string matching using strcmp() in addition to the substring matching using strstr(). The specific use case in the i915 driver is to allow us to use an exact match for D510MO, without also incorrectly matching D510MOV: { .ident = "Intel D510MO", .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "Intel"), DMI_EXACT_MATCH(DMI_BOARD_NAME, "D510MO"), }, } Signed-off-by: Jani Nikula Cc: Cc: Chris Wilson Cc: Cornel Panceac Acked-by: Daniel Vetter Cc: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c index b95159b..eb760a2 100644 --- a/drivers/firmware/dmi_scan.c +++ b/drivers/firmware/dmi_scan.c @@ -551,9 +551,15 @@ static bool dmi_matches(const struct dmi_system_id *dmi) int s = dmi->matches[i].slot; if (s == DMI_NONE) break; - if (dmi_ident[s] - && strstr(dmi_ident[s], dmi->matches[i].substr)) - continue; + if (dmi_ident[s]) { + if (!dmi->matches[i].exact_match && + strstr(dmi_ident[s], dmi->matches[i].substr)) + continue; + else if (dmi->matches[i].exact_match && + !strcmp(dmi_ident[s], dmi->matches[i].substr)) + continue; + } + /* No match */ return false; } diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index b508016..b3bd7e7 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -456,7 +456,8 @@ enum dmi_field { }; struct dmi_strmatch { - unsigned char slot; + unsigned char slot:7; + unsigned char exact_match:1; char substr[79]; }; @@ -474,7 +475,8 @@ struct dmi_system_id { */ #define dmi_device_id dmi_system_id -#define DMI_MATCH(a, b) { a, b } +#define DMI_MATCH(a, b) { .slot = a, .substr = b } +#define DMI_EXACT_MATCH(a, b) { .slot = a, .substr = b, .exact_match = 1 } #define PLATFORM_NAME_SIZE 20 #define PLATFORM_MODULE_PREFIX "platform:" -- cgit v0.10.2 From e5614f0c2d0f4d7f0b8ef745d34593baf2c5dbf8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 3 Jul 2013 15:05:03 -0700 Subject: drm/i915: quirk away phantom LVDS on Intel's D510MO mainboard This replaceable mainboard only has a VGA-out, yet it claims to also have a connected LVDS header. Addresses https://bugs.freedesktop.org/show_bug.cgi?id=63860 [jani.nikula@intel.com: use DMI_EXACT_MATCH for board name.] Signed-off-by: Chris Wilson Signed-off-by: Jani Nikula Reported-by: Cc: Cornel Panceac Acked-by: Daniel Vetter Cc: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index 29412cc..5716cf3 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -869,6 +869,14 @@ static const struct dmi_system_id intel_no_lvds[] = { DMI_MATCH(DMI_PRODUCT_NAME, "ESPRIMO Q900"), }, }, + { + .callback = intel_no_lvds_dmi_callback, + .ident = "Intel D510MO", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Intel"), + DMI_EXACT_MATCH(DMI_BOARD_NAME, "D510MO"), + }, + }, { } /* terminating entry */ }; -- cgit v0.10.2 From dcf6d294830d46b0e6901477fb4bf455281d90c8 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 3 Jul 2013 15:05:05 -0700 Subject: drm/i915: quirk away phantom LVDS on Intel's D525MW mainboard This replaceable mainboard only has a VGA-out, yet it claims to also have a connected LVDS header. Addresses https://bugs.freedesktop.org/show_bug.cgi?id=65256 Signed-off-by: Jani Nikula Reported-by: Cornel Panceac Cc: Chris Wilson Cc: Acked-by: Daniel Vetter Cc: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index 5716cf3..817f936 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -877,6 +877,14 @@ static const struct dmi_system_id intel_no_lvds[] = { DMI_EXACT_MATCH(DMI_BOARD_NAME, "D510MO"), }, }, + { + .callback = intel_no_lvds_dmi_callback, + .ident = "Intel D525MW", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Intel"), + DMI_EXACT_MATCH(DMI_BOARD_NAME, "D525MW"), + }, + }, { } /* terminating entry */ }; -- cgit v0.10.2 From 48a9db462d99494583dad829969616ac90a8df4e Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 3 Jul 2013 15:05:06 -0700 Subject: drivers/dma: remove unused support for MEMSET operations There have never been any real users of MEMSET operations since they have been introduced in January 2007 by commit 7405f74badf4 ("dmaengine: refactor dmaengine around dma_async_tx_descriptor"). Therefore remove support for them for now, it can be always brought back when needed. [sebastian.hesselbarth@gmail.com: fix drivers/dma/mv_xor] Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Kyungmin Park Signed-off-by: Sebastian Hesselbarth Cc: Vinod Koul Acked-by: Dan Williams Cc: Tomasz Figa Cc: Herbert Xu Cc: Olof Johansson Cc: Kevin Hilman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/Documentation/crypto/async-tx-api.txt b/Documentation/crypto/async-tx-api.txt index ba046b8..7bf1be2 100644 --- a/Documentation/crypto/async-tx-api.txt +++ b/Documentation/crypto/async-tx-api.txt @@ -222,5 +222,4 @@ drivers/dma/: location for offload engine drivers include/linux/async_tx.h: core header file for the async_tx api crypto/async_tx/async_tx.c: async_tx interface to dmaengine and common code crypto/async_tx/async_memcpy.c: copy offload -crypto/async_tx/async_memset.c: memory fill offload crypto/async_tx/async_xor.c: xor and xor zero sum offload diff --git a/arch/arm/mach-iop13xx/setup.c b/arch/arm/mach-iop13xx/setup.c index 3181f61..1c5bd76 100644 --- a/arch/arm/mach-iop13xx/setup.c +++ b/arch/arm/mach-iop13xx/setup.c @@ -469,7 +469,6 @@ void __init iop13xx_platform_init(void) dma_cap_set(DMA_MEMCPY, plat_data->cap_mask); dma_cap_set(DMA_XOR, plat_data->cap_mask); dma_cap_set(DMA_XOR_VAL, plat_data->cap_mask); - dma_cap_set(DMA_MEMSET, plat_data->cap_mask); dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask); break; case IOP13XX_INIT_ADMA_1: @@ -479,7 +478,6 @@ void __init iop13xx_platform_init(void) dma_cap_set(DMA_MEMCPY, plat_data->cap_mask); dma_cap_set(DMA_XOR, plat_data->cap_mask); dma_cap_set(DMA_XOR_VAL, plat_data->cap_mask); - dma_cap_set(DMA_MEMSET, plat_data->cap_mask); dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask); break; case IOP13XX_INIT_ADMA_2: @@ -489,7 +487,6 @@ void __init iop13xx_platform_init(void) dma_cap_set(DMA_MEMCPY, plat_data->cap_mask); dma_cap_set(DMA_XOR, plat_data->cap_mask); dma_cap_set(DMA_XOR_VAL, plat_data->cap_mask); - dma_cap_set(DMA_MEMSET, plat_data->cap_mask); dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask); dma_cap_set(DMA_PQ, plat_data->cap_mask); dma_cap_set(DMA_PQ_VAL, plat_data->cap_mask); diff --git a/arch/arm/plat-iop/adma.c b/arch/arm/plat-iop/adma.c index 1ff6a37..a4d1f8d 100644 --- a/arch/arm/plat-iop/adma.c +++ b/arch/arm/plat-iop/adma.c @@ -192,12 +192,10 @@ static int __init iop3xx_adma_cap_init(void) #ifdef CONFIG_ARCH_IOP32X /* the 32x AAU does not perform zero sum */ dma_cap_set(DMA_XOR, iop3xx_aau_data.cap_mask); - dma_cap_set(DMA_MEMSET, iop3xx_aau_data.cap_mask); dma_cap_set(DMA_INTERRUPT, iop3xx_aau_data.cap_mask); #else dma_cap_set(DMA_XOR, iop3xx_aau_data.cap_mask); dma_cap_set(DMA_XOR_VAL, iop3xx_aau_data.cap_mask); - dma_cap_set(DMA_MEMSET, iop3xx_aau_data.cap_mask); dma_cap_set(DMA_INTERRUPT, iop3xx_aau_data.cap_mask); #endif diff --git a/arch/arm/plat-orion/common.c b/arch/arm/plat-orion/common.c index c019b7a..c66d163 100644 --- a/arch/arm/plat-orion/common.c +++ b/arch/arm/plat-orion/common.c @@ -666,14 +666,9 @@ void __init orion_xor0_init(unsigned long mapbase_low, orion_xor0_shared_resources[3].start = irq_1; orion_xor0_shared_resources[3].end = irq_1; - /* - * two engines can't do memset simultaneously, this limitation - * satisfied by removing memset support from one of the engines. - */ dma_cap_set(DMA_MEMCPY, orion_xor0_channels_data[0].cap_mask); dma_cap_set(DMA_XOR, orion_xor0_channels_data[0].cap_mask); - dma_cap_set(DMA_MEMSET, orion_xor0_channels_data[1].cap_mask); dma_cap_set(DMA_MEMCPY, orion_xor0_channels_data[1].cap_mask); dma_cap_set(DMA_XOR, orion_xor0_channels_data[1].cap_mask); @@ -732,14 +727,9 @@ void __init orion_xor1_init(unsigned long mapbase_low, orion_xor1_shared_resources[3].start = irq_1; orion_xor1_shared_resources[3].end = irq_1; - /* - * two engines can't do memset simultaneously, this limitation - * satisfied by removing memset support from one of the engines. - */ dma_cap_set(DMA_MEMCPY, orion_xor1_channels_data[0].cap_mask); dma_cap_set(DMA_XOR, orion_xor1_channels_data[0].cap_mask); - dma_cap_set(DMA_MEMSET, orion_xor1_channels_data[1].cap_mask); dma_cap_set(DMA_MEMCPY, orion_xor1_channels_data[1].cap_mask); dma_cap_set(DMA_XOR, orion_xor1_channels_data[1].cap_mask); diff --git a/crypto/async_tx/Kconfig b/crypto/async_tx/Kconfig index 1b11abb..f38a58a 100644 --- a/crypto/async_tx/Kconfig +++ b/crypto/async_tx/Kconfig @@ -10,10 +10,6 @@ config ASYNC_XOR select ASYNC_CORE select XOR_BLOCKS -config ASYNC_MEMSET - tristate - select ASYNC_CORE - config ASYNC_PQ tristate select ASYNC_CORE diff --git a/crypto/async_tx/Makefile b/crypto/async_tx/Makefile index d1e0e6f..462e4ab 100644 --- a/crypto/async_tx/Makefile +++ b/crypto/async_tx/Makefile @@ -1,6 +1,5 @@ obj-$(CONFIG_ASYNC_CORE) += async_tx.o obj-$(CONFIG_ASYNC_MEMCPY) += async_memcpy.o -obj-$(CONFIG_ASYNC_MEMSET) += async_memset.o obj-$(CONFIG_ASYNC_XOR) += async_xor.o obj-$(CONFIG_ASYNC_PQ) += async_pq.o obj-$(CONFIG_ASYNC_RAID6_RECOV) += async_raid6_recov.o diff --git a/crypto/async_tx/async_memset.c b/crypto/async_tx/async_memset.c deleted file mode 100644 index 05a4d1e..0000000 --- a/crypto/async_tx/async_memset.c +++ /dev/null @@ -1,89 +0,0 @@ -/* - * memory fill offload engine support - * - * Copyright © 2006, Intel Corporation. - * - * Dan Williams - * - * with architecture considerations by: - * Neil Brown - * Jeff Garzik - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - * - */ -#include -#include -#include -#include -#include -#include - -/** - * async_memset - attempt to fill memory with a dma engine. - * @dest: destination page - * @val: fill value - * @offset: offset in pages to start transaction - * @len: length in bytes - * - * honored flags: ASYNC_TX_ACK - */ -struct dma_async_tx_descriptor * -async_memset(struct page *dest, int val, unsigned int offset, size_t len, - struct async_submit_ctl *submit) -{ - struct dma_chan *chan = async_tx_find_channel(submit, DMA_MEMSET, - &dest, 1, NULL, 0, len); - struct dma_device *device = chan ? chan->device : NULL; - struct dma_async_tx_descriptor *tx = NULL; - - if (device && is_dma_fill_aligned(device, offset, 0, len)) { - dma_addr_t dma_dest; - unsigned long dma_prep_flags = 0; - - if (submit->cb_fn) - dma_prep_flags |= DMA_PREP_INTERRUPT; - if (submit->flags & ASYNC_TX_FENCE) - dma_prep_flags |= DMA_PREP_FENCE; - dma_dest = dma_map_page(device->dev, dest, offset, len, - DMA_FROM_DEVICE); - - tx = device->device_prep_dma_memset(chan, dma_dest, val, len, - dma_prep_flags); - } - - if (tx) { - pr_debug("%s: (async) len: %zu\n", __func__, len); - async_tx_submit(chan, tx, submit); - } else { /* run the memset synchronously */ - void *dest_buf; - pr_debug("%s: (sync) len: %zu\n", __func__, len); - - dest_buf = page_address(dest) + offset; - - /* wait for any prerequisite operations */ - async_tx_quiesce(&submit->depend_tx); - - memset(dest_buf, val, len); - - async_tx_sync_epilog(submit); - } - - return tx; -} -EXPORT_SYMBOL_GPL(async_memset); - -MODULE_AUTHOR("Intel Corporation"); -MODULE_DESCRIPTION("asynchronous memset api"); -MODULE_LICENSE("GPL"); diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index 93f7992..9e56745 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -663,11 +663,6 @@ static bool device_has_all_tx_types(struct dma_device *device) return false; #endif - #if defined(CONFIG_ASYNC_MEMSET) || defined(CONFIG_ASYNC_MEMSET_MODULE) - if (!dma_has_cap(DMA_MEMSET, device->cap_mask)) - return false; - #endif - #if defined(CONFIG_ASYNC_XOR) || defined(CONFIG_ASYNC_XOR_MODULE) if (!dma_has_cap(DMA_XOR, device->cap_mask)) return false; @@ -729,8 +724,6 @@ int dma_async_device_register(struct dma_device *device) !device->device_prep_dma_pq); BUG_ON(dma_has_cap(DMA_PQ_VAL, device->cap_mask) && !device->device_prep_dma_pq_val); - BUG_ON(dma_has_cap(DMA_MEMSET, device->cap_mask) && - !device->device_prep_dma_memset); BUG_ON(dma_has_cap(DMA_INTERRUPT, device->cap_mask) && !device->device_prep_dma_interrupt); BUG_ON(dma_has_cap(DMA_SG, device->cap_mask) && diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c index 17a2393..5ff6fc1 100644 --- a/drivers/dma/ioat/dma.c +++ b/drivers/dma/ioat/dma.c @@ -1105,12 +1105,11 @@ static ssize_t cap_show(struct dma_chan *c, char *page) { struct dma_device *dma = c->device; - return sprintf(page, "copy%s%s%s%s%s%s\n", + return sprintf(page, "copy%s%s%s%s%s\n", dma_has_cap(DMA_PQ, dma->cap_mask) ? " pq" : "", dma_has_cap(DMA_PQ_VAL, dma->cap_mask) ? " pq_val" : "", dma_has_cap(DMA_XOR, dma->cap_mask) ? " xor" : "", dma_has_cap(DMA_XOR_VAL, dma->cap_mask) ? " xor_val" : "", - dma_has_cap(DMA_MEMSET, dma->cap_mask) ? " fill" : "", dma_has_cap(DMA_INTERRUPT, dma->cap_mask) ? " intr" : ""); } diff --git a/drivers/dma/ioat/dma_v2.h b/drivers/dma/ioat/dma_v2.h index 29bf944..212d584 100644 --- a/drivers/dma/ioat/dma_v2.h +++ b/drivers/dma/ioat/dma_v2.h @@ -123,7 +123,6 @@ static inline u16 ioat2_xferlen_to_descs(struct ioat2_dma_chan *ioat, size_t len struct ioat_ring_ent { union { struct ioat_dma_descriptor *hw; - struct ioat_fill_descriptor *fill; struct ioat_xor_descriptor *xor; struct ioat_xor_ext_descriptor *xor_ex; struct ioat_pq_descriptor *pq; diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c index ca6ea9b..b642e03 100644 --- a/drivers/dma/ioat/dma_v3.c +++ b/drivers/dma/ioat/dma_v3.c @@ -311,14 +311,6 @@ static void ioat3_dma_unmap(struct ioat2_dma_chan *ioat, if (!desc->hw->ctl_f.null) /* skip 'interrupt' ops */ ioat_dma_unmap(chan, flags, len, desc->hw); break; - case IOAT_OP_FILL: { - struct ioat_fill_descriptor *hw = desc->fill; - - if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) - ioat_unmap(pdev, hw->dst_addr - offset, len, - PCI_DMA_FROMDEVICE, flags, 1); - break; - } case IOAT_OP_XOR_VAL: case IOAT_OP_XOR: { struct ioat_xor_descriptor *xor = desc->xor; @@ -824,51 +816,6 @@ ioat3_tx_status(struct dma_chan *c, dma_cookie_t cookie, } static struct dma_async_tx_descriptor * -ioat3_prep_memset_lock(struct dma_chan *c, dma_addr_t dest, int value, - size_t len, unsigned long flags) -{ - struct ioat2_dma_chan *ioat = to_ioat2_chan(c); - struct ioat_ring_ent *desc; - size_t total_len = len; - struct ioat_fill_descriptor *fill; - u64 src_data = (0x0101010101010101ULL) * (value & 0xff); - int num_descs, idx, i; - - num_descs = ioat2_xferlen_to_descs(ioat, len); - if (likely(num_descs) && ioat2_check_space_lock(ioat, num_descs) == 0) - idx = ioat->head; - else - return NULL; - i = 0; - do { - size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log); - - desc = ioat2_get_ring_ent(ioat, idx + i); - fill = desc->fill; - - fill->size = xfer_size; - fill->src_data = src_data; - fill->dst_addr = dest; - fill->ctl = 0; - fill->ctl_f.op = IOAT_OP_FILL; - - len -= xfer_size; - dest += xfer_size; - dump_desc_dbg(ioat, desc); - } while (++i < num_descs); - - desc->txd.flags = flags; - desc->len = total_len; - fill->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); - fill->ctl_f.fence = !!(flags & DMA_PREP_FENCE); - fill->ctl_f.compl_write = 1; - dump_desc_dbg(ioat, desc); - - /* we leave the channel locked to ensure in order submission */ - return &desc->txd; -} - -static struct dma_async_tx_descriptor * __ioat3_prep_xor_lock(struct dma_chan *c, enum sum_check_flags *result, dma_addr_t dest, dma_addr_t *src, unsigned int src_cnt, size_t len, unsigned long flags) @@ -1431,7 +1378,7 @@ static int ioat_xor_val_self_test(struct ioatdma_device *device) struct page *xor_srcs[IOAT_NUM_SRC_TEST]; struct page *xor_val_srcs[IOAT_NUM_SRC_TEST + 1]; dma_addr_t dma_srcs[IOAT_NUM_SRC_TEST + 1]; - dma_addr_t dma_addr, dest_dma; + dma_addr_t dest_dma; struct dma_async_tx_descriptor *tx; struct dma_chan *dma_chan; dma_cookie_t cookie; @@ -1598,56 +1545,6 @@ static int ioat_xor_val_self_test(struct ioatdma_device *device) goto free_resources; } - /* skip memset if the capability is not present */ - if (!dma_has_cap(DMA_MEMSET, dma_chan->device->cap_mask)) - goto free_resources; - - /* test memset */ - op = IOAT_OP_FILL; - - dma_addr = dma_map_page(dev, dest, 0, - PAGE_SIZE, DMA_FROM_DEVICE); - tx = dma->device_prep_dma_memset(dma_chan, dma_addr, 0, PAGE_SIZE, - DMA_PREP_INTERRUPT | - DMA_COMPL_SKIP_SRC_UNMAP | - DMA_COMPL_SKIP_DEST_UNMAP); - if (!tx) { - dev_err(dev, "Self-test memset prep failed\n"); - err = -ENODEV; - goto dma_unmap; - } - - async_tx_ack(tx); - init_completion(&cmp); - tx->callback = ioat3_dma_test_callback; - tx->callback_param = &cmp; - cookie = tx->tx_submit(tx); - if (cookie < 0) { - dev_err(dev, "Self-test memset setup failed\n"); - err = -ENODEV; - goto dma_unmap; - } - dma->device_issue_pending(dma_chan); - - tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); - - if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_SUCCESS) { - dev_err(dev, "Self-test memset timed out\n"); - err = -ENODEV; - goto dma_unmap; - } - - dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_FROM_DEVICE); - - for (i = 0; i < PAGE_SIZE/sizeof(u32); i++) { - u32 *ptr = page_address(dest); - if (ptr[i]) { - dev_err(dev, "Self-test memset failed compare\n"); - err = -ENODEV; - goto free_resources; - } - } - /* test for non-zero parity sum */ op = IOAT_OP_XOR_VAL; @@ -1706,8 +1603,7 @@ dma_unmap: for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, DMA_TO_DEVICE); - } else if (op == IOAT_OP_FILL) - dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_FROM_DEVICE); + } free_resources: dma->device_free_chan_resources(dma_chan); out: @@ -1944,12 +1840,6 @@ int ioat3_dma_probe(struct ioatdma_device *device, int dca) } } - if (is_raid_device && (device->cap & IOAT_CAP_FILL_BLOCK)) { - dma_cap_set(DMA_MEMSET, dma->cap_mask); - dma->device_prep_dma_memset = ioat3_prep_memset_lock; - } - - dma->device_tx_status = ioat3_tx_status; device->cleanup_fn = ioat3_cleanup_event; device->timer_fn = ioat3_timer_event; diff --git a/drivers/dma/ioat/hw.h b/drivers/dma/ioat/hw.h index 5ee57d4..62f83e9 100644 --- a/drivers/dma/ioat/hw.h +++ b/drivers/dma/ioat/hw.h @@ -100,33 +100,6 @@ struct ioat_dma_descriptor { uint64_t user2; }; -struct ioat_fill_descriptor { - uint32_t size; - union { - uint32_t ctl; - struct { - unsigned int int_en:1; - unsigned int rsvd:1; - unsigned int dest_snoop_dis:1; - unsigned int compl_write:1; - unsigned int fence:1; - unsigned int rsvd2:2; - unsigned int dest_brk:1; - unsigned int bundle:1; - unsigned int rsvd4:15; - #define IOAT_OP_FILL 0x01 - unsigned int op:8; - } ctl_f; - }; - uint64_t src_data; - uint64_t dst_addr; - uint64_t next; - uint64_t rsv1; - uint64_t next_dst_addr; - uint64_t user1; - uint64_t user2; -}; - struct ioat_xor_descriptor { uint32_t size; union { diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c index 7dafb9f..c9cc08c 100644 --- a/drivers/dma/iop-adma.c +++ b/drivers/dma/iop-adma.c @@ -633,39 +633,6 @@ iop_adma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dma_dest, } static struct dma_async_tx_descriptor * -iop_adma_prep_dma_memset(struct dma_chan *chan, dma_addr_t dma_dest, - int value, size_t len, unsigned long flags) -{ - struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); - struct iop_adma_desc_slot *sw_desc, *grp_start; - int slot_cnt, slots_per_op; - - if (unlikely(!len)) - return NULL; - BUG_ON(len > IOP_ADMA_MAX_BYTE_COUNT); - - dev_dbg(iop_chan->device->common.dev, "%s len: %u\n", - __func__, len); - - spin_lock_bh(&iop_chan->lock); - slot_cnt = iop_chan_memset_slot_count(len, &slots_per_op); - sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); - if (sw_desc) { - grp_start = sw_desc->group_head; - iop_desc_init_memset(grp_start, flags); - iop_desc_set_byte_count(grp_start, iop_chan, len); - iop_desc_set_block_fill_val(grp_start, value); - iop_desc_set_dest_addr(grp_start, iop_chan, dma_dest); - sw_desc->unmap_src_cnt = 1; - sw_desc->unmap_len = len; - sw_desc->async_tx.flags = flags; - } - spin_unlock_bh(&iop_chan->lock); - - return sw_desc ? &sw_desc->async_tx : NULL; -} - -static struct dma_async_tx_descriptor * iop_adma_prep_dma_xor(struct dma_chan *chan, dma_addr_t dma_dest, dma_addr_t *dma_src, unsigned int src_cnt, size_t len, unsigned long flags) @@ -1176,33 +1143,6 @@ iop_adma_xor_val_self_test(struct iop_adma_device *device) goto free_resources; } - /* test memset */ - dma_addr = dma_map_page(dma_chan->device->dev, dest, 0, - PAGE_SIZE, DMA_FROM_DEVICE); - tx = iop_adma_prep_dma_memset(dma_chan, dma_addr, 0, PAGE_SIZE, - DMA_PREP_INTERRUPT | DMA_CTRL_ACK); - - cookie = iop_adma_tx_submit(tx); - iop_adma_issue_pending(dma_chan); - msleep(8); - - if (iop_adma_status(dma_chan, cookie, NULL) != DMA_SUCCESS) { - dev_err(dma_chan->device->dev, - "Self-test memset timed out, disabling\n"); - err = -ENODEV; - goto free_resources; - } - - for (i = 0; i < PAGE_SIZE/sizeof(u32); i++) { - u32 *ptr = page_address(dest); - if (ptr[i]) { - dev_err(dma_chan->device->dev, - "Self-test memset failed compare, disabling\n"); - err = -ENODEV; - goto free_resources; - } - } - /* test for non-zero parity sum */ zero_sum_result = 0; for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 1; i++) @@ -1487,8 +1427,6 @@ static int iop_adma_probe(struct platform_device *pdev) /* set prep routines based on capability */ if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) dma_dev->device_prep_dma_memcpy = iop_adma_prep_dma_memcpy; - if (dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) - dma_dev->device_prep_dma_memset = iop_adma_prep_dma_memset; if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) { dma_dev->max_xor = iop_adma_get_max_xor(); dma_dev->device_prep_dma_xor = iop_adma_prep_dma_xor; @@ -1556,8 +1494,7 @@ static int iop_adma_probe(struct platform_device *pdev) goto err_free_iop_chan; } - if (dma_has_cap(DMA_XOR, dma_dev->cap_mask) || - dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) { + if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) { ret = iop_adma_xor_val_self_test(adev); dev_dbg(&pdev->dev, "xor self test returned %d\n", ret); if (ret) @@ -1584,7 +1521,6 @@ static int iop_adma_probe(struct platform_device *pdev) dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask) ? "pq_val " : "", dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "", dma_has_cap(DMA_XOR_VAL, dma_dev->cap_mask) ? "xor_val " : "", - dma_has_cap(DMA_MEMSET, dma_dev->cap_mask) ? "fill " : "", dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "", dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : ""); diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c index d64ae14..200f1a3 100644 --- a/drivers/dma/mv_xor.c +++ b/drivers/dma/mv_xor.c @@ -89,11 +89,6 @@ static void mv_desc_clear_next_desc(struct mv_xor_desc_slot *desc) hw_desc->phy_next_desc = 0; } -static void mv_desc_set_block_fill_val(struct mv_xor_desc_slot *desc, u32 val) -{ - desc->value = val; -} - static void mv_desc_set_dest_addr(struct mv_xor_desc_slot *desc, dma_addr_t addr) { @@ -128,22 +123,6 @@ static void mv_chan_set_next_descriptor(struct mv_xor_chan *chan, __raw_writel(next_desc_addr, XOR_NEXT_DESC(chan)); } -static void mv_chan_set_dest_pointer(struct mv_xor_chan *chan, u32 desc_addr) -{ - __raw_writel(desc_addr, XOR_DEST_POINTER(chan)); -} - -static void mv_chan_set_block_size(struct mv_xor_chan *chan, u32 block_size) -{ - __raw_writel(block_size, XOR_BLOCK_SIZE(chan)); -} - -static void mv_chan_set_value(struct mv_xor_chan *chan, u32 value) -{ - __raw_writel(value, XOR_INIT_VALUE_LOW(chan)); - __raw_writel(value, XOR_INIT_VALUE_HIGH(chan)); -} - static void mv_chan_unmask_interrupts(struct mv_xor_chan *chan) { u32 val = __raw_readl(XOR_INTR_MASK(chan)); @@ -186,8 +165,6 @@ static int mv_can_chain(struct mv_xor_desc_slot *desc) if (chain_old_tail->type != desc->type) return 0; - if (desc->type == DMA_MEMSET) - return 0; return 1; } @@ -205,9 +182,6 @@ static void mv_set_mode(struct mv_xor_chan *chan, case DMA_MEMCPY: op_mode = XOR_OPERATION_MODE_MEMCPY; break; - case DMA_MEMSET: - op_mode = XOR_OPERATION_MODE_MEMSET; - break; default: dev_err(mv_chan_to_devp(chan), "error: unsupported operation %d\n", @@ -274,18 +248,9 @@ static void mv_xor_start_new_chain(struct mv_xor_chan *mv_chan, if (sw_desc->type != mv_chan->current_type) mv_set_mode(mv_chan, sw_desc->type); - if (sw_desc->type == DMA_MEMSET) { - /* for memset requests we need to program the engine, no - * descriptors used. - */ - struct mv_xor_desc *hw_desc = sw_desc->hw_desc; - mv_chan_set_dest_pointer(mv_chan, hw_desc->phy_dest_addr); - mv_chan_set_block_size(mv_chan, sw_desc->unmap_len); - mv_chan_set_value(mv_chan, sw_desc->value); - } else { - /* set the hardware chain */ - mv_chan_set_next_descriptor(mv_chan, sw_desc->async_tx.phys); - } + /* set the hardware chain */ + mv_chan_set_next_descriptor(mv_chan, sw_desc->async_tx.phys); + mv_chan->pending += sw_desc->slot_cnt; mv_xor_issue_pending(&mv_chan->dmachan); } @@ -688,43 +653,6 @@ mv_xor_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, } static struct dma_async_tx_descriptor * -mv_xor_prep_dma_memset(struct dma_chan *chan, dma_addr_t dest, int value, - size_t len, unsigned long flags) -{ - struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan); - struct mv_xor_desc_slot *sw_desc, *grp_start; - int slot_cnt; - - dev_dbg(mv_chan_to_devp(mv_chan), - "%s dest: %x len: %u flags: %ld\n", - __func__, dest, len, flags); - if (unlikely(len < MV_XOR_MIN_BYTE_COUNT)) - return NULL; - - BUG_ON(len > MV_XOR_MAX_BYTE_COUNT); - - spin_lock_bh(&mv_chan->lock); - slot_cnt = mv_chan_memset_slot_count(len); - sw_desc = mv_xor_alloc_slots(mv_chan, slot_cnt, 1); - if (sw_desc) { - sw_desc->type = DMA_MEMSET; - sw_desc->async_tx.flags = flags; - grp_start = sw_desc->group_head; - mv_desc_init(grp_start, flags); - mv_desc_set_byte_count(grp_start, len); - mv_desc_set_dest_addr(sw_desc->group_head, dest); - mv_desc_set_block_fill_val(grp_start, value); - sw_desc->unmap_src_cnt = 1; - sw_desc->unmap_len = len; - } - spin_unlock_bh(&mv_chan->lock); - dev_dbg(mv_chan_to_devp(mv_chan), - "%s sw_desc %p async_tx %p \n", - __func__, sw_desc, &sw_desc->async_tx); - return sw_desc ? &sw_desc->async_tx : NULL; -} - -static struct dma_async_tx_descriptor * mv_xor_prep_dma_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src, unsigned int src_cnt, size_t len, unsigned long flags) { @@ -1137,8 +1065,6 @@ mv_xor_channel_add(struct mv_xor_device *xordev, /* set prep routines based on capability */ if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) dma_dev->device_prep_dma_memcpy = mv_xor_prep_dma_memcpy; - if (dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) - dma_dev->device_prep_dma_memset = mv_xor_prep_dma_memset; if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) { dma_dev->max_xor = 8; dma_dev->device_prep_dma_xor = mv_xor_prep_dma_xor; @@ -1187,9 +1113,8 @@ mv_xor_channel_add(struct mv_xor_device *xordev, goto err_free_irq; } - dev_info(&pdev->dev, "Marvell XOR: ( %s%s%s%s)\n", + dev_info(&pdev->dev, "Marvell XOR: ( %s%s%s)\n", dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "", - dma_has_cap(DMA_MEMSET, dma_dev->cap_mask) ? "fill " : "", dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "", dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : ""); @@ -1298,8 +1223,6 @@ static int mv_xor_probe(struct platform_device *pdev) dma_cap_set(DMA_MEMCPY, cap_mask); if (of_property_read_bool(np, "dmacap,xor")) dma_cap_set(DMA_XOR, cap_mask); - if (of_property_read_bool(np, "dmacap,memset")) - dma_cap_set(DMA_MEMSET, cap_mask); if (of_property_read_bool(np, "dmacap,interrupt")) dma_cap_set(DMA_INTERRUPT, cap_mask); diff --git a/drivers/dma/mv_xor.h b/drivers/dma/mv_xor.h index c632a47..c619359 100644 --- a/drivers/dma/mv_xor.h +++ b/drivers/dma/mv_xor.h @@ -31,7 +31,6 @@ #define XOR_OPERATION_MODE_XOR 0 #define XOR_OPERATION_MODE_MEMCPY 2 -#define XOR_OPERATION_MODE_MEMSET 4 #define XOR_CURR_DESC(chan) (chan->mmr_base + 0x210 + (chan->idx * 4)) #define XOR_NEXT_DESC(chan) (chan->mmr_base + 0x200 + (chan->idx * 4)) diff --git a/drivers/dma/ppc4xx/adma.c b/drivers/dma/ppc4xx/adma.c index 5d3d955..1e220f8 100644 --- a/drivers/dma/ppc4xx/adma.c +++ b/drivers/dma/ppc4xx/adma.c @@ -2323,47 +2323,6 @@ static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_memcpy( } /** - * ppc440spe_adma_prep_dma_memset - prepare CDB for a MEMSET operation - */ -static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_memset( - struct dma_chan *chan, dma_addr_t dma_dest, int value, - size_t len, unsigned long flags) -{ - struct ppc440spe_adma_chan *ppc440spe_chan; - struct ppc440spe_adma_desc_slot *sw_desc, *group_start; - int slot_cnt, slots_per_op; - - ppc440spe_chan = to_ppc440spe_adma_chan(chan); - - if (unlikely(!len)) - return NULL; - - BUG_ON(len > PPC440SPE_ADMA_DMA_MAX_BYTE_COUNT); - - spin_lock_bh(&ppc440spe_chan->lock); - - dev_dbg(ppc440spe_chan->device->common.dev, - "ppc440spe adma%d: %s cal: %u len: %u int_en %d\n", - ppc440spe_chan->device->id, __func__, value, len, - flags & DMA_PREP_INTERRUPT ? 1 : 0); - - slot_cnt = slots_per_op = 1; - sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt, - slots_per_op); - if (sw_desc) { - group_start = sw_desc->group_head; - ppc440spe_desc_init_memset(group_start, value, flags); - ppc440spe_adma_set_dest(group_start, dma_dest, 0); - ppc440spe_desc_set_byte_count(group_start, ppc440spe_chan, len); - sw_desc->unmap_len = len; - sw_desc->async_tx.flags = flags; - } - spin_unlock_bh(&ppc440spe_chan->lock); - - return sw_desc ? &sw_desc->async_tx : NULL; -} - -/** * ppc440spe_adma_prep_dma_xor - prepare CDB for a XOR operation */ static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_xor( @@ -4125,7 +4084,6 @@ static void ppc440spe_adma_init_capabilities(struct ppc440spe_adma_device *adev) case PPC440SPE_DMA1_ID: dma_cap_set(DMA_MEMCPY, adev->common.cap_mask); dma_cap_set(DMA_INTERRUPT, adev->common.cap_mask); - dma_cap_set(DMA_MEMSET, adev->common.cap_mask); dma_cap_set(DMA_PQ, adev->common.cap_mask); dma_cap_set(DMA_PQ_VAL, adev->common.cap_mask); dma_cap_set(DMA_XOR_VAL, adev->common.cap_mask); @@ -4151,10 +4109,6 @@ static void ppc440spe_adma_init_capabilities(struct ppc440spe_adma_device *adev) adev->common.device_prep_dma_memcpy = ppc440spe_adma_prep_dma_memcpy; } - if (dma_has_cap(DMA_MEMSET, adev->common.cap_mask)) { - adev->common.device_prep_dma_memset = - ppc440spe_adma_prep_dma_memset; - } if (dma_has_cap(DMA_XOR, adev->common.cap_mask)) { adev->common.max_xor = XOR_MAX_OPS; adev->common.device_prep_dma_xor = @@ -4217,7 +4171,6 @@ static void ppc440spe_adma_init_capabilities(struct ppc440spe_adma_device *adev) dma_has_cap(DMA_XOR, adev->common.cap_mask) ? "xor " : "", dma_has_cap(DMA_XOR_VAL, adev->common.cap_mask) ? "xor_val " : "", dma_has_cap(DMA_MEMCPY, adev->common.cap_mask) ? "memcpy " : "", - dma_has_cap(DMA_MEMSET, adev->common.cap_mask) ? "memset " : "", dma_has_cap(DMA_INTERRUPT, adev->common.cap_mask) ? "intr " : ""); } diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h index a1c486a..179b38f 100644 --- a/include/linux/async_tx.h +++ b/include/linux/async_tx.h @@ -182,10 +182,6 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, unsigned int src_offset, size_t len, struct async_submit_ctl *submit); -struct dma_async_tx_descriptor * -async_memset(struct page *dest, int val, unsigned int offset, - size_t len, struct async_submit_ctl *submit); - struct dma_async_tx_descriptor *async_trigger_callback(struct async_submit_ctl *submit); struct dma_async_tx_descriptor * diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 96d3e4a..cb286b1 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -66,7 +66,6 @@ enum dma_transaction_type { DMA_PQ, DMA_XOR_VAL, DMA_PQ_VAL, - DMA_MEMSET, DMA_INTERRUPT, DMA_SG, DMA_PRIVATE, @@ -520,7 +519,6 @@ struct dma_tx_state { * @device_prep_dma_xor_val: prepares a xor validation operation * @device_prep_dma_pq: prepares a pq operation * @device_prep_dma_pq_val: prepares a pqzero_sum operation - * @device_prep_dma_memset: prepares a memset operation * @device_prep_dma_interrupt: prepares an end of chain interrupt operation * @device_prep_slave_sg: prepares a slave dma operation * @device_prep_dma_cyclic: prepare a cyclic dma operation suitable for audio. @@ -573,9 +571,6 @@ struct dma_device { struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src, unsigned int src_cnt, const unsigned char *scf, size_t len, enum sum_check_flags *pqres, unsigned long flags); - struct dma_async_tx_descriptor *(*device_prep_dma_memset)( - struct dma_chan *chan, dma_addr_t dest, int value, size_t len, - unsigned long flags); struct dma_async_tx_descriptor *(*device_prep_dma_interrupt)( struct dma_chan *chan, unsigned long flags); struct dma_async_tx_descriptor *(*device_prep_dma_sg)( -- cgit v0.10.2 From a7d0dabb3e863a1d4018235c1384b4318322116a Mon Sep 17 00:00:00 2001 From: Dimitri Sivanich Date: Wed, 3 Jul 2013 15:05:07 -0700 Subject: drivers/misc/sgi-gru/grufault.c: fix a sanity test in gru_set_context_option() "req.val1 == -1" is valid but it doesn't make sense to check gru_base[-1]. gru_base[] is a global array. Signed-off-by: Dimitri Sivanich Cc: Dan Carpenter Cc: Robin Holt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/misc/sgi-gru/grufault.c b/drivers/misc/sgi-gru/grufault.c index c4acac7..f74fc0c 100644 --- a/drivers/misc/sgi-gru/grufault.c +++ b/drivers/misc/sgi-gru/grufault.c @@ -876,8 +876,9 @@ int gru_set_context_option(unsigned long arg) switch (req.op) { case sco_blade_chiplet: /* Select blade/chiplet for GRU context */ - if (req.val1 < -1 || req.val1 >= GRU_MAX_BLADES || !gru_base[req.val1] || - req.val0 < -1 || req.val0 >= GRU_CHIPLETS_PER_HUB) { + if (req.val0 < -1 || req.val0 >= GRU_CHIPLETS_PER_HUB || + req.val1 < -1 || req.val1 >= GRU_MAX_BLADES || + (req.val1 >= 0 && !gru_base[req.val1])) { ret = -EINVAL; } else { gts->ts_user_blade_id = req.val1; -- cgit v0.10.2 From f7269cfc37d547a6e89b26caf30c62557450c196 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 3 Jul 2013 15:05:08 -0700 Subject: MAINTAINERS: fix tape driver file mappings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The masks for the st and osst tape drivers in MAINTAINERS are too broad and include unrelated files. Make the file list accurate so that maintainers of these drivers aren't bothered with unrelated work. Signed-off-by: Jean Delvare Cc: Willem Riede Cc: Kai Mäkisara Cc: "James E.J. Bottomley" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/MAINTAINERS b/MAINTAINERS index 56de382..2c64e6d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5963,8 +5963,10 @@ M: Willem Riede L: osst-users@lists.sourceforge.net L: linux-scsi@vger.kernel.org S: Maintained -F: drivers/scsi/osst* -F: drivers/scsi/st* +F: Documentation/scsi/osst.txt +F: drivers/scsi/osst.* +F: drivers/scsi/osst_*.h +F: drivers/scsi/st.h OPENCORES I2C BUS DRIVER M: Peter Korsgaard @@ -7112,7 +7114,8 @@ M: Kai Mäkisara L: linux-scsi@vger.kernel.org S: Maintained F: Documentation/scsi/st.txt -F: drivers/scsi/st* +F: drivers/scsi/st.* +F: drivers/scsi/st_*.h SCTP PROTOCOL M: Vlad Yasevich -- cgit v0.10.2 From 29603af12096567299c0004975c1e18217732c31 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 3 Jul 2013 15:05:09 -0700 Subject: backlight: atmel-pwm-bl: remove unnecessary platform_set_drvdata() The driver core clears the driver data to NULL after device_release or on probe failure, since commit 0998d06310 ("device-core: Ensure drvdata = NULL when no driver is bound"). Thus, it is not needed to manually clear the device driver data to NULL. Signed-off-by: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/video/backlight/atmel-pwm-bl.c b/drivers/video/backlight/atmel-pwm-bl.c index a60d6af..0393d82 100644 --- a/drivers/video/backlight/atmel-pwm-bl.c +++ b/drivers/video/backlight/atmel-pwm-bl.c @@ -195,7 +195,6 @@ static int __init atmel_pwm_bl_probe(struct platform_device *pdev) return 0; err_free_bl_dev: - platform_set_drvdata(pdev, NULL); backlight_device_unregister(bldev); err_free_pwm: pwm_channel_free(&pwmbl->pwmc); @@ -212,7 +211,6 @@ static int __exit atmel_pwm_bl_remove(struct platform_device *pdev) pwm_channel_disable(&pwmbl->pwmc); pwm_channel_free(&pwmbl->pwmc); backlight_device_unregister(pwmbl->bldev); - platform_set_drvdata(pdev, NULL); return 0; } -- cgit v0.10.2 From 6a7c02f4edd6ce8ef96fc27701fa4eeb3fecc046 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 3 Jul 2013 15:05:10 -0700 Subject: backlight: ep93xx: remove unnecessary platform_set_drvdata() The driver core clears the driver data to NULL after device_release or on probe failure, since commit 0998d063100 ("device-core: Ensure drvdata = NULL when no driver is bound"). Thus, it is not needed to manually clear the device driver data to NULL. Signed-off-by: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/video/backlight/ep93xx_bl.c b/drivers/video/backlight/ep93xx_bl.c index 3345582..018368b 100644 --- a/drivers/video/backlight/ep93xx_bl.c +++ b/drivers/video/backlight/ep93xx_bl.c @@ -111,7 +111,6 @@ static int ep93xxbl_remove(struct platform_device *dev) struct backlight_device *bl = platform_get_drvdata(dev); backlight_device_unregister(bl); - platform_set_drvdata(dev, NULL); return 0; } -- cgit v0.10.2 From 85f7f220fbcabec5ce76b8dcb065d200989f6a48 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 3 Jul 2013 15:05:11 -0700 Subject: backlight: lp8788: remove unnecessary platform_set_drvdata() The driver core clears the driver data to NULL after device_release or on probe failure, since commit 0998d063100 ("device-core: Ensure drvdata = NULL when no driver is bound"). Thus, it is not needed to manually clear the device driver data to NULL. Signed-off-by: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/video/backlight/lp8788_bl.c b/drivers/video/backlight/lp8788_bl.c index 4bb8b4f..980855e 100644 --- a/drivers/video/backlight/lp8788_bl.c +++ b/drivers/video/backlight/lp8788_bl.c @@ -312,7 +312,6 @@ static int lp8788_backlight_remove(struct platform_device *pdev) backlight_update_status(bl_dev); sysfs_remove_group(&pdev->dev.kobj, &lp8788_attr_group); lp8788_backlight_unregister(bl); - platform_set_drvdata(pdev, NULL); return 0; } -- cgit v0.10.2 From 9ed3936fd7e918bfea565f51aa4379967009d7dd Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 3 Jul 2013 15:05:12 -0700 Subject: backlight: pcf50633: remove unnecessary platform_set_drvdata() The driver core clears the driver data to NULL after device_release or on probe failure, since commit 0998d063100 ("device-core: Ensure drvdata = NULL when no driver is bound"). Thus, it is not needed to manually clear the device driver data to NULL. Signed-off-by: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/video/backlight/pcf50633-backlight.c b/drivers/video/backlight/pcf50633-backlight.c index e87c7a3..6ed76be 100644 --- a/drivers/video/backlight/pcf50633-backlight.c +++ b/drivers/video/backlight/pcf50633-backlight.c @@ -153,8 +153,6 @@ static int pcf50633_bl_remove(struct platform_device *pdev) backlight_device_unregister(pcf_bl->bl); - platform_set_drvdata(pdev, NULL); - return 0; } -- cgit v0.10.2 From 8318fde4ac78f6793b1cbaf57659902253a61617 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 3 Jul 2013 15:05:13 -0700 Subject: backlight: add devm_backlight_device_{register,unregister}() These functions allow the driver core to automatically clean up any allocation made by backlight drivers. Thus it simplifies the error paths. Signed-off-by: Jingoo Han Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/video/backlight/backlight.c b/drivers/video/backlight/backlight.c index e3c2790..53c52fb 100644 --- a/drivers/video/backlight/backlight.c +++ b/drivers/video/backlight/backlight.c @@ -370,6 +370,81 @@ void backlight_device_unregister(struct backlight_device *bd) } EXPORT_SYMBOL(backlight_device_unregister); +static void devm_backlight_device_release(struct device *dev, void *res) +{ + struct backlight_device *backlight = *(struct backlight_device **)res; + + backlight_device_unregister(backlight); +} + +static int devm_backlight_device_match(struct device *dev, void *res, + void *data) +{ + struct backlight_device **r = res; + + return *r == data; +} + +/** + * devm_backlight_device_register - resource managed backlight_device_register() + * @dev: the device to register + * @name: the name of the device + * @parent: a pointer to the parent device + * @devdata: an optional pointer to be stored for private driver use + * @ops: the backlight operations structure + * @props: the backlight properties + * + * @return a struct backlight on success, or an ERR_PTR on error + * + * Managed backlight_device_register(). The backlight_device returned + * from this function are automatically freed on driver detach. + * See backlight_device_register() for more information. + */ +struct backlight_device *devm_backlight_device_register(struct device *dev, + const char *name, struct device *parent, void *devdata, + const struct backlight_ops *ops, + const struct backlight_properties *props) +{ + struct backlight_device **ptr, *backlight; + + ptr = devres_alloc(devm_backlight_device_release, sizeof(*ptr), + GFP_KERNEL); + if (!ptr) + return ERR_PTR(-ENOMEM); + + backlight = backlight_device_register(name, parent, devdata, ops, + props); + if (!IS_ERR(backlight)) { + *ptr = backlight; + devres_add(dev, ptr); + } else { + devres_free(ptr); + } + + return backlight; +} +EXPORT_SYMBOL(devm_backlight_device_register); + +/** + * devm_backlight_device_unregister - resource managed backlight_device_unregister() + * @dev: the device to unregister + * @bd: the backlight device to unregister + * + * Deallocated a backlight allocated with devm_backlight_device_register(). + * Normally this function will not need to be called and the resource management + * code will ensure that the resource is freed. + */ +void devm_backlight_device_unregister(struct device *dev, + struct backlight_device *bd) +{ + int rc; + + rc = devres_release(dev, devm_backlight_device_release, + devm_backlight_device_match, bd); + WARN_ON(rc); +} +EXPORT_SYMBOL(devm_backlight_device_unregister); + #ifdef CONFIG_OF static int of_parent_match(struct device *dev, const void *data) { diff --git a/include/linux/backlight.h b/include/linux/backlight.h index da9a082..53b7794 100644 --- a/include/linux/backlight.h +++ b/include/linux/backlight.h @@ -114,7 +114,13 @@ static inline void backlight_update_status(struct backlight_device *bd) extern struct backlight_device *backlight_device_register(const char *name, struct device *dev, void *devdata, const struct backlight_ops *ops, const struct backlight_properties *props); +extern struct backlight_device *devm_backlight_device_register( + struct device *dev, const char *name, struct device *parent, + void *devdata, const struct backlight_ops *ops, + const struct backlight_properties *props); extern void backlight_device_unregister(struct backlight_device *bd); +extern void devm_backlight_device_unregister(struct device *dev, + struct backlight_device *bd); extern void backlight_force_update(struct backlight_device *bd, enum backlight_update_reason reason); -- cgit v0.10.2 From 1d0c48e66b3f1cf40660f69a87f55af3df0b2ae3 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 3 Jul 2013 15:05:14 -0700 Subject: lcd: add devm_lcd_device_{register,unregister}() These functions allow the driver core to automatically clean up any allocation made by lcd drivers. Thus it simplifies the error paths. Signed-off-by: Jingoo Han Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/video/backlight/lcd.c b/drivers/video/backlight/lcd.c index 3649fd9..41964a7 100644 --- a/drivers/video/backlight/lcd.c +++ b/drivers/video/backlight/lcd.c @@ -260,6 +260,76 @@ void lcd_device_unregister(struct lcd_device *ld) } EXPORT_SYMBOL(lcd_device_unregister); +static void devm_lcd_device_release(struct device *dev, void *res) +{ + struct lcd_device *lcd = *(struct lcd_device **)res; + + lcd_device_unregister(lcd); +} + +static int devm_lcd_device_match(struct device *dev, void *res, void *data) +{ + struct lcd_device **r = res; + + return *r == data; +} + +/** + * devm_lcd_device_register - resource managed lcd_device_register() + * @dev: the device to register + * @name: the name of the device + * @parent: a pointer to the parent device + * @devdata: an optional pointer to be stored for private driver use + * @ops: the lcd operations structure + * + * @return a struct lcd on success, or an ERR_PTR on error + * + * Managed lcd_device_register(). The lcd_device returned from this function + * are automatically freed on driver detach. See lcd_device_register() + * for more information. + */ +struct lcd_device *devm_lcd_device_register(struct device *dev, + const char *name, struct device *parent, + void *devdata, struct lcd_ops *ops) +{ + struct lcd_device **ptr, *lcd; + + ptr = devres_alloc(devm_lcd_device_release, sizeof(*ptr), GFP_KERNEL); + if (!ptr) + return ERR_PTR(-ENOMEM); + + lcd = lcd_device_register(name, parent, devdata, ops); + if (!IS_ERR(lcd)) { + *ptr = lcd; + devres_add(dev, ptr); + } else { + devres_free(ptr); + } + + return lcd; +} +EXPORT_SYMBOL(devm_lcd_device_register); + +/** + * devm_lcd_device_unregister - resource managed lcd_device_unregister() + * @dev: the device to unregister + * @ld: the lcd device to unregister + * + * Deallocated a lcd allocated with devm_lcd_device_register(). Normally + * this function will not need to be called and the resource management + * code will ensure that the resource is freed. + */ +void devm_lcd_device_unregister(struct device *dev, struct lcd_device *ld) +{ + int rc; + + rc = devres_release(dev, devm_lcd_device_release, + devm_lcd_device_match, ld); + WARN_ON(rc); +} +EXPORT_SYMBOL(devm_lcd_device_unregister); + + static void __exit lcd_class_exit(void) { class_destroy(lcd_class); diff --git a/include/linux/lcd.h b/include/linux/lcd.h index e00c3b0..504f624 100644 --- a/include/linux/lcd.h +++ b/include/linux/lcd.h @@ -112,7 +112,12 @@ static inline void lcd_set_power(struct lcd_device *ld, int power) extern struct lcd_device *lcd_device_register(const char *name, struct device *parent, void *devdata, struct lcd_ops *ops); +extern struct lcd_device *devm_lcd_device_register(struct device *dev, + const char *name, struct device *parent, + void *devdata, struct lcd_ops *ops); extern void lcd_device_unregister(struct lcd_device *ld); +extern void devm_lcd_device_unregister(struct device *dev, + struct lcd_device *ld); #define to_lcd_device(obj) container_of(obj, struct lcd_device, dev) -- cgit v0.10.2 From 6212de88f8a2f59e9be66674b3aa1f9399971a16 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 3 Jul 2013 15:05:15 -0700 Subject: MAINTAINERS: add Backlight subsystem co-maintainer Add myself as co-maintainer for the backlight subsystem. Signed-off-by: Jingoo Han Cc: Richard Purdie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/MAINTAINERS b/MAINTAINERS index 2c64e6d..8e12f69 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1611,6 +1611,7 @@ F: drivers/net/wireless/b43legacy/ BACKLIGHT CLASS/SUBSYSTEM M: Richard Purdie +M: Jingoo Han S: Maintained F: drivers/video/backlight/ F: include/linux/backlight.h -- cgit v0.10.2 From 3601792e7b68150420ea8dc129e26e167c0484d8 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Wed, 3 Jul 2013 15:05:16 -0700 Subject: backlight: convert from legacy pm ops to dev_pm_ops Convert drivers/video/backlight/class to use dev_pm_ops for power management and remove Legacy PM ops hooks. With this change, backlight class registers suspend/resume callbacks via class->pm (dev_pm_ops) instead of Legacy class->suspend/resume. When __device_suspend() runs call-backs, it will find class->pm ops for the backlight class. [jg1.han@samsung.com: add CONFIG_PM_SLEEP to suspend/resume functions] Signed-off-by: Shuah Khan Signed-off-by: Jingoo Han Cc: Shuah Khan Cc: Jingoo Han Cc: Shuah Khan Cc: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/video/backlight/backlight.c b/drivers/video/backlight/backlight.c index 53c52fb..3fccb6d 100644 --- a/drivers/video/backlight/backlight.c +++ b/drivers/video/backlight/backlight.c @@ -208,7 +208,8 @@ static ssize_t backlight_show_actual_brightness(struct device *dev, static struct class *backlight_class; -static int backlight_suspend(struct device *dev, pm_message_t state) +#ifdef CONFIG_PM_SLEEP +static int backlight_suspend(struct device *dev) { struct backlight_device *bd = to_backlight_device(dev); @@ -235,6 +236,10 @@ static int backlight_resume(struct device *dev) return 0; } +#endif + +static SIMPLE_DEV_PM_OPS(backlight_class_dev_pm_ops, backlight_suspend, + backlight_resume); static void bl_device_release(struct device *dev) { @@ -489,8 +494,7 @@ static int __init backlight_class_init(void) } backlight_class->dev_attrs = bl_device_attributes; - backlight_class->suspend = backlight_suspend; - backlight_class->resume = backlight_resume; + backlight_class->pm = &backlight_class_dev_pm_ops; return 0; } -- cgit v0.10.2 From a95681058e993bd059d0b6f70730964e15c22596 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Wed, 3 Jul 2013 15:05:17 -0700 Subject: radeon: remove redundant __list_for_each definition from mkregtable.c Signed-off-by: Dave Jones Cc: Dave Airlie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/gpu/drm/radeon/mkregtable.c b/drivers/gpu/drm/radeon/mkregtable.c index 5a82b6b..af85299 100644 --- a/drivers/gpu/drm/radeon/mkregtable.c +++ b/drivers/gpu/drm/radeon/mkregtable.c @@ -373,19 +373,6 @@ static inline void list_splice_tail_init(struct list_head *list, pos = pos->next) /** - * __list_for_each - iterate over a list - * @pos: the &struct list_head to use as a loop cursor. - * @head: the head for your list. - * - * This variant differs from list_for_each() in that it's the - * simplest possible list iteration code, no prefetching is done. - * Use this for code that knows the list to be very short (empty - * or 1 entry) most of the time. - */ -#define __list_for_each(pos, head) \ - for (pos = (head)->next; pos != (head); pos = pos->next) - -/** * list_for_each_prev - iterate over a list backwards * @pos: the &struct list_head to use as a loop cursor. * @head: the head for your list. -- cgit v0.10.2 From 5cb0656b62ff1199763764e4f6b4c06d30d5d0f5 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Wed, 3 Jul 2013 15:05:18 -0700 Subject: ipw2200: convert __list_for_each usage to list_for_each Signed-off-by: Dave Jones Cc: Stanislav Yakovlev Cc: "John W. Linville" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/net/wireless/ipw2x00/ipw2200.c b/drivers/net/wireless/ipw2x00/ipw2200.c index d96257b..4ed5e45 100644 --- a/drivers/net/wireless/ipw2x00/ipw2200.c +++ b/drivers/net/wireless/ipw2x00/ipw2200.c @@ -8256,7 +8256,7 @@ static int is_duplicate_packet(struct ipw_priv *priv, u8 *mac = header->addr2; int index = mac[5] % IPW_IBSS_MAC_HASH_SIZE; - __list_for_each(p, &priv->ibss_mac_hash[index]) { + list_for_each(p, &priv->ibss_mac_hash[index]) { entry = list_entry(p, struct ipw_ibss_seq, list); if (!memcmp(entry->mac, mac, ETH_ALEN)) -- cgit v0.10.2 From 64df3071a97f20767f63b88c573791691a855b5c Mon Sep 17 00:00:00 2001 From: Fan Du Date: Wed, 3 Jul 2013 15:05:19 -0700 Subject: lib/percpu_counter.c: __this_cpu_write() doesn't need to be protected by spinlock __this_cpu_write doesn't need to be protected by spinlock, AS we are doing per cpu write with preempt disabled. And another reason to remove __this_cpu_write outside of spinlock: __percpu_counter_sum is not an accurate counter. Signed-off-by: Fan Du Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c index ba6085d..1fc23a3 100644 --- a/lib/percpu_counter.c +++ b/lib/percpu_counter.c @@ -80,8 +80,8 @@ void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch) if (count >= batch || count <= -batch) { raw_spin_lock(&fbc->lock); fbc->count += count; - __this_cpu_write(*fbc->counters, 0); raw_spin_unlock(&fbc->lock); + __this_cpu_write(*fbc->counters, 0); } else { __this_cpu_write(*fbc->counters, count); } -- cgit v0.10.2 From be79794bc116fc0c264be1a599433c92ec9e34f5 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 3 Jul 2013 15:05:20 -0700 Subject: checkpatch: change CamelCase test and make it --strict Do not bleat a message on nominally acceptable CamelCase uses that are separated by an _ like drm_core_has_MTRR. CamelCase tests are also a bit noisy against certain types of code acceptable to some kernel developers. Make the test applicable only with --strict. Signed-off-by: Joe Perches Cc: Andy Whitcroft Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index b954de5..f1aad19 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -2938,12 +2938,12 @@ sub process { while ($line =~ m{($Constant|$Lval)}g) { my $var = $1; if ($var !~ /$Constant/ && - $var =~ /[A-Z]\w*[a-z]|[a-z]\w*[A-Z]/ && + $var =~ /[A-Z][a-z]|[a-z][A-Z]/ && $var !~ /"^(?:Clear|Set|TestClear|TestSet|)Page[A-Z]/ && !defined $camelcase{$var}) { $camelcase{$var} = 1; - WARN("CAMELCASE", - "Avoid CamelCase: <$var>\n" . $herecurr); + CHK("CAMELCASE", + "Avoid CamelCase: <$var>\n" . $herecurr); } } -- cgit v0.10.2 From 95e2c6023b0e4c8499fb521697f79215f69135fe Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 3 Jul 2013 15:05:20 -0700 Subject: checkpatch: warn when using gcc's binary constant ("0b") extension The gcc extension for binary constants that start with 0b is only supported with gcc version 4.3 or higher. The kernel can still be compiled with earlier versions of gcc, so have checkpatch emit a warning for these constants. Restructure checkpatch's constant finding code a bit to support finding these binary constants. Signed-off-by: Joe Perches Suggested-by: Andrew Morton Cc: Andy Whitcroft Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index f1aad19..517da26 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -230,11 +230,15 @@ our $Inline = qr{inline|__always_inline|noinline}; our $Member = qr{->$Ident|\.$Ident|\[[^]]*\]}; our $Lval = qr{$Ident(?:$Member)*}; +our $Int_type = qr{(?i)llu|ull|ll|lu|ul|l|u}; +our $Binary = qr{(?i)0b[01]+$Int_type?}; +our $Hex = qr{(?i)0x[0-9a-f]+$Int_type?}; +our $Int = qr{[0-9]+$Int_type?}; our $Float_hex = qr{(?i)0x[0-9a-f]+p-?[0-9]+[fl]?}; our $Float_dec = qr{(?i)(?:[0-9]+\.[0-9]*|[0-9]*\.[0-9]+)(?:e-?[0-9]+)?[fl]?}; our $Float_int = qr{(?i)[0-9]+e-?[0-9]+[fl]?}; our $Float = qr{$Float_hex|$Float_dec|$Float_int}; -our $Constant = qr{$Float|(?i)(?:0x[0-9a-f]+|[0-9]+)[ul]*}; +our $Constant = qr{$Float|$Binary|$Hex|$Int}; our $Assignment = qr{\*\=|/=|%=|\+=|-=|<<=|>>=|&=|\^=|\|=|=}; our $Compare = qr{<=|>=|==|!=|<|>}; our $Operators = qr{ @@ -2934,9 +2938,17 @@ sub process { } } -#CamelCase +#Specific variable tests while ($line =~ m{($Constant|$Lval)}g) { my $var = $1; + +#gcc binary extension + if ($var =~ /^$Binary$/) { + WARN("GCC_BINARY_CONSTANT", + "Avoid gcc v4.3+ binary constant extension: <$var>\n" . $herecurr); + } + +#CamelCase if ($var !~ /$Constant/ && $var =~ /[A-Z][a-z]|[a-z][A-Z]/ && $var !~ /"^(?:Clear|Set|TestClear|TestSet|)Page[A-Z]/ && -- cgit v0.10.2 From a640d25cead66457ac14a878234f8b323ba8aade Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 3 Jul 2013 15:05:21 -0700 Subject: checkpatch: add --strict preference for p = kmalloc(sizeof(*p)... Add another test for memory allocation style to follow Documentation/CodingStyle: Chapter 14: Allocating memory The preferred form for passing a size of a struct is the following: p = kmalloc(sizeof(*p), ...); Signed-off-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 517da26..6185eb6 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -3507,6 +3507,14 @@ sub process { "unnecessary cast may hide bugs, see http://c-faq.com/malloc/mallocnocast.html\n" . $herecurr); } +# alloc style +# p = alloc(sizeof(struct foo), ...) should be p = alloc(sizeof(*p), ...) + if ($^V && $^V ge 5.10.0 && + $line =~ /\b($Lval)\s*\=\s*(?:$balanced_parens)?\s*([kv][mz]alloc(?:_node)?)\s*\(\s*(sizeof\s*\(\s*struct\s+$Lval\s*\))/) { + CHK("ALLOC_SIZEOF_STRUCT", + "Prefer $3(sizeof(*$1)...) over $3($4...)\n" . $herecurr); + } + # check for krealloc arg reuse if ($^V && $^V ge 5.10.0 && $line =~ /\b($Lval)\s*\=\s*(?:$balanced_parens)?\s*krealloc\s*\(\s*\1\s*,/) { -- cgit v0.10.2 From 807bd26c4c3e94aced4630ba8369c8941728636b Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 3 Jul 2013 15:05:22 -0700 Subject: checkpatch: remove quote from CamelCase test Commit be987d9f80 ("checkpatch: improve CamelCase test for Page") added it but it shouldn't be there. Must have been my fault. Make sure that the tested variable doesn't contain a constant. Signed-off-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 6185eb6..2f61b5c 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -2949,9 +2949,9 @@ sub process { } #CamelCase - if ($var !~ /$Constant/ && + if ($var !~ /^$Constant$/ && $var =~ /[A-Z][a-z]|[a-z][A-Z]/ && - $var !~ /"^(?:Clear|Set|TestClear|TestSet|)Page[A-Z]/ && + $var !~ /^(?:Clear|Set|TestClear|TestSet|)Page[A-Z]/ && !defined $camelcase{$var}) { $camelcase{$var} = 1; CHK("CAMELCASE", -- cgit v0.10.2 From fdb4bcd6108602097b9a1ebd11f6e61f331c8dce Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 3 Jul 2013 15:05:23 -0700 Subject: checkpatch: improve network block comment test and message Show the first line of the comment after a line with just /* to better show where the defective comment style is in the file. Signed-off-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 2f61b5c..a3922d0 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -1891,8 +1891,8 @@ sub process { } if ($realfile =~ m@^(drivers/net/|net/)@ && - $rawline =~ /^\+[ \t]*\/\*[ \t]*$/ && - $prevrawline =~ /^\+[ \t]*$/) { + $prevrawline =~ /^\+[ \t]*\/\*[ \t]*$/ && + $rawline =~ /^\+[ \t]*\*/) { WARN("NETWORKING_BLOCK_COMMENT_STYLE", "networking block comments don't use an empty /* line, use /* Comment...\n" . $hereprev); } -- cgit v0.10.2 From a605e32ebde25dc31f943fecb30e3e28079ccd06 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 3 Jul 2013 15:05:24 -0700 Subject: checkpatch: warn when networking block comment lines don't start with * Some block comments in network are written as: /* block comment line 1 block comment line 2 */ Emit a warning on the "block comment line 2" because it should be /* block comment line 1 * block comment line 2 */ This warning is only emitted on the second line of a block comment. Signed-off-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index a3922d0..576139a 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -1898,6 +1898,14 @@ sub process { } if ($realfile =~ m@^(drivers/net/|net/)@ && + $prevrawline =~ /^\+[ \t]*\/\*/ && #starting /* + $prevrawline !~ /\*\/[ \t]*$/ && #no trailing */ + $rawline !~ /^\+[ \t]*\*/) { #no leading * + WARN("NETWORKING_BLOCK_COMMENT_STYLE", + "networking block comments start with * on subsequent lines\n" . $hereprev); + } + + if ($realfile =~ m@^(drivers/net/|net/)@ && $rawline !~ m@^\+[ \t]*\*/[ \t]*$@ && #trailing */ $rawline !~ m@^\+.*/\*.*\*/[ \t]*$@ && #inline /*...*/ $rawline !~ m@^\+.*\*{2,}/[ \t]*$@ && #trailing **/ -- cgit v0.10.2 From 36ec19390effc9131132901e8a66a071a7b74a88 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 3 Jul 2013 15:05:25 -0700 Subject: checkpatch: warn on comparisons to jiffies Comparing jiffies is almost always wrong and time_before and time_after should be used instead. Warn on any comparison to jiffies. Signed-off-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 576139a..c274e1d 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -3299,6 +3299,12 @@ sub process { } } +# check for comparisons of jiffies + if ($line =~ /\bjiffies\s*$Compare|$Compare\s*jiffies\b/) { + WARN("JIFFIES_COMPARISON", + "Comparing jiffies is almost always wrong; prefer time_after, time_before and friends\n" . $herecurr); + } + # warn about #ifdefs in C files # if ($line =~ /^.\s*\#\s*if(|n)def/ && ($realfile =~ /\.c$/)) { # print "#ifdef in C files should be avoided\n"; -- cgit v0.10.2 From 9d7a34a5135d29b840d074ba8fbb9c2fac63e508 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 3 Jul 2013 15:05:26 -0700 Subject: checkpatch: warn on comparisons to get_jiffies_64() Comparing get_jiffies_64() is almost always wrong and time_before64 and time_after64 should be used instead. Warn on any comparison to get_jiffies_64(). Signed-off-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index c274e1d..f4e247b2 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -3305,6 +3305,12 @@ sub process { "Comparing jiffies is almost always wrong; prefer time_after, time_before and friends\n" . $herecurr); } +# check for comparisons of get_jiffies_64() + if ($line =~ /\bget_jiffies_64\s*\(\s*\)\s*$Compare|$Compare\s*get_jiffies_64\s*\(\s*\)/) { + WARN("JIFFIES_COMPARISON", + "Comparing get_jiffies_64() is almost always wrong; prefer time_after64, time_before64 and friends\n" . $herecurr); + } + # warn about #ifdefs in C files # if ($line =~ /^.\s*\#\s*if(|n)def/ && ($realfile =~ /\.c$/)) { # print "#ifdef in C files should be avoided\n"; -- cgit v0.10.2 From 3cc4b1c3f0d283f4bb8d49059bd6df8e7af7558b Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 3 Jul 2013 15:05:27 -0700 Subject: checkpatch: reduce false positive rate of "complex macros" Allow "#define foo struct.member" without bleating a warning. This also allows "#define foo bar.baz->qux" and so on. Signed-off-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index f4e247b2..93b8e66 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -3041,7 +3041,7 @@ sub process { if ($dstat ne '' && $dstat !~ /^(?:$Ident|-?$Constant),$/ && # 10, // foo(), $dstat !~ /^(?:$Ident|-?$Constant);$/ && # foo(); - $dstat !~ /^[!~-]?(?:$Ident|$Constant)$/ && # 10 // foo() // !foo // ~foo // -foo + $dstat !~ /^[!~-]?(?:$Lval|$Constant)$/ && # 10 // foo() // !foo // ~foo // -foo // foo->bar // foo.bar->baz $dstat !~ /^'X'$/ && # character constants $dstat !~ /$exceptions/ && $dstat !~ /^\.$Ident\s*=/ && # .foo = -- cgit v0.10.2 From c4a62ef9102bfa39f3bb89be2ae1ae11a23ebe28 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 3 Jul 2013 15:05:28 -0700 Subject: checkpatch: add a placeholder to check blank lines before declarations Figure out first how to determine if this is in a struct declaration or in a function body before enabling this. Signed-off-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 93b8e66..4ad4052 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -2750,6 +2750,14 @@ sub process { "space required before the open brace '{'\n" . $herecurr); } +## # check for blank lines before declarations +## if ($line =~ /^.\t+$Type\s+$Ident(?:\s*=.*)?;/ && +## $prevrawline =~ /^.\s*$/) { +## WARN("SPACING", +## "No blank lines before declarations\n" . $hereprev); +## } +## + # closing brace should have a space following it when it has anything # on the line if ($line =~ /}(?!(?:,|;|\)))\S/) { -- cgit v0.10.2 From 77b9a53a627491df83a75361440485629c35aa91 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 3 Jul 2013 15:05:29 -0700 Subject: checkpatch: don't warn on blank lines before/after braces as often Check to make sure the blank lines aren't comment lines like: bool foo(bool bar) { /* Don't warn on a leading comment */ return !bar; /* Don't warn on a trailing comment either */ } Signed-off-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 4ad4052..c43be81 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -3258,11 +3258,11 @@ sub process { } # check for unnecessary blank lines around braces - if (($line =~ /^.\s*}\s*$/ && $prevline =~ /^.\s*$/)) { + if (($line =~ /^.\s*}\s*$/ && $prevrawline =~ /^.\s*$/)) { CHK("BRACES", "Blank lines aren't necessary before a close brace '}'\n" . $hereprev); } - if (($line =~ /^.\s*$/ && $prevline =~ /^..*{\s*$/)) { + if (($rawline =~ /^.\s*$/ && $prevline =~ /^..*{\s*$/)) { CHK("BRACES", "Blank lines aren't necessary after an open brace '{'\n" . $hereprev); } -- cgit v0.10.2 From 179f8f40fc3ae7cd49e96b3a7d5182166c36bdab Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 3 Jul 2013 15:05:30 -0700 Subject: checkpatch: add a --strict test for comparison to true/false Comparing to true or false is error prone. Add tests for the various forms of (foo == true) && (false != bar) that are only reported with --strict. Signed-off-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index c43be81..c2d223c 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -3588,6 +3588,33 @@ sub process { "Using yield() is generally wrong. See yield() kernel-doc (sched/core.c)\n" . $herecurr); } +# check for comparisons against true and false + if ($line =~ /\+\s*(.*?)\b(true|false|$Lval)\s*(==|\!=)\s*(true|false|$Lval)\b(.*)$/i) { + my $lead = $1; + my $arg = $2; + my $test = $3; + my $otype = $4; + my $trail = $5; + my $op = "!"; + + ($arg, $otype) = ($otype, $arg) if ($arg =~ /^(?:true|false)$/i); + + my $type = lc($otype); + if ($type =~ /^(?:true|false)$/) { + if (("$test" eq "==" && "$type" eq "true") || + ("$test" eq "!=" && "$type" eq "false")) { + $op = ""; + } + + CHK("BOOL_COMPARISON", + "Using comparison to $otype is error prone\n" . $herecurr); + +## maybe suggesting a correct construct would better +## "Using comparison to $otype is error prone. Perhaps use '${lead}${op}${arg}${trail}'\n" . $herecurr); + + } + } + # check for semaphores initialized locked if ($line =~ /^.\s*sema_init.+,\W?0\W?\)/) { WARN("CONSIDER_COMPLETION", -- cgit v0.10.2 From 23f780c90496eb1cc158e862e7035c8468dfa052 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 3 Jul 2013 15:05:31 -0700 Subject: checkpatch: improve "no space after cast" test Some false positives exist on this test. For instance: *va_arg(args, signed char *) = val.s; or memset(foo, 0, sizeof(struct bar *) * baz)); Ignore lines that have an arithmetic operator or assignment after what appears to be a cast to a pointer "(foo *)". Add $Arithmetic convenience variable. Signed-off-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index c2d223c..ab39ceb 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -241,10 +241,11 @@ our $Float = qr{$Float_hex|$Float_dec|$Float_int}; our $Constant = qr{$Float|$Binary|$Hex|$Int}; our $Assignment = qr{\*\=|/=|%=|\+=|-=|<<=|>>=|&=|\^=|\|=|=}; our $Compare = qr{<=|>=|==|!=|<|>}; +our $Arithmetic = qr{\+|-|\*|\/|%}; our $Operators = qr{ <=|>=|==|!=| =>|->|<<|>>|<|>|!|~| - &&|\|\||,|\^|\+\+|--|&|\||\+|-|\*|\/|% + &&|\|\||,|\^|\+\+|--|&|\||$Arithmetic }x; our $NonptrType; @@ -1885,7 +1886,7 @@ sub process { } } - if ($line =~ /^\+.*\*[ \t]*\)[ \t]+/) { + if ($line =~ /^\+.*\*[ \t]*\)[ \t]+(?!$Assignment|$Arithmetic)/) { CHK("SPACING", "No space is necessary after a cast\n" . $hereprev); } -- cgit v0.10.2 From 3705ce5bcc1037b68e9d20f90ab50bc7f64edd00 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 3 Jul 2013 15:05:31 -0700 Subject: checkpatch: create an EXPERIMENTAL --fix option to correct patches Some patches have simple defects in whitespace and formatting that checkpatch could correct automatically. Attempt to do so. Add a --fix option to create a ".EXPERIMENTAL-checkpatch-fixes" file that tries to use normal kernel style for some of these formatting errors. Add warnings against using this file without verifying the changes. Signed-off-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index ab39ceb..9696be5 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -27,6 +27,7 @@ my $summary = 1; my $mailback = 0; my $summary_file = 0; my $show_types = 0; +my $fix = 0; my $root; my %debug; my %ignore_type = (); @@ -63,6 +64,11 @@ Options: is all off) --test-only=WORD report only warnings/errors containing WORD literally + --fix EXPERIMENTAL - may create horrible results + If correctable single-line errors exist, create + ".EXPERIMENTAL-checkpatch-fixes" + with potential errors corrected to the preferred + checkpatch style -h, --help, --version display this help and exit When FILE is - read standard input. @@ -114,7 +120,7 @@ GetOptions( 'summary!' => \$summary, 'mailback!' => \$mailback, 'summary-file!' => \$summary_file, - + 'fix!' => \$fix, 'debug=s' => \%debug, 'test-only=s' => \$tst_only, 'h|help' => \$help, @@ -367,6 +373,7 @@ $chk_signoff = 0 if ($file); my @rawlines = (); my @lines = (); +my @fixed = (); my $vname; for my $filename (@ARGV) { my $FILE; @@ -394,6 +401,7 @@ for my $filename (@ARGV) { } @rawlines = (); @lines = (); + @fixed = (); } exit($exit); @@ -434,7 +442,7 @@ sub parse_email { $comment = $2 if defined $2; $formatted_email =~ s/$address.*$//; $name = $formatted_email; - $name =~ s/^\s+|\s+$//g; + $name = trim($name); $name =~ s/^\"|\"$//g; # If there's a name left after stripping spaces and # leading quotes, and the address doesn't have both @@ -449,9 +457,9 @@ sub parse_email { } } - $name =~ s/^\s+|\s+$//g; + $name = trim($name); $name =~ s/^\"|\"$//g; - $address =~ s/^\s+|\s+$//g; + $address = trim($address); $address =~ s/^\<|\>$//g; if ($name =~ /[^\w \-]/i) { ##has "must quote" chars @@ -467,9 +475,9 @@ sub format_email { my $formatted_email; - $name =~ s/^\s+|\s+$//g; + $name = trim($name); $name =~ s/^\"|\"$//g; - $address =~ s/^\s+|\s+$//g; + $address = trim($address); if ($name =~ /[^\w \-]/i) { ##has "must quote" chars $name =~ s/(?\n"; - my ($from, $to) = ($2, $2); + my ($ident, $from, $to) = ($1, $2, $2); # Should start with a space. $to =~ s/^(\S)/ $1/; @@ -2374,15 +2460,22 @@ sub process { while ($to =~ s/\*\s+\*/\*\*/) { } - #print "from<$from> to<$to>\n"; +## print "1: from<$from> to<$to> ident<$ident>\n"; if ($from ne $to) { - ERROR("POINTER_LOCATION", - "\"(foo$from)\" should be \"(foo$to)\"\n" . $herecurr); + if (ERROR("POINTER_LOCATION", + "\"(foo$from)\" should be \"(foo$to)\"\n" . $herecurr) && + $fix) { + my $sub_from = $ident; + my $sub_to = $ident; + $sub_to =~ s/\Q$from\E/$to/; + $fixed[$linenr - 1] =~ + s@\Q$sub_from\E@$sub_to@; + } } } while ($line =~ m{(\b$NonptrType(\s*(?:$Modifier\b\s*|\*\s*)+)($Ident))}g) { #print "BB<$1>\n"; - my ($from, $to, $ident) = ($2, $2, $3); + my ($match, $from, $to, $ident) = ($1, $2, $2, $3); # Should start with a space. $to =~ s/^(\S)/ $1/; @@ -2394,10 +2487,18 @@ sub process { # Modifiers should have spaces. $to =~ s/(\b$Modifier$)/$1 /; - #print "from<$from> to<$to> ident<$ident>\n"; +## print "2: from<$from> to<$to> ident<$ident>\n"; if ($from ne $to && $ident !~ /^$Modifier$/) { - ERROR("POINTER_LOCATION", - "\"foo${from}bar\" should be \"foo${to}bar\"\n" . $herecurr); + if (ERROR("POINTER_LOCATION", + "\"foo${from}bar\" should be \"foo${to}bar\"\n" . $herecurr) && + $fix) { + + my $sub_from = $match; + my $sub_to = $match; + $sub_to =~ s/\Q$from\E/$to/; + $fixed[$linenr - 1] =~ + s@\Q$sub_from\E@$sub_to@; + } } } @@ -2483,9 +2584,13 @@ sub process { } # missing space after union, struct or enum definition - if ($line =~ /^.\s*(?:typedef\s+)?(enum|union|struct)(?:\s+$Ident)?(?:\s+$Ident)?[=\{]/) { - WARN("SPACING", - "missing space after $1 definition\n" . $herecurr); + if ($line =~ /^.\s*(?:typedef\s+)?(enum|union|struct)(?:\s+$Ident){1,2}[=\{]/) { + if (WARN("SPACING", + "missing space after $1 definition\n" . $herecurr) && + $fix) { + $fixed[$linenr - 1] =~ + s/^(.\s*(?:typedef\s+)?(?:enum|union|struct)(?:\s+$Ident){1,2})([=\{])/$1 $2/; + } } # check for spacing round square brackets; allowed: @@ -2497,8 +2602,12 @@ sub process { if ($prefix !~ /$Type\s+$/ && ($where != 0 || $prefix !~ /^.\s+$/) && $prefix !~ /[{,]\s+$/) { - ERROR("BRACKET_SPACE", - "space prohibited before open square bracket '['\n" . $herecurr); + if (ERROR("BRACKET_SPACE", + "space prohibited before open square bracket '['\n" . $herecurr) && + $fix) { + $fixed[$linenr - 1] =~ + s/^(\+.*?)\s+\[/$1\[/; + } } } @@ -2515,7 +2624,6 @@ sub process { __attribute__|format|__extension__| asm|__asm__)$/x) { - # cpp #define statements have non-optional spaces, ie # if there is a space between the name and the open # parenthesis it is simply not a parameter group. @@ -2529,19 +2637,30 @@ sub process { } elsif ($ctx =~ /$Type$/) { } else { - WARN("SPACING", - "space prohibited between function name and open parenthesis '('\n" . $herecurr); + if (WARN("SPACING", + "space prohibited between function name and open parenthesis '('\n" . $herecurr) && + $fix) { + $fixed[$linenr - 1] =~ + s/\b$name\s+\(/$name\(/; + } } } # check for whitespace before a non-naked semicolon if ($line =~ /^\+.*\S\s+;/) { - WARN("SPACING", - "space prohibited before semicolon\n" . $herecurr); + if (WARN("SPACING", + "space prohibited before semicolon\n" . $herecurr) && + $fix) { + $fixed[$linenr - 1] =~ + s/^(\+.*\S)\s+;/$1;/; + } } # Check operator spacing. if (!($line=~/\#\s*include/)) { + my $fixed_line = ""; + my $line_fixed = 0; + my $ops = qr{ <<=|>>=|<=|>=|==|!=| \+=|-=|\*=|\/=|%=|\^=|\|=|&=| @@ -2550,11 +2669,30 @@ sub process { \?|: }x; my @elements = split(/($ops|;)/, $opline); + +## print("element count: <" . $#elements . ">\n"); +## foreach my $el (@elements) { +## print("el: <$el>\n"); +## } + + my @fix_elements = (); my $off = 0; + foreach my $el (@elements) { + push(@fix_elements, substr($rawline, $off, length($el))); + $off += length($el); + } + + $off = 0; + my $blank = copy_spacing($opline); for (my $n = 0; $n < $#elements; $n += 2) { + + my $good = $fix_elements[$n] . $fix_elements[$n + 1]; + +## print("n: <$n> good: <$good>\n"); + $off += length($elements[$n]); # Pick up the preceding and succeeding characters. @@ -2611,8 +2749,11 @@ sub process { } elsif ($op eq ';') { if ($ctx !~ /.x[WEBC]/ && $cc !~ /^\\/ && $cc !~ /^;/) { - ERROR("SPACING", - "space required after that '$op' $at\n" . $hereptr); + if (ERROR("SPACING", + "space required after that '$op' $at\n" . $hereptr)) { + $good = trim($fix_elements[$n]) . " " . trim($fix_elements[$n + 1]) . " "; + $line_fixed = 1; + } } # // is a comment @@ -2623,15 +2764,24 @@ sub process { # : when part of a bitfield } elsif ($op eq '->' || $opv eq ':B') { if ($ctx =~ /Wx.|.xW/) { - ERROR("SPACING", - "spaces prohibited around that '$op' $at\n" . $hereptr); + if (ERROR("SPACING", + "spaces prohibited around that '$op' $at\n" . $hereptr)) { + $good = trim($fix_elements[$n]) . trim($fix_elements[$n + 1]); + $line_fixed = 1; + if (defined $fix_elements[$n + 2]) { + $fix_elements[$n + 2] =~ s/^\s+//; + } + } } # , must have a space on the right. } elsif ($op eq ',') { if ($ctx !~ /.x[WEC]/ && $cc !~ /^}/) { - ERROR("SPACING", - "space required after that '$op' $at\n" . $hereptr); + if (ERROR("SPACING", + "space required after that '$op' $at\n" . $hereptr)) { + $good = trim($fix_elements[$n]) . trim($fix_elements[$n + 1]) . " "; + $line_fixed = 1; + } } # '*' as part of a type definition -- reported already. @@ -2645,34 +2795,58 @@ sub process { $opv eq '*U' || $opv eq '-U' || $opv eq '&U' || $opv eq '&&U') { if ($ctx !~ /[WEBC]x./ && $ca !~ /(?:\)|!|~|\*|-|\&|\||\+\+|\-\-|\{)$/) { - ERROR("SPACING", - "space required before that '$op' $at\n" . $hereptr); + if (ERROR("SPACING", + "space required before that '$op' $at\n" . $hereptr)) { + $good = trim($fix_elements[$n]) . " " . trim($fix_elements[$n + 1]); + $line_fixed = 1; + } } if ($op eq '*' && $cc =~/\s*$Modifier\b/) { # A unary '*' may be const } elsif ($ctx =~ /.xW/) { - ERROR("SPACING", - "space prohibited after that '$op' $at\n" . $hereptr); + if (ERROR("SPACING", + "space prohibited after that '$op' $at\n" . $hereptr)) { + $fixed_line =~ s/\s+$//; + $good = trim($fix_elements[$n]) . trim($fix_elements[$n + 1]); + $line_fixed = 1; + if (defined $fix_elements[$n + 2]) { + $fix_elements[$n + 2] =~ s/^\s+//; + } + } } # unary ++ and unary -- are allowed no space on one side. } elsif ($op eq '++' or $op eq '--') { if ($ctx !~ /[WEOBC]x[^W]/ && $ctx !~ /[^W]x[WOBEC]/) { - ERROR("SPACING", - "space required one side of that '$op' $at\n" . $hereptr); + if (ERROR("SPACING", + "space required one side of that '$op' $at\n" . $hereptr)) { + $fixed_line =~ s/\s+$//; + $good = trim($fix_elements[$n]) . trim($fix_elements[$n + 1]) . " "; + $line_fixed = 1; + } } if ($ctx =~ /Wx[BE]/ || ($ctx =~ /Wx./ && $cc =~ /^;/)) { - ERROR("SPACING", - "space prohibited before that '$op' $at\n" . $hereptr); + if (ERROR("SPACING", + "space prohibited before that '$op' $at\n" . $hereptr)) { + $fixed_line =~ s/\s+$//; + $good = trim($fix_elements[$n]) . trim($fix_elements[$n + 1]); + $line_fixed = 1; + } } if ($ctx =~ /ExW/) { - ERROR("SPACING", - "space prohibited after that '$op' $at\n" . $hereptr); + if (ERROR("SPACING", + "space prohibited after that '$op' $at\n" . $hereptr)) { + $fixed_line =~ s/\s+$//; + $good = trim($fix_elements[$n]) . trim($fix_elements[$n + 1]); + $line_fixed = 1; + if (defined $fix_elements[$n + 2]) { + $fix_elements[$n + 2] =~ s/^\s+//; + } + } } - # << and >> may either have or not have spaces both sides } elsif ($op eq '<<' or $op eq '>>' or $op eq '&' or $op eq '^' or $op eq '|' or @@ -2681,17 +2855,23 @@ sub process { $op eq '%') { if ($ctx =~ /Wx[^WCE]|[^WCE]xW/) { - ERROR("SPACING", - "need consistent spacing around '$op' $at\n" . - $hereptr); + if (ERROR("SPACING", + "need consistent spacing around '$op' $at\n" . $hereptr)) { + $fixed_line =~ s/\s+$//; + $good = trim($fix_elements[$n]) . " " . trim($fix_elements[$n + 1]) . " "; + $line_fixed = 1; + } } # A colon needs no spaces before when it is # terminating a case value or a label. } elsif ($opv eq ':C' || $opv eq ':L') { if ($ctx =~ /Wx./) { - ERROR("SPACING", - "space prohibited before that '$op' $at\n" . $hereptr); + if (ERROR("SPACING", + "space prohibited before that '$op' $at\n" . $hereptr)) { + $good = trim($fix_elements[$n]) . trim($fix_elements[$n + 1]); + $line_fixed = 1; + } } # All the others need spaces both sides. @@ -2714,12 +2894,30 @@ sub process { } if ($ok == 0) { - ERROR("SPACING", - "spaces required around that '$op' $at\n" . $hereptr); + if (ERROR("SPACING", + "spaces required around that '$op' $at\n" . $hereptr)) { + $good = trim($fix_elements[$n]) . " " . trim($fix_elements[$n + 1]) . " "; + $good = $fix_elements[$n] . " " . trim($fix_elements[$n + 1]) . " "; + $line_fixed = 1; + } } } $off += length($elements[$n + 1]); + +## print("n: <$n> GOOD: <$good>\n"); + + $fixed_line = $fixed_line . $good; + } + + if (($#elements % 2) == 0) { + $fixed_line = $fixed_line . $fix_elements[$#elements]; } + + if ($fix && $line_fixed && $fixed_line ne $fixed[$linenr - 1]) { + $fixed[$linenr - 1] = $fixed_line; + } + + } # check for multiple assignments @@ -2747,8 +2945,12 @@ sub process { #need space before brace following if, while, etc if (($line =~ /\(.*\){/ && $line !~ /\($Type\){/) || $line =~ /do{/) { - ERROR("SPACING", - "space required before the open brace '{'\n" . $herecurr); + if (ERROR("SPACING", + "space required before the open brace '{'\n" . $herecurr) && + $fix) { + $fixed[$linenr - 1] =~ + s/^(\+.*(?:do|\))){/$1 {/; + } } ## # check for blank lines before declarations @@ -2768,32 +2970,52 @@ sub process { # check spacing on square brackets if ($line =~ /\[\s/ && $line !~ /\[\s*$/) { - ERROR("SPACING", - "space prohibited after that open square bracket '['\n" . $herecurr); + if (ERROR("SPACING", + "space prohibited after that open square bracket '['\n" . $herecurr) && + $fix) { + $fixed[$linenr - 1] =~ + s/\[\s+/\[/; + } } if ($line =~ /\s\]/) { - ERROR("SPACING", - "space prohibited before that close square bracket ']'\n" . $herecurr); + if (ERROR("SPACING", + "space prohibited before that close square bracket ']'\n" . $herecurr) && + $fix) { + $fixed[$linenr - 1] =~ + s/\s+\]/\]/; + } } # check spacing on parentheses if ($line =~ /\(\s/ && $line !~ /\(\s*(?:\\)?$/ && $line !~ /for\s*\(\s+;/) { - ERROR("SPACING", - "space prohibited after that open parenthesis '('\n" . $herecurr); + if (ERROR("SPACING", + "space prohibited after that open parenthesis '('\n" . $herecurr) && + $fix) { + $fixed[$linenr - 1] =~ + s/\(\s+/\(/; + } } if ($line =~ /(\s+)\)/ && $line !~ /^.\s*\)/ && $line !~ /for\s*\(.*;\s+\)/ && $line !~ /:\s+\)/) { - ERROR("SPACING", - "space prohibited before that close parenthesis ')'\n" . $herecurr); + if (ERROR("SPACING", + "space prohibited before that close parenthesis ')'\n" . $herecurr) && + $fix) { + $fixed[$linenr - 1] =~ + s/\s+\)/\)/; + } } #goto labels aren't indented, allow a single space however if ($line=~/^.\s+[A-Za-z\d_]+:(?![0-9]+)/ and !($line=~/^. [A-Za-z\d_]+:/) and !($line=~/^.\s+default:/)) { - WARN("INDENTED_LABEL", - "labels should not be indented\n" . $herecurr); + if (WARN("INDENTED_LABEL", + "labels should not be indented\n" . $herecurr) && + $fix) { + $fixed[$linenr - 1] =~ + s/^(.)\s+/$1/; + } } # Return is not a function. @@ -2830,8 +3052,13 @@ sub process { } # Need a space before open parenthesis after if, while etc - if ($line=~/\b(if|while|for|switch)\(/) { - ERROR("SPACING", "space required before the open parenthesis '('\n" . $herecurr); + if ($line =~ /\b(if|while|for|switch)\(/) { + if (ERROR("SPACING", + "space required before the open parenthesis '('\n" . $herecurr) && + $fix) { + $fixed[$linenr - 1] =~ + s/\b(if|while|for|switch)\(/$1 \(/; + } } # Check for illegal assignment in if conditional -- and check for trailing @@ -3329,8 +3556,13 @@ sub process { # warn about spacing in #ifdefs if ($line =~ /^.\s*\#\s*(ifdef|ifndef|elif)\s\s+/) { - ERROR("SPACING", - "exactly one space required after that #$1\n" . $herecurr); + if (ERROR("SPACING", + "exactly one space required after that #$1\n" . $herecurr) && + $fix) { + $fixed[$linenr - 1] =~ + s/^(.\s*\#\s*(ifdef|ifndef|elif))\s{2,}/$1 /; + } + } # check for spinlock_t definitions without a comment. @@ -3793,6 +4025,40 @@ sub process { print "\n\n"; } + if ($clean == 0 && $fix && "@rawlines" ne "@fixed") { + my $newfile = $filename . ".EXPERIMENTAL-checkpatch-fixes"; + my $linecount = 0; + my $f; + + open($f, '>', $newfile) + or die "$P: Can't open $newfile for write\n"; + foreach my $fixed_line (@fixed) { + $linecount++; + if ($file) { + if ($linecount > 3) { + $fixed_line =~ s/^\+//; + print $f $fixed_line. "\n"; + } + } else { + print $f $fixed_line . "\n"; + } + } + close($f); + + if (!$quiet) { + print << "EOM"; +Wrote EXPERIMENTAL --fix correction(s) to '$newfile' + +Do _NOT_ trust the results written to this file. +Do _NOT_ submit these changes without inspecting them for correctness. + +This EXPERIMENTAL file is simply a convenience to help rewrite patches. +No warranties, expressed or implied... + +EOM + } + } + if ($clean == 1 && $quiet == 0) { print "$vname has no obvious style problems and is ready for submission.\n" } -- cgit v0.10.2 From 786b632622800d73d9b0355c9a79b3f3b5792c6c Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 3 Jul 2013 15:05:32 -0700 Subject: checkpatch: move test for space before semicolon after operator spacing Moving this test allows the --fix option to work better. Signed-off-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 9696be5..5989415 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -2646,16 +2646,6 @@ sub process { } } -# check for whitespace before a non-naked semicolon - if ($line =~ /^\+.*\S\s+;/) { - if (WARN("SPACING", - "space prohibited before semicolon\n" . $herecurr) && - $fix) { - $fixed[$linenr - 1] =~ - s/^(\+.*\S)\s+;/$1;/; - } - } - # Check operator spacing. if (!($line=~/\#\s*include/)) { my $fixed_line = ""; @@ -2920,6 +2910,16 @@ sub process { } +# check for whitespace before a non-naked semicolon + if ($line =~ /^\+.*\S\s+;/) { + if (WARN("SPACING", + "space prohibited before semicolon\n" . $herecurr) && + $fix) { + 1 while $fixed[$linenr - 1] =~ + s/^(\+.*\S)\s+;/$1;/; + } + } + # check for multiple assignments if ($line =~ /^.\s*$Lval\s*=\s*$Lval\s*=(?!=)/) { CHK("MULTIPLE_ASSIGNMENTS", -- cgit v0.10.2 From 22735ce857a2d9f4e6eec37c36be3fcf9d21d154 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 3 Jul 2013 15:05:33 -0700 Subject: checkpatch: ignore SI unit CamelCase variants like "_uV" Many existing variable names use SI like variants that should be otherwise obvious and acceptable. Whitelist them from the CamelCase message. Signed-off-by: Joe Perches Suggested-by: Phil Carmody Acked-by: Phil Carmody Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 5989415..7e8aa1b 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -3195,7 +3195,10 @@ sub process { #CamelCase if ($var !~ /^$Constant$/ && $var =~ /[A-Z][a-z]|[a-z][A-Z]/ && +#Ignore Page variants $var !~ /^(?:Clear|Set|TestClear|TestSet|)Page[A-Z]/ && +#Ignore SI style variants like nS, mV and dB (ie: max_uV, regulator_min_uA_show) + $var !~ /^(?:[a-z_]*?)_?[a-z][A-Z](?:_[a-z_]+)?$/ && !defined $camelcase{$var}) { $camelcase{$var} = 1; CHK("CAMELCASE", -- cgit v0.10.2 From 3445686af721534ac1086a9c6d48b3470dfb6946 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 3 Jul 2013 15:05:34 -0700 Subject: checkpatch: ignore existing CamelCase uses from include/... When using --strict, CamelCase uses are described with CHECK: messages. These CamelCase uses may be acceptable and should not generate these messages when the variable is already defined in a file from the include/... path. So, change checkpatch to read all the .h files in include/... and look for preexisting CamelCase #defines, typedefs and function prototypes. Add these to the existing camelcase hash so that any uses in the patch or file can be ignored. There are currently ~3500 files in include/. It takes about 10 cpu seconds on my little netbook to grep for and preseed these existing uses. That's about 4x the time for a similar git grep. This preseeding is only done once when using --strict and only when there is a CamelCase use found. If a .git directory is found, it uses 'git ls-files include' If not, it uses 'find $root/include -name "*.h" Signed-off-by: Joe Perches Cc: Andy Whitcroft Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 7e8aa1b..70e7f7c 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -31,6 +31,7 @@ my $fix = 0; my $root; my %debug; my %ignore_type = (); +my %camelcase = (); my @ignore = (); my $help = 0; my $configuration_file = ".checkpatch.conf"; @@ -349,7 +350,6 @@ sub build_types { } build_types(); - our $Typecast = qr{\s*(\(\s*$NonptrType\s*\)){0,1}\s*}; # Using $balanced_parens, $LvalOrFunc, or $FuncArg @@ -369,6 +369,48 @@ sub deparenthesize { return $string; } +sub seed_camelcase_file { + my ($file) = @_; + + return if (!(-f $file)); + + local $/; + + open(my $include_file, '<', "$file") + or warn "$P: Can't read '$file' $!\n"; + my $text = <$include_file>; + close($include_file); + + my @lines = split('\n', $text); + + foreach my $line (@lines) { + next if ($line !~ /(?:[A-Z][a-z]|[a-z][A-Z])/); + if ($line =~ /^[ \t]*(?:#[ \t]*define|typedef\s+$Type)\s+(\w*(?:[A-Z][a-z]|[a-z][A-Z])\w*)/) { + $camelcase{$1} = 1; + } + elsif ($line =~ /^\s*$Declare\s+(\w*(?:[A-Z][a-z]|[a-z][A-Z])\w*)\s*\(/) { + $camelcase{$1} = 1; + } + } +} + +my $camelcase_seeded = 0; +sub seed_camelcase_includes { + return if ($camelcase_seeded); + + my $files; + if (-d ".git") { + $files = `git ls-files include`; + } else { + $files = `find $root/include -name "*.h"`; + } + my @include_files = split('\n', $files); + foreach my $file (@include_files) { + seed_camelcase_file($file); + } + $camelcase_seeded = 1; +} + $chk_signoff = 0 if ($file); my @rawlines = (); @@ -1448,7 +1490,6 @@ sub process { my %suppress_export; my $suppress_statement = 0; - my %camelcase = (); # Pre-scan the patch sanitizing the lines. # Pre-scan the patch looking for any __setup documentation. @@ -3198,11 +3239,13 @@ sub process { #Ignore Page variants $var !~ /^(?:Clear|Set|TestClear|TestSet|)Page[A-Z]/ && #Ignore SI style variants like nS, mV and dB (ie: max_uV, regulator_min_uA_show) - $var !~ /^(?:[a-z_]*?)_?[a-z][A-Z](?:_[a-z_]+)?$/ && - !defined $camelcase{$var}) { - $camelcase{$var} = 1; - CHK("CAMELCASE", - "Avoid CamelCase: <$var>\n" . $herecurr); + $var !~ /^(?:[a-z_]*?)_?[a-z][A-Z](?:_[a-z_]+)?$/) { + seed_camelcase_includes() if ($check); + if (!defined $camelcase{$var}) { + $camelcase{$var} = 1; + CHK("CAMELCASE", + "Avoid CamelCase: <$var>\n" . $herecurr); + } } } -- cgit v0.10.2 From 7d0b6594e1055e3d4efcc28af11a8e42dd85ded4 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Wed, 3 Jul 2013 15:05:35 -0700 Subject: checkpatch: allow longer logging function names The current $logFunction regular expression allows names like dev_warn, e_dbg, netdev_info, etc, but some log functions are now written like e_dev_warn, so allow 1 or 2 word blocks with an underscore before the logging level. Signed-off-by: Jacob Keller Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher Signed-off-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 70e7f7c..0d94853 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -281,7 +281,7 @@ our $typeTypedefs = qr{(?x: our $logFunctions = qr{(?x: printk(?:_ratelimited|_once|)| - [a-z0-9]+_(?:printk|emerg|alert|crit|err|warning|warn|notice|info|debug|dbg|vdbg|devel|cont|WARN)(?:_ratelimited|_once|)| + (?:[a-z0-9]+_){1,2}(?:printk|emerg|alert|crit|err|warning|warn|notice|info|debug|dbg|vdbg|devel|cont|WARN)(?:_ratelimited|_once|)| WARN(?:_RATELIMIT|_ONCE|)| panic| MODULE_[A-Z_]+ -- cgit v0.10.2 From 351b2a1fe2d06f44b4c06d377744b2ac408b7407 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 3 Jul 2013 15:05:36 -0700 Subject: checkpatch: cache last camelcase hash as .checkpatch-camelcase. Add a file to cache the CamelCase variables found by to reduce the time it takes to scan the include/ directory. Filename is '.checkpatch-camelcase.' and it is created only only if a .git directory exists. is determined by the last non-merge commit id in the include/ path. Reduces checkpatch run time by ~12 cpu seconds on my little netbook. Signed-off-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 0d94853..6afcd12 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -399,7 +399,23 @@ sub seed_camelcase_includes { return if ($camelcase_seeded); my $files; + my $camelcase_git_file = ""; + if (-d ".git") { + my $git_last_include_commit = `git log --no-merges --pretty=format:"%h%n" -1 -- include`; + chomp $git_last_include_commit; + $camelcase_git_file = ".checkpatch-camelcase.$git_last_include_commit"; + if (-f $camelcase_git_file) { + open(my $camelcase_file, '<', "$camelcase_git_file") + or warn "$P: Can't read '$camelcase_git_file' $!\n"; + while (<$camelcase_file>) { + chomp; + $camelcase{$_} = 1; + } + close($camelcase_file); + + return; + } $files = `git ls-files include`; } else { $files = `find $root/include -name "*.h"`; @@ -409,6 +425,16 @@ sub seed_camelcase_includes { seed_camelcase_file($file); } $camelcase_seeded = 1; + + if ($camelcase_git_file ne "") { + unlink glob ".checkpatch-camelcase.*"; + open(my $camelcase_file, '>', "$camelcase_git_file") + or warn "$P: Can't write '$camelcase_git_file' $!\n"; + foreach (sort { lc($a) cmp lc($b) } keys(%camelcase)) { + print $camelcase_file ("$_\n"); + } + close($camelcase_file); + } } $chk_signoff = 0 if ($file); -- cgit v0.10.2 From ff1c8fac88ab8e7dac65236b277908c7c5c7ab09 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 3 Jul 2013 15:05:37 -0700 Subject: init: remove permanent string buffer from do_one_initcall() do_one_initcall() uses a 64 byte string buffer to save a message. This buffer is declared static and is only used at boot up and when a module is loaded. As 64 bytes is very small, and this function has very limited scope, there's no reason to waste permanent memory with this string and not just simply put it on the stack. Signed-off-by: Steven Rostedt Cc: Geert Uytterhoeven Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/init/main.c b/init/main.c index ec54958..f236653 100644 --- a/init/main.c +++ b/init/main.c @@ -655,8 +655,6 @@ static void __init do_ctors(void) bool initcall_debug; core_param(initcall_debug, initcall_debug, bool, 0644); -static char msgbuf[64]; - static int __init_or_module do_one_initcall_debug(initcall_t fn) { ktime_t calltime, delta, rettime; @@ -679,6 +677,7 @@ int __init_or_module do_one_initcall(initcall_t fn) { int count = preempt_count(); int ret; + char msgbuf[64]; if (initcall_debug) ret = do_one_initcall_debug(fn); -- cgit v0.10.2 From ca75b4d8799d46842350596fdc5fce7711610326 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toralf=20F=C3=B6rster?= Date: Wed, 3 Jul 2013 15:05:38 -0700 Subject: insert missing space in printk line of root_delay MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Trivial, but it really looks better. Signed-off-by: Toralf Förster Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/init/do_mounts.c b/init/do_mounts.c index a2b49f2..816014c 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -536,7 +536,7 @@ void __init prepare_namespace(void) int is_floppy; if (root_delay) { - printk(KERN_INFO "Waiting %dsec before mounting root device...\n", + printk(KERN_INFO "Waiting %d sec before mounting root device...\n", root_delay); ssleep(root_delay); } -- cgit v0.10.2 From 10fb46d5f79147620d0afda7d3d51302a1e38191 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 3 Jul 2013 15:05:39 -0700 Subject: kprobes: handle empty/invalid input to debugfs "enabled" file When writing invalid input to 'debug/kprobes/enabled' it'll silently be ignored. Even worse, when writing an empty string to this file, the outcome is purely random as the switch statement will make its decision based on the value of an uninitialized stack variable. Fix this by handling invalid/empty input as error returning -EINVAL. Signed-off-by: Mathias Krause Cc: Ananth N Mavinakayanahalli Cc: Anil S Keshavamurthy Cc: "David S. Miller" Cc: Masami Hiramatsu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/kernel/kprobes.c b/kernel/kprobes.c index bddf3b2..6e33498 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -2332,6 +2332,7 @@ static ssize_t write_enabled_file_bool(struct file *file, if (copy_from_user(buf, user_buf, buf_size)) return -EFAULT; + buf[buf_size] = '\0'; switch (buf[0]) { case 'y': case 'Y': @@ -2343,6 +2344,8 @@ static ssize_t write_enabled_file_bool(struct file *file, case '0': disarm_all_kprobes(); break; + default: + return -EINVAL; } return count; -- cgit v0.10.2 From ce0b348edfd803d4c2244449599799775591df3b Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 3 Jul 2013 15:05:40 -0700 Subject: rtc: rtc-88pm80x: remove unnecessary platform_set_drvdata() The driver core clears the driver data to NULL after device_release or on probe failure, since commit 0998d063100 ("device-core: Ensure drvdata = NULL when no driver is bound"). Thus, it is not needed to manually clear the device driver data to NULL. Signed-off-by: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-88pm80x.c b/drivers/rtc/rtc-88pm80x.c index f3742f3..354c937 100644 --- a/drivers/rtc/rtc-88pm80x.c +++ b/drivers/rtc/rtc-88pm80x.c @@ -345,7 +345,6 @@ out: static int pm80x_rtc_remove(struct platform_device *pdev) { struct pm80x_rtc_info *info = platform_get_drvdata(pdev); - platform_set_drvdata(pdev, NULL); pm80x_free_irq(info->chip, info->irq, info); return 0; } -- cgit v0.10.2 From d8d5290a32d4a698db32f233f11bc2021a803746 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:05:41 -0700 Subject: drivers/rtc/rtc-v3020.c: remove redundant goto Remove a redundant goto statement left over during the conversion of this driver to use devm_* APIs. Signed-off-by: Sachin Kamat Cc: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-v3020.c b/drivers/rtc/rtc-v3020.c index 6e0cba8..4ee0e63 100644 --- a/drivers/rtc/rtc-v3020.c +++ b/drivers/rtc/rtc-v3020.c @@ -320,7 +320,7 @@ static int rtc_probe(struct platform_device *pdev) retval = chip->ops->map_io(chip, pdev, pdata); if (retval) - goto err_chip; + return retval; /* Make sure the v3020 expects a communication cycle * by reading 8 times */ @@ -364,7 +364,7 @@ static int rtc_probe(struct platform_device *pdev) err_io: chip->ops->unmap_io(chip); -err_chip: + return retval; } -- cgit v0.10.2 From 3ff2e13ce1b4172bf7d188799d3a106c0e2bb5cd Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:05:42 -0700 Subject: drivers/rtc/interface.c: fix checkpatch errors Fixes the following types of errors: ERROR: "foo* bar" should be "foo *bar" ERROR: else should follow close brace '}' WARNING: braces {} are not necessary for single statement blocks Signed-off-by: Sachin Kamat Cc: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c index 42bd57d..14c1efd 100644 --- a/drivers/rtc/interface.c +++ b/drivers/rtc/interface.c @@ -109,9 +109,9 @@ int rtc_set_mmss(struct rtc_device *rtc, unsigned long secs) err = rtc->ops->set_time(rtc->dev.parent, &new); } - } - else + } else { err = -EINVAL; + } mutex_unlock(&rtc->ops_lock); /* A timer might have just expired */ @@ -367,14 +367,14 @@ int rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm) err = mutex_lock_interruptible(&rtc->ops_lock); if (err) return err; - if (rtc->aie_timer.enabled) { + if (rtc->aie_timer.enabled) rtc_timer_remove(rtc, &rtc->aie_timer); - } + rtc->aie_timer.node.expires = rtc_tm_to_ktime(alarm->time); rtc->aie_timer.period = ktime_set(0, 0); - if (alarm->enabled) { + if (alarm->enabled) err = rtc_timer_enqueue(rtc, &rtc->aie_timer); - } + mutex_unlock(&rtc->ops_lock); return err; } @@ -891,7 +891,7 @@ again: * * Kernel interface to initializing an rtc_timer. */ -void rtc_timer_init(struct rtc_timer *timer, void (*f)(void* p), void* data) +void rtc_timer_init(struct rtc_timer *timer, void (*f)(void *p), void *data) { timerqueue_init(&timer->node); timer->enabled = 0; @@ -907,7 +907,7 @@ void rtc_timer_init(struct rtc_timer *timer, void (*f)(void* p), void* data) * * Kernel interface to set an rtc_timer */ -int rtc_timer_start(struct rtc_device *rtc, struct rtc_timer* timer, +int rtc_timer_start(struct rtc_device *rtc, struct rtc_timer *timer, ktime_t expires, ktime_t period) { int ret = 0; @@ -930,7 +930,7 @@ int rtc_timer_start(struct rtc_device *rtc, struct rtc_timer* timer, * * Kernel interface to cancel an rtc_timer */ -int rtc_timer_cancel(struct rtc_device *rtc, struct rtc_timer* timer) +int rtc_timer_cancel(struct rtc_device *rtc, struct rtc_timer *timer) { int ret = 0; mutex_lock(&rtc->ops_lock); -- cgit v0.10.2 From 365c411d812a67ae840c7fadd48a6813355b95c4 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:05:43 -0700 Subject: drivers/rtc/rtc-at32ap700x.c: fix checkpatch error Fixes the following error: ERROR: space required before the open parenthesis '(' Signed-off-by: Sachin Kamat Cc: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-at32ap700x.c b/drivers/rtc/rtc-at32ap700x.c index f47fbb5..128896d 100644 --- a/drivers/rtc/rtc-at32ap700x.c +++ b/drivers/rtc/rtc-at32ap700x.c @@ -141,7 +141,7 @@ static int at32_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled) spin_lock_irq(&rtc->lock); - if(enabled) { + if (enabled) { if (rtc_readl(rtc, VAL) > rtc->alarm_time) { ret = -EINVAL; goto out; -- cgit v0.10.2 From 8ecc0bf41a8e3ddd40e0fbb7b3045f553e8aad41 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:05:44 -0700 Subject: drivers/rtc/rtc-at91rm9200.c: include Silences the following checkpatch warning: WARNING: Use #include instead of Signed-off-by: Sachin Kamat Cc: Andrew Victor Cc: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-at91rm9200.c b/drivers/rtc/rtc-at91rm9200.c index f296f3f..e87a81c 100644 --- a/drivers/rtc/rtc-at91rm9200.c +++ b/drivers/rtc/rtc-at91rm9200.c @@ -31,8 +31,7 @@ #include #include #include - -#include +#include #include "rtc-at91rm9200.h" -- cgit v0.10.2 From 5e8599d21bdd8010e43f407c0f0cbef32f23bea8 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:05:45 -0700 Subject: drivers/rtc/rtc-cmos.c: fix whitespace related errors Fixes the following types of issues: ERROR: space required after that ',' (ctx:VxV) WARNING: please, no spaces at the start of a line Signed-off-by: Sachin Kamat Cc: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index f1cb706..a6727d9 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -326,7 +326,7 @@ static void cmos_irq_disable(struct cmos_rtc *cmos, unsigned char mask) static int cmos_set_alarm(struct device *dev, struct rtc_wkalrm *t) { struct cmos_rtc *cmos = dev_get_drvdata(dev); - unsigned char mon, mday, hrs, min, sec, rtc_control; + unsigned char mon, mday, hrs, min, sec, rtc_control; if (!is_valid_irq(cmos->irq)) return -EIO; @@ -691,7 +691,7 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq) /* FIXME: * doesn't know 12-hour mode either. */ - if (is_valid_irq(rtc_irq) && !(rtc_control & RTC_24H)) { + if (is_valid_irq(rtc_irq) && !(rtc_control & RTC_24H)) { dev_warn(dev, "only 24-hr supported\n"); retval = -ENXIO; goto cleanup1; @@ -991,7 +991,7 @@ static int cmos_pnp_probe(struct pnp_dev *pnp, const struct pnp_device_id *id) { cmos_wake_setup(&pnp->dev); - if (pnp_port_start(pnp,0) == 0x70 && !pnp_irq_valid(pnp,0)) + if (pnp_port_start(pnp, 0) == 0x70 && !pnp_irq_valid(pnp, 0)) /* Some machines contain a PNP entry for the RTC, but * don't define the IRQ. It should always be safe to * hardcode it in these cases -- cgit v0.10.2 From 48c48180de5a1292c5ce40d6ad8fc21e7d1c86a8 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:05:46 -0700 Subject: drivers/rtc/rtc-davinci.c: fix whitespace warning Silences the following warning: WARNING: please, no space before tabs Signed-off-by: Sachin Kamat Cc: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-davinci.c b/drivers/rtc/rtc-davinci.c index a55048c..be5fc32 100644 --- a/drivers/rtc/rtc-davinci.c +++ b/drivers/rtc/rtc-davinci.c @@ -117,7 +117,7 @@ static DEFINE_SPINLOCK(davinci_rtc_lock); struct davinci_rtc { - struct rtc_device *rtc; + struct rtc_device *rtc; void __iomem *base; resource_size_t pbase; size_t base_size; -- cgit v0.10.2 From 465008fa46dc73c54ee281b02047275539eb8ab4 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:05:47 -0700 Subject: drivers/rtc/rtc-ds1305.c: add missing braces around sizeof Silences the following type of warnings: WARNING: sizeof buf should be sizeof(buf) Signed-off-by: Sachin Kamat Cc: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-ds1305.c b/drivers/rtc/rtc-ds1305.c index bb5f13f..dd6170a 100644 --- a/drivers/rtc/rtc-ds1305.c +++ b/drivers/rtc/rtc-ds1305.c @@ -158,7 +158,7 @@ static int ds1305_alarm_irq_enable(struct device *dev, unsigned int enabled) goto done; buf[1] &= ~DS1305_AEI0; } - err = spi_write_then_read(ds1305->spi, buf, sizeof buf, NULL, 0); + err = spi_write_then_read(ds1305->spi, buf, sizeof(buf), NULL, 0); if (err >= 0) ds1305->ctrl[0] = buf[1]; done: @@ -181,8 +181,8 @@ static int ds1305_get_time(struct device *dev, struct rtc_time *time) /* Use write-then-read to get all the date/time registers * since dma from stack is nonportable */ - status = spi_write_then_read(ds1305->spi, &addr, sizeof addr, - buf, sizeof buf); + status = spi_write_then_read(ds1305->spi, &addr, sizeof(addr), + buf, sizeof(buf)); if (status < 0) return status; @@ -237,7 +237,7 @@ static int ds1305_set_time(struct device *dev, struct rtc_time *time) buf[4], buf[5], buf[6], buf[7]); /* use write-then-read since dma from stack is nonportable */ - return spi_write_then_read(ds1305->spi, buf, sizeof buf, + return spi_write_then_read(ds1305->spi, buf, sizeof(buf), NULL, 0); } @@ -286,8 +286,8 @@ static int ds1305_get_alarm(struct device *dev, struct rtc_wkalrm *alm) * of EFI status is at best fragile anyway (given IRQ handlers). */ addr = DS1305_CONTROL; - status = spi_write_then_read(spi, &addr, sizeof addr, - ds1305->ctrl, sizeof ds1305->ctrl); + status = spi_write_then_read(spi, &addr, sizeof(addr), + ds1305->ctrl, sizeof(ds1305->ctrl)); if (status < 0) return status; @@ -296,8 +296,8 @@ static int ds1305_get_alarm(struct device *dev, struct rtc_wkalrm *alm) /* get and check ALM0 registers */ addr = DS1305_ALM0(DS1305_SEC); - status = spi_write_then_read(spi, &addr, sizeof addr, - buf, sizeof buf); + status = spi_write_then_read(spi, &addr, sizeof(addr), + buf, sizeof(buf)); if (status < 0) return status; @@ -381,7 +381,7 @@ static int ds1305_set_alarm(struct device *dev, struct rtc_wkalrm *alm) "alm0 write", buf[1 + DS1305_SEC], buf[1 + DS1305_MIN], buf[1 + DS1305_HOUR], buf[1 + DS1305_WDAY]); - status = spi_write_then_read(spi, buf, sizeof buf, NULL, 0); + status = spi_write_then_read(spi, buf, sizeof(buf), NULL, 0); if (status < 0) return status; @@ -474,7 +474,7 @@ static void ds1305_work(struct work_struct *work) buf[1] = ds1305->ctrl[0]; buf[2] = 0; - status = spi_write_then_read(spi, buf, sizeof buf, + status = spi_write_then_read(spi, buf, sizeof(buf), NULL, 0); if (status < 0) dev_dbg(&spi->dev, "clear irq --> %d\n", status); @@ -627,8 +627,8 @@ static int ds1305_probe(struct spi_device *spi) /* read and cache control registers */ addr = DS1305_CONTROL; - status = spi_write_then_read(spi, &addr, sizeof addr, - ds1305->ctrl, sizeof ds1305->ctrl); + status = spi_write_then_read(spi, &addr, sizeof(addr), + ds1305->ctrl, sizeof(ds1305->ctrl)); if (status < 0) { dev_dbg(&spi->dev, "can't %s, %d\n", "read", status); @@ -659,7 +659,7 @@ static int ds1305_probe(struct spi_device *spi) buf[0] = DS1305_WRITE | DS1305_CONTROL; buf[1] = ds1305->ctrl[0]; - status = spi_write_then_read(spi, buf, sizeof buf, NULL, 0); + status = spi_write_then_read(spi, buf, sizeof(buf), NULL, 0); dev_dbg(&spi->dev, "clear WP --> %d\n", status); if (status < 0) @@ -713,7 +713,7 @@ static int ds1305_probe(struct spi_device *spi) buf[1] = ds1305->ctrl[0]; buf[2] = ds1305->ctrl[1]; buf[3] = ds1305->ctrl[2]; - status = spi_write_then_read(spi, buf, sizeof buf, NULL, 0); + status = spi_write_then_read(spi, buf, sizeof(buf), NULL, 0); if (status < 0) { dev_dbg(&spi->dev, "can't %s, %d\n", "write", status); @@ -725,8 +725,8 @@ static int ds1305_probe(struct spi_device *spi) /* see if non-Linux software set up AM/PM mode */ addr = DS1305_HOUR; - status = spi_write_then_read(spi, &addr, sizeof addr, - &value, sizeof value); + status = spi_write_then_read(spi, &addr, sizeof(addr), + &value, sizeof(value)); if (status < 0) { dev_dbg(&spi->dev, "read HOUR --> %d\n", status); return status; -- cgit v0.10.2 From adc7b9b68dbd914199867327a0a6113492eda94b Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:05:48 -0700 Subject: drivers/rtc/rtc-ds1374.c: fix spacing related issues Fixes the following types of issues: ERROR: code indent should use tabs where possible WARNING: please, no spaces at the start of a line Signed-off-by: Sachin Kamat Cc: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-ds1374.c b/drivers/rtc/rtc-ds1374.c index 94366e1..9e6e14f 100644 --- a/drivers/rtc/rtc-ds1374.c +++ b/drivers/rtc/rtc-ds1374.c @@ -65,7 +65,7 @@ struct ds1374 { static struct i2c_driver ds1374_driver; static int ds1374_read_rtc(struct i2c_client *client, u32 *time, - int reg, int nbytes) + int reg, int nbytes) { u8 buf[4]; int ret; @@ -90,7 +90,7 @@ static int ds1374_read_rtc(struct i2c_client *client, u32 *time, } static int ds1374_write_rtc(struct i2c_client *client, u32 time, - int reg, int nbytes) + int reg, int nbytes) { u8 buf[4]; int i; @@ -119,8 +119,7 @@ static int ds1374_check_rtc_status(struct i2c_client *client) if (stat & DS1374_REG_SR_OSF) dev_warn(&client->dev, - "oscillator discontinuity flagged, " - "time unreliable\n"); + "oscillator discontinuity flagged, time unreliable\n"); stat &= ~(DS1374_REG_SR_OSF | DS1374_REG_SR_AF); @@ -363,7 +362,7 @@ static int ds1374_probe(struct i2c_client *client, if (client->irq > 0) { ret = devm_request_irq(&client->dev, client->irq, ds1374_irq, 0, - "ds1374", client); + "ds1374", client); if (ret) { dev_err(&client->dev, "unable to request IRQ\n"); return ret; @@ -373,7 +372,7 @@ static int ds1374_probe(struct i2c_client *client, } ds1374->rtc = devm_rtc_device_register(&client->dev, client->name, - &ds1374_rtc_ops, THIS_MODULE); + &ds1374_rtc_ops, THIS_MODULE); if (IS_ERR(ds1374->rtc)) { dev_err(&client->dev, "unable to register the class device\n"); return PTR_ERR(ds1374->rtc); -- cgit v0.10.2 From 7b2f0053b7dad07e1a18d0e46bc7ef4fb8ac564c Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:05:49 -0700 Subject: drivers/rtc/rtc-ds1511.c: fix issues related to spaces and braces Fixes the following types of issues: WARNING: please, no spaces at the start of a line WARNING: braces {} are not necessary for single statement blocks Signed-off-by: Sachin Kamat Cc: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-ds1511.c b/drivers/rtc/rtc-ds1511.c index 6ce8a99..308a8fe 100644 --- a/drivers/rtc/rtc-ds1511.c +++ b/drivers/rtc/rtc-ds1511.c @@ -104,31 +104,31 @@ static DEFINE_SPINLOCK(ds1511_lock); static __iomem char *ds1511_base; static u32 reg_spacing = 1; - static noinline void +static noinline void rtc_write(uint8_t val, uint32_t reg) { writeb(val, ds1511_base + (reg * reg_spacing)); } - static inline void +static inline void rtc_write_alarm(uint8_t val, enum ds1511reg reg) { rtc_write((val | 0x80), reg); } - static noinline uint8_t +static noinline uint8_t rtc_read(enum ds1511reg reg) { return readb(ds1511_base + (reg * reg_spacing)); } - static inline void +static inline void rtc_disable_update(void) { rtc_write((rtc_read(RTC_CMD) & ~RTC_TE), RTC_CMD); } - static void +static void rtc_enable_update(void) { rtc_write((rtc_read(RTC_CMD) | RTC_TE), RTC_CMD); @@ -145,7 +145,7 @@ rtc_enable_update(void) * just enough code to set the watchdog timer so that it * will reboot the system */ - void +void ds1511_wdog_set(unsigned long deciseconds) { /* @@ -163,7 +163,7 @@ ds1511_wdog_set(unsigned long deciseconds) rtc_write(DS1511_WDE | DS1511_WDS, RTC_CMD); } - void +void ds1511_wdog_disable(void) { /* @@ -191,13 +191,12 @@ static int ds1511_rtc_set_time(struct device *dev, struct rtc_time *rtc_tm) /* * won't have to change this for a while */ - if (rtc_tm->tm_year < 1900) { + if (rtc_tm->tm_year < 1900) rtc_tm->tm_year += 1900; - } - if (rtc_tm->tm_year < 1970) { + if (rtc_tm->tm_year < 1970) return -EINVAL; - } + yrs = rtc_tm->tm_year % 100; cen = rtc_tm->tm_year / 100; mon = rtc_tm->tm_mon + 1; /* tm_mon starts at zero */ @@ -207,17 +206,14 @@ static int ds1511_rtc_set_time(struct device *dev, struct rtc_time *rtc_tm) min = rtc_tm->tm_min; sec = rtc_tm->tm_sec; - if ((mon > 12) || (day == 0)) { + if ((mon > 12) || (day == 0)) return -EINVAL; - } - if (day > rtc_month_days(rtc_tm->tm_mon, rtc_tm->tm_year)) { + if (day > rtc_month_days(rtc_tm->tm_mon, rtc_tm->tm_year)) return -EINVAL; - } - if ((hrs >= 24) || (min >= 60) || (sec >= 60)) { + if ((hrs >= 24) || (min >= 60) || (sec >= 60)) return -EINVAL; - } /* * each register is a different number of valid bits @@ -299,7 +295,7 @@ static int ds1511_rtc_read_time(struct device *dev, struct rtc_time *rtc_tm) * date/hours/mins/secs matches. the ds1511 has many more * permutations, but the kernel doesn't. */ - static void +static void ds1511_rtc_update_alarm(struct rtc_plat_data *pdata) { unsigned long flags; @@ -322,7 +318,7 @@ ds1511_rtc_update_alarm(struct rtc_plat_data *pdata) spin_unlock_irqrestore(&pdata->lock, flags); } - static int +static int ds1511_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) { struct platform_device *pdev = to_platform_device(dev); @@ -335,14 +331,14 @@ ds1511_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) pdata->alrm_hour = alrm->time.tm_hour; pdata->alrm_min = alrm->time.tm_min; pdata->alrm_sec = alrm->time.tm_sec; - if (alrm->enabled) { + if (alrm->enabled) pdata->irqen |= RTC_AF; - } + ds1511_rtc_update_alarm(pdata); return 0; } - static int +static int ds1511_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) { struct platform_device *pdev = to_platform_device(dev); @@ -359,7 +355,7 @@ ds1511_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) return 0; } - static irqreturn_t +static irqreturn_t ds1511_interrupt(int irq, void *dev_id) { struct platform_device *pdev = dev_id; @@ -406,7 +402,7 @@ static const struct rtc_class_ops ds1511_rtc_ops = { .alarm_irq_enable = ds1511_rtc_alarm_irq_enable, }; - static ssize_t +static ssize_t ds1511_nvram_read(struct file *filp, struct kobject *kobj, struct bin_attribute *ba, char *buf, loff_t pos, size_t size) @@ -417,26 +413,26 @@ ds1511_nvram_read(struct file *filp, struct kobject *kobj, * if count is more than one, turn on "burst" mode * turn it off when you're done */ - if (size > 1) { + if (size > 1) rtc_write((rtc_read(RTC_CMD) | DS1511_BME), RTC_CMD); - } - if (pos > DS1511_RAM_MAX) { + + if (pos > DS1511_RAM_MAX) pos = DS1511_RAM_MAX; - } - if (size + pos > DS1511_RAM_MAX + 1) { + + if (size + pos > DS1511_RAM_MAX + 1) size = DS1511_RAM_MAX - pos + 1; - } + rtc_write(pos, DS1511_RAMADDR_LSB); - for (count = 0; size > 0; count++, size--) { + for (count = 0; size > 0; count++, size--) *buf++ = rtc_read(DS1511_RAMDATA); - } - if (count > 1) { + + if (count > 1) rtc_write((rtc_read(RTC_CMD) & ~DS1511_BME), RTC_CMD); - } + return count; } - static ssize_t +static ssize_t ds1511_nvram_write(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t pos, size_t size) @@ -447,22 +443,22 @@ ds1511_nvram_write(struct file *filp, struct kobject *kobj, * if count is more than one, turn on "burst" mode * turn it off when you're done */ - if (size > 1) { + if (size > 1) rtc_write((rtc_read(RTC_CMD) | DS1511_BME), RTC_CMD); - } - if (pos > DS1511_RAM_MAX) { + + if (pos > DS1511_RAM_MAX) pos = DS1511_RAM_MAX; - } - if (size + pos > DS1511_RAM_MAX + 1) { + + if (size + pos > DS1511_RAM_MAX + 1) size = DS1511_RAM_MAX - pos + 1; - } + rtc_write(pos, DS1511_RAMADDR_LSB); - for (count = 0; size > 0; count++, size--) { + for (count = 0; size > 0; count++, size--) rtc_write(*buf++, DS1511_RAMDATA); - } - if (count > 1) { + + if (count > 1) rtc_write((rtc_read(RTC_CMD) & ~DS1511_BME), RTC_CMD); - } + return count; } @@ -484,9 +480,9 @@ static int ds1511_rtc_probe(struct platform_device *pdev) int ret = 0; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res) { + if (!res) return -ENODEV; - } + pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL); if (!pdata) return -ENOMEM; @@ -518,9 +514,8 @@ static int ds1511_rtc_probe(struct platform_device *pdev) /* * check for a dying bat-tree */ - if (rtc_read(RTC_CMD1) & DS1511_BLF1) { + if (rtc_read(RTC_CMD1) & DS1511_BLF1) dev_warn(&pdev->dev, "voltage-low detected.\n"); - } spin_lock_init(&pdata->lock); platform_set_drvdata(pdev, pdata); -- cgit v0.10.2 From 97dd89693d1bc30060a5096c3c92274931e66f45 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:05:50 -0700 Subject: drivers/rtc/rtc-ds3234.c: fix whitespace issue Fixes the following warning: WARNING: please, no space before tabs Signed-off-by: Sachin Kamat Cc: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-ds3234.c b/drivers/rtc/rtc-ds3234.c index ba98c0e..67f1a80 100644 --- a/drivers/rtc/rtc-ds3234.c +++ b/drivers/rtc/rtc-ds3234.c @@ -73,7 +73,7 @@ static int ds3234_read_time(struct device *dev, struct rtc_time *dt) dt->tm_wday = bcd2bin(buf[3]) - 1; /* 0 = Sun */ dt->tm_mday = bcd2bin(buf[4]); dt->tm_mon = bcd2bin(buf[5] & 0x1f) - 1; /* 0 = Jan */ - dt->tm_year = bcd2bin(buf[6] & 0xff) + 100; /* Assume 20YY */ + dt->tm_year = bcd2bin(buf[6] & 0xff) + 100; /* Assume 20YY */ return rtc_valid_tm(dt); } -- cgit v0.10.2 From c865e9220da35d484fb44c9f2c291593637d139c Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:05:51 -0700 Subject: drivers/rtc/rtc-fm3130.c: fix whitespace related issue Silences the following checkpatch warning: WARNING: please, no space before tabs Signed-off-by: Sachin Kamat Cc: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-fm3130.c b/drivers/rtc/rtc-fm3130.c index 2835fb6..8f053da 100644 --- a/drivers/rtc/rtc-fm3130.c +++ b/drivers/rtc/rtc-fm3130.c @@ -47,7 +47,7 @@ struct fm3130 { u8 reg_addr_time; - u8 reg_addr_alarm; + u8 reg_addr_alarm; u8 regs[15]; struct i2c_msg msg[4]; struct i2c_client *client; -- cgit v0.10.2 From e46527d289a8e7dbdf9249c64a5245cc7a7ee699 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:05:52 -0700 Subject: drivers/rtc/rtc-m41t80.c: fix spacing related issue Silences the following checkpatch warning: WARNING: space prohibited before semicolon Signed-off-by: Sachin Kamat Cc: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-m41t80.c b/drivers/rtc/rtc-m41t80.c index 89674b5..a5248aa 100644 --- a/drivers/rtc/rtc-m41t80.c +++ b/drivers/rtc/rtc-m41t80.c @@ -168,7 +168,7 @@ static int m41t80_set_datetime(struct i2c_client *client, struct rtc_time *tm) buf[M41T80_REG_MIN] = bin2bcd(tm->tm_min) | (buf[M41T80_REG_MIN] & ~0x7f); buf[M41T80_REG_HOUR] = - bin2bcd(tm->tm_hour) | (buf[M41T80_REG_HOUR] & ~0x3f) ; + bin2bcd(tm->tm_hour) | (buf[M41T80_REG_HOUR] & ~0x3f); buf[M41T80_REG_WDAY] = (tm->tm_wday & 0x07) | (buf[M41T80_REG_WDAY] & ~0x07); buf[M41T80_REG_DAY] = -- cgit v0.10.2 From 83246a168d65a28f98bf71bb4e267dc5f131b12c Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:05:54 -0700 Subject: drivers/rtc/rtc-max6902.c: remove unwanted spaces Silences the following type of warnings: WARNING: space prohibited between function name and open parenthesis '(' Signed-off-by: Sachin Kamat Cc: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-max6902.c b/drivers/rtc/rtc-max6902.c index e3aea00..e9dd56c 100644 --- a/drivers/rtc/rtc-max6902.c +++ b/drivers/rtc/rtc-max6902.c @@ -159,7 +159,7 @@ static struct spi_driver max6902_driver = { module_spi_driver(max6902_driver); -MODULE_DESCRIPTION ("max6902 spi RTC driver"); -MODULE_AUTHOR ("Raphael Assenat"); -MODULE_LICENSE ("GPL"); +MODULE_DESCRIPTION("max6902 spi RTC driver"); +MODULE_AUTHOR("Raphael Assenat"); +MODULE_LICENSE("GPL"); MODULE_ALIAS("spi:rtc-max6902"); -- cgit v0.10.2 From cdf5f4ac63785e48d93137566d003d6c839b1259 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:05:55 -0700 Subject: drivers/rtc/rtc-max77686.c: remove space before semicolon Fixes the following warning: WARNING: space prohibited before semicolon Signed-off-by: Sachin Kamat Cc: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-max77686.c b/drivers/rtc/rtc-max77686.c index 771812d..441c5c2 100644 --- a/drivers/rtc/rtc-max77686.c +++ b/drivers/rtc/rtc-max77686.c @@ -119,7 +119,7 @@ static int max77686_rtc_tm_to_data(struct rtc_time *tm, u8 *data) data[RTC_WEEKDAY] = 1 << tm->tm_wday; data[RTC_DATE] = tm->tm_mday; data[RTC_MONTH] = tm->tm_mon + 1; - data[RTC_YEAR] = tm->tm_year > 100 ? (tm->tm_year - 100) : 0 ; + data[RTC_YEAR] = tm->tm_year > 100 ? (tm->tm_year - 100) : 0; if (tm->tm_year < 100) { pr_warn("%s: MAX77686 RTC cannot handle the year %d." -- cgit v0.10.2 From f887a9de5522487debc7601595f4ef791572c044 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:05:56 -0700 Subject: drivers/rtc/rtc-max8997.c: remove space before semicolon Fixes the following warning: WARNING: space prohibited before semicolon Signed-off-by: Sachin Kamat Cc: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-max8997.c b/drivers/rtc/rtc-max8997.c index dacf48d..1683904 100644 --- a/drivers/rtc/rtc-max8997.c +++ b/drivers/rtc/rtc-max8997.c @@ -104,7 +104,7 @@ static int max8997_rtc_tm_to_data(struct rtc_time *tm, u8 *data) data[RTC_WEEKDAY] = 1 << tm->tm_wday; data[RTC_DATE] = tm->tm_mday; data[RTC_MONTH] = tm->tm_mon + 1; - data[RTC_YEAR] = tm->tm_year > 100 ? (tm->tm_year - 100) : 0 ; + data[RTC_YEAR] = tm->tm_year > 100 ? (tm->tm_year - 100) : 0; if (tm->tm_year < 100) { pr_warn("%s: MAX8997 RTC cannot handle the year %d." -- cgit v0.10.2 From 07d27f2df56e6b8937fba13909f4eb5685211bb4 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:05:57 -0700 Subject: drivers/rtc/rtc-mpc5121.c: remove space before tab WARNING: please, no space before tabs Signed-off-by: Sachin Kamat Cc: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-mpc5121.c b/drivers/rtc/rtc-mpc5121.c index bdcc608..213006b 100644 --- a/drivers/rtc/rtc-mpc5121.c +++ b/drivers/rtc/rtc-mpc5121.c @@ -68,7 +68,7 @@ struct mpc5121_rtc_regs { u32 target_time; /* RTC + 0x20 */ /* * actual_time: - * readonly time since VBAT_RTC was last connected + * readonly time since VBAT_RTC was last connected */ u32 actual_time; /* RTC + 0x24 */ u32 keep_alive; /* RTC + 0x28 */ -- cgit v0.10.2 From 2600f7154b5ab257d3e5c0a2990d5d951758ec78 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:05:58 -0700 Subject: drivers/rtc/rtc-msm6242.c: use pr_warn pr_warn is preferred to pr_warning. Signed-off-by: Sachin Kamat Cc: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-msm6242.c b/drivers/rtc/rtc-msm6242.c index 771f86a..f0e9893 100644 --- a/drivers/rtc/rtc-msm6242.c +++ b/drivers/rtc/rtc-msm6242.c @@ -111,8 +111,8 @@ static void msm6242_lock(struct msm6242_priv *priv) } if (!cnt) - pr_warning("msm6242: timed out waiting for RTC (0x%x)\n", - msm6242_read(priv, MSM6242_CD)); + pr_warn("msm6242: timed out waiting for RTC (0x%x)\n", + msm6242_read(priv, MSM6242_CD)); } static void msm6242_unlock(struct msm6242_priv *priv) -- cgit v0.10.2 From 4a8282d028b69b94ce86c3860be43a7608a0fd2c Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:05:59 -0700 Subject: drivers/rtc/rtc-mxc.c: fix checkpatch error Fixes the following error: ERROR: spaces required around that '>=' (ctx:WxV) Signed-off-by: Sachin Kamat Cc: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-mxc.c b/drivers/rtc/rtc-mxc.c index 9a3895b..bf00d6d 100644 --- a/drivers/rtc/rtc-mxc.c +++ b/drivers/rtc/rtc-mxc.c @@ -436,7 +436,7 @@ static int mxc_rtc_probe(struct platform_device *pdev) pdata->irq = -1; } - if (pdata->irq >=0) + if (pdata->irq >= 0) device_init_wakeup(&pdev->dev, 1); rtc = devm_rtc_device_register(&pdev->dev, pdev->name, &mxc_rtc_ops, -- cgit v0.10.2 From 4b30c9fca71b5f74fb53992791778b8c8ddf0ac5 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:06:00 -0700 Subject: drivers/rtc/rtc-omap.c: include instead of Use #include instead of as pointed out by checkpatch. Signed-off-by: Sachin Kamat Cc: George G. Davis Cc: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-omap.c b/drivers/rtc/rtc-omap.c index b0ba3fc..6f6ac03 100644 --- a/drivers/rtc/rtc-omap.c +++ b/drivers/rtc/rtc-omap.c @@ -23,9 +23,7 @@ #include #include #include - -#include - +#include /* The OMAP1 RTC is a year/month/day/hours/minutes/seconds BCD clock * with century-range alarm matching, driven by the 32kHz clock. -- cgit v0.10.2 From 369015fbd40655f86384a3e7d5452f9c61eb26e9 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:06:01 -0700 Subject: drivers/rtc/rtc-pcf2123.c: remove space before tabs Silences the following checkpatch warning: WARNING: please, no space before tabs Signed-off-by: Sachin Kamat Cc: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-pcf2123.c b/drivers/rtc/rtc-pcf2123.c index 796a6c5..b2a78a0 100644 --- a/drivers/rtc/rtc-pcf2123.c +++ b/drivers/rtc/rtc-pcf2123.c @@ -18,11 +18,11 @@ * should look something like: * * static struct spi_board_info ek_spi_devices[] = { - * ... - * { - * .modalias = "rtc-pcf2123", - * .chip_select = 1, - * .controller_data = (void *)AT91_PIN_PA10, + * ... + * { + * .modalias = "rtc-pcf2123", + * .chip_select = 1, + * .controller_data = (void *)AT91_PIN_PA10, * .max_speed_hz = 1000 * 1000, * .mode = SPI_CS_HIGH, * .bus_num = 0, -- cgit v0.10.2 From d1bda80afd36c5eeb79a8f5a53c9b3e23350d3a9 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:06:02 -0700 Subject: drivers/rtc/rtc-pcf8583.c: move assignment outside if condition Fixes the following checkpatch error: ERROR: do not use assignment in if condition Signed-off-by: Sachin Kamat Cc: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-pcf8583.c b/drivers/rtc/rtc-pcf8583.c index 95886dc..9971f7f 100644 --- a/drivers/rtc/rtc-pcf8583.c +++ b/drivers/rtc/rtc-pcf8583.c @@ -188,7 +188,8 @@ static int pcf8583_rtc_read_time(struct device *dev, struct rtc_time *tm) dev_warn(dev, "resetting control %02x -> %02x\n", ctrl, new_ctrl); - if ((err = pcf8583_set_ctrl(client, &new_ctrl)) < 0) + err = pcf8583_set_ctrl(client, &new_ctrl); + if (err < 0) return err; } -- cgit v0.10.2 From 91b80e4c3887fc26aa6c2c398b45478a09fac561 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:06:03 -0700 Subject: drivers/rtc/rtc-rs5c313.c: include instead of Use #include instead of as pointed out by checkpatch. Signed-off-by: Sachin Kamat Cc: Nobuhiro Iwamatsu Cc: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-rs5c313.c b/drivers/rtc/rtc-rs5c313.c index 8089fc6..b603cc4 100644 --- a/drivers/rtc/rtc-rs5c313.c +++ b/drivers/rtc/rtc-rs5c313.c @@ -47,7 +47,7 @@ #include #include #include -#include +#include #define DRV_NAME "rs5c313" #define DRV_VERSION "1.13" -- cgit v0.10.2 From 675090fa84368d66f46acb521a3eff5a4318d9ff Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:06:04 -0700 Subject: drivers/rtc/rtc-rs5c313.c: fix spacing related issues Fixes the following types of checkpatch issues: WARNING: please, no space before tabs ERROR: space prohibited after that open parenthesis '(' ERROR: space prohibited before that close parenthesis ')' ERROR: need consistent spacing around '>>' (ctx:VxW) Signed-off-by: Sachin Kamat Cc: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-rs5c313.c b/drivers/rtc/rtc-rs5c313.c index b603cc4..6af0f1f 100644 --- a/drivers/rtc/rtc-rs5c313.c +++ b/drivers/rtc/rtc-rs5c313.c @@ -50,7 +50,7 @@ #include #define DRV_NAME "rs5c313" -#define DRV_VERSION "1.13" +#define DRV_VERSION "1.13" #ifdef CONFIG_SH_LANDISK /*****************************************************/ @@ -301,7 +301,7 @@ static int rs5c313_rtc_set_time(struct device *dev, struct rtc_time *tm) rs5c313_write_reg(RS5C313_ADDR_SEC10, (data >> 4)); data = bin2bcd(tm->tm_min); - rs5c313_write_reg(RS5C313_ADDR_MIN, data ); + rs5c313_write_reg(RS5C313_ADDR_MIN, data); rs5c313_write_reg(RS5C313_ADDR_MIN10, (data >> 4)); data = bin2bcd(tm->tm_hour); @@ -310,7 +310,7 @@ static int rs5c313_rtc_set_time(struct device *dev, struct rtc_time *tm) data = bin2bcd(tm->tm_mday); rs5c313_write_reg(RS5C313_ADDR_DAY, data); - rs5c313_write_reg(RS5C313_ADDR_DAY10, (data>> 4)); + rs5c313_write_reg(RS5C313_ADDR_DAY10, (data >> 4)); data = bin2bcd(tm->tm_mon + 1); rs5c313_write_reg(RS5C313_ADDR_MON, data); @@ -349,9 +349,9 @@ static void rs5c313_check_xstp_bit(void) } memset(&tm, 0, sizeof(struct rtc_time)); - tm.tm_mday = 1; - tm.tm_mon = 1 - 1; - tm.tm_year = 2000 - 1900; + tm.tm_mday = 1; + tm.tm_mon = 1 - 1; + tm.tm_year = 2000 - 1900; rs5c313_rtc_set_time(NULL, &tm); pr_err("invalid value, resetting to 1 Jan 2000\n"); @@ -388,7 +388,7 @@ static struct platform_driver rs5c313_rtc_platform_driver = { .name = DRV_NAME, .owner = THIS_MODULE, }, - .probe = rs5c313_rtc_probe, + .probe = rs5c313_rtc_probe, .remove = rs5c313_rtc_remove, }; @@ -408,7 +408,7 @@ static int __init rs5c313_rtc_init(void) static void __exit rs5c313_rtc_exit(void) { - platform_driver_unregister( &rs5c313_rtc_platform_driver ); + platform_driver_unregister(&rs5c313_rtc_platform_driver); } module_init(rs5c313_rtc_init); -- cgit v0.10.2 From de2edf32f174529bf172e6535f9e33d51240886a Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:06:05 -0700 Subject: drivers/rtc/rtc-v3020.c: fix spacing issues Fixes the following type of issues: WARNING: please, no space before tabs Signed-off-by: Sachin Kamat Cc: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-v3020.c b/drivers/rtc/rtc-v3020.c index 4ee0e63..d07d898 100644 --- a/drivers/rtc/rtc-v3020.c +++ b/drivers/rtc/rtc-v3020.c @@ -16,7 +16,7 @@ * - Use the generic rtc class * * ??-???-2004: Someone at Compulab - * - Initial driver creation. + * - Initial driver creation. * */ #include @@ -278,13 +278,13 @@ static int v3020_set_time(struct device *dev, struct rtc_time *dt) dev_dbg(dev, "tm_year: %i\n", dt->tm_year); /* Write all the values to ram... */ - v3020_set_reg(chip, V3020_SECONDS, bin2bcd(dt->tm_sec)); - v3020_set_reg(chip, V3020_MINUTES, bin2bcd(dt->tm_min)); - v3020_set_reg(chip, V3020_HOURS, bin2bcd(dt->tm_hour)); + v3020_set_reg(chip, V3020_SECONDS, bin2bcd(dt->tm_sec)); + v3020_set_reg(chip, V3020_MINUTES, bin2bcd(dt->tm_min)); + v3020_set_reg(chip, V3020_HOURS, bin2bcd(dt->tm_hour)); v3020_set_reg(chip, V3020_MONTH_DAY, bin2bcd(dt->tm_mday)); - v3020_set_reg(chip, V3020_MONTH, bin2bcd(dt->tm_mon + 1)); - v3020_set_reg(chip, V3020_WEEK_DAY, bin2bcd(dt->tm_wday)); - v3020_set_reg(chip, V3020_YEAR, bin2bcd(dt->tm_year % 100)); + v3020_set_reg(chip, V3020_MONTH, bin2bcd(dt->tm_mon + 1)); + v3020_set_reg(chip, V3020_WEEK_DAY, bin2bcd(dt->tm_wday)); + v3020_set_reg(chip, V3020_YEAR, bin2bcd(dt->tm_year % 100)); /* ...and set the clock. */ v3020_set_reg(chip, V3020_CMD_RAM2CLOCK, 0); -- cgit v0.10.2 From 0218bcf658d0faf98582856b7176e25bdf3fd129 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:06:06 -0700 Subject: drivers/rtc/rtc-vr41xx.c: fix spacing issues Fixes the following types of issues: ERROR: code indent should use tabs where possible Signed-off-by: Sachin Kamat Cc: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-vr41xx.c b/drivers/rtc/rtc-vr41xx.c index f91be04..b940c23 100644 --- a/drivers/rtc/rtc-vr41xx.c +++ b/drivers/rtc/rtc-vr41xx.c @@ -103,7 +103,7 @@ static inline unsigned long read_elapsed_second(void) second_mid = rtc1_read(ETIMEMREG); second_high = rtc1_read(ETIMEHREG); } while (first_low != second_low || first_mid != second_mid || - first_high != second_high); + first_high != second_high); return (first_high << 17) | (first_mid << 1) | (first_low >> 15); } @@ -154,7 +154,7 @@ static int vr41xx_rtc_set_time(struct device *dev, struct rtc_time *time) epoch_sec = mktime(epoch, 1, 1, 0, 0, 0); current_sec = mktime(time->tm_year + 1900, time->tm_mon + 1, time->tm_mday, - time->tm_hour, time->tm_min, time->tm_sec); + time->tm_hour, time->tm_min, time->tm_sec); write_elapsed_second(current_sec - epoch_sec); @@ -186,7 +186,7 @@ static int vr41xx_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *wkalrm) struct rtc_time *time = &wkalrm->time; alarm_sec = mktime(time->tm_year + 1900, time->tm_mon + 1, time->tm_mday, - time->tm_hour, time->tm_min, time->tm_sec); + time->tm_hour, time->tm_min, time->tm_sec); spin_lock_irq(&rtc_lock); @@ -334,7 +334,7 @@ static int rtc_probe(struct platform_device *pdev) } retval = request_irq(aie_irq, elapsedtime_interrupt, 0, - "elapsed_time", pdev); + "elapsed_time", pdev); if (retval < 0) goto err_device_unregister; @@ -343,7 +343,7 @@ static int rtc_probe(struct platform_device *pdev) goto err_free_irq; retval = request_irq(pie_irq, rtclong1_interrupt, 0, - "rtclong1", pdev); + "rtclong1", pdev); if (retval < 0) goto err_free_irq; -- cgit v0.10.2 From 66e3f10c993845e0118f4232911b40068ad91886 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:06:07 -0700 Subject: drivers/rtc/rtc-x1205.c: fix checkpatch issues Fixes the following types of issues: ERROR: do not use assignment in if condition ERROR: open brace '{' following struct go on the same line ERROR: else should follow close brace '}' WARNING: please, no space before tabs Signed-off-by: Sachin Kamat Cc: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-x1205.c b/drivers/rtc/rtc-x1205.c index fa9b067..365dc65 100644 --- a/drivers/rtc/rtc-x1205.c +++ b/drivers/rtc/rtc-x1205.c @@ -4,7 +4,7 @@ * Copyright 2005 Alessandro Zummo * * please send all reports to: - * Karen Spearel + * Karen Spearel * Alessandro Zummo * * based on a lot of other RTC drivers. @@ -215,12 +215,14 @@ static int x1205_set_datetime(struct i2c_client *client, struct rtc_time *tm, buf[i] |= 0x80; /* this sequence is required to unlock the chip */ - if ((xfer = i2c_master_send(client, wel, 3)) != 3) { + xfer = i2c_master_send(client, wel, 3); + if (xfer != 3) { dev_err(&client->dev, "%s: wel - %d\n", __func__, xfer); return -EIO; } - if ((xfer = i2c_master_send(client, rwel, 3)) != 3) { + xfer = i2c_master_send(client, rwel, 3); + if (xfer != 3) { dev_err(&client->dev, "%s: rwel - %d\n", __func__, xfer); return -EIO; } @@ -269,7 +271,8 @@ static int x1205_set_datetime(struct i2c_client *client, struct rtc_time *tm, } /* disable further writes */ - if ((xfer = i2c_master_send(client, diswe, 3)) != 3) { + xfer = i2c_master_send(client, diswe, 3); + if (xfer != 3) { dev_err(&client->dev, "%s: diswe - %d\n", __func__, xfer); return -EIO; } @@ -375,8 +378,7 @@ static int x1205_get_atrim(struct i2c_client *client, int *trim) return 0; } -struct x1205_limit -{ +struct x1205_limit { unsigned char reg, mask, min, max; }; @@ -430,7 +432,8 @@ static int x1205_validate_client(struct i2c_client *client) }, }; - if ((xfer = i2c_transfer(client->adapter, msgs, 2)) != 2) { + xfer = i2c_transfer(client->adapter, msgs, 2); + if (xfer != 2) { dev_err(&client->dev, "%s: could not read register %x\n", __func__, probe_zero_pattern[i]); @@ -467,7 +470,8 @@ static int x1205_validate_client(struct i2c_client *client) }, }; - if ((xfer = i2c_transfer(client->adapter, msgs, 2)) != 2) { + xfer = i2c_transfer(client->adapter, msgs, 2); + if (xfer != 2) { dev_err(&client->dev, "%s: could not read register %x\n", __func__, probe_limits_pattern[i].reg); @@ -548,10 +552,12 @@ static int x1205_rtc_proc(struct device *dev, struct seq_file *seq) { int err, dtrim, atrim; - if ((err = x1205_get_dtrim(to_i2c_client(dev), &dtrim)) == 0) + err = x1205_get_dtrim(to_i2c_client(dev), &dtrim); + if (!err) seq_printf(seq, "digital_trim\t: %d ppm\n", dtrim); - if ((err = x1205_get_atrim(to_i2c_client(dev), &atrim)) == 0) + err = x1205_get_atrim(to_i2c_client(dev), &atrim); + if (!err) seq_printf(seq, "analog_trim\t: %d.%02d pF\n", atrim / 1000, atrim % 1000); return 0; @@ -639,7 +645,8 @@ static int x1205_probe(struct i2c_client *client, i2c_set_clientdata(client, rtc); /* Check for power failures and eventually enable the osc */ - if ((err = x1205_get_status(client, &sr)) == 0) { + err = x1205_get_status(client, &sr); + if (!err) { if (sr & X1205_SR_RTCF) { dev_err(&client->dev, "power failure detected, " @@ -647,9 +654,9 @@ static int x1205_probe(struct i2c_client *client, udelay(50); x1205_fix_osc(client); } - } - else + } else { dev_err(&client->dev, "couldn't read status\n"); + } err = x1205_sysfs_register(&client->dev); if (err) -- cgit v0.10.2 From 90da0c0c1ee7f8fc5100de0f97dbb7c6796dc42f Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 3 Jul 2013 15:06:08 -0700 Subject: rtc: rtc-88pm860x: remove unnecessary platform_set_drvdata() The driver core clears the driver data to NULL after device_release or on probe failure, since commit 0998d063100 ("device-core: Ensure drvdata = NULL when no driver is bound"). Thus, it is not needed to manually clear the device driver data to NULL. Signed-off-by: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-88pm860x.c b/drivers/rtc/rtc-88pm860x.c index 0f2b91b..4e30c85 100644 --- a/drivers/rtc/rtc-88pm860x.c +++ b/drivers/rtc/rtc-88pm860x.c @@ -418,7 +418,6 @@ static int pm860x_rtc_remove(struct platform_device *pdev) pm860x_set_bits(info->i2c, PM8607_MEAS_EN2, MEAS2_VRTC, 0); #endif /* VRTC_CALIBRATION */ - platform_set_drvdata(pdev, NULL); return 0; } -- cgit v0.10.2 From 0ffb0c065707123ae19b3a8b37a3662261c421b5 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 3 Jul 2013 15:06:09 -0700 Subject: rtc: rtc-ab3100: remove unnecessary platform_set_drvdata() The driver core clears the driver data to NULL after device_release or on probe failure, since commit 0998d063100 ("device-core: Ensure drvdata = NULL when no driver is bound"). Thus, it is not needed to manually clear the device driver data to NULL. Signed-off-by: Jingoo Han Cc: Lars-Peter Clausen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-ab3100.c b/drivers/rtc/rtc-ab3100.c index 47a4f2c..572208d 100644 --- a/drivers/rtc/rtc-ab3100.c +++ b/drivers/rtc/rtc-ab3100.c @@ -242,7 +242,6 @@ static int __init ab3100_rtc_probe(struct platform_device *pdev) static int __exit ab3100_rtc_remove(struct platform_device *pdev) { - platform_set_drvdata(pdev, NULL); return 0; } -- cgit v0.10.2 From 431296a4cf0e9c951457cf3f6786760f63dee7f5 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 3 Jul 2013 15:06:10 -0700 Subject: rtc: rtc-ab8500: remove unnecessary platform_set_drvdata() The driver core clears the driver data to NULL after device_release or on probe failure, since commit 0998d063100 ("device-core: Ensure drvdata = NULL when no driver is bound"). Thus, it is not needed to manually clear the device driver data to NULL. Signed-off-by: Jingoo Han Cc: Srinidhi Kasagar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-ab8500.c b/drivers/rtc/rtc-ab8500.c index 63cfa31..c5b62d4 100644 --- a/drivers/rtc/rtc-ab8500.c +++ b/drivers/rtc/rtc-ab8500.c @@ -451,8 +451,6 @@ static int ab8500_rtc_remove(struct platform_device *pdev) { ab8500_sysfs_rtc_unregister(&pdev->dev); - platform_set_drvdata(pdev, NULL); - return 0; } -- cgit v0.10.2 From 46aee0c7829794cdc18ba96d4d0fb3e3cd646577 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 3 Jul 2013 15:06:11 -0700 Subject: rtc: rtc-at32ap700x: remove unnecessary platform_set_drvdata() The driver core clears the driver data to NULL after device_release or on probe failure, since commit 0998d063100 ("device-core: Ensure drvdata = NULL when no driver is bound"). Thus, it is not needed to manually clear the device driver data to NULL. Signed-off-by: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-at32ap700x.c b/drivers/rtc/rtc-at32ap700x.c index 128896d..3161ab5 100644 --- a/drivers/rtc/rtc-at32ap700x.c +++ b/drivers/rtc/rtc-at32ap700x.c @@ -212,23 +212,20 @@ static int __init at32_rtc_probe(struct platform_device *pdev) regs = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!regs) { dev_dbg(&pdev->dev, "no mmio resource defined\n"); - ret = -ENXIO; - goto out; + return -ENXIO; } irq = platform_get_irq(pdev, 0); if (irq <= 0) { dev_dbg(&pdev->dev, "could not get irq\n"); - ret = -ENXIO; - goto out; + return -ENXIO; } rtc->irq = irq; rtc->regs = devm_ioremap(&pdev->dev, regs->start, resource_size(regs)); if (!rtc->regs) { - ret = -ENOMEM; dev_dbg(&pdev->dev, "could not map I/O memory\n"); - goto out; + return -ENOMEM; } spin_lock_init(&rtc->lock); @@ -249,7 +246,7 @@ static int __init at32_rtc_probe(struct platform_device *pdev) "rtc", rtc); if (ret) { dev_dbg(&pdev->dev, "could not request irq %d\n", irq); - goto out; + return ret; } platform_set_drvdata(pdev, rtc); @@ -258,8 +255,7 @@ static int __init at32_rtc_probe(struct platform_device *pdev) &at32_rtc_ops, THIS_MODULE); if (IS_ERR(rtc->rtc)) { dev_dbg(&pdev->dev, "could not register rtc device\n"); - ret = PTR_ERR(rtc->rtc); - goto out; + return PTR_ERR(rtc->rtc); } device_init_wakeup(&pdev->dev, 1); @@ -268,18 +264,12 @@ static int __init at32_rtc_probe(struct platform_device *pdev) (unsigned long)rtc->regs, rtc->irq); return 0; - -out: - platform_set_drvdata(pdev, NULL); - return ret; } static int __exit at32_rtc_remove(struct platform_device *pdev) { device_init_wakeup(&pdev->dev, 0); - platform_set_drvdata(pdev, NULL); - return 0; } -- cgit v0.10.2 From 3d772d34d18d6eeccb6cb39bbac1be932d063b0e Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 3 Jul 2013 15:06:11 -0700 Subject: rtc: rtc-at91rm9200: remove unnecessary platform_set_drvdata() The driver core clears the driver data to NULL after device_release or on probe failure, since commit 0998d063100 ("device-core: Ensure drvdata = NULL when no driver is bound"). Thus, it is not needed to manually clear the device driver data to NULL. Signed-off-by: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-at91rm9200.c b/drivers/rtc/rtc-at91rm9200.c index e87a81c..7418926 100644 --- a/drivers/rtc/rtc-at91rm9200.c +++ b/drivers/rtc/rtc-at91rm9200.c @@ -438,7 +438,6 @@ static int __exit at91_rtc_remove(struct platform_device *pdev) rtc_device_unregister(rtc); iounmap(at91_rtc_regs); - platform_set_drvdata(pdev, NULL); return 0; } -- cgit v0.10.2 From 61cc483a6fbfbda9bcfa91b13cf062c851fec972 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 3 Jul 2013 15:06:12 -0700 Subject: rtc: rtc-at91sam9: remove unnecessary platform_set_drvdata() The driver core clears the driver data to NULL after device_release or on probe failure, since commit 0998d063100 ("device-core: Ensure drvdata = NULL when no driver is bound"). Thus, it is not needed to manually clear the device driver data to NULL. Signed-off-by: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-at91sam9.c b/drivers/rtc/rtc-at91sam9.c index b60a34c..309b8b3 100644 --- a/drivers/rtc/rtc-at91sam9.c +++ b/drivers/rtc/rtc-at91sam9.c @@ -324,16 +324,14 @@ static int at91_rtc_probe(struct platform_device *pdev) rtc->rtt = devm_ioremap(&pdev->dev, r->start, resource_size(r)); if (!rtc->rtt) { dev_err(&pdev->dev, "failed to map registers, aborting.\n"); - ret = -ENOMEM; - goto fail; + return -ENOMEM; } rtc->gpbr = devm_ioremap(&pdev->dev, r_gpbr->start, resource_size(r_gpbr)); if (!rtc->gpbr) { dev_err(&pdev->dev, "failed to map gpbr registers, aborting.\n"); - ret = -ENOMEM; - goto fail; + return -ENOMEM; } mr = rtt_readl(rtc, MR); @@ -350,17 +348,15 @@ static int at91_rtc_probe(struct platform_device *pdev) rtc->rtcdev = devm_rtc_device_register(&pdev->dev, pdev->name, &at91_rtc_ops, THIS_MODULE); - if (IS_ERR(rtc->rtcdev)) { - ret = PTR_ERR(rtc->rtcdev); - goto fail; - } + if (IS_ERR(rtc->rtcdev)) + return PTR_ERR(rtc->rtcdev); /* register irq handler after we know what name we'll use */ ret = devm_request_irq(&pdev->dev, rtc->irq, at91_rtc_interrupt, IRQF_SHARED, dev_name(&rtc->rtcdev->dev), rtc); if (ret) { dev_dbg(&pdev->dev, "can't share IRQ %d?\n", rtc->irq); - goto fail; + return ret; } /* NOTE: sam9260 rev A silicon has a ROM bug which resets the @@ -374,10 +370,6 @@ static int at91_rtc_probe(struct platform_device *pdev) dev_name(&rtc->rtcdev->dev)); return 0; - -fail: - platform_set_drvdata(pdev, NULL); - return ret; } /* @@ -391,7 +383,6 @@ static int at91_rtc_remove(struct platform_device *pdev) /* disable all interrupts */ rtt_writel(rtc, MR, mr & ~(AT91_RTT_ALMIEN | AT91_RTT_RTTINCIEN)); - platform_set_drvdata(pdev, NULL); return 0; } -- cgit v0.10.2 From 0e3dee0e76e605249dc8eb68fda08b17188fc5b9 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 3 Jul 2013 15:06:13 -0700 Subject: rtc: rtc-au1xxx: remove unnecessary platform_set_drvdata() The driver core clears the driver data to NULL after device_release or on probe failure, since commit 0998d063100 ("device-core: Ensure drvdata = NULL when no driver is bound"). Thus, it is not needed to manually clear the device driver data to NULL. Signed-off-by: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-au1xxx.c b/drivers/rtc/rtc-au1xxx.c index 7995abc..7c6e7bb 100644 --- a/drivers/rtc/rtc-au1xxx.c +++ b/drivers/rtc/rtc-au1xxx.c @@ -118,8 +118,6 @@ out_err: static int au1xtoy_rtc_remove(struct platform_device *pdev) { - platform_set_drvdata(pdev, NULL); - return 0; } -- cgit v0.10.2 From 22c87d633309364158911173ffaeb8de7aca6d96 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 3 Jul 2013 15:06:14 -0700 Subject: rtc: rtc-bfin: remove unnecessary platform_set_drvdata() The driver core clears the driver data to NULL after device_release or on probe failure, since commit 0998d063100 ("device-core: Ensure drvdata = NULL when no driver is bound"). Thus, it is not needed to manually clear the device driver data to NULL. Signed-off-by: Jingoo Han Acked-by: Mike Frysinger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-bfin.c b/drivers/rtc/rtc-bfin.c index ad44ec5..0c53f45 100644 --- a/drivers/rtc/rtc-bfin.c +++ b/drivers/rtc/rtc-bfin.c @@ -391,7 +391,6 @@ static int bfin_rtc_remove(struct platform_device *pdev) struct device *dev = &pdev->dev; bfin_rtc_reset(dev, 0); - platform_set_drvdata(pdev, NULL); return 0; } -- cgit v0.10.2 From a8e23d7b50c9e90beca30a7eba333fec1dd88130 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 3 Jul 2013 15:06:15 -0700 Subject: rtc: rtc-bq4802: remove unnecessary platform_set_drvdata() The driver core clears the driver data to NULL after device_release or on probe failure, since commit 0998d063100 ("device-core: Ensure drvdata = NULL when no driver is bound"). Thus, it is not needed to manually clear the device driver data to NULL. Signed-off-by: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-bq4802.c b/drivers/rtc/rtc-bq4802.c index af28867..b919168 100644 --- a/drivers/rtc/rtc-bq4802.c +++ b/drivers/rtc/rtc-bq4802.c @@ -188,8 +188,6 @@ out: static int bq4802_remove(struct platform_device *pdev) { - platform_set_drvdata(pdev, NULL); - return 0; } -- cgit v0.10.2 From 7fcbf5fd5eb74eea9e136adc1e6364a8e7a98134 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 3 Jul 2013 15:06:16 -0700 Subject: rtc: rtc-coh901331: remove unnecessary platform_set_drvdata() The driver core clears the driver data to NULL after device_release or on probe failure, since commit 0998d063100 ("device-core: Ensure drvdata = NULL when no driver is bound"). Thus, it is not needed to manually clear the device driver data to NULL. Signed-off-by: Jingoo Han Acked-by: Linus Walleij Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-coh901331.c b/drivers/rtc/rtc-coh901331.c index ad6863a..c05b202 100644 --- a/drivers/rtc/rtc-coh901331.c +++ b/drivers/rtc/rtc-coh901331.c @@ -154,10 +154,8 @@ static int __exit coh901331_remove(struct platform_device *pdev) { struct coh901331_port *rtap = dev_get_drvdata(&pdev->dev); - if (rtap) { + if (rtap) clk_unprepare(rtap->clk); - platform_set_drvdata(pdev, NULL); - } return 0; } @@ -220,7 +218,6 @@ static int __init coh901331_probe(struct platform_device *pdev) return 0; out_no_rtc: - platform_set_drvdata(pdev, NULL); clk_unprepare(rtap->clk); return ret; } -- cgit v0.10.2 From 90566e1d35ceb4a83e007b25711afa6fcf615ed4 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 3 Jul 2013 15:06:17 -0700 Subject: rtc: rtc-da9052: remove unnecessary platform_set_drvdata() The driver core clears the driver data to NULL after device_release or on probe failure, since commit 0998d063100 ("device-core: Ensure drvdata = NULL when no driver is bound"). Thus, it is not needed to manually clear the device driver data to NULL. Signed-off-by: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-da9052.c b/drivers/rtc/rtc-da9052.c index 7286b27..a61ab7f 100644 --- a/drivers/rtc/rtc-da9052.c +++ b/drivers/rtc/rtc-da9052.c @@ -257,8 +257,6 @@ static int da9052_rtc_probe(struct platform_device *pdev) static int da9052_rtc_remove(struct platform_device *pdev) { - platform_set_drvdata(pdev, NULL); - return 0; } -- cgit v0.10.2 From 37dd5ffc35f41d85c1b8b9e1012c9fb891af3c05 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 3 Jul 2013 15:06:18 -0700 Subject: rtc: rtc-da9055: remove unnecessary platform_set_drvdata() The driver core clears the driver data to NULL after device_release or on probe failure, since commit 0998d063100 ("device-core: Ensure drvdata = NULL when no driver is bound"). Thus, it is not needed to manually clear the device driver data to NULL. Signed-off-by: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-da9055.c b/drivers/rtc/rtc-da9055.c index 73858ca..df4cf16 100644 --- a/drivers/rtc/rtc-da9055.c +++ b/drivers/rtc/rtc-da9055.c @@ -317,8 +317,6 @@ err_rtc: static int da9055_rtc_remove(struct platform_device *pdev) { - platform_set_drvdata(pdev, NULL); - return 0; } -- cgit v0.10.2 From 438831fc6ee66a34f27522ef3f0f9eac4b4da8bc Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 3 Jul 2013 15:06:19 -0700 Subject: rtc: rtc-davinci: remove unnecessary platform_set_drvdata() The driver core clears the driver data to NULL after device_release or on probe failure, since commit 0998d063100 ("device-core: Ensure drvdata = NULL when no driver is bound"). Thus, it is not needed to manually clear the device driver data to NULL. Signed-off-by: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-davinci.c b/drivers/rtc/rtc-davinci.c index be5fc32..24677ef8 100644 --- a/drivers/rtc/rtc-davinci.c +++ b/drivers/rtc/rtc-davinci.c @@ -526,10 +526,9 @@ static int __init davinci_rtc_probe(struct platform_device *pdev) davinci_rtc->rtc = devm_rtc_device_register(&pdev->dev, pdev->name, &davinci_rtc_ops, THIS_MODULE); if (IS_ERR(davinci_rtc->rtc)) { - ret = PTR_ERR(davinci_rtc->rtc); dev_err(dev, "unable to register RTC device, err %d\n", ret); - goto fail1; + return PTR_ERR(davinci_rtc->rtc); } rtcif_write(davinci_rtc, PRTCIF_INTFLG_RTCSS, PRTCIF_INTFLG); @@ -543,7 +542,7 @@ static int __init davinci_rtc_probe(struct platform_device *pdev) 0, "davinci_rtc", davinci_rtc); if (ret < 0) { dev_err(dev, "unable to register davinci RTC interrupt\n"); - goto fail1; + return ret; } /* Enable interrupts */ @@ -556,10 +555,6 @@ static int __init davinci_rtc_probe(struct platform_device *pdev) device_init_wakeup(&pdev->dev, 0); return 0; - -fail1: - platform_set_drvdata(pdev, NULL); - return ret; } static int __exit davinci_rtc_remove(struct platform_device *pdev) @@ -570,8 +565,6 @@ static int __exit davinci_rtc_remove(struct platform_device *pdev) rtcif_write(davinci_rtc, 0, PRTCIF_INTEN); - platform_set_drvdata(pdev, NULL); - return 0; } -- cgit v0.10.2 From 531d56fe7d4277367c8fb2c637f92915bca459d2 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 3 Jul 2013 15:06:20 -0700 Subject: rtc: rtc-dm355evm: remove unnecessary platform_set_drvdata() The driver core clears the driver data to NULL after device_release or on probe failure, since commit 0998d063100 ("device-core: Ensure drvdata = NULL when no driver is bound"). Thus, it is not needed to manually clear the device driver data to NULL. Signed-off-by: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-dm355evm.c b/drivers/rtc/rtc-dm355evm.c index 1e1ca63..1855581 100644 --- a/drivers/rtc/rtc-dm355evm.c +++ b/drivers/rtc/rtc-dm355evm.c @@ -141,7 +141,6 @@ static int dm355evm_rtc_probe(struct platform_device *pdev) static int dm355evm_rtc_remove(struct platform_device *pdev) { - platform_set_drvdata(pdev, NULL); return 0; } -- cgit v0.10.2 From 844b8a6855bf08808fb211c4923dbd5062eeb0ad Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 3 Jul 2013 15:06:21 -0700 Subject: rtc: rtc-ds1302: remove unnecessary platform_set_drvdata() The driver core clears the driver data to NULL after device_release or on probe failure, since commit 0998d063100 ("device-core: Ensure drvdata = NULL when no driver is bound"). Thus, it is not needed to manually clear the device driver data to NULL. Signed-off-by: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-ds1302.c b/drivers/rtc/rtc-ds1302.c index d139543..1f8ee63 100644 --- a/drivers/rtc/rtc-ds1302.c +++ b/drivers/rtc/rtc-ds1302.c @@ -236,8 +236,6 @@ static int __init ds1302_rtc_probe(struct platform_device *pdev) static int __exit ds1302_rtc_remove(struct platform_device *pdev) { - platform_set_drvdata(pdev, NULL); - return 0; } -- cgit v0.10.2 From 7b32b631fb30ef3982d1bdcbb9242873029b46aa Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 3 Jul 2013 15:06:21 -0700 Subject: rtc: rtc-ep93xx: remove unnecessary platform_set_drvdata() The driver core clears the driver data to NULL after device_release or on probe failure, since commit 0998d063100 ("device-core: Ensure drvdata = NULL when no driver is bound"). Thus, it is not needed to manually clear the device driver data to NULL. Signed-off-by: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-ep93xx.c b/drivers/rtc/rtc-ep93xx.c index 5807b77..549b3c3 100644 --- a/drivers/rtc/rtc-ep93xx.c +++ b/drivers/rtc/rtc-ep93xx.c @@ -167,7 +167,6 @@ static int ep93xx_rtc_probe(struct platform_device *pdev) return 0; exit: - platform_set_drvdata(pdev, NULL); pdev->dev.platform_data = NULL; return err; } @@ -175,7 +174,6 @@ exit: static int ep93xx_rtc_remove(struct platform_device *pdev) { sysfs_remove_group(&pdev->dev.kobj, &ep93xx_rtc_sysfs_files); - platform_set_drvdata(pdev, NULL); pdev->dev.platform_data = NULL; return 0; -- cgit v0.10.2 From 2ce5413d9ef3cf177b27edfaf7d7a3b056834fea Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 3 Jul 2013 15:06:22 -0700 Subject: rtc: rtc-jz4740: remove unnecessary platform_set_drvdata() The driver core clears the driver data to NULL after device_release or on probe failure, since commit 0998d063100 ("device-core: Ensure drvdata = NULL when no driver is bound"). Thus, it is not needed to manually clear the device driver data to NULL. Signed-off-by: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-jz4740.c b/drivers/rtc/rtc-jz4740.c index 1e48686..c6573585 100644 --- a/drivers/rtc/rtc-jz4740.c +++ b/drivers/rtc/rtc-jz4740.c @@ -287,7 +287,6 @@ err_free_irq: err_unregister_rtc: rtc_device_unregister(rtc->rtc); err_iounmap: - platform_set_drvdata(pdev, NULL); iounmap(rtc->base); err_release_mem_region: release_mem_region(rtc->mem->start, resource_size(rtc->mem)); @@ -310,8 +309,6 @@ static int jz4740_rtc_remove(struct platform_device *pdev) kfree(rtc); - platform_set_drvdata(pdev, NULL); - return 0; } -- cgit v0.10.2 From 70bfde832c772688cb12734b297d387c1037b994 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 3 Jul 2013 15:06:23 -0700 Subject: rtc: rtc-lp8788: remove unnecessary platform_set_drvdata() The driver core clears the driver data to NULL after device_release or on probe failure, since commit 0998d063100 ("device-core: Ensure drvdata = NULL when no driver is bound"). Thus, it is not needed to manually clear the device driver data to NULL. Signed-off-by: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-lp8788.c b/drivers/rtc/rtc-lp8788.c index 9853ac1..76a82dc 100644 --- a/drivers/rtc/rtc-lp8788.c +++ b/drivers/rtc/rtc-lp8788.c @@ -314,8 +314,6 @@ static int lp8788_rtc_probe(struct platform_device *pdev) static int lp8788_rtc_remove(struct platform_device *pdev) { - platform_set_drvdata(pdev, NULL); - return 0; } -- cgit v0.10.2 From 99c3e1c59ad96c6de3e59452f5183edc4d634f16 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 3 Jul 2013 15:06:24 -0700 Subject: rtc: rtc-lpc32xx: remove unnecessary platform_set_drvdata() The driver core clears the driver data to NULL after device_release or on probe failure, since commit 0998d063100 ("device-core: Ensure drvdata = NULL when no driver is bound"). Thus, it is not needed to manually clear the device driver data to NULL. Signed-off-by: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-lpc32xx.c b/drivers/rtc/rtc-lpc32xx.c index 787550d..8276ae9 100644 --- a/drivers/rtc/rtc-lpc32xx.c +++ b/drivers/rtc/rtc-lpc32xx.c @@ -277,7 +277,6 @@ static int lpc32xx_rtc_probe(struct platform_device *pdev) &lpc32xx_rtc_ops, THIS_MODULE); if (IS_ERR(rtc->rtc)) { dev_err(&pdev->dev, "Can't get RTC\n"); - platform_set_drvdata(pdev, NULL); return PTR_ERR(rtc->rtc); } @@ -306,8 +305,6 @@ static int lpc32xx_rtc_remove(struct platform_device *pdev) if (rtc->irq >= 0) device_init_wakeup(&pdev->dev, 0); - platform_set_drvdata(pdev, NULL); - return 0; } -- cgit v0.10.2 From 55d66d286d069f3cb65a0aaec9bf0889821a915a Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 3 Jul 2013 15:06:25 -0700 Subject: rtc: rtc-ls1x: remove unnecessary platform_set_drvdata() The driver core clears the driver data to NULL after device_release or on probe failure, since commit 0998d063100 ("device-core: Ensure drvdata = NULL when no driver is bound"). Thus, it is not needed to manually clear the device driver data to NULL. Signed-off-by: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-ls1x.c b/drivers/rtc/rtc-ls1x.c index db82f91..e913f38 100644 --- a/drivers/rtc/rtc-ls1x.c +++ b/drivers/rtc/rtc-ls1x.c @@ -187,8 +187,6 @@ err: static int ls1x_rtc_remove(struct platform_device *pdev) { - platform_set_drvdata(pdev, NULL); - return 0; } -- cgit v0.10.2 From 5f371582180572f06f74dcac55c54d2072e672bc Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 3 Jul 2013 15:06:26 -0700 Subject: rtc: rtc-m48t59: remove unnecessary platform_set_drvdata() The driver core clears the driver data to NULL after device_release or on probe failure, since commit 0998d063100 ("device-core: Ensure drvdata = NULL when no driver is bound"). Thus, it is not needed to manually clear the device driver data to NULL. Signed-off-by: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-m48t59.c b/drivers/rtc/rtc-m48t59.c index 130f29a..d4d31fa 100644 --- a/drivers/rtc/rtc-m48t59.c +++ b/drivers/rtc/rtc-m48t59.c @@ -513,7 +513,6 @@ static int m48t59_rtc_remove(struct platform_device *pdev) iounmap(m48t59->ioaddr); if (m48t59->irq != NO_IRQ) free_irq(m48t59->irq, &pdev->dev); - platform_set_drvdata(pdev, NULL); kfree(m48t59); return 0; } -- cgit v0.10.2 From 28e1de09421b0e21dffbf2186b5ef14c1c8e033c Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 3 Jul 2013 15:06:27 -0700 Subject: rtc: rtc-max8925: remove unnecessary platform_set_drvdata() The driver core clears the driver data to NULL after device_release or on probe failure, since commit 0998d063100 ("device-core: Ensure drvdata = NULL when no driver is bound"). Thus, it is not needed to manually clear the device driver data to NULL. Signed-off-by: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-max8925.c b/drivers/rtc/rtc-max8925.c index 7c90f4e..981b654 100644 --- a/drivers/rtc/rtc-max8925.c +++ b/drivers/rtc/rtc-max8925.c @@ -268,7 +268,7 @@ static int max8925_rtc_probe(struct platform_device *pdev) if (ret < 0) { dev_err(chip->dev, "Failed to request IRQ: #%d: %d\n", info->irq, ret); - goto err; + return ret; } dev_set_drvdata(&pdev->dev, info); @@ -282,13 +282,10 @@ static int max8925_rtc_probe(struct platform_device *pdev) ret = PTR_ERR(info->rtc_dev); if (IS_ERR(info->rtc_dev)) { dev_err(&pdev->dev, "Failed to register RTC device: %d\n", ret); - goto err; + return ret; } return 0; -err: - platform_set_drvdata(pdev, NULL); - return ret; } static int max8925_rtc_remove(struct platform_device *pdev) -- cgit v0.10.2 From 4802f224a50d841786f17a1d27dd48dc149a1e33 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 3 Jul 2013 15:06:28 -0700 Subject: rtc: rtc-max8998: remove unnecessary platform_set_drvdata() The driver core clears the driver data to NULL after device_release or on probe failure, since commit 0998d063100 ("device-core: Ensure drvdata = NULL when no driver is bound"). Thus, it is not needed to manually clear the device driver data to NULL. Signed-off-by: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-max8998.c b/drivers/rtc/rtc-max8998.c index d5af7ba..738eece 100644 --- a/drivers/rtc/rtc-max8998.c +++ b/drivers/rtc/rtc-max8998.c @@ -274,7 +274,7 @@ static int max8998_rtc_probe(struct platform_device *pdev) if (IS_ERR(info->rtc_dev)) { ret = PTR_ERR(info->rtc_dev); dev_err(&pdev->dev, "Failed to register RTC device: %d\n", ret); - goto out_rtc; + return ret; } ret = devm_request_threaded_irq(&pdev->dev, info->irq, NULL, @@ -292,10 +292,6 @@ static int max8998_rtc_probe(struct platform_device *pdev) } return 0; - -out_rtc: - platform_set_drvdata(pdev, NULL); - return ret; } static int max8998_rtc_remove(struct platform_device *pdev) -- cgit v0.10.2 From d8f447877b1cd40267a66c6d9fcd11464c859346 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 3 Jul 2013 15:06:29 -0700 Subject: rtc: rtc-mc13xxx: remove unnecessary platform_set_drvdata() The driver core clears the driver data to NULL after device_release or on probe failure, since commit 0998d063100 ("device-core: Ensure drvdata = NULL when no driver is bound"). Thus, it is not needed to manually clear the device driver data to NULL. Signed-off-by: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-mc13xxx.c b/drivers/rtc/rtc-mc13xxx.c index 7a8ed27..77ea989 100644 --- a/drivers/rtc/rtc-mc13xxx.c +++ b/drivers/rtc/rtc-mc13xxx.c @@ -370,8 +370,6 @@ err_reset_irq_status: err_reset_irq_request: mc13xxx_unlock(mc13xxx); - - platform_set_drvdata(pdev, NULL); } return ret; @@ -389,8 +387,6 @@ static int __exit mc13xxx_rtc_remove(struct platform_device *pdev) mc13xxx_unlock(priv->mc13xxx); - platform_set_drvdata(pdev, NULL); - return 0; } -- cgit v0.10.2 From e3aa7526fca757312732f88cd16f7a73a496b80d Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 3 Jul 2013 15:06:29 -0700 Subject: rtc: rtc-msm6242: remove unnecessary platform_set_drvdata() The driver core clears the driver data to NULL after device_release or on probe failure, since commit 0998d063100 ("device-core: Ensure drvdata = NULL when no driver is bound"). Thus, it is not needed to manually clear the device driver data to NULL. Signed-off-by: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-msm6242.c b/drivers/rtc/rtc-msm6242.c index f0e9893..c014fa2 100644 --- a/drivers/rtc/rtc-msm6242.c +++ b/drivers/rtc/rtc-msm6242.c @@ -199,7 +199,6 @@ static int __init msm6242_rtc_probe(struct platform_device *pdev) struct resource *res; struct msm6242_priv *priv; struct rtc_device *rtc; - int error; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!res) @@ -216,17 +215,11 @@ static int __init msm6242_rtc_probe(struct platform_device *pdev) rtc = devm_rtc_device_register(&pdev->dev, "rtc-msm6242", &msm6242_rtc_ops, THIS_MODULE); - if (IS_ERR(rtc)) { - error = PTR_ERR(rtc); - goto out_unmap; - } + if (IS_ERR(rtc)) + return PTR_ERR(rtc); priv->rtc = rtc; return 0; - -out_unmap: - platform_set_drvdata(pdev, NULL); - return error; } static int __exit msm6242_rtc_remove(struct platform_device *pdev) -- cgit v0.10.2 From d2f3a3984be5b154714a16aeafe195883b4cd233 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 3 Jul 2013 15:06:30 -0700 Subject: rtc: rtc-mxc: remove unnecessary platform_set_drvdata() The driver core clears the driver data to NULL after device_release or on probe failure, since commit 0998d063100 ("device-core: Ensure drvdata = NULL when no driver is bound"). Thus, it is not needed to manually clear the device driver data to NULL. Signed-off-by: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-mxc.c b/drivers/rtc/rtc-mxc.c index bf00d6d..ab87bac 100644 --- a/drivers/rtc/rtc-mxc.c +++ b/drivers/rtc/rtc-mxc.c @@ -443,15 +443,13 @@ static int mxc_rtc_probe(struct platform_device *pdev) THIS_MODULE); if (IS_ERR(rtc)) { ret = PTR_ERR(rtc); - goto exit_clr_drvdata; + goto exit_put_clk; } pdata->rtc = rtc; return 0; -exit_clr_drvdata: - platform_set_drvdata(pdev, NULL); exit_put_clk: clk_disable_unprepare(pdata->clk); @@ -465,7 +463,6 @@ static int mxc_rtc_remove(struct platform_device *pdev) struct rtc_plat_data *pdata = platform_get_drvdata(pdev); clk_disable_unprepare(pdata->clk); - platform_set_drvdata(pdev, NULL); return 0; } -- cgit v0.10.2 From 6d4a38cbbb311dc93c0f66a702f7d0ae5c5b4965 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 3 Jul 2013 15:06:31 -0700 Subject: rtc: rtc-nuc900: remove unnecessary platform_set_drvdata() The driver core clears the driver data to NULL after device_release or on probe failure, since commit 0998d063100 ("device-core: Ensure drvdata = NULL when no driver is bound"). Thus, it is not needed to manually clear the device driver data to NULL. Signed-off-by: Jingoo Han Acked-by: Wan Zongshun Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-nuc900.c b/drivers/rtc/rtc-nuc900.c index d592e2f..994659b 100644 --- a/drivers/rtc/rtc-nuc900.c +++ b/drivers/rtc/rtc-nuc900.c @@ -262,8 +262,6 @@ static int __init nuc900_rtc_probe(struct platform_device *pdev) static int __exit nuc900_rtc_remove(struct platform_device *pdev) { - platform_set_drvdata(pdev, NULL); - return 0; } -- cgit v0.10.2 From a1c805a9bdc55b244706b911e842d0649990953b Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 3 Jul 2013 15:06:32 -0700 Subject: rtc: rtc-pcap: remove unnecessary platform_set_drvdata() The driver core clears the driver data to NULL after device_release or on probe failure, since commit 0998d063100 ("device-core: Ensure drvdata = NULL when no driver is bound"). Thus, it is not needed to manually clear the device driver data to NULL. Signed-off-by: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-pcap.c b/drivers/rtc/rtc-pcap.c index 539a90b..40b5c63 100644 --- a/drivers/rtc/rtc-pcap.c +++ b/drivers/rtc/rtc-pcap.c @@ -156,10 +156,8 @@ static int __init pcap_rtc_probe(struct platform_device *pdev) pcap_rtc->rtc = devm_rtc_device_register(&pdev->dev, "pcap", &pcap_rtc_ops, THIS_MODULE); - if (IS_ERR(pcap_rtc->rtc)) { - err = PTR_ERR(pcap_rtc->rtc); - goto fail; - } + if (IS_ERR(pcap_rtc->rtc)) + return PTR_ERR(pcap_rtc->rtc); timer_irq = pcap_to_irq(pcap_rtc->pcap, PCAP_IRQ_1HZ); alarm_irq = pcap_to_irq(pcap_rtc->pcap, PCAP_IRQ_TODA); @@ -167,17 +165,14 @@ static int __init pcap_rtc_probe(struct platform_device *pdev) err = devm_request_irq(&pdev->dev, timer_irq, pcap_rtc_irq, 0, "RTC Timer", pcap_rtc); if (err) - goto fail; + return err; err = devm_request_irq(&pdev->dev, alarm_irq, pcap_rtc_irq, 0, "RTC Alarm", pcap_rtc); if (err) - goto fail; + return err; return 0; -fail: - platform_set_drvdata(pdev, NULL); - return err; } static int __exit pcap_rtc_remove(struct platform_device *pdev) -- cgit v0.10.2 From 253262a570e377bcf36c8d2643c6402644d7d363 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 3 Jul 2013 15:06:33 -0700 Subject: rtc: rtc-pm8xxx: remove unnecessary platform_set_drvdata() The driver core clears the driver data to NULL after device_release or on probe failure, since commit 0998d063100 ("device-core: Ensure drvdata = NULL when no driver is bound"). Thus, it is not needed to manually clear the device driver data to NULL. Signed-off-by: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-pm8xxx.c b/drivers/rtc/rtc-pm8xxx.c index f1a6557..14ee860 100644 --- a/drivers/rtc/rtc-pm8xxx.c +++ b/drivers/rtc/rtc-pm8xxx.c @@ -480,7 +480,6 @@ static int pm8xxx_rtc_probe(struct platform_device *pdev) fail_req_irq: rtc_device_unregister(rtc_dd->rtc); fail_rtc_enable: - platform_set_drvdata(pdev, NULL); kfree(rtc_dd); return rc; } @@ -492,7 +491,6 @@ static int pm8xxx_rtc_remove(struct platform_device *pdev) device_init_wakeup(&pdev->dev, 0); free_irq(rtc_dd->rtc_alarm_irq, rtc_dd); rtc_device_unregister(rtc_dd->rtc); - platform_set_drvdata(pdev, NULL); kfree(rtc_dd); return 0; -- cgit v0.10.2 From fb9b525e7930822e6fbe6db1cce931686545410a Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 3 Jul 2013 15:06:34 -0700 Subject: rtc: rtc-s3c: remove unnecessary platform_set_drvdata() The driver core clears the driver data to NULL after device_release or on probe failure, since commit 0998d063100 ("device-core: Ensure drvdata = NULL when no driver is bound"). Thus, it is not needed to manually clear the device driver data to NULL. Signed-off-by: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c index 0b495e8..7afd373 100644 --- a/drivers/rtc/rtc-s3c.c +++ b/drivers/rtc/rtc-s3c.c @@ -421,8 +421,6 @@ static void s3c_rtc_enable(struct platform_device *pdev, int en) static int s3c_rtc_remove(struct platform_device *dev) { - platform_set_drvdata(dev, NULL); - s3c_rtc_setaie(&dev->dev, 0); clk_unprepare(rtc_clk); @@ -549,23 +547,20 @@ static int s3c_rtc_probe(struct platform_device *pdev) 0, "s3c2410-rtc alarm", rtc); if (ret) { dev_err(&pdev->dev, "IRQ%d error %d\n", s3c_rtc_alarmno, ret); - goto err_alarm_irq; + goto err_nortc; } ret = devm_request_irq(&pdev->dev, s3c_rtc_tickno, s3c_rtc_tickirq, 0, "s3c2410-rtc tick", rtc); if (ret) { dev_err(&pdev->dev, "IRQ%d error %d\n", s3c_rtc_tickno, ret); - goto err_alarm_irq; + goto err_nortc; } clk_disable(rtc_clk); return 0; - err_alarm_irq: - platform_set_drvdata(pdev, NULL); - err_nortc: s3c_rtc_enable(pdev, 0); clk_disable_unprepare(rtc_clk); -- cgit v0.10.2 From 66600bbef9c74c49b2a020a04897af86202b8d15 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 3 Jul 2013 15:06:35 -0700 Subject: rtc: rtc-sa1100: remove unnecessary platform_set_drvdata() The driver core clears the driver data to NULL after device_release or on probe failure, since commit 0998d063100 ("device-core: Ensure drvdata = NULL when no driver is bound"). Thus, it is not needed to manually clear the device driver data to NULL. Signed-off-by: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-sa1100.c b/drivers/rtc/rtc-sa1100.c index 0060560..0f7adeb 100644 --- a/drivers/rtc/rtc-sa1100.c +++ b/drivers/rtc/rtc-sa1100.c @@ -249,7 +249,7 @@ static int sa1100_rtc_probe(struct platform_device *pdev) ret = clk_prepare_enable(info->clk); if (ret) - goto err_enable_clk; + return ret; /* * According to the manual we should be able to let RTTR be zero * and then a default diviser for a 32.768KHz clock is used. @@ -303,8 +303,6 @@ static int sa1100_rtc_probe(struct platform_device *pdev) return 0; err_dev: clk_disable_unprepare(info->clk); -err_enable_clk: - platform_set_drvdata(pdev, NULL); return ret; } @@ -312,10 +310,8 @@ static int sa1100_rtc_remove(struct platform_device *pdev) { struct sa1100_rtc *info = platform_get_drvdata(pdev); - if (info) { + if (info) clk_disable_unprepare(info->clk); - platform_set_drvdata(pdev, NULL); - } return 0; } -- cgit v0.10.2 From f50c8bf73f74c304c213371de35f4a4d7fafcaf9 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 3 Jul 2013 15:06:36 -0700 Subject: rtc: rtc-sh: remove unnecessary platform_set_drvdata() The driver core clears the driver data to NULL after device_release or on probe failure, since commit 0998d063100 ("device-core: Ensure drvdata = NULL when no driver is bound"). Thus, it is not needed to manually clear the device driver data to NULL. Signed-off-by: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-sh.c b/drivers/rtc/rtc-sh.c index 8d5bd2e..cb2f839 100644 --- a/drivers/rtc/rtc-sh.c +++ b/drivers/rtc/rtc-sh.c @@ -770,8 +770,6 @@ static int __exit sh_rtc_remove(struct platform_device *pdev) clk_disable(rtc->clk); clk_put(rtc->clk); - platform_set_drvdata(pdev, NULL); - kfree(rtc); return 0; -- cgit v0.10.2 From 1cc622358745261dcfe4a7ebb5c37a5145fb3ee3 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 3 Jul 2013 15:06:36 -0700 Subject: rtc: rtc-spear: remove unnecessary platform_set_drvdata() The driver core clears the driver data to NULL after device_release or on probe failure, since commit 0998d063100 ("device-core: Ensure drvdata = NULL when no driver is bound"). Thus, it is not needed to manually clear the device driver data to NULL. Signed-off-by: Jingoo Han Acked-by: Viresh Kumar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-spear.c b/drivers/rtc/rtc-spear.c index 574359c..c492cf0 100644 --- a/drivers/rtc/rtc-spear.c +++ b/drivers/rtc/rtc-spear.c @@ -417,7 +417,6 @@ static int spear_rtc_probe(struct platform_device *pdev) return 0; err_disable_clock: - platform_set_drvdata(pdev, NULL); clk_disable_unprepare(config->clk); return status; -- cgit v0.10.2 From dfd2a17809b787dea44eec76ab9f3de8b7fcb8d6 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 3 Jul 2013 15:06:37 -0700 Subject: rtc: rtc-stmp3xxx: remove unnecessary platform_set_drvdata() The driver core clears the driver data to NULL after device_release or on probe failure, since commit 0998d063100 ("device-core: Ensure drvdata = NULL when no driver is bound"). Thus, it is not needed to manually clear the device driver data to NULL. Signed-off-by: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-stmp3xxx.c b/drivers/rtc/rtc-stmp3xxx.c index 483ce08..90a3e86 100644 --- a/drivers/rtc/rtc-stmp3xxx.c +++ b/drivers/rtc/rtc-stmp3xxx.c @@ -225,7 +225,6 @@ static int stmp3xxx_rtc_remove(struct platform_device *pdev) writel(STMP3XXX_RTC_CTRL_ALARM_IRQ_EN, rtc_data->io + STMP3XXX_RTC_CTRL_CLR); - platform_set_drvdata(pdev, NULL); return 0; } @@ -274,25 +273,19 @@ static int stmp3xxx_rtc_probe(struct platform_device *pdev) rtc_data->rtc = devm_rtc_device_register(&pdev->dev, pdev->name, &stmp3xxx_rtc_ops, THIS_MODULE); - if (IS_ERR(rtc_data->rtc)) { - err = PTR_ERR(rtc_data->rtc); - goto out; - } + if (IS_ERR(rtc_data->rtc)) + return PTR_ERR(rtc_data->rtc); err = devm_request_irq(&pdev->dev, rtc_data->irq_alarm, stmp3xxx_rtc_interrupt, 0, "RTC alarm", &pdev->dev); if (err) { dev_err(&pdev->dev, "Cannot claim IRQ%d\n", rtc_data->irq_alarm); - goto out; + return err; } stmp3xxx_wdt_register(pdev); return 0; - -out: - platform_set_drvdata(pdev, NULL); - return err; } #ifdef CONFIG_PM_SLEEP -- cgit v0.10.2 From ca5f4389a81bdceb151ba7f806a6a807d0c8893f Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 3 Jul 2013 15:06:38 -0700 Subject: rtc: rtc-twl: remove unnecessary platform_set_drvdata() The driver core clears the driver data to NULL after device_release or on probe failure, since commit 0998d063100 ("device-core: Ensure drvdata = NULL when no driver is bound"). Thus, it is not needed to manually clear the device driver data to NULL. Signed-off-by: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-twl.c b/drivers/rtc/rtc-twl.c index b2eab34..52843d0 100644 --- a/drivers/rtc/rtc-twl.c +++ b/drivers/rtc/rtc-twl.c @@ -556,7 +556,6 @@ static int twl_rtc_remove(struct platform_device *pdev) free_irq(irq, rtc); rtc_device_unregister(rtc); - platform_set_drvdata(pdev, NULL); return 0; } -- cgit v0.10.2 From fedd5f49618f11dd08f99c4106c5fc43951ba4b0 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 3 Jul 2013 15:06:39 -0700 Subject: rtc: rtc-vr41xx: remove unnecessary platform_set_drvdata() The driver core clears the driver data to NULL after device_release or on probe failure, since commit 0998d063100 ("device-core: Ensure drvdata = NULL when no driver is bound"). Thus, it is not needed to manually clear the device driver data to NULL. Signed-off-by: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-vr41xx.c b/drivers/rtc/rtc-vr41xx.c index b940c23..3b5b4fa 100644 --- a/drivers/rtc/rtc-vr41xx.c +++ b/drivers/rtc/rtc-vr41xx.c @@ -381,8 +381,6 @@ static int rtc_remove(struct platform_device *pdev) if (rtc) rtc_device_unregister(rtc); - platform_set_drvdata(pdev, NULL); - free_irq(aie_irq, pdev); free_irq(pie_irq, pdev); if (rtc1_base) -- cgit v0.10.2 From 221ba4db9de567d84b891f8dad60d5efdea63af1 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 3 Jul 2013 15:06:40 -0700 Subject: rtc: rtc-vt8500: remove unnecessary platform_set_drvdata() The driver core clears the driver data to NULL after device_release or on probe failure, since commit 0998d063100 ("device-core: Ensure drvdata = NULL when no driver is bound"). Thus, it is not needed to manually clear the device driver data to NULL. Signed-off-by: Jingoo Han Acked-by: Tony Prisk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-vt8500.c b/drivers/rtc/rtc-vt8500.c index d89efee..c2d6331 100644 --- a/drivers/rtc/rtc-vt8500.c +++ b/drivers/rtc/rtc-vt8500.c @@ -282,8 +282,6 @@ static int vt8500_rtc_remove(struct platform_device *pdev) /* Disable alarm matching */ writel(0, vt8500_rtc->regbase + VT8500_RTC_IS); - platform_set_drvdata(pdev, NULL); - return 0; } -- cgit v0.10.2 From d314fa3d27ad43c31ca1da91c7592d0df2461b86 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 3 Jul 2013 15:06:41 -0700 Subject: rtc: rtc-m48t86: remove unnecessary platform_set_drvdata() The driver core clears the driver data to NULL after device_release or on probe failure, since commit 0998d063100 ("device-core: Ensure drvdata = NULL when no driver is bound"). Thus, it is not needed to manually clear the device driver data to NULL. Signed-off-by: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-m48t86.c b/drivers/rtc/rtc-m48t86.c index 33a91c4..d1fe7f5 100644 --- a/drivers/rtc/rtc-m48t86.c +++ b/drivers/rtc/rtc-m48t86.c @@ -168,8 +168,6 @@ static int m48t86_rtc_probe(struct platform_device *dev) static int m48t86_rtc_remove(struct platform_device *dev) { - platform_set_drvdata(dev, NULL); - return 0; } -- cgit v0.10.2 From 7f1c2e824e10439ef716f2e0bda7609b0474d0f8 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 3 Jul 2013 15:06:42 -0700 Subject: rtc: rtc-puv3: remove unnecessary platform_set_drvdata() The driver core clears the driver data to NULL after device_release or on probe failure, since commit 0998d063100 ("device-core: Ensure drvdata = NULL when no driver is bound"). Thus, it is not needed to manually clear the device driver data to NULL. Signed-off-by: Jingoo Han Acked-by: Guan Xuetao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-puv3.c b/drivers/rtc/rtc-puv3.c index 72f4371..402732c 100644 --- a/drivers/rtc/rtc-puv3.c +++ b/drivers/rtc/rtc-puv3.c @@ -224,7 +224,6 @@ static int puv3_rtc_remove(struct platform_device *dev) { struct rtc_device *rtc = platform_get_drvdata(dev); - platform_set_drvdata(dev, NULL); rtc_device_unregister(rtc); puv3_rtc_setpie(&dev->dev, 0); -- cgit v0.10.2 From a81de2076c32f2b02c454cfb16fdba63050c30b7 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 3 Jul 2013 15:06:43 -0700 Subject: rtc: rtc-rp5c01: remove unnecessary platform_set_drvdata() The driver core clears the driver data to NULL after device_release or on probe failure, since commit 0998d063100 ("device-core: Ensure drvdata = NULL when no driver is bound"). Thus, it is not needed to manually clear the device driver data to NULL. Signed-off-by: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-rp5c01.c b/drivers/rtc/rtc-rp5c01.c index 873c689..89d0736 100644 --- a/drivers/rtc/rtc-rp5c01.c +++ b/drivers/rtc/rtc-rp5c01.c @@ -251,21 +251,15 @@ static int __init rp5c01_rtc_probe(struct platform_device *dev) rtc = devm_rtc_device_register(&dev->dev, "rtc-rp5c01", &rp5c01_rtc_ops, THIS_MODULE); - if (IS_ERR(rtc)) { - error = PTR_ERR(rtc); - goto out; - } + if (IS_ERR(rtc)) + return PTR_ERR(rtc); priv->rtc = rtc; error = sysfs_create_bin_file(&dev->dev.kobj, &priv->nvram_attr); if (error) - goto out; + return error; return 0; - -out: - platform_set_drvdata(dev, NULL); - return error; } static int __exit rp5c01_rtc_remove(struct platform_device *dev) -- cgit v0.10.2 From 364589e359267b9878790db19c6b7130bcb7ac6b Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 3 Jul 2013 15:06:44 -0700 Subject: rtc: rtc-tile: remove unnecessary platform_set_drvdata() The driver core clears the driver data to NULL after device_release or on probe failure, since commit 0998d063100 ("device-core: Ensure drvdata = NULL when no driver is bound"). Thus, it is not needed to manually clear the device driver data to NULL. Signed-off-by: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-tile.c b/drivers/rtc/rtc-tile.c index fc3dee9..b3bb32c 100644 --- a/drivers/rtc/rtc-tile.c +++ b/drivers/rtc/rtc-tile.c @@ -96,8 +96,6 @@ static int tile_rtc_probe(struct platform_device *dev) */ static int tile_rtc_remove(struct platform_device *dev) { - platform_set_drvdata(dev, NULL); - return 0; } -- cgit v0.10.2 From 29ecd78c0fd6ee05f2c6b07b23823a6ae43c13ff Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Wed, 3 Jul 2013 15:06:45 -0700 Subject: drivers/rtc/rtc-rv3029c2.c: fix disabling AIE irq In the disable AIE irq code path, current code passes "1" to enable parameter of rv3029c2_rtc_i2c_alarm_set_irq(). Thus it does not disable AIE irq. Signed-off-by: Axel Lin Acked-by: Heiko Schocher Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-rv3029c2.c b/drivers/rtc/rtc-rv3029c2.c index 5032c24..9100a34 100644 --- a/drivers/rtc/rtc-rv3029c2.c +++ b/drivers/rtc/rtc-rv3029c2.c @@ -310,7 +310,7 @@ static int rv3029c2_rtc_i2c_set_alarm(struct i2c_client *client, dev_dbg(&client->dev, "alarm IRQ armed\n"); } else { /* disable AIE irq */ - ret = rv3029c2_rtc_i2c_alarm_set_irq(client, 1); + ret = rv3029c2_rtc_i2c_alarm_set_irq(client, 0); if (ret) return ret; -- cgit v0.10.2 From d43fcab6b29b818a627501b21628967b5396b1f9 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:06:46 -0700 Subject: drivers/rtc/rtc-m48t86.c: remove empty function After the switch to devm_ functions and the removal of rtc_device_unregister(), the 'remove' function does not do anything. Delete it. Signed-off-by: Sachin Kamat Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-m48t86.c b/drivers/rtc/rtc-m48t86.c index d1fe7f5..2d30314 100644 --- a/drivers/rtc/rtc-m48t86.c +++ b/drivers/rtc/rtc-m48t86.c @@ -166,18 +166,12 @@ static int m48t86_rtc_probe(struct platform_device *dev) return 0; } -static int m48t86_rtc_remove(struct platform_device *dev) -{ - return 0; -} - static struct platform_driver m48t86_rtc_platform_driver = { .driver = { .name = "rtc-m48t86", .owner = THIS_MODULE, }, .probe = m48t86_rtc_probe, - .remove = m48t86_rtc_remove, }; module_platform_driver(m48t86_rtc_platform_driver); -- cgit v0.10.2 From afdce3014002968fe5c3d97d36b71583337c5f06 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:06:47 -0700 Subject: drivers/rtc/rtc-tile.c: remove empty function After the switch to devm_ functions and the removal of rtc_device_unregister(), the 'remove' function does not do anything. Delete it. Signed-off-by: Sachin Kamat Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-tile.c b/drivers/rtc/rtc-tile.c index b3bb32c..ff9632e 100644 --- a/drivers/rtc/rtc-tile.c +++ b/drivers/rtc/rtc-tile.c @@ -91,21 +91,12 @@ static int tile_rtc_probe(struct platform_device *dev) return 0; } -/* - * Device cleanup routine. - */ -static int tile_rtc_remove(struct platform_device *dev) -{ - return 0; -} - static struct platform_driver tile_rtc_platform_driver = { .driver = { .name = "rtc-tile", .owner = THIS_MODULE, }, .probe = tile_rtc_probe, - .remove = tile_rtc_remove, }; /* -- cgit v0.10.2 From 8bd941e7942e3f8bdb68df19e9c7fff57397a52d Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:06:48 -0700 Subject: drivers/rtc/rtc-nuc900.c: remove empty function After the switch to devm_ functions and the removal of rtc_device_unregister(), the 'remove' function does not do anything. Delete it. Signed-off-by: Sachin Kamat Cc: Wan ZongShun Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-nuc900.c b/drivers/rtc/rtc-nuc900.c index 994659b..22861c5 100644 --- a/drivers/rtc/rtc-nuc900.c +++ b/drivers/rtc/rtc-nuc900.c @@ -260,13 +260,7 @@ static int __init nuc900_rtc_probe(struct platform_device *pdev) return 0; } -static int __exit nuc900_rtc_remove(struct platform_device *pdev) -{ - return 0; -} - static struct platform_driver nuc900_rtc_driver = { - .remove = __exit_p(nuc900_rtc_remove), .driver = { .name = "nuc900-rtc", .owner = THIS_MODULE, -- cgit v0.10.2 From e7d5a628f2e6c8153922b20099cba0b7c35ac2ae Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:06:49 -0700 Subject: drivers/rtc/rtc-msm6242.c: remove empty function After the switch to devm_ functions and the removal of rtc_device_unregister(), the 'remove' function does not do anything. Delete it. Signed-off-by: Sachin Kamat Cc: Geert Uytterhoeven Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-msm6242.c b/drivers/rtc/rtc-msm6242.c index c014fa2..426cb51 100644 --- a/drivers/rtc/rtc-msm6242.c +++ b/drivers/rtc/rtc-msm6242.c @@ -222,17 +222,11 @@ static int __init msm6242_rtc_probe(struct platform_device *pdev) return 0; } -static int __exit msm6242_rtc_remove(struct platform_device *pdev) -{ - return 0; -} - static struct platform_driver msm6242_rtc_driver = { .driver = { .name = "rtc-msm6242", .owner = THIS_MODULE, }, - .remove = __exit_p(msm6242_rtc_remove), }; module_platform_driver_probe(msm6242_rtc_driver, msm6242_rtc_probe); -- cgit v0.10.2 From 2fbbdb11f098d2c03677d1eb730bd55de18faa7a Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:06:50 -0700 Subject: drivers/rtc/rtc-max8998.c: remove empty function After the switch to devm_ functions and the removal of rtc_device_unregister(), the 'remove' function does not do anything. Delete it. Signed-off-by: Sachin Kamat Cc: Minkyu Kang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-max8998.c b/drivers/rtc/rtc-max8998.c index 738eece..5388336 100644 --- a/drivers/rtc/rtc-max8998.c +++ b/drivers/rtc/rtc-max8998.c @@ -294,11 +294,6 @@ static int max8998_rtc_probe(struct platform_device *pdev) return 0; } -static int max8998_rtc_remove(struct platform_device *pdev) -{ - return 0; -} - static const struct platform_device_id max8998_rtc_id[] = { { "max8998-rtc", TYPE_MAX8998 }, { "lp3974-rtc", TYPE_LP3974 }, @@ -311,7 +306,6 @@ static struct platform_driver max8998_rtc_driver = { .owner = THIS_MODULE, }, .probe = max8998_rtc_probe, - .remove = max8998_rtc_remove, .id_table = max8998_rtc_id, }; -- cgit v0.10.2 From ce06cc82d7cb5445d0c01c980433ada4ab0f2847 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:06:51 -0700 Subject: drivers/rtc/rtc-max8925.c: remove empty function After the switch to devm_ functions and the removal of rtc_device_unregister(), the 'remove' function does not do anything. Delete it. Signed-off-by: Sachin Kamat Cc: Haojian Zhuang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-max8925.c b/drivers/rtc/rtc-max8925.c index 981b654..951d1a7 100644 --- a/drivers/rtc/rtc-max8925.c +++ b/drivers/rtc/rtc-max8925.c @@ -288,11 +288,6 @@ static int max8925_rtc_probe(struct platform_device *pdev) return 0; } -static int max8925_rtc_remove(struct platform_device *pdev) -{ - return 0; -} - #ifdef CONFIG_PM_SLEEP static int max8925_rtc_suspend(struct device *dev) { @@ -323,7 +318,6 @@ static struct platform_driver max8925_rtc_driver = { .pm = &max8925_rtc_pm_ops, }, .probe = max8925_rtc_probe, - .remove = max8925_rtc_remove, }; module_platform_driver(max8925_rtc_driver); -- cgit v0.10.2 From 7f9833cdf642ade8a99d7979a7128b9288234d95 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:06:51 -0700 Subject: drivers/rtc/rtc-ls1x.c: remove empty function After the switch to devm_ functions and the removal of rtc_device_unregister(), the 'remove' function does not do anything. Delete it. Signed-off-by: Sachin Kamat Cc: zhao zhang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-ls1x.c b/drivers/rtc/rtc-ls1x.c index e913f38..682ecb0 100644 --- a/drivers/rtc/rtc-ls1x.c +++ b/drivers/rtc/rtc-ls1x.c @@ -185,17 +185,11 @@ err: return ret; } -static int ls1x_rtc_remove(struct platform_device *pdev) -{ - return 0; -} - static struct platform_driver ls1x_rtc_driver = { .driver = { .name = "ls1x-rtc", .owner = THIS_MODULE, }, - .remove = ls1x_rtc_remove, .probe = ls1x_rtc_probe, }; -- cgit v0.10.2 From ffeb40515971c9860ff671bb074689db15e18831 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:06:53 -0700 Subject: drivers/rtc/rtc-lp8788.c: remove empty function After the switch to devm_ functions and the removal of rtc_device_unregister(), the 'remove' function does not do anything. Delete it. Signed-off-by: Sachin Kamat Acked-by: Milo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-lp8788.c b/drivers/rtc/rtc-lp8788.c index 76a82dc..4ff6c73 100644 --- a/drivers/rtc/rtc-lp8788.c +++ b/drivers/rtc/rtc-lp8788.c @@ -312,14 +312,8 @@ static int lp8788_rtc_probe(struct platform_device *pdev) return 0; } -static int lp8788_rtc_remove(struct platform_device *pdev) -{ - return 0; -} - static struct platform_driver lp8788_rtc_driver = { .probe = lp8788_rtc_probe, - .remove = lp8788_rtc_remove, .driver = { .name = LP8788_DEV_RTC, .owner = THIS_MODULE, -- cgit v0.10.2 From 013f91e7bd98d6adb3b84763e2ab8ff86e64445f Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:06:54 -0700 Subject: drivers/rtc/rtc-ds1302.c: remove empty function After the switch to devm_ functions and the removal of rtc_device_unregister(), the 'remove' function does not do anything. Delete it. Signed-off-by: Sachin Kamat Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-ds1302.c b/drivers/rtc/rtc-ds1302.c index 1f8ee63..7533b72 100644 --- a/drivers/rtc/rtc-ds1302.c +++ b/drivers/rtc/rtc-ds1302.c @@ -234,17 +234,11 @@ static int __init ds1302_rtc_probe(struct platform_device *pdev) return 0; } -static int __exit ds1302_rtc_remove(struct platform_device *pdev) -{ - return 0; -} - static struct platform_driver ds1302_platform_driver = { .driver = { .name = DRV_NAME, .owner = THIS_MODULE, }, - .remove = __exit_p(ds1302_rtc_remove), }; module_platform_driver_probe(ds1302_platform_driver, ds1302_rtc_probe); -- cgit v0.10.2 From 277048aa6a32ecd8a21241737287ff377a5501e0 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:06:55 -0700 Subject: drivers/rtc/rtc-dm355evm.c: remove empty function After the switch to devm_ functions and the removal of rtc_device_unregister(), the 'remove' function does not do anything. Delete it. Signed-off-by: Sachin Kamat Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-dm355evm.c b/drivers/rtc/rtc-dm355evm.c index 1855581..1aca083 100644 --- a/drivers/rtc/rtc-dm355evm.c +++ b/drivers/rtc/rtc-dm355evm.c @@ -139,18 +139,12 @@ static int dm355evm_rtc_probe(struct platform_device *pdev) return 0; } -static int dm355evm_rtc_remove(struct platform_device *pdev) -{ - return 0; -} - /* * I2C is used to talk to the MSP430, but this platform device is * exposed by an MFD driver that manages I2C communications. */ static struct platform_driver rtc_dm355evm_driver = { .probe = dm355evm_rtc_probe, - .remove = dm355evm_rtc_remove, .driver = { .owner = THIS_MODULE, .name = "rtc-dm355evm", -- cgit v0.10.2 From 67d326fa716f1521d435f18a4adae83a767cde69 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:06:56 -0700 Subject: drivers/rtc/rtc-da9055.c: remove empty function After the switch to devm_ functions and the removal of rtc_device_unregister(), the 'remove' function does not do anything. Delete it. Signed-off-by: Sachin Kamat Cc: David Dajun Chen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-da9055.c b/drivers/rtc/rtc-da9055.c index df4cf16..e00642b 100644 --- a/drivers/rtc/rtc-da9055.c +++ b/drivers/rtc/rtc-da9055.c @@ -315,11 +315,6 @@ err_rtc: } -static int da9055_rtc_remove(struct platform_device *pdev) -{ - return 0; -} - #ifdef CONFIG_PM /* Turn off the alarm if it should not be a wake source. */ static int da9055_rtc_suspend(struct device *dev) @@ -392,7 +387,6 @@ static const struct dev_pm_ops da9055_rtc_pm_ops = { static struct platform_driver da9055_rtc_driver = { .probe = da9055_rtc_probe, - .remove = da9055_rtc_remove, .driver = { .name = "da9055-rtc", .owner = THIS_MODULE, -- cgit v0.10.2 From 8c0a4273306d2bf95d2448ff457847d7c59bec52 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:06:56 -0700 Subject: drivers/rtc/rtc-da9052.c: remove empty function After the switch to devm_ functions and the removal of rtc_device_unregister(), the 'remove' function does not do anything. Delete it. Signed-off-by: Sachin Kamat Cc: David Dajun Chen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-da9052.c b/drivers/rtc/rtc-da9052.c index a61ab7f..2f7fdd7 100644 --- a/drivers/rtc/rtc-da9052.c +++ b/drivers/rtc/rtc-da9052.c @@ -255,14 +255,8 @@ static int da9052_rtc_probe(struct platform_device *pdev) return 0; } -static int da9052_rtc_remove(struct platform_device *pdev) -{ - return 0; -} - static struct platform_driver da9052_rtc_driver = { .probe = da9052_rtc_probe, - .remove = da9052_rtc_remove, .driver = { .name = "da9052-rtc", .owner = THIS_MODULE, -- cgit v0.10.2 From d6c817586a9a5e1454c4e2f16f6f9968508168b6 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:06:57 -0700 Subject: drivers/rtc/rtc-bq4802.c: remove empty function After the switch to devm_ functions and the removal of rtc_device_unregister(), the 'remove' function does not do anything. Delete it. Signed-off-by: Sachin Kamat Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-bq4802.c b/drivers/rtc/rtc-bq4802.c index b919168..fc0ff87 100644 --- a/drivers/rtc/rtc-bq4802.c +++ b/drivers/rtc/rtc-bq4802.c @@ -186,11 +186,6 @@ out: } -static int bq4802_remove(struct platform_device *pdev) -{ - return 0; -} - /* work with hotplug and coldplug */ MODULE_ALIAS("platform:rtc-bq4802"); @@ -200,7 +195,6 @@ static struct platform_driver bq4802_driver = { .owner = THIS_MODULE, }, .probe = bq4802_probe, - .remove = bq4802_remove, }; module_platform_driver(bq4802_driver); -- cgit v0.10.2 From b3ecafd97be890fcfef5b2165a98c8eea01133c0 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:06:58 -0700 Subject: drivers/rtc/rtc-au1xxx.c: remove empty function After the switch to devm_ functions and the removal of rtc_device_unregister(), the 'remove' function does not do anything. Delete it. Signed-off-by: Sachin Kamat Cc: Manuel Lauss Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-au1xxx.c b/drivers/rtc/rtc-au1xxx.c index 7c6e7bb..ed526a1 100644 --- a/drivers/rtc/rtc-au1xxx.c +++ b/drivers/rtc/rtc-au1xxx.c @@ -116,17 +116,11 @@ out_err: return ret; } -static int au1xtoy_rtc_remove(struct platform_device *pdev) -{ - return 0; -} - static struct platform_driver au1xrtc_driver = { .driver = { .name = "rtc-au1xxx", .owner = THIS_MODULE, }, - .remove = au1xtoy_rtc_remove, }; module_platform_driver_probe(au1xrtc_driver, au1xtoy_rtc_probe); -- cgit v0.10.2 From a2c0b85945e75901906d0aa82ef9a0bea96f85ce Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:06:59 -0700 Subject: drivers/rtc/rtc-ab3100.c: remove empty function After the switch to devm_ functions and the removal of rtc_device_unregister(), the 'remove' function does not do anything. Delete it. Signed-off-by: Sachin Kamat Acked-by: Linus Walleij Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-ab3100.c b/drivers/rtc/rtc-ab3100.c index 572208d..ff43534 100644 --- a/drivers/rtc/rtc-ab3100.c +++ b/drivers/rtc/rtc-ab3100.c @@ -240,17 +240,11 @@ static int __init ab3100_rtc_probe(struct platform_device *pdev) return 0; } -static int __exit ab3100_rtc_remove(struct platform_device *pdev) -{ - return 0; -} - static struct platform_driver ab3100_rtc_driver = { .driver = { .name = "ab3100-rtc", .owner = THIS_MODULE, }, - .remove = __exit_p(ab3100_rtc_remove), }; module_platform_driver_probe(ab3100_rtc_driver, ab3100_rtc_probe); -- cgit v0.10.2 From 7e3c741abdae964b8f8aa9f46cf51143e2d0f686 Mon Sep 17 00:00:00 2001 From: Alexander Holler Date: Wed, 3 Jul 2013 15:07:00 -0700 Subject: rtc: rtc-hid-sensor-time: allow full years (16bit) in HID reports The draft for HID-sensors (HUTRR39) currently doesn't define the range for the attribute year. Asking one of the authors revealed that full years (e.g. 2013 instead of just 13) were meant. So we now allow both, 8 bit and 16 bit values for the attribute year and assuming full years when the value is 16 bits wide. We will still support 8 bit values until the specification gets final (and maybe defines a way to set the time too). Signed-off-by: Alexander Holler Cc: Alessandro Zummo Cc: Lars-Peter Clausen Cc: Jonathan Cameron Cc: Jiri Kosina Cc: John Stultz Cc: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-hid-sensor-time.c b/drivers/rtc/rtc-hid-sensor-time.c index 6302450..b8c1b10 100644 --- a/drivers/rtc/rtc-hid-sensor-time.c +++ b/drivers/rtc/rtc-hid-sensor-time.c @@ -85,10 +85,13 @@ static int hid_time_capture_sample(struct hid_sensor_hub_device *hsdev, switch (usage_id) { case HID_USAGE_SENSOR_TIME_YEAR: - time_buf->tm_year = *(u8 *)raw_data; - if (time_buf->tm_year < 70) - /* assume we are in 1970...2069 */ - time_buf->tm_year += 100; + if (raw_len == 1) { + time_buf->tm_year = *(u8 *)raw_data; + if (time_buf->tm_year < 70) + /* assume we are in 1970...2069 */ + time_buf->tm_year += 100; + } else + time_buf->tm_year = *(u16 *)raw_data-1900; break; case HID_USAGE_SENSOR_TIME_MONTH: /* sensor sending the month as 1-12, we need 0-11 */ @@ -151,11 +154,27 @@ static int hid_time_parse_report(struct platform_device *pdev, return -EINVAL; } if (time_state->info[i].size != 1) { - dev_err(&pdev->dev, - "attribute '%s' not 8 bits wide!\n", + /* + * The draft for HID-sensors (HUTRR39) currently + * doesn't define the range for the year attribute. + * Therefor we support both 8 bit (0-99) and 16 bit + * (full) as size for the year. + */ + if (time_state->info[i].attrib_id != + HID_USAGE_SENSOR_TIME_YEAR) { + dev_err(&pdev->dev, + "attribute '%s' not 8 bits wide!\n", hid_time_attrib_name( time_state->info[i].attrib_id)); - return -EINVAL; + return -EINVAL; + } + if (time_state->info[i].size != 2) { + dev_err(&pdev->dev, + "attribute '%s' not 8 or 16 bits wide!\n", + hid_time_attrib_name( + time_state->info[i].attrib_id)); + return -EINVAL; + } } if (time_state->info[i].units != HID_USAGE_SENSOR_UNITS_NOT_SPECIFIED && -- cgit v0.10.2 From c67d96e317510e63e8a24d3a5742dd47b21c3340 Mon Sep 17 00:00:00 2001 From: Alexander Holler Date: Wed, 3 Jul 2013 15:07:02 -0700 Subject: rtc: rtc-hid-sensor-time: allow 16 and 32 bit values for all attributes. There is no real reason to not support 16 or 32 bit values too. Signed-off-by: Alexander Holler Cc: Alessandro Zummo Cc: Lars-Peter Clausen Cc: Jonathan Cameron Cc: Jiri Kosina Cc: John Stultz Cc: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-hid-sensor-time.c b/drivers/rtc/rtc-hid-sensor-time.c index b8c1b10..7273b01 100644 --- a/drivers/rtc/rtc-hid-sensor-time.c +++ b/drivers/rtc/rtc-hid-sensor-time.c @@ -76,6 +76,20 @@ static int hid_time_proc_event(struct hid_sensor_hub_device *hsdev, return 0; } +static u32 hid_time_value(size_t raw_len, char *raw_data) +{ + switch (raw_len) { + case 1: + return *(u8 *)raw_data; + case 2: + return *(u16 *)raw_data; + case 4: + return *(u32 *)raw_data; + default: + return (u32)(~0U); /* 0xff... or -1 to denote an error */ + } +} + static int hid_time_capture_sample(struct hid_sensor_hub_device *hsdev, unsigned usage_id, size_t raw_len, char *raw_data, void *priv) @@ -85,29 +99,35 @@ static int hid_time_capture_sample(struct hid_sensor_hub_device *hsdev, switch (usage_id) { case HID_USAGE_SENSOR_TIME_YEAR: + /* + * The draft for HID-sensors (HUTRR39) currently doesn't define + * the range for the year attribute. Therefor we support + * 8 bit (0-99) and 16 or 32 bits (full) as size for the year. + */ if (raw_len == 1) { time_buf->tm_year = *(u8 *)raw_data; if (time_buf->tm_year < 70) /* assume we are in 1970...2069 */ time_buf->tm_year += 100; } else - time_buf->tm_year = *(u16 *)raw_data-1900; + time_buf->tm_year = + (int)hid_time_value(raw_len, raw_data)-1900; break; case HID_USAGE_SENSOR_TIME_MONTH: - /* sensor sending the month as 1-12, we need 0-11 */ - time_buf->tm_mon = *(u8 *)raw_data-1; + /* sensors are sending the month as 1-12, we need 0-11 */ + time_buf->tm_mon = (int)hid_time_value(raw_len, raw_data)-1; break; case HID_USAGE_SENSOR_TIME_DAY: - time_buf->tm_mday = *(u8 *)raw_data; + time_buf->tm_mday = (int)hid_time_value(raw_len, raw_data); break; case HID_USAGE_SENSOR_TIME_HOUR: - time_buf->tm_hour = *(u8 *)raw_data; + time_buf->tm_hour = (int)hid_time_value(raw_len, raw_data); break; case HID_USAGE_SENSOR_TIME_MINUTE: - time_buf->tm_min = *(u8 *)raw_data; + time_buf->tm_min = (int)hid_time_value(raw_len, raw_data); break; case HID_USAGE_SENSOR_TIME_SECOND: - time_buf->tm_sec = *(u8 *)raw_data; + time_buf->tm_sec = (int)hid_time_value(raw_len, raw_data); break; default: return -EINVAL; @@ -153,28 +173,13 @@ static int hid_time_parse_report(struct platform_device *pdev, "not all needed attributes inside the same report!\n"); return -EINVAL; } - if (time_state->info[i].size != 1) { - /* - * The draft for HID-sensors (HUTRR39) currently - * doesn't define the range for the year attribute. - * Therefor we support both 8 bit (0-99) and 16 bit - * (full) as size for the year. - */ - if (time_state->info[i].attrib_id != - HID_USAGE_SENSOR_TIME_YEAR) { - dev_err(&pdev->dev, - "attribute '%s' not 8 bits wide!\n", - hid_time_attrib_name( - time_state->info[i].attrib_id)); - return -EINVAL; - } - if (time_state->info[i].size != 2) { - dev_err(&pdev->dev, - "attribute '%s' not 8 or 16 bits wide!\n", + if (time_state->info[i].size == 3 || + time_state->info[i].size > 4) { + dev_err(&pdev->dev, + "attribute '%s' not 8, 16 or 32 bits wide!\n", hid_time_attrib_name( time_state->info[i].attrib_id)); - return -EINVAL; - } + return -EINVAL; } if (time_state->info[i].units != HID_USAGE_SENSOR_UNITS_NOT_SPECIFIED && -- cgit v0.10.2 From 1df0a4711f6e93c1073dd82f6e7905748842e2b3 Mon Sep 17 00:00:00 2001 From: Bernie Thompson Date: Wed, 3 Jul 2013 15:07:03 -0700 Subject: rtc: add ability to push out an existing wakealarm using sysfs This adds the ability for the rtc sysfs code to handle += characters at the beginning of a wakealarm setting string. This will allow the user to attempt to push out an existing wakealarm by a provided amount. In the case that the += characters are provided but the alarm is not active -EINVAL is returned. his is useful, at least for my purposes in suspend/resume testing. The basic test goes something like: 1. Set a wake alarm from userspace 5 seconds in the future 2. Start the suspend process (echo mem > /sys/power/state) 3. After ~2.5 seconds if userspace is still running (using another thread to check this), move the wake alarm 5 more seconds If the "move" involves an unset of the wakealarm then there's a period of time where the system is midway through suspending but has no wake alarm. It will get stuck. We'd rather not remove the "move" since the idea is to avoid a cancelled suspend when the alarm fires _during_ suspend. It is difficult for the test to tell the difference between a suspend that was cancelled because the alarm fired too early and a suspend that was Signed-off-by: Bernie Thompson Cc: Alessandro Zummo Cc: Doug Anderson Cc: "Rafael J. Wysocki" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/Documentation/rtc.txt b/Documentation/rtc.txt index 32aa400..596b60c 100644 --- a/Documentation/rtc.txt +++ b/Documentation/rtc.txt @@ -153,9 +153,10 @@ since_epoch: The number of seconds since the epoch according to the RTC time: RTC-provided time wakealarm: The time at which the clock will generate a system wakeup event. This is a one shot wakeup event, so must be reset - after wake if a daily wakeup is required. Format is either - seconds since the epoch or, if there's a leading +, seconds - in the future. + after wake if a daily wakeup is required. Format is seconds since + the epoch by default, or if there's a leading +, seconds in the + future, or if there is a leading +=, seconds ahead of the current + alarm. IOCTL INTERFACE --------------- diff --git a/drivers/rtc/rtc-sysfs.c b/drivers/rtc/rtc-sysfs.c index b70e2bb..4b26f86 100644 --- a/drivers/rtc/rtc-sysfs.c +++ b/drivers/rtc/rtc-sysfs.c @@ -164,6 +164,7 @@ rtc_sysfs_set_wakealarm(struct device *dev, struct device_attribute *attr, { ssize_t retval; unsigned long now, alarm; + unsigned long push = 0; struct rtc_wkalrm alm; struct rtc_device *rtc = to_rtc_device(dev); char *buf_ptr; @@ -180,13 +181,17 @@ rtc_sysfs_set_wakealarm(struct device *dev, struct device_attribute *attr, buf_ptr = (char *)buf; if (*buf_ptr == '+') { buf_ptr++; - adjust = 1; + if (*buf_ptr == '=') { + buf_ptr++; + push = 1; + } else + adjust = 1; } alarm = simple_strtoul(buf_ptr, NULL, 0); if (adjust) { alarm += now; } - if (alarm > now) { + if (alarm > now || push) { /* Avoid accidentally clobbering active alarms; we can't * entirely prevent that here, without even the minimal * locking from the /dev/rtcN api. @@ -194,9 +199,14 @@ rtc_sysfs_set_wakealarm(struct device *dev, struct device_attribute *attr, retval = rtc_read_alarm(rtc, &alm); if (retval < 0) return retval; - if (alm.enabled) - return -EBUSY; - + if (alm.enabled) { + if (push) { + rtc_tm_to_time(&alm.time, &push); + alarm += push; + } else + return -EBUSY; + } else if (push) + return -EINVAL; alm.enabled = 1; } else { alm.enabled = 0; -- cgit v0.10.2 From 812f147853a14812b260620b19e0764cdeb71b4c Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 3 Jul 2013 15:07:04 -0700 Subject: drivers/rtc/rtc-vr41xx.c: fix error return code in rtc_probe() Fix to return -EBUSY in the platform irq get error handling case instead of 0, as done elsewhere in this function. Signed-off-by: Wei Yongjun Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-vr41xx.c b/drivers/rtc/rtc-vr41xx.c index 3b5b4fa..54e104e 100644 --- a/drivers/rtc/rtc-vr41xx.c +++ b/drivers/rtc/rtc-vr41xx.c @@ -339,8 +339,10 @@ static int rtc_probe(struct platform_device *pdev) goto err_device_unregister; pie_irq = platform_get_irq(pdev, 1); - if (pie_irq <= 0) + if (pie_irq <= 0) { + retval = -EBUSY; goto err_free_irq; + } retval = request_irq(pie_irq, rtclong1_interrupt, 0, "rtclong1", pdev); -- cgit v0.10.2 From edca66d2ceae6ba426b9250ffcad5bca820b146f Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 3 Jul 2013 15:07:05 -0700 Subject: rtc: rtc-ds1307: use devm_*() functions Use devm_*() functions to make cleanup paths simpler. Signed-off-by: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c index b53992a..ca18fd1 100644 --- a/drivers/rtc/rtc-ds1307.c +++ b/drivers/rtc/rtc-ds1307.c @@ -683,7 +683,7 @@ static int ds1307_probe(struct i2c_client *client, && !i2c_check_functionality(adapter, I2C_FUNC_SMBUS_I2C_BLOCK)) return -EIO; - ds1307 = kzalloc(sizeof(struct ds1307), GFP_KERNEL); + ds1307 = devm_kzalloc(&client->dev, sizeof(struct ds1307), GFP_KERNEL); if (!ds1307) return -ENOMEM; @@ -715,7 +715,7 @@ static int ds1307_probe(struct i2c_client *client, if (tmp != 2) { dev_dbg(&client->dev, "read error %d\n", tmp); err = -EIO; - goto exit_free; + goto exit; } /* oscillator off? turn it on, so clock can tick. */ @@ -754,7 +754,7 @@ static int ds1307_probe(struct i2c_client *client, if (tmp != 2) { dev_dbg(&client->dev, "read error %d\n", tmp); err = -EIO; - goto exit_free; + goto exit; } /* oscillator off? turn it on, so clock can tick. */ @@ -798,7 +798,7 @@ static int ds1307_probe(struct i2c_client *client, if (tmp != 2) { dev_dbg(&client->dev, "read error %d\n", tmp); err = -EIO; - goto exit_free; + goto exit; } /* correct hour */ @@ -826,7 +826,7 @@ read_rtc: if (tmp != 8) { dev_dbg(&client->dev, "read error %d\n", tmp); err = -EIO; - goto exit_free; + goto exit; } /* @@ -868,7 +868,7 @@ read_rtc: if (tmp < 0) { dev_dbg(&client->dev, "read error %d\n", tmp); err = -EIO; - goto exit_free; + goto exit; } /* oscillator fault? clear flag, and warn */ @@ -927,13 +927,13 @@ read_rtc: bin2bcd(tmp)); } - ds1307->rtc = rtc_device_register(client->name, &client->dev, + ds1307->rtc = devm_rtc_device_register(&client->dev, client->name, &ds13xx_rtc_ops, THIS_MODULE); if (IS_ERR(ds1307->rtc)) { err = PTR_ERR(ds1307->rtc); dev_err(&client->dev, "unable to register the class device\n"); - goto exit_free; + goto exit; } if (want_irq) { @@ -942,7 +942,7 @@ read_rtc: if (err) { dev_err(&client->dev, "unable to request IRQ!\n"); - goto exit_irq; + goto exit; } device_set_wakeup_capable(&client->dev, 1); @@ -951,11 +951,12 @@ read_rtc: } if (chip->nvram_size) { - ds1307->nvram = kzalloc(sizeof(struct bin_attribute), - GFP_KERNEL); + ds1307->nvram = devm_kzalloc(&client->dev, + sizeof(struct bin_attribute), + GFP_KERNEL); if (!ds1307->nvram) { err = -ENOMEM; - goto exit_nvram; + goto exit; } ds1307->nvram->attr.name = "nvram"; ds1307->nvram->attr.mode = S_IRUGO | S_IWUSR; @@ -965,21 +966,15 @@ read_rtc: ds1307->nvram->size = chip->nvram_size; ds1307->nvram_offset = chip->nvram_offset; err = sysfs_create_bin_file(&client->dev.kobj, ds1307->nvram); - if (err) { - kfree(ds1307->nvram); - goto exit_nvram; - } + if (err) + goto exit; set_bit(HAS_NVRAM, &ds1307->flags); dev_info(&client->dev, "%zu bytes nvram\n", ds1307->nvram->size); } return 0; -exit_nvram: -exit_irq: - rtc_device_unregister(ds1307->rtc); -exit_free: - kfree(ds1307); +exit: return err; } @@ -992,13 +987,9 @@ static int ds1307_remove(struct i2c_client *client) cancel_work_sync(&ds1307->work); } - if (test_and_clear_bit(HAS_NVRAM, &ds1307->flags)) { + if (test_and_clear_bit(HAS_NVRAM, &ds1307->flags)) sysfs_remove_bin_file(&client->dev.kobj, ds1307->nvram); - kfree(ds1307->nvram); - } - rtc_device_unregister(ds1307->rtc); - kfree(ds1307); return 0; } -- cgit v0.10.2 From c08ac4894c2079531f0310b1e9b7aba7a5b8b9be Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 3 Jul 2013 15:07:06 -0700 Subject: rtc: rtc-jz4740: use devm_*() functions Use devm_*() functions to make cleanup paths simpler. Signed-off-by: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-jz4740.c b/drivers/rtc/rtc-jz4740.c index c6573585..1b126d2 100644 --- a/drivers/rtc/rtc-jz4740.c +++ b/drivers/rtc/rtc-jz4740.c @@ -14,6 +14,7 @@ * */ +#include #include #include #include @@ -216,37 +217,34 @@ static int jz4740_rtc_probe(struct platform_device *pdev) struct jz4740_rtc *rtc; uint32_t scratchpad; - rtc = kzalloc(sizeof(*rtc), GFP_KERNEL); + rtc = devm_kzalloc(&pdev->dev, sizeof(*rtc), GFP_KERNEL); if (!rtc) return -ENOMEM; rtc->irq = platform_get_irq(pdev, 0); if (rtc->irq < 0) { - ret = -ENOENT; dev_err(&pdev->dev, "Failed to get platform irq\n"); - goto err_free; + return -ENOENT; } rtc->mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!rtc->mem) { - ret = -ENOENT; dev_err(&pdev->dev, "Failed to get platform mmio memory\n"); - goto err_free; + return -ENOENT; } - rtc->mem = request_mem_region(rtc->mem->start, resource_size(rtc->mem), - pdev->name); + rtc->mem = devm_request_mem_region(&pdev->dev, rtc->mem->start, + resource_size(rtc->mem), pdev->name); if (!rtc->mem) { - ret = -EBUSY; dev_err(&pdev->dev, "Failed to request mmio memory region\n"); - goto err_free; + return -EBUSY; } - rtc->base = ioremap_nocache(rtc->mem->start, resource_size(rtc->mem)); + rtc->base = devm_ioremap_nocache(&pdev->dev, rtc->mem->start, + resource_size(rtc->mem)); if (!rtc->base) { - ret = -EBUSY; dev_err(&pdev->dev, "Failed to ioremap mmio memory\n"); - goto err_release_mem_region; + return -EBUSY; } spin_lock_init(&rtc->lock); @@ -255,19 +253,19 @@ static int jz4740_rtc_probe(struct platform_device *pdev) device_init_wakeup(&pdev->dev, 1); - rtc->rtc = rtc_device_register(pdev->name, &pdev->dev, &jz4740_rtc_ops, - THIS_MODULE); + rtc->rtc = devm_rtc_device_register(&pdev->dev, pdev->name, + &jz4740_rtc_ops, THIS_MODULE); if (IS_ERR(rtc->rtc)) { ret = PTR_ERR(rtc->rtc); dev_err(&pdev->dev, "Failed to register rtc device: %d\n", ret); - goto err_iounmap; + return ret; } - ret = request_irq(rtc->irq, jz4740_rtc_irq, 0, + ret = devm_request_irq(&pdev->dev, rtc->irq, jz4740_rtc_irq, 0, pdev->name, rtc); if (ret) { dev_err(&pdev->dev, "Failed to request rtc irq: %d\n", ret); - goto err_unregister_rtc; + return ret; } scratchpad = jz4740_rtc_reg_read(rtc, JZ_REG_RTC_SCRATCHPAD); @@ -276,43 +274,13 @@ static int jz4740_rtc_probe(struct platform_device *pdev) ret = jz4740_rtc_reg_write(rtc, JZ_REG_RTC_SEC, 0); if (ret) { dev_err(&pdev->dev, "Could not write write to RTC registers\n"); - goto err_free_irq; + return ret; } } return 0; - -err_free_irq: - free_irq(rtc->irq, rtc); -err_unregister_rtc: - rtc_device_unregister(rtc->rtc); -err_iounmap: - iounmap(rtc->base); -err_release_mem_region: - release_mem_region(rtc->mem->start, resource_size(rtc->mem)); -err_free: - kfree(rtc); - - return ret; } -static int jz4740_rtc_remove(struct platform_device *pdev) -{ - struct jz4740_rtc *rtc = platform_get_drvdata(pdev); - - free_irq(rtc->irq, rtc); - - rtc_device_unregister(rtc->rtc); - - iounmap(rtc->base); - release_mem_region(rtc->mem->start, resource_size(rtc->mem)); - - kfree(rtc); - - return 0; -} - - #ifdef CONFIG_PM static int jz4740_rtc_suspend(struct device *dev) { @@ -344,7 +312,6 @@ static const struct dev_pm_ops jz4740_pm_ops = { static struct platform_driver jz4740_rtc_driver = { .probe = jz4740_rtc_probe, - .remove = jz4740_rtc_remove, .driver = { .name = "jz4740-rtc", .owner = THIS_MODULE, -- cgit v0.10.2 From 073bf424602992427e3dbf83a1dca47ed119326a Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 3 Jul 2013 15:07:07 -0700 Subject: rtc: rtc-mpc5121: use devm_*() functions Use devm_*() functions to make cleanup paths simpler. Signed-off-by: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-mpc5121.c b/drivers/rtc/rtc-mpc5121.c index 213006b..4c02497 100644 --- a/drivers/rtc/rtc-mpc5121.c +++ b/drivers/rtc/rtc-mpc5121.c @@ -312,15 +312,14 @@ static int mpc5121_rtc_probe(struct platform_device *op) struct mpc5121_rtc_data *rtc; int err = 0; - rtc = kzalloc(sizeof(*rtc), GFP_KERNEL); + rtc = devm_kzalloc(&op->dev, sizeof(*rtc), GFP_KERNEL); if (!rtc) return -ENOMEM; rtc->regs = of_iomap(op->dev.of_node, 0); if (!rtc->regs) { dev_err(&op->dev, "%s: couldn't map io space\n", __func__); - err = -ENOSYS; - goto out_free; + return -ENOSYS; } device_init_wakeup(&op->dev, 1); @@ -354,10 +353,10 @@ static int mpc5121_rtc_probe(struct platform_device *op) out_be32(&rtc->regs->keep_alive, ka); } - rtc->rtc = rtc_device_register("mpc5121-rtc", &op->dev, + rtc->rtc = devm_rtc_device_register(&op->dev, "mpc5121-rtc", &mpc5121_rtc_ops, THIS_MODULE); } else { - rtc->rtc = rtc_device_register("mpc5200-rtc", &op->dev, + rtc->rtc = devm_rtc_device_register(&op->dev, "mpc5200-rtc", &mpc5200_rtc_ops, THIS_MODULE); } @@ -377,8 +376,6 @@ out_dispose2: out_dispose: irq_dispose_mapping(rtc->irq); iounmap(rtc->regs); -out_free: - kfree(rtc); return err; } @@ -392,14 +389,11 @@ static int mpc5121_rtc_remove(struct platform_device *op) out_8(®s->alm_enable, 0); out_8(®s->int_enable, in_8(®s->int_enable) & ~0x1); - rtc_device_unregister(rtc->rtc); iounmap(rtc->regs); free_irq(rtc->irq, &op->dev); free_irq(rtc->irq_periodic, &op->dev); irq_dispose_mapping(rtc->irq); irq_dispose_mapping(rtc->irq_periodic); - dev_set_drvdata(&op->dev, NULL); - kfree(rtc); return 0; } -- cgit v0.10.2 From 19b8d8875fd5f231135a55bfe53337859ec73a4a Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 3 Jul 2013 15:07:08 -0700 Subject: rtc: rtc-m48t59: use devm_*() functions Use devm_*() functions to make cleanup paths simpler. Signed-off-by: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-m48t59.c b/drivers/rtc/rtc-m48t59.c index d4d31fa..fcb0329 100644 --- a/drivers/rtc/rtc-m48t59.c +++ b/drivers/rtc/rtc-m48t59.c @@ -409,7 +409,8 @@ static int m48t59_rtc_probe(struct platform_device *pdev) } else if (res->flags & IORESOURCE_MEM) { /* we are memory-mapped */ if (!pdata) { - pdata = kzalloc(sizeof(*pdata), GFP_KERNEL); + pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), + GFP_KERNEL); if (!pdata) return -ENOMEM; /* Ensure we only kmalloc platform data once */ @@ -425,7 +426,7 @@ static int m48t59_rtc_probe(struct platform_device *pdev) pdata->read_byte = m48t59_mem_readb; } - m48t59 = kzalloc(sizeof(*m48t59), GFP_KERNEL); + m48t59 = devm_kzalloc(&pdev->dev, sizeof(*m48t59), GFP_KERNEL); if (!m48t59) return -ENOMEM; @@ -433,9 +434,10 @@ static int m48t59_rtc_probe(struct platform_device *pdev) if (!m48t59->ioaddr) { /* ioaddr not mapped externally */ - m48t59->ioaddr = ioremap(res->start, resource_size(res)); + m48t59->ioaddr = devm_ioremap(&pdev->dev, res->start, + resource_size(res)); if (!m48t59->ioaddr) - goto out; + return ret; } /* Try to get irq number. We also can work in @@ -446,10 +448,11 @@ static int m48t59_rtc_probe(struct platform_device *pdev) m48t59->irq = NO_IRQ; if (m48t59->irq != NO_IRQ) { - ret = request_irq(m48t59->irq, m48t59_rtc_interrupt, - IRQF_SHARED, "rtc-m48t59", &pdev->dev); + ret = devm_request_irq(&pdev->dev, m48t59->irq, + m48t59_rtc_interrupt, IRQF_SHARED, + "rtc-m48t59", &pdev->dev); if (ret) - goto out; + return ret; } switch (pdata->type) { case M48T59RTC_TYPE_M48T59: @@ -469,51 +472,29 @@ static int m48t59_rtc_probe(struct platform_device *pdev) break; default: dev_err(&pdev->dev, "Unknown RTC type\n"); - ret = -ENODEV; - goto out; + return -ENODEV; } spin_lock_init(&m48t59->lock); platform_set_drvdata(pdev, m48t59); - m48t59->rtc = rtc_device_register(name, &pdev->dev, ops, THIS_MODULE); - if (IS_ERR(m48t59->rtc)) { - ret = PTR_ERR(m48t59->rtc); - goto out; - } + m48t59->rtc = devm_rtc_device_register(&pdev->dev, name, ops, + THIS_MODULE); + if (IS_ERR(m48t59->rtc)) + return PTR_ERR(m48t59->rtc); m48t59_nvram_attr.size = pdata->offset; ret = sysfs_create_bin_file(&pdev->dev.kobj, &m48t59_nvram_attr); - if (ret) { - rtc_device_unregister(m48t59->rtc); - goto out; - } + if (ret) + return ret; return 0; - -out: - if (m48t59->irq != NO_IRQ) - free_irq(m48t59->irq, &pdev->dev); - if (m48t59->ioaddr) - iounmap(m48t59->ioaddr); - kfree(m48t59); - return ret; } static int m48t59_rtc_remove(struct platform_device *pdev) { - struct m48t59_private *m48t59 = platform_get_drvdata(pdev); - struct m48t59_plat_data *pdata = pdev->dev.platform_data; - sysfs_remove_bin_file(&pdev->dev.kobj, &m48t59_nvram_attr); - if (!IS_ERR(m48t59->rtc)) - rtc_device_unregister(m48t59->rtc); - if (m48t59->ioaddr && !pdata->ioaddr) - iounmap(m48t59->ioaddr); - if (m48t59->irq != NO_IRQ) - free_irq(m48t59->irq, &pdev->dev); - kfree(m48t59); return 0; } -- cgit v0.10.2 From c417299ce7d77521225f2aff471d3f7ee5f34b1e Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 3 Jul 2013 15:07:09 -0700 Subject: rtc: rtc-pm8xxx: use devm_*() functions Use devm_*() functions to make cleanup paths simpler. Signed-off-by: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-pm8xxx.c b/drivers/rtc/rtc-pm8xxx.c index 14ee860..03f8f75 100644 --- a/drivers/rtc/rtc-pm8xxx.c +++ b/drivers/rtc/rtc-pm8xxx.c @@ -395,7 +395,7 @@ static int pm8xxx_rtc_probe(struct platform_device *pdev) if (pdata != NULL) rtc_write_enable = pdata->rtc_write_enable; - rtc_dd = kzalloc(sizeof(*rtc_dd), GFP_KERNEL); + rtc_dd = devm_kzalloc(&pdev->dev, sizeof(*rtc_dd), GFP_KERNEL); if (rtc_dd == NULL) { dev_err(&pdev->dev, "Unable to allocate memory!\n"); return -ENOMEM; @@ -407,16 +407,14 @@ static int pm8xxx_rtc_probe(struct platform_device *pdev) rtc_dd->rtc_alarm_irq = platform_get_irq(pdev, 0); if (rtc_dd->rtc_alarm_irq < 0) { dev_err(&pdev->dev, "Alarm IRQ resource absent!\n"); - rc = -ENXIO; - goto fail_rtc_enable; + return -ENXIO; } rtc_resource = platform_get_resource_byname(pdev, IORESOURCE_IO, "pmic_rtc_base"); if (!(rtc_resource && rtc_resource->start)) { dev_err(&pdev->dev, "RTC IO resource absent!\n"); - rc = -ENXIO; - goto fail_rtc_enable; + return -ENXIO; } rtc_dd->rtc_base = rtc_resource->start; @@ -432,7 +430,7 @@ static int pm8xxx_rtc_probe(struct platform_device *pdev) rc = pm8xxx_read_wrapper(rtc_dd, &ctrl_reg, rtc_dd->rtc_base, 1); if (rc < 0) { dev_err(&pdev->dev, "RTC control register read failed!\n"); - goto fail_rtc_enable; + return rc; } if (!(ctrl_reg & PM8xxx_RTC_ENABLE)) { @@ -442,7 +440,7 @@ static int pm8xxx_rtc_probe(struct platform_device *pdev) if (rc < 0) { dev_err(&pdev->dev, "Write to RTC control register " "failed\n"); - goto fail_rtc_enable; + return rc; } } @@ -453,13 +451,12 @@ static int pm8xxx_rtc_probe(struct platform_device *pdev) platform_set_drvdata(pdev, rtc_dd); /* Register the RTC device */ - rtc_dd->rtc = rtc_device_register("pm8xxx_rtc", &pdev->dev, + rtc_dd->rtc = devm_rtc_device_register(&pdev->dev, "pm8xxx_rtc", &pm8xxx_rtc_ops, THIS_MODULE); if (IS_ERR(rtc_dd->rtc)) { dev_err(&pdev->dev, "%s: RTC registration failed (%ld)\n", __func__, PTR_ERR(rtc_dd->rtc)); - rc = PTR_ERR(rtc_dd->rtc); - goto fail_rtc_enable; + return PTR_ERR(rtc_dd->rtc); } /* Request the alarm IRQ */ @@ -468,7 +465,7 @@ static int pm8xxx_rtc_probe(struct platform_device *pdev) "pm8xxx_rtc_alarm", rtc_dd); if (rc < 0) { dev_err(&pdev->dev, "Request IRQ failed (%d)\n", rc); - goto fail_req_irq; + return rc; } device_init_wakeup(&pdev->dev, 1); @@ -476,12 +473,6 @@ static int pm8xxx_rtc_probe(struct platform_device *pdev) dev_dbg(&pdev->dev, "Probe success !!\n"); return 0; - -fail_req_irq: - rtc_device_unregister(rtc_dd->rtc); -fail_rtc_enable: - kfree(rtc_dd); - return rc; } static int pm8xxx_rtc_remove(struct platform_device *pdev) @@ -490,8 +481,6 @@ static int pm8xxx_rtc_remove(struct platform_device *pdev) device_init_wakeup(&pdev->dev, 0); free_irq(rtc_dd->rtc_alarm_irq, rtc_dd); - rtc_device_unregister(rtc_dd->rtc); - kfree(rtc_dd); return 0; } -- cgit v0.10.2 From 618c300305c6ee7c71a1493da97c48c24205268c Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 3 Jul 2013 15:07:09 -0700 Subject: rtc: rtc-pxa: use devm_*() functions Use devm_*() functions to make cleanup paths simpler. Signed-off-by: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-pxa.c b/drivers/rtc/rtc-pxa.c index ed037ae..a355f2b 100644 --- a/drivers/rtc/rtc-pxa.c +++ b/drivers/rtc/rtc-pxa.c @@ -324,37 +324,35 @@ static int __init pxa_rtc_probe(struct platform_device *pdev) int ret; u32 rttr; - pxa_rtc = kzalloc(sizeof(struct pxa_rtc), GFP_KERNEL); + pxa_rtc = devm_kzalloc(dev, sizeof(*pxa_rtc), GFP_KERNEL); if (!pxa_rtc) return -ENOMEM; spin_lock_init(&pxa_rtc->lock); platform_set_drvdata(pdev, pxa_rtc); - ret = -ENXIO; pxa_rtc->ress = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!pxa_rtc->ress) { dev_err(dev, "No I/O memory resource defined\n"); - goto err_ress; + return -ENXIO; } pxa_rtc->irq_1Hz = platform_get_irq(pdev, 0); if (pxa_rtc->irq_1Hz < 0) { dev_err(dev, "No 1Hz IRQ resource defined\n"); - goto err_ress; + return -ENXIO; } pxa_rtc->irq_Alrm = platform_get_irq(pdev, 1); if (pxa_rtc->irq_Alrm < 0) { dev_err(dev, "No alarm IRQ resource defined\n"); - goto err_ress; + return -ENXIO; } pxa_rtc_open(dev); - ret = -ENOMEM; - pxa_rtc->base = ioremap(pxa_rtc->ress->start, + pxa_rtc->base = devm_ioremap(dev, pxa_rtc->ress->start, resource_size(pxa_rtc->ress)); if (!pxa_rtc->base) { - dev_err(&pdev->dev, "Unable to map pxa RTC I/O memory\n"); - goto err_map; + dev_err(dev, "Unable to map pxa RTC I/O memory\n"); + return -ENOMEM; } /* @@ -370,41 +368,24 @@ static int __init pxa_rtc_probe(struct platform_device *pdev) rtsr_clear_bits(pxa_rtc, RTSR_PIALE | RTSR_RDALE1 | RTSR_HZE); - pxa_rtc->rtc = rtc_device_register("pxa-rtc", &pdev->dev, &pxa_rtc_ops, - THIS_MODULE); - ret = PTR_ERR(pxa_rtc->rtc); + pxa_rtc->rtc = devm_rtc_device_register(&pdev->dev, "pxa-rtc", + &pxa_rtc_ops, THIS_MODULE); if (IS_ERR(pxa_rtc->rtc)) { + ret = PTR_ERR(pxa_rtc->rtc); dev_err(dev, "Failed to register RTC device -> %d\n", ret); - goto err_rtc_reg; + return ret; } device_init_wakeup(dev, 1); return 0; - -err_rtc_reg: - iounmap(pxa_rtc->base); -err_ress: -err_map: - kfree(pxa_rtc); - return ret; } static int __exit pxa_rtc_remove(struct platform_device *pdev) { - struct pxa_rtc *pxa_rtc = platform_get_drvdata(pdev); - struct device *dev = &pdev->dev; - pxa_rtc_release(dev); - - rtc_device_unregister(pxa_rtc->rtc); - - spin_lock_irq(&pxa_rtc->lock); - iounmap(pxa_rtc->base); - spin_unlock_irq(&pxa_rtc->lock); - - kfree(pxa_rtc); + pxa_rtc_release(dev); return 0; } -- cgit v0.10.2 From fac42b414a42e54f327ead67fa1804078b0db7be Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 3 Jul 2013 15:07:10 -0700 Subject: rtc: rtc-rx8025: use devm_*() functions Use devm_*() functions to make cleanup paths simpler. Signed-off-by: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-rx8025.c b/drivers/rtc/rtc-rx8025.c index 0722d36..8fa23ea 100644 --- a/drivers/rtc/rtc-rx8025.c +++ b/drivers/rtc/rtc-rx8025.c @@ -549,7 +549,7 @@ static int rx8025_probe(struct i2c_client *client, goto errout; } - rx8025 = kzalloc(sizeof(*rx8025), GFP_KERNEL); + rx8025 = devm_kzalloc(&client->dev, sizeof(*rx8025), GFP_KERNEL); if (!rx8025) { dev_err(&adapter->dev, "failed to alloc memory\n"); err = -ENOMEM; @@ -562,7 +562,7 @@ static int rx8025_probe(struct i2c_client *client, err = rx8025_init_client(client, &need_reset); if (err) - goto errout_free; + goto errout; if (need_reset) { struct rtc_time tm; @@ -572,12 +572,12 @@ static int rx8025_probe(struct i2c_client *client, rx8025_set_time(&client->dev, &tm); } - rx8025->rtc = rtc_device_register(client->name, &client->dev, + rx8025->rtc = devm_rtc_device_register(&client->dev, client->name, &rx8025_rtc_ops, THIS_MODULE); if (IS_ERR(rx8025->rtc)) { err = PTR_ERR(rx8025->rtc); dev_err(&client->dev, "unable to register the class device\n"); - goto errout_free; + goto errout; } if (client->irq > 0) { @@ -586,7 +586,7 @@ static int rx8025_probe(struct i2c_client *client, 0, "rx8025", client); if (err) { dev_err(&client->dev, "unable to request IRQ\n"); - goto errout_reg; + goto errout; } } @@ -603,12 +603,6 @@ errout_irq: if (client->irq > 0) free_irq(client->irq, client); -errout_reg: - rtc_device_unregister(rx8025->rtc); - -errout_free: - kfree(rx8025); - errout: dev_err(&adapter->dev, "probing for rx8025 failed\n"); return err; @@ -629,8 +623,6 @@ static int rx8025_remove(struct i2c_client *client) } rx8025_sysfs_unregister(&client->dev); - rtc_device_unregister(rx8025->rtc); - kfree(rx8025); return 0; } -- cgit v0.10.2 From 0209affa6eefb4bb8288f8a0e678b1b36facabd5 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 3 Jul 2013 15:07:11 -0700 Subject: rtc: rtc-sh: use devm_*() functions Use devm_*() functions to make cleanup paths simpler. Signed-off-by: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-sh.c b/drivers/rtc/rtc-sh.c index cb2f839..6d87e26 100644 --- a/drivers/rtc/rtc-sh.c +++ b/drivers/rtc/rtc-sh.c @@ -593,7 +593,7 @@ static int __init sh_rtc_probe(struct platform_device *pdev) char clk_name[6]; int clk_id, ret; - rtc = kzalloc(sizeof(struct sh_rtc), GFP_KERNEL); + rtc = devm_kzalloc(&pdev->dev, sizeof(*rtc), GFP_KERNEL); if (unlikely(!rtc)) return -ENOMEM; @@ -602,9 +602,8 @@ static int __init sh_rtc_probe(struct platform_device *pdev) /* get periodic/carry/alarm irqs */ ret = platform_get_irq(pdev, 0); if (unlikely(ret <= 0)) { - ret = -ENOENT; dev_err(&pdev->dev, "No IRQ resource\n"); - goto err_badres; + return -ENOENT; } rtc->periodic_irq = ret; @@ -613,24 +612,21 @@ static int __init sh_rtc_probe(struct platform_device *pdev) res = platform_get_resource(pdev, IORESOURCE_IO, 0); if (unlikely(res == NULL)) { - ret = -ENOENT; dev_err(&pdev->dev, "No IO resource\n"); - goto err_badres; + return -ENOENT; } rtc->regsize = resource_size(res); - rtc->res = request_mem_region(res->start, rtc->regsize, pdev->name); - if (unlikely(!rtc->res)) { - ret = -EBUSY; - goto err_badres; - } + rtc->res = devm_request_mem_region(&pdev->dev, res->start, + rtc->regsize, pdev->name); + if (unlikely(!rtc->res)) + return -EBUSY; - rtc->regbase = ioremap_nocache(rtc->res->start, rtc->regsize); - if (unlikely(!rtc->regbase)) { - ret = -EINVAL; - goto err_badmap; - } + rtc->regbase = devm_ioremap_nocache(&pdev->dev, rtc->res->start, + rtc->regsize); + if (unlikely(!rtc->regbase)) + return -EINVAL; clk_id = pdev->id; /* With a single device, the clock id is still "rtc0" */ @@ -639,7 +635,7 @@ static int __init sh_rtc_probe(struct platform_device *pdev) snprintf(clk_name, sizeof(clk_name), "rtc%d", clk_id); - rtc->clk = clk_get(&pdev->dev, clk_name); + rtc->clk = devm_clk_get(&pdev->dev, clk_name); if (IS_ERR(rtc->clk)) { /* * No error handling for rtc->clk intentionally, not all @@ -665,8 +661,8 @@ static int __init sh_rtc_probe(struct platform_device *pdev) if (rtc->carry_irq <= 0) { /* register shared periodic/carry/alarm irq */ - ret = request_irq(rtc->periodic_irq, sh_rtc_shared, - 0, "sh-rtc", rtc); + ret = devm_request_irq(&pdev->dev, rtc->periodic_irq, + sh_rtc_shared, 0, "sh-rtc", rtc); if (unlikely(ret)) { dev_err(&pdev->dev, "request IRQ failed with %d, IRQ %d\n", ret, @@ -675,8 +671,8 @@ static int __init sh_rtc_probe(struct platform_device *pdev) } } else { /* register periodic/carry/alarm irqs */ - ret = request_irq(rtc->periodic_irq, sh_rtc_periodic, - 0, "sh-rtc period", rtc); + ret = devm_request_irq(&pdev->dev, rtc->periodic_irq, + sh_rtc_periodic, 0, "sh-rtc period", rtc); if (unlikely(ret)) { dev_err(&pdev->dev, "request period IRQ failed with %d, IRQ %d\n", @@ -684,24 +680,21 @@ static int __init sh_rtc_probe(struct platform_device *pdev) goto err_unmap; } - ret = request_irq(rtc->carry_irq, sh_rtc_interrupt, - 0, "sh-rtc carry", rtc); + ret = devm_request_irq(&pdev->dev, rtc->carry_irq, + sh_rtc_interrupt, 0, "sh-rtc carry", rtc); if (unlikely(ret)) { dev_err(&pdev->dev, "request carry IRQ failed with %d, IRQ %d\n", ret, rtc->carry_irq); - free_irq(rtc->periodic_irq, rtc); goto err_unmap; } - ret = request_irq(rtc->alarm_irq, sh_rtc_alarm, - 0, "sh-rtc alarm", rtc); + ret = devm_request_irq(&pdev->dev, rtc->alarm_irq, + sh_rtc_alarm, 0, "sh-rtc alarm", rtc); if (unlikely(ret)) { dev_err(&pdev->dev, "request alarm IRQ failed with %d, IRQ %d\n", ret, rtc->alarm_irq); - free_irq(rtc->carry_irq, rtc); - free_irq(rtc->periodic_irq, rtc); goto err_unmap; } } @@ -714,13 +707,10 @@ static int __init sh_rtc_probe(struct platform_device *pdev) sh_rtc_setaie(&pdev->dev, 0); sh_rtc_setcie(&pdev->dev, 0); - rtc->rtc_dev = rtc_device_register("sh", &pdev->dev, + rtc->rtc_dev = devm_rtc_device_register(&pdev->dev, "sh", &sh_rtc_ops, THIS_MODULE); if (IS_ERR(rtc->rtc_dev)) { ret = PTR_ERR(rtc->rtc_dev); - free_irq(rtc->periodic_irq, rtc); - free_irq(rtc->carry_irq, rtc); - free_irq(rtc->alarm_irq, rtc); goto err_unmap; } @@ -737,12 +727,6 @@ static int __init sh_rtc_probe(struct platform_device *pdev) err_unmap: clk_disable(rtc->clk); - clk_put(rtc->clk); - iounmap(rtc->regbase); -err_badmap: - release_mem_region(rtc->res->start, rtc->regsize); -err_badres: - kfree(rtc); return ret; } @@ -751,26 +735,12 @@ static int __exit sh_rtc_remove(struct platform_device *pdev) { struct sh_rtc *rtc = platform_get_drvdata(pdev); - rtc_device_unregister(rtc->rtc_dev); sh_rtc_irq_set_state(&pdev->dev, 0); sh_rtc_setaie(&pdev->dev, 0); sh_rtc_setcie(&pdev->dev, 0); - free_irq(rtc->periodic_irq, rtc); - - if (rtc->carry_irq > 0) { - free_irq(rtc->carry_irq, rtc); - free_irq(rtc->alarm_irq, rtc); - } - - iounmap(rtc->regbase); - release_mem_region(rtc->res->start, rtc->regsize); - clk_disable(rtc->clk); - clk_put(rtc->clk); - - kfree(rtc); return 0; } -- cgit v0.10.2 From 3a02893f4e70725089b838053e916cc026ed93af Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 3 Jul 2013 15:07:12 -0700 Subject: rtc: rtc-coh901331: use platform_{get,set}_drvdata() Use the wrapper functions for getting and setting the driver data using platform_device instead of using dev_{get,set}_drvdata() with &pdev->dev, so we can directly pass a struct platform_device. Signed-off-by: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-coh901331.c b/drivers/rtc/rtc-coh901331.c index c05b202..73f1575 100644 --- a/drivers/rtc/rtc-coh901331.c +++ b/drivers/rtc/rtc-coh901331.c @@ -152,7 +152,7 @@ static struct rtc_class_ops coh901331_ops = { static int __exit coh901331_remove(struct platform_device *pdev) { - struct coh901331_port *rtap = dev_get_drvdata(&pdev->dev); + struct coh901331_port *rtap = platform_get_drvdata(pdev); if (rtap) clk_unprepare(rtap->clk); @@ -264,7 +264,7 @@ static SIMPLE_DEV_PM_OPS(coh901331_pm_ops, coh901331_suspend, coh901331_resume); static void coh901331_shutdown(struct platform_device *pdev) { - struct coh901331_port *rtap = dev_get_drvdata(&pdev->dev); + struct coh901331_port *rtap = platform_get_drvdata(pdev); clk_enable(rtap->clk); writel(0, rtap->virtbase + COH901331_IRQ_MASK); -- cgit v0.10.2 From e1c2f989e691e8e124b57fd7ba4e15e7873b91e5 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 3 Jul 2013 15:07:13 -0700 Subject: rtc: rtc-rc5t583: use platform_{get,set}_drvdata() Use the wrapper functions for getting and setting the driver data using platform_device instead of using dev_{get,set}_drvdata() with &pdev->dev, so we can directly pass a struct platform_device. Signed-off-by: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-rc5t583.c b/drivers/rtc/rtc-rc5t583.c index 8eabcf5..e53e9b1 100644 --- a/drivers/rtc/rtc-rc5t583.c +++ b/drivers/rtc/rtc-rc5t583.c @@ -273,7 +273,7 @@ static int rc5t583_rtc_probe(struct platform_device *pdev) */ static int rc5t583_rtc_remove(struct platform_device *pdev) { - struct rc5t583_rtc *rc5t583_rtc = dev_get_drvdata(&pdev->dev); + struct rc5t583_rtc *rc5t583_rtc = platform_get_drvdata(pdev); rc5t583_rtc_alarm_irq_enable(&rc5t583_rtc->rtc->dev, 0); return 0; -- cgit v0.10.2 From 52c1b7a74c6d1cebac4402beaa7c59bf5ea4b433 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:07:14 -0700 Subject: drivers/rtc/rtc-bq32k.c: remove empty function After the switch to devm_* functions and the removal of rtc_device_unregister(), the 'remove' function does not do anything. Delete it. Signed-off-by: Sachin Kamat Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-bq32k.c b/drivers/rtc/rtc-bq32k.c index fea78bc..c74bf0d 100644 --- a/drivers/rtc/rtc-bq32k.c +++ b/drivers/rtc/rtc-bq32k.c @@ -163,11 +163,6 @@ static int bq32k_probe(struct i2c_client *client, return 0; } -static int bq32k_remove(struct i2c_client *client) -{ - return 0; -} - static const struct i2c_device_id bq32k_id[] = { { "bq32000", 0 }, { } @@ -180,7 +175,6 @@ static struct i2c_driver bq32k_driver = { .owner = THIS_MODULE, }, .probe = bq32k_probe, - .remove = bq32k_remove, .id_table = bq32k_id, }; -- cgit v0.10.2 From 0df2c54e8e3e7befb7a484d55d781b4e6ba17d83 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:07:15 -0700 Subject: drivers/rtc/rtc-ds1216.c: remove empty function After the switch to devm_* functions and the removal of rtc_device_unregister(), the 'remove' function does not do anything. Delete it. Signed-off-by: Sachin Kamat Cc: Thomas Bogendoerfer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-ds1216.c b/drivers/rtc/rtc-ds1216.c index c7702b7..1831c84 100644 --- a/drivers/rtc/rtc-ds1216.c +++ b/drivers/rtc/rtc-ds1216.c @@ -167,17 +167,11 @@ static int __init ds1216_rtc_probe(struct platform_device *pdev) return 0; } -static int __exit ds1216_rtc_remove(struct platform_device *pdev) -{ - return 0; -} - static struct platform_driver ds1216_rtc_platform_driver = { .driver = { .name = "rtc-ds1216", .owner = THIS_MODULE, }, - .remove = __exit_p(ds1216_rtc_remove), }; static int __init ds1216_rtc_init(void) -- cgit v0.10.2 From b943abd37ea2209acb0076f2a2fc590a053d15c1 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:07:16 -0700 Subject: drivers/rtc/rtc-ds1286.c: remove empty function After the switch to devm_* functions and the removal of rtc_device_unregister(), the 'remove' function does not do anything. Delete it. Signed-off-by: Sachin Kamat Cc: Thomas Bogendoerfer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-ds1286.c b/drivers/rtc/rtc-ds1286.c index 398c96a..50e109b7 100644 --- a/drivers/rtc/rtc-ds1286.c +++ b/drivers/rtc/rtc-ds1286.c @@ -353,18 +353,12 @@ static int ds1286_probe(struct platform_device *pdev) return 0; } -static int ds1286_remove(struct platform_device *pdev) -{ - return 0; -} - static struct platform_driver ds1286_platform_driver = { .driver = { .name = "rtc-ds1286", .owner = THIS_MODULE, }, .probe = ds1286_probe, - .remove = ds1286_remove, }; module_platform_driver(ds1286_platform_driver); -- cgit v0.10.2 From 53ac70aa4dbfa08005768aafeb1420ca101f0642 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:07:17 -0700 Subject: drivers/rtc/rtc-ds1672.c: remove empty function After the switch to devm_* functions and the removal of rtc_device_unregister(), the 'remove' function does not do anything. Delete it. Signed-off-by: Sachin Kamat Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-ds1672.c b/drivers/rtc/rtc-ds1672.c index 3fc2a47..18e2d84 100644 --- a/drivers/rtc/rtc-ds1672.c +++ b/drivers/rtc/rtc-ds1672.c @@ -153,11 +153,6 @@ static const struct rtc_class_ops ds1672_rtc_ops = { .set_mmss = ds1672_rtc_set_mmss, }; -static int ds1672_remove(struct i2c_client *client) -{ - return 0; -} - static int ds1672_probe(struct i2c_client *client, const struct i2c_device_id *id) { @@ -210,7 +205,6 @@ static struct i2c_driver ds1672_driver = { .name = "rtc-ds1672", }, .probe = &ds1672_probe, - .remove = &ds1672_remove, .id_table = ds1672_id, }; -- cgit v0.10.2 From 940bca371b53eb842299cd71f55880fec229885e Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:07:17 -0700 Subject: drivers/rtc/rtc-ds3234.c: remove empty function After the switch to devm_* functions and the removal of rtc_device_unregister(), the 'remove' function does not do anything. Delete it. Signed-off-by: Sachin Kamat Cc: Dennis Aberilla Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-ds3234.c b/drivers/rtc/rtc-ds3234.c index 67f1a80..4c9ba53 100644 --- a/drivers/rtc/rtc-ds3234.c +++ b/drivers/rtc/rtc-ds3234.c @@ -156,18 +156,12 @@ static int ds3234_probe(struct spi_device *spi) return 0; } -static int ds3234_remove(struct spi_device *spi) -{ - return 0; -} - static struct spi_driver ds3234_driver = { .driver = { .name = "ds3234", .owner = THIS_MODULE, }, .probe = ds3234_probe, - .remove = ds3234_remove, }; module_spi_driver(ds3234_driver); -- cgit v0.10.2 From f3828d5dd14af4c1d0a6ae4bf50895eefcf54f64 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:07:18 -0700 Subject: drivers/rtc/rtc-ds1390.c: remove empty function After the switch to devm_* functions and the removal of rtc_device_unregister(), the 'remove' function does not do anything. Delete it. Signed-off-by: Sachin Kamat Cc: Mark Jackson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-ds1390.c b/drivers/rtc/rtc-ds1390.c index 289af41..be9d8c0 100644 --- a/drivers/rtc/rtc-ds1390.c +++ b/drivers/rtc/rtc-ds1390.c @@ -154,18 +154,12 @@ static int ds1390_probe(struct spi_device *spi) return res; } -static int ds1390_remove(struct spi_device *spi) -{ - return 0; -} - static struct spi_driver ds1390_driver = { .driver = { .name = "rtc-ds1390", .owner = THIS_MODULE, }, .probe = ds1390_probe, - .remove = ds1390_remove, }; module_spi_driver(ds1390_driver); -- cgit v0.10.2 From e160419a05bc6c0948f646d46b2e91741a03ecf1 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:07:19 -0700 Subject: drivers/rtc/rtc-efi.c: remove empty function After the switch to devm_* functions and the removal of rtc_device_unregister(), the 'remove' function does not do anything. Delete it. Signed-off-by: Sachin Kamat Cc: dann frazier Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-efi.c b/drivers/rtc/rtc-efi.c index b3c8c0b..797aa02 100644 --- a/drivers/rtc/rtc-efi.c +++ b/drivers/rtc/rtc-efi.c @@ -201,17 +201,11 @@ static int __init efi_rtc_probe(struct platform_device *dev) return 0; } -static int __exit efi_rtc_remove(struct platform_device *dev) -{ - return 0; -} - static struct platform_driver efi_rtc_driver = { .driver = { .name = "rtc-efi", .owner = THIS_MODULE, }, - .remove = __exit_p(efi_rtc_remove), }; module_platform_driver_probe(efi_rtc_driver, efi_rtc_probe); -- cgit v0.10.2 From 2a7a145e787a900cca5939c52d2df78c20b7b4de Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:07:20 -0700 Subject: drivers/rtc/rtc-em3027.c: remove empty function After the switch to devm_* functions and the removal of rtc_device_unregister(), the 'remove' function does not do anything. Delete it. Signed-off-by: Sachin Kamat Cc: Mike Rapoport Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-em3027.c b/drivers/rtc/rtc-em3027.c index 3f9eb57..fccf366 100644 --- a/drivers/rtc/rtc-em3027.c +++ b/drivers/rtc/rtc-em3027.c @@ -131,11 +131,6 @@ static int em3027_probe(struct i2c_client *client, return 0; } -static int em3027_remove(struct i2c_client *client) -{ - return 0; -} - static struct i2c_device_id em3027_id[] = { { "em3027", 0 }, { } @@ -146,7 +141,6 @@ static struct i2c_driver em3027_driver = { .name = "rtc-em3027", }, .probe = &em3027_probe, - .remove = &em3027_remove, .id_table = em3027_id, }; -- cgit v0.10.2 From 0814023b1c8b5f1f8ead68e0df161bcdedf83c1f Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:07:21 -0700 Subject: drivers/rtc/rtc-fm3130.c: remove empty function After the switch to devm_* functions and the removal of rtc_device_unregister(), the 'remove' function does not do anything. Delete it. Signed-off-by: Sachin Kamat Cc: Sergey Lapin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-fm3130.c b/drivers/rtc/rtc-fm3130.c index 8f053da..83c3b30 100644 --- a/drivers/rtc/rtc-fm3130.c +++ b/drivers/rtc/rtc-fm3130.c @@ -520,18 +520,12 @@ exit_free: return err; } -static int fm3130_remove(struct i2c_client *client) -{ - return 0; -} - static struct i2c_driver fm3130_driver = { .driver = { .name = "rtc-fm3130", .owner = THIS_MODULE, }, .probe = fm3130_probe, - .remove = fm3130_remove, .id_table = fm3130_id, }; -- cgit v0.10.2 From 2e5526fcd8762398fbef1eec3ae59849437f9765 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:07:22 -0700 Subject: drivers/rtc/rtc-isl12022.c: remove empty function After the switch to devm_* functions and the removal of rtc_device_unregister(), the 'remove' function does not do anything. Delete it. Signed-off-by: Sachin Kamat Cc: Roman Fietze Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-isl12022.c b/drivers/rtc/rtc-isl12022.c index a1bbbb8..6f2de99 100644 --- a/drivers/rtc/rtc-isl12022.c +++ b/drivers/rtc/rtc-isl12022.c @@ -273,11 +273,6 @@ static int isl12022_probe(struct i2c_client *client, return 0; } -static int isl12022_remove(struct i2c_client *client) -{ - return 0; -} - static const struct i2c_device_id isl12022_id[] = { { "isl12022", 0 }, { } @@ -289,7 +284,6 @@ static struct i2c_driver isl12022_driver = { .name = "rtc-isl12022", }, .probe = isl12022_probe, - .remove = isl12022_remove, .id_table = isl12022_id, }; -- cgit v0.10.2 From d95c616352e19e735f6d1578f64f586a87eb2004 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:07:23 -0700 Subject: drivers/rtc/rtc-m41t93.c: remove empty function After the switch to devm_* functions and the removal of rtc_device_unregister(), the 'remove' function does not do anything. Delete it. Signed-off-by: Sachin Kamat Cc: Nikolaus Voss Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-m41t93.c b/drivers/rtc/rtc-m41t93.c index 9707d36..4698c7e 100644 --- a/drivers/rtc/rtc-m41t93.c +++ b/drivers/rtc/rtc-m41t93.c @@ -194,19 +194,12 @@ static int m41t93_probe(struct spi_device *spi) return 0; } - -static int m41t93_remove(struct spi_device *spi) -{ - return 0; -} - static struct spi_driver m41t93_driver = { .driver = { .name = "rtc-m41t93", .owner = THIS_MODULE, }, .probe = m41t93_probe, - .remove = m41t93_remove, }; module_spi_driver(m41t93_driver); -- cgit v0.10.2 From 40ac8729ea4cc383d62046620f33b205a28391ba Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:07:24 -0700 Subject: drivers/rtc/rtc-m48t35.c: remove empty function After the switch to devm_* functions and the removal of rtc_device_unregister(), the 'remove' function does not do anything. Delete it. Signed-off-by: Sachin Kamat Cc: Thomas Bogendoerfer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-m48t35.c b/drivers/rtc/rtc-m48t35.c index 3744424..25678d4 100644 --- a/drivers/rtc/rtc-m48t35.c +++ b/drivers/rtc/rtc-m48t35.c @@ -180,18 +180,12 @@ static int m48t35_probe(struct platform_device *pdev) return 0; } -static int m48t35_remove(struct platform_device *pdev) -{ - return 0; -} - static struct platform_driver m48t35_platform_driver = { .driver = { .name = "rtc-m48t35", .owner = THIS_MODULE, }, .probe = m48t35_probe, - .remove = m48t35_remove, }; module_platform_driver(m48t35_platform_driver); -- cgit v0.10.2 From c9e379f5af5b9e2db7f1713fa55bf27db771686a Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:07:25 -0700 Subject: drivers/rtc/rtc-generic.c: remove empty function After the switch to devm_* functions and the removal of rtc_device_unregister(), the 'remove' function does not do anything. Delete it. Signed-off-by: Sachin Kamat Cc: Kyle McMartin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-generic.c b/drivers/rtc/rtc-generic.c index 06279ce..9b6725e 100644 --- a/drivers/rtc/rtc-generic.c +++ b/drivers/rtc/rtc-generic.c @@ -48,17 +48,11 @@ static int __init generic_rtc_probe(struct platform_device *dev) return 0; } -static int __exit generic_rtc_remove(struct platform_device *dev) -{ - return 0; -} - static struct platform_driver generic_rtc_driver = { .driver = { .name = "rtc-generic", .owner = THIS_MODULE, }, - .remove = __exit_p(generic_rtc_remove), }; module_platform_driver_probe(generic_rtc_driver, generic_rtc_probe); -- cgit v0.10.2 From a29310497038410ad17ff5ec257d3c34568b96d4 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:07:26 -0700 Subject: drivers/rtc/rtc-m41t94.c: remove empty function After the switch to devm_* functions and the removal of rtc_device_unregister(), the 'remove' function does not do anything. Delete it. Signed-off-by: Sachin Kamat Cc: Kim B. Heino Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-m41t94.c b/drivers/rtc/rtc-m41t94.c index 7454ef0..8d800b1 100644 --- a/drivers/rtc/rtc-m41t94.c +++ b/drivers/rtc/rtc-m41t94.c @@ -134,18 +134,12 @@ static int m41t94_probe(struct spi_device *spi) return 0; } -static int m41t94_remove(struct spi_device *spi) -{ - return 0; -} - static struct spi_driver m41t94_driver = { .driver = { .name = "rtc-m41t94", .owner = THIS_MODULE, }, .probe = m41t94_probe, - .remove = m41t94_remove, }; module_spi_driver(m41t94_driver); -- cgit v0.10.2 From 060f55902057b9a46ee5d78a10f4d1371fca2838 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:07:27 -0700 Subject: drivers/rtc/rtc-max6902.c: remove empty function After the switch to devm_* functions and the removal of rtc_device_unregister(), the 'remove' function does not do anything. Delete it. Signed-off-by: Sachin Kamat Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-max6902.c b/drivers/rtc/rtc-max6902.c index e9dd56c..ac3f419 100644 --- a/drivers/rtc/rtc-max6902.c +++ b/drivers/rtc/rtc-max6902.c @@ -143,18 +143,12 @@ static int max6902_probe(struct spi_device *spi) return 0; } -static int max6902_remove(struct spi_device *spi) -{ - return 0; -} - static struct spi_driver max6902_driver = { .driver = { .name = "rtc-max6902", .owner = THIS_MODULE, }, .probe = max6902_probe, - .remove = max6902_remove, }; module_spi_driver(max6902_driver); -- cgit v0.10.2 From a1396d9f127952a09a40c8cd3285ff9d062282a0 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:07:28 -0700 Subject: drivers/rtc/rtc-max6900.c: remove empty function After the switch to devm_* functions and the removal of rtc_device_unregister(), the 'remove' function does not do anything. Delete it. Signed-off-by: Sachin Kamat Cc: Dale Farnsworth Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-max6900.c b/drivers/rtc/rtc-max6900.c index 8669d6d..55969b1 100644 --- a/drivers/rtc/rtc-max6900.c +++ b/drivers/rtc/rtc-max6900.c @@ -212,11 +212,6 @@ static int max6900_rtc_set_time(struct device *dev, struct rtc_time *tm) return max6900_i2c_set_time(to_i2c_client(dev), tm); } -static int max6900_remove(struct i2c_client *client) -{ - return 0; -} - static const struct rtc_class_ops max6900_rtc_ops = { .read_time = max6900_rtc_read_time, .set_time = max6900_rtc_set_time, @@ -252,7 +247,6 @@ static struct i2c_driver max6900_driver = { .name = "rtc-max6900", }, .probe = max6900_probe, - .remove = max6900_remove, .id_table = max6900_id, }; -- cgit v0.10.2 From 17d9e04c62c2980bf36c846541282204b22c1bf2 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:07:29 -0700 Subject: drivers/rtc/rtc-max8907.c: remove empty function After the switch to devm_* functions and the removal of rtc_device_unregister(), the 'remove' function does not do anything. Delete it. Signed-off-by: Sachin Kamat Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-max8907.c b/drivers/rtc/rtc-max8907.c index 86afb79..8e45b3c 100644 --- a/drivers/rtc/rtc-max8907.c +++ b/drivers/rtc/rtc-max8907.c @@ -213,18 +213,12 @@ static int max8907_rtc_probe(struct platform_device *pdev) return ret; } -static int max8907_rtc_remove(struct platform_device *pdev) -{ - return 0; -} - static struct platform_driver max8907_rtc_driver = { .driver = { .name = "max8907-rtc", .owner = THIS_MODULE, }, .probe = max8907_rtc_probe, - .remove = max8907_rtc_remove, }; module_platform_driver(max8907_rtc_driver); -- cgit v0.10.2 From eda328fbe3e1f82a12d56ffae3112086e2f1c578 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:07:30 -0700 Subject: drivers/rtc/rtc-max77686.c: remove empty function After the switch to devm_* functions and the removal of rtc_device_unregister(), the 'remove' function does not do anything. Delete it. Signed-off-by: Sachin Kamat Cc: Chiwoong Byun Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-max77686.c b/drivers/rtc/rtc-max77686.c index 441c5c2..9915cb9 100644 --- a/drivers/rtc/rtc-max77686.c +++ b/drivers/rtc/rtc-max77686.c @@ -567,11 +567,6 @@ err_rtc: return ret; } -static int max77686_rtc_remove(struct platform_device *pdev) -{ - return 0; -} - static void max77686_rtc_shutdown(struct platform_device *pdev) { #ifdef MAX77686_RTC_WTSR_SMPL @@ -610,7 +605,6 @@ static struct platform_driver max77686_rtc_driver = { .owner = THIS_MODULE, }, .probe = max77686_rtc_probe, - .remove = max77686_rtc_remove, .shutdown = max77686_rtc_shutdown, .id_table = rtc_id, }; -- cgit v0.10.2 From f7de2a1d6db0c510c1f23363325ee59cf84be548 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:07:31 -0700 Subject: drivers/rtc/rtc-max8997.c: remove empty function After the switch to devm_* functions and the removal of rtc_device_unregister(), the 'remove' function does not do anything. Delete it. Signed-off-by: Sachin Kamat Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-max8997.c b/drivers/rtc/rtc-max8997.c index 1683904..0777c01 100644 --- a/drivers/rtc/rtc-max8997.c +++ b/drivers/rtc/rtc-max8997.c @@ -507,11 +507,6 @@ err_out: return ret; } -static int max8997_rtc_remove(struct platform_device *pdev) -{ - return 0; -} - static void max8997_rtc_shutdown(struct platform_device *pdev) { struct max8997_rtc_info *info = platform_get_drvdata(pdev); @@ -531,7 +526,6 @@ static struct platform_driver max8997_rtc_driver = { .owner = THIS_MODULE, }, .probe = max8997_rtc_probe, - .remove = max8997_rtc_remove, .shutdown = max8997_rtc_shutdown, .id_table = rtc_id, }; -- cgit v0.10.2 From af99ef9ccc60c2ba1c43604259fee75eaa932c82 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:07:32 -0700 Subject: drivers/rtc/rtc-pcf8523.c: remove empty function After the switch to devm_* functions and the removal of rtc_device_unregister(), the 'remove' function does not do anything. Delete it. Signed-off-by: Sachin Kamat Acked-by: Thierry Reding Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-pcf8523.c b/drivers/rtc/rtc-pcf8523.c index 305c951..5c8f822 100644 --- a/drivers/rtc/rtc-pcf8523.c +++ b/drivers/rtc/rtc-pcf8523.c @@ -317,11 +317,6 @@ static int pcf8523_probe(struct i2c_client *client, return 0; } -static int pcf8523_remove(struct i2c_client *client) -{ - return 0; -} - static const struct i2c_device_id pcf8523_id[] = { { "pcf8523", 0 }, { } @@ -343,7 +338,6 @@ static struct i2c_driver pcf8523_driver = { .of_match_table = of_match_ptr(pcf8523_of_match), }, .probe = pcf8523_probe, - .remove = pcf8523_remove, .id_table = pcf8523_id, }; module_i2c_driver(pcf8523_driver); -- cgit v0.10.2 From b1fb593b782eeab2cf4282f17d58a4939dbef126 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:07:33 -0700 Subject: drivers/rtc/rtc-pcf8563.c: remove empty function After the switch to devm_* functions and the removal of rtc_device_unregister(), the 'remove' function does not do anything. Delete it. Signed-off-by: Sachin Kamat Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-pcf8563.c b/drivers/rtc/rtc-pcf8563.c index 97b354a..9ea2d07 100644 --- a/drivers/rtc/rtc-pcf8563.c +++ b/drivers/rtc/rtc-pcf8563.c @@ -269,11 +269,6 @@ static int pcf8563_probe(struct i2c_client *client, return 0; } -static int pcf8563_remove(struct i2c_client *client) -{ - return 0; -} - static const struct i2c_device_id pcf8563_id[] = { { "pcf8563", 0 }, { "rtc8564", 0 }, @@ -296,7 +291,6 @@ static struct i2c_driver pcf8563_driver = { .of_match_table = of_match_ptr(pcf8563_of_match), }, .probe = pcf8563_probe, - .remove = pcf8563_remove, .id_table = pcf8563_id, }; -- cgit v0.10.2 From 54aa095ffcc52f9e81bc84159f4af223730d3681 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:07:34 -0700 Subject: drivers/rtc/rtc-pcf8583.c: remove empty function After the switch to devm_* functions and the removal of rtc_device_unregister(), the 'remove' function does not do anything. Delete it. Signed-off-by: Sachin Kamat Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-pcf8583.c b/drivers/rtc/rtc-pcf8583.c index 9971f7f..ab740cd 100644 --- a/drivers/rtc/rtc-pcf8583.c +++ b/drivers/rtc/rtc-pcf8583.c @@ -290,11 +290,6 @@ static int pcf8583_probe(struct i2c_client *client, return 0; } -static int pcf8583_remove(struct i2c_client *client) -{ - return 0; -} - static const struct i2c_device_id pcf8583_id[] = { { "pcf8583", 0 }, { } @@ -307,7 +302,6 @@ static struct i2c_driver pcf8583_driver = { .owner = THIS_MODULE, }, .probe = pcf8583_probe, - .remove = pcf8583_remove, .id_table = pcf8583_id, }; -- cgit v0.10.2 From 164b8649186c1746750d063c53b71c42646fe337 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:07:35 -0700 Subject: drivers/rtc/rtc-ps3.c: remove empty function After the switch to devm_* functions and the removal of rtc_device_unregister(), the 'remove' function does not do anything. Delete it. Signed-off-by: Sachin Kamat Acked-by: Geoff Levand Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-ps3.c b/drivers/rtc/rtc-ps3.c index 4bb825b..554ada5 100644 --- a/drivers/rtc/rtc-ps3.c +++ b/drivers/rtc/rtc-ps3.c @@ -71,17 +71,11 @@ static int __init ps3_rtc_probe(struct platform_device *dev) return 0; } -static int __exit ps3_rtc_remove(struct platform_device *dev) -{ - return 0; -} - static struct platform_driver ps3_rtc_driver = { .driver = { .name = "rtc-ps3", .owner = THIS_MODULE, }, - .remove = __exit_p(ps3_rtc_remove), }; module_platform_driver_probe(ps3_rtc_driver, ps3_rtc_probe); -- cgit v0.10.2 From a87b0f802bbb89c83c1b7c921c3752e646827a22 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:07:36 -0700 Subject: drivers/rtc/rtc-rs5c313.c: remove empty function After the switch to devm_* functions and the removal of rtc_device_unregister(), the 'remove' function does not do anything. Delete it. Signed-off-by: Sachin Kamat Cc: Nobuhiro Iwamatsu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-rs5c313.c b/drivers/rtc/rtc-rs5c313.c index 6af0f1f..68f7856 100644 --- a/drivers/rtc/rtc-rs5c313.c +++ b/drivers/rtc/rtc-rs5c313.c @@ -378,18 +378,12 @@ static int rs5c313_rtc_probe(struct platform_device *pdev) return 0; } -static int rs5c313_rtc_remove(struct platform_device *pdev) -{ - return 0; -} - static struct platform_driver rs5c313_rtc_platform_driver = { .driver = { .name = DRV_NAME, .owner = THIS_MODULE, }, .probe = rs5c313_rtc_probe, - .remove = rs5c313_rtc_remove, }; static int __init rs5c313_rtc_init(void) -- cgit v0.10.2 From 832ccd3fad7cadf9dbba8e28591be20a74b805a4 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:07:36 -0700 Subject: drivers/rtc/rtc-rv3029c2.c: remove empty function After the switch to devm_* functions and the removal of rtc_device_unregister(), the 'remove' function does not do anything. Delete it. Signed-off-by: Sachin Kamat Cc: Gregory Hermant Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-rv3029c2.c b/drivers/rtc/rtc-rv3029c2.c index 9100a34..1a779a6 100644 --- a/drivers/rtc/rtc-rv3029c2.c +++ b/drivers/rtc/rtc-rv3029c2.c @@ -412,17 +412,11 @@ static int rv3029c2_probe(struct i2c_client *client, return 0; } -static int rv3029c2_remove(struct i2c_client *client) -{ - return 0; -} - static struct i2c_driver rv3029c2_driver = { .driver = { .name = "rtc-rv3029c2", }, .probe = rv3029c2_probe, - .remove = rv3029c2_remove, .id_table = rv3029c2_id, }; -- cgit v0.10.2 From c281735c52623e46cdef4b7cc74e81d117e944da Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:07:37 -0700 Subject: drivers/rtc/rtc-rx4581.c: remove empty function After the switch to devm_* functions and the removal of rtc_device_unregister(), the 'remove' function does not do anything. Delete it. Signed-off-by: Sachin Kamat Cc: Torben Hohn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-rx4581.c b/drivers/rtc/rtc-rx4581.c index 84eb08d..6889222 100644 --- a/drivers/rtc/rtc-rx4581.c +++ b/drivers/rtc/rtc-rx4581.c @@ -282,11 +282,6 @@ static int rx4581_probe(struct spi_device *spi) return 0; } -static int rx4581_remove(struct spi_device *spi) -{ - return 0; -} - static const struct spi_device_id rx4581_id[] = { { "rx4581", 0 }, { } @@ -299,7 +294,6 @@ static struct spi_driver rx4581_driver = { .owner = THIS_MODULE, }, .probe = rx4581_probe, - .remove = rx4581_remove, .id_table = rx4581_id, }; -- cgit v0.10.2 From 75a6cef3c6d2fe7b67e6860baa0a2c9c61058adf Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:07:38 -0700 Subject: drivers/rtc/rtc-rs5c348.c: remove empty function After the switch to devm_* functions and the removal of rtc_device_unregister(), the 'remove' function does not do anything. Delete it. Signed-off-by: Sachin Kamat Acked-by: Atsushi Nemoto Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-rs5c348.c b/drivers/rtc/rtc-rs5c348.c index 2c37df3..f7a90a1 100644 --- a/drivers/rtc/rtc-rs5c348.c +++ b/drivers/rtc/rtc-rs5c348.c @@ -218,18 +218,12 @@ static int rs5c348_probe(struct spi_device *spi) return ret; } -static int rs5c348_remove(struct spi_device *spi) -{ - return 0; -} - static struct spi_driver rs5c348_driver = { .driver = { .name = "rtc-rs5c348", .owner = THIS_MODULE, }, .probe = rs5c348_probe, - .remove = rs5c348_remove, }; module_spi_driver(rs5c348_driver); -- cgit v0.10.2 From 753190928dd7e08f529f2a16a24fdb58080a481b Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:07:39 -0700 Subject: drivers/rtc/rtc-rx8581.c: remove empty function After the switch to devm_* functions and the removal of rtc_device_unregister(), the 'remove' function does not do anything. Delete it. Signed-off-by: Sachin Kamat Cc: Martyn Welch Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-rx8581.c b/drivers/rtc/rtc-rx8581.c index 07f3037..00b0eb7 100644 --- a/drivers/rtc/rtc-rx8581.c +++ b/drivers/rtc/rtc-rx8581.c @@ -251,11 +251,6 @@ static int rx8581_probe(struct i2c_client *client, return 0; } -static int rx8581_remove(struct i2c_client *client) -{ - return 0; -} - static const struct i2c_device_id rx8581_id[] = { { "rx8581", 0 }, { } @@ -268,7 +263,6 @@ static struct i2c_driver rx8581_driver = { .owner = THIS_MODULE, }, .probe = rx8581_probe, - .remove = rx8581_remove, .id_table = rx8581_id, }; -- cgit v0.10.2 From 0cb4b84fd3806d1cdad3f37ee606fd9f8d506264 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:07:40 -0700 Subject: drivers/rtc/rtc-snvs.c: remove empty function After the switch to devm_* functions and the removal of rtc_device_unregister(), the 'remove' function does not do anything. Delete it. Signed-off-by: Sachin Kamat Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-snvs.c b/drivers/rtc/rtc-snvs.c index b04f09a..316a342 100644 --- a/drivers/rtc/rtc-snvs.c +++ b/drivers/rtc/rtc-snvs.c @@ -294,11 +294,6 @@ static int snvs_rtc_probe(struct platform_device *pdev) return 0; } -static int snvs_rtc_remove(struct platform_device *pdev) -{ - return 0; -} - #ifdef CONFIG_PM_SLEEP static int snvs_rtc_suspend(struct device *dev) { @@ -337,7 +332,6 @@ static struct platform_driver snvs_rtc_driver = { .of_match_table = of_match_ptr(snvs_dt_ids), }, .probe = snvs_rtc_probe, - .remove = snvs_rtc_remove, }; module_platform_driver(snvs_rtc_driver); -- cgit v0.10.2 From 15b005b7ef5fd56ca60bf734a480d87592bccd27 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:07:41 -0700 Subject: drivers/rtc/rtc-starfire.c: remove empty function After the switch to devm_* functions and the removal of rtc_device_unregister(), the 'remove' function does not do anything. Delete it. Signed-off-by: Sachin Kamat Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-starfire.c b/drivers/rtc/rtc-starfire.c index 987b5ec..f7d8a6d 100644 --- a/drivers/rtc/rtc-starfire.c +++ b/drivers/rtc/rtc-starfire.c @@ -51,17 +51,11 @@ static int __init starfire_rtc_probe(struct platform_device *pdev) return 0; } -static int __exit starfire_rtc_remove(struct platform_device *pdev) -{ - return 0; -} - static struct platform_driver starfire_rtc_driver = { .driver = { .name = "rtc-starfire", .owner = THIS_MODULE, }, - .remove = __exit_p(starfire_rtc_remove), }; module_platform_driver_probe(starfire_rtc_driver, starfire_rtc_probe); -- cgit v0.10.2 From d55b6643e54bdb0822c0db5c0b4dfc4d89c6b23d Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:07:42 -0700 Subject: drivers/rtc/rtc-sun4v.c: remove empty function After the switch to devm_* functions and the removal of rtc_device_unregister(), the 'remove' function does not do anything. Delete it. Signed-off-by: Sachin Kamat Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-sun4v.c b/drivers/rtc/rtc-sun4v.c index ce42e5f..bc97ff9 100644 --- a/drivers/rtc/rtc-sun4v.c +++ b/drivers/rtc/rtc-sun4v.c @@ -92,17 +92,11 @@ static int __init sun4v_rtc_probe(struct platform_device *pdev) return 0; } -static int __exit sun4v_rtc_remove(struct platform_device *pdev) -{ - return 0; -} - static struct platform_driver sun4v_rtc_driver = { .driver = { .name = "rtc-sun4v", .owner = THIS_MODULE, }, - .remove = __exit_p(sun4v_rtc_remove), }; module_platform_driver_probe(sun4v_rtc_driver, sun4v_rtc_probe); -- cgit v0.10.2 From 7deef7f2983209b3bb34cbda91e1080f3e43d74a Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:07:43 -0700 Subject: drivers/rtc/rtc-tps80031.c: remove empty function After the switch to devm_* functions and the removal of rtc_device_unregister(), the 'remove' function does not do anything. Delete it. Signed-off-by: Sachin Kamat Cc: Laxman Dewangan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-tps80031.c b/drivers/rtc/rtc-tps80031.c index 72662ea..3e400dc 100644 --- a/drivers/rtc/rtc-tps80031.c +++ b/drivers/rtc/rtc-tps80031.c @@ -298,11 +298,6 @@ static int tps80031_rtc_probe(struct platform_device *pdev) return 0; } -static int tps80031_rtc_remove(struct platform_device *pdev) -{ - return 0; -} - #ifdef CONFIG_PM_SLEEP static int tps80031_rtc_suspend(struct device *dev) { @@ -333,7 +328,6 @@ static struct platform_driver tps80031_rtc_driver = { .pm = &tps80031_pm_ops, }, .probe = tps80031_rtc_probe, - .remove = tps80031_rtc_remove, }; module_platform_driver(tps80031_rtc_driver); -- cgit v0.10.2 From 65ee88c9c68ad533463d1030d2abd859d7a4d9f9 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:07:44 -0700 Subject: drivers/rtc/rtc-wm831x.c: remove empty function After the switch to devm_* functions and the removal of rtc_device_unregister(), the 'remove' function does not do anything. Delete it. Signed-off-by: Sachin Kamat Acked-by: Mark Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-wm831x.c b/drivers/rtc/rtc-wm831x.c index 8d65b94..75aea4c 100644 --- a/drivers/rtc/rtc-wm831x.c +++ b/drivers/rtc/rtc-wm831x.c @@ -460,11 +460,6 @@ err: return ret; } -static int wm831x_rtc_remove(struct platform_device *pdev) -{ - return 0; -} - static const struct dev_pm_ops wm831x_rtc_pm_ops = { .suspend = wm831x_rtc_suspend, .resume = wm831x_rtc_resume, @@ -478,7 +473,6 @@ static const struct dev_pm_ops wm831x_rtc_pm_ops = { static struct platform_driver wm831x_rtc_driver = { .probe = wm831x_rtc_probe, - .remove = wm831x_rtc_remove, .driver = { .name = "wm831x-rtc", .pm = &wm831x_rtc_pm_ops, -- cgit v0.10.2 From 25d053cf1040e6430fff679854b3710edb0b7fee Mon Sep 17 00:00:00 2001 From: Alexandre Torgue Date: Wed, 3 Jul 2013 15:07:45 -0700 Subject: drivers/rtc/rtc-ab8500.c: add second resolution to rtc driver Android expects the RTC to have second resolution. On ab8540 cut2 RTC block has a new register which allows setting seconds for wakeup alarms. Existing registers (minutes hi, mid and low) have seen their offsets changed. Here is the new mapping: * AlarmSec (A) 0x22 * AlarmMinLow (M) from 0x8 to 0x23 * AlarmMinMid (M) from 0x9 to 0x24 * AlarmMinHigh (M) from 0xA to 0x25 Signed-off-by: Julien Delacou Signed-off-by: Alexandre Torgue Acked-by: Lee Jones Acked-by: Linus Walleij Cc: Samuel Ortiz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-ab8500.c b/drivers/rtc/rtc-ab8500.c index c5b62d4..727e2f5 100644 --- a/drivers/rtc/rtc-ab8500.c +++ b/drivers/rtc/rtc-ab8500.c @@ -35,6 +35,10 @@ #define AB8500_RTC_FORCE_BKUP_REG 0x0D #define AB8500_RTC_CALIB_REG 0x0E #define AB8500_RTC_SWITCH_STAT_REG 0x0F +#define AB8540_RTC_ALRM_SEC 0x22 +#define AB8540_RTC_ALRM_MIN_LOW_REG 0x23 +#define AB8540_RTC_ALRM_MIN_MID_REG 0x24 +#define AB8540_RTC_ALRM_MIN_HI_REG 0x25 /* RtcReadRequest bits */ #define RTC_READ_REQUEST 0x01 @@ -58,6 +62,11 @@ static const u8 ab8500_rtc_alarm_regs[] = { AB8500_RTC_ALRM_MIN_LOW_REG }; +static const u8 ab8540_rtc_alarm_regs[] = { + AB8540_RTC_ALRM_MIN_HI_REG, AB8540_RTC_ALRM_MIN_MID_REG, + AB8540_RTC_ALRM_MIN_LOW_REG, AB8540_RTC_ALRM_SEC +}; + /* Calculate the seconds from 1970 to 01-01-2000 00:00:00 */ static unsigned long get_elapsed_seconds(int year) { @@ -267,6 +276,42 @@ static int ab8500_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alarm) return ab8500_rtc_irq_enable(dev, alarm->enabled); } +static int ab8540_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alarm) +{ + int retval, i; + unsigned char buf[ARRAY_SIZE(ab8540_rtc_alarm_regs)]; + unsigned long mins, secs = 0; + + if (alarm->time.tm_year < (AB8500_RTC_EPOCH - 1900)) { + dev_dbg(dev, "year should be equal to or greater than %d\n", + AB8500_RTC_EPOCH); + return -EINVAL; + } + + /* Get the number of seconds since 1970 */ + rtc_tm_to_time(&alarm->time, &secs); + + /* + * Convert it to the number of seconds since 01-01-2000 00:00:00 + */ + secs -= get_elapsed_seconds(AB8500_RTC_EPOCH); + mins = secs / 60; + + buf[3] = secs % 60; + buf[2] = mins & 0xFF; + buf[1] = (mins >> 8) & 0xFF; + buf[0] = (mins >> 16) & 0xFF; + + /* Set the alarm time */ + for (i = 0; i < ARRAY_SIZE(ab8540_rtc_alarm_regs); i++) { + retval = abx500_set_register_interruptible(dev, AB8500_RTC, + ab8540_rtc_alarm_regs[i], buf[i]); + if (retval < 0) + return retval; + } + + return ab8500_rtc_irq_enable(dev, alarm->enabled); +} static int ab8500_rtc_set_calibration(struct device *dev, int calibration) { @@ -389,8 +434,22 @@ static const struct rtc_class_ops ab8500_rtc_ops = { .alarm_irq_enable = ab8500_rtc_irq_enable, }; +static const struct rtc_class_ops ab8540_rtc_ops = { + .read_time = ab8500_rtc_read_time, + .set_time = ab8500_rtc_set_time, + .read_alarm = ab8500_rtc_read_alarm, + .set_alarm = ab8540_rtc_set_alarm, + .alarm_irq_enable = ab8500_rtc_irq_enable, +}; + +static struct platform_device_id ab85xx_rtc_ids[] = { + { "ab8500-rtc", (kernel_ulong_t)&ab8500_rtc_ops, }, + { "ab8540-rtc", (kernel_ulong_t)&ab8540_rtc_ops, }, +}; + static int ab8500_rtc_probe(struct platform_device *pdev) { + const struct platform_device_id *platid = platform_get_device_id(pdev); int err; struct rtc_device *rtc; u8 rtc_ctrl; @@ -423,7 +482,8 @@ static int ab8500_rtc_probe(struct platform_device *pdev) device_init_wakeup(&pdev->dev, true); rtc = devm_rtc_device_register(&pdev->dev, "ab8500-rtc", - &ab8500_rtc_ops, THIS_MODULE); + (struct rtc_class_ops *)platid->driver_data, + THIS_MODULE); if (IS_ERR(rtc)) { dev_err(&pdev->dev, "Registration failed\n"); err = PTR_ERR(rtc); @@ -461,6 +521,7 @@ static struct platform_driver ab8500_rtc_driver = { }, .probe = ab8500_rtc_probe, .remove = ab8500_rtc_remove, + .id_table = ab85xx_rtc_ids, }; module_platform_driver(ab8500_rtc_driver); -- cgit v0.10.2 From dfc657b1330990213a3865aaef9d8af563ccad51 Mon Sep 17 00:00:00 2001 From: Sergey Yanovich Date: Wed, 3 Jul 2013 15:07:46 -0700 Subject: drivers/rtc/rtc-ds1302.c: handle write protection This chip has a control register and can prevent altering saved clock. Without this patch we could have: (arm)root@pac14:~# date Tue May 21 03:08:27 MSK 2013 (arm)root@pac14:~# /etc/init.d/hwclock.sh show Tue May 21 11:13:58 2013 -0.067322 seconds (arm)root@pac14:~# /etc/init.d/hwclock.sh stop [info] Saving the system clock. [info] Hardware Clock updated to Tue May 21 03:09:01 MSK 2013. (arm)root@pac14:~# /etc/init.d/hwclock.sh show Tue May 21 11:14:15 2013 -0.624272 seconds The patch enables write access to rtc before the driver tries to write time and re-disables when time data is written. Signed-off-by: Sergey Yanovich Acked-by: Marc Zyngier Cc: Alessandro Zummo Cc: Sachin Kamat Cc: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-ds1302.c b/drivers/rtc/rtc-ds1302.c index 7533b72..07e8d79 100644 --- a/drivers/rtc/rtc-ds1302.c +++ b/drivers/rtc/rtc-ds1302.c @@ -23,8 +23,12 @@ #define RTC_CMD_READ 0x81 /* Read command */ #define RTC_CMD_WRITE 0x80 /* Write command */ +#define RTC_CMD_WRITE_ENABLE 0x00 /* Write enable */ +#define RTC_CMD_WRITE_DISABLE 0x80 /* Write disable */ + #define RTC_ADDR_RAM0 0x20 /* Address of RAM0 */ #define RTC_ADDR_TCR 0x08 /* Address of trickle charge register */ +#define RTC_ADDR_CTRL 0x07 /* Address of control register */ #define RTC_ADDR_YEAR 0x06 /* Address of year register */ #define RTC_ADDR_DAY 0x05 /* Address of day of week register */ #define RTC_ADDR_MON 0x04 /* Address of month register */ @@ -161,6 +165,7 @@ static int ds1302_rtc_read_time(struct device *dev, struct rtc_time *tm) static int ds1302_rtc_set_time(struct device *dev, struct rtc_time *tm) { + ds1302_writebyte(RTC_ADDR_CTRL, RTC_CMD_WRITE_ENABLE); /* Stop RTC */ ds1302_writebyte(RTC_ADDR_SEC, ds1302_readbyte(RTC_ADDR_SEC) | 0x80); @@ -175,6 +180,8 @@ static int ds1302_rtc_set_time(struct device *dev, struct rtc_time *tm) /* Start RTC */ ds1302_writebyte(RTC_ADDR_SEC, ds1302_readbyte(RTC_ADDR_SEC) & ~0x80); + ds1302_writebyte(RTC_ADDR_CTRL, RTC_CMD_WRITE_DISABLE); + return 0; } -- cgit v0.10.2 From ad87a766479d027f5966c781605bc9f7bf8bab67 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 3 Jul 2013 15:07:47 -0700 Subject: drivers/rtc/rtc-mpc5121.c: use platform_{get,set}_drvdata() Use the wrapper functions for getting and setting the driver data using platform_device instead of using dev_{get,set}_drvdata() with &pdev->dev, so we can directly pass a struct platform_device. Signed-off-by: Jingoo Han Cc: Grant Likely Cc: Rob Herring Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-mpc5121.c b/drivers/rtc/rtc-mpc5121.c index 4c02497..9c8f609 100644 --- a/drivers/rtc/rtc-mpc5121.c +++ b/drivers/rtc/rtc-mpc5121.c @@ -324,7 +324,7 @@ static int mpc5121_rtc_probe(struct platform_device *op) device_init_wakeup(&op->dev, 1); - dev_set_drvdata(&op->dev, rtc); + platform_set_drvdata(op, rtc); rtc->irq = irq_of_parse_and_map(op->dev.of_node, 1); err = request_irq(rtc->irq, mpc5121_rtc_handler, 0, @@ -382,7 +382,7 @@ out_dispose: static int mpc5121_rtc_remove(struct platform_device *op) { - struct mpc5121_rtc_data *rtc = dev_get_drvdata(&op->dev); + struct mpc5121_rtc_data *rtc = platform_get_drvdata(op); struct mpc5121_rtc_regs __iomem *regs = rtc->regs; /* disable interrupt, so there are no nasty surprises */ -- cgit v0.10.2 From 11f54d05865c009a468e9d7adaa7414749daa6ca Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:07:48 -0700 Subject: drivers/rtc/rtc-da9052.c: use PTR_RET() Use of PTR_RET() simplifies the code. Signed-off-by: Sachin Kamat Cc: David Dajun Chen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-da9052.c b/drivers/rtc/rtc-da9052.c index 2f7fdd7..9c8c194 100644 --- a/drivers/rtc/rtc-da9052.c +++ b/drivers/rtc/rtc-da9052.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -249,10 +250,7 @@ static int da9052_rtc_probe(struct platform_device *pdev) rtc->rtc = devm_rtc_device_register(&pdev->dev, pdev->name, &da9052_rtc_ops, THIS_MODULE); - if (IS_ERR(rtc->rtc)) - return PTR_ERR(rtc->rtc); - - return 0; + return PTR_RET(rtc->rtc); } static struct platform_driver da9052_rtc_driver = { -- cgit v0.10.2 From 17cc54b20e4a06c081dec6abc9317a69605db541 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:07:49 -0700 Subject: drivers/rtc/rtc-isl12022.c: use PTR_RET() Use of PTR_RET() simplifies the code. Signed-off-by: Sachin Kamat Cc: Roman Fietze Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-isl12022.c b/drivers/rtc/rtc-isl12022.c index 6f2de99..5dbdc44 100644 --- a/drivers/rtc/rtc-isl12022.c +++ b/drivers/rtc/rtc-isl12022.c @@ -16,6 +16,7 @@ #include #include #include +#include #define DRV_VERSION "0.1" @@ -267,10 +268,7 @@ static int isl12022_probe(struct i2c_client *client, isl12022->rtc = devm_rtc_device_register(&client->dev, isl12022_driver.driver.name, &isl12022_rtc_ops, THIS_MODULE); - if (IS_ERR(isl12022->rtc)) - return PTR_ERR(isl12022->rtc); - - return 0; + return PTR_RET(isl12022->rtc); } static const struct i2c_device_id isl12022_id[] = { -- cgit v0.10.2 From 8ff7b7d90ed3f2181ec8342758670891bba2f7bd Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:07:50 -0700 Subject: drivers/rtc/rtc-m48t35.c: use PTR_RET() Use of PTR_RET() simplifies the code. Signed-off-by: Sachin Kamat Cc: Thomas Bogendoerfer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-m48t35.c b/drivers/rtc/rtc-m48t35.c index 25678d4..23c3779 100644 --- a/drivers/rtc/rtc-m48t35.c +++ b/drivers/rtc/rtc-m48t35.c @@ -20,6 +20,7 @@ #include #include #include +#include #define DRV_VERSION "1.0" @@ -174,10 +175,7 @@ static int m48t35_probe(struct platform_device *pdev) priv->rtc = devm_rtc_device_register(&pdev->dev, "m48t35", &m48t35_ops, THIS_MODULE); - if (IS_ERR(priv->rtc)) - return PTR_ERR(priv->rtc); - - return 0; + return PTR_RET(priv->rtc); } static struct platform_driver m48t35_platform_driver = { -- cgit v0.10.2 From 23f809ee33f06877de4c4f28129c495d332cd500 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:07:51 -0700 Subject: drivers/rtc/rtc-pcf8563.c: use PTR_RET() Use of PTR_RET() simplifies the code. Signed-off-by: Sachin Kamat Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-pcf8563.c b/drivers/rtc/rtc-pcf8563.c index 9ea2d07..710c3a5 100644 --- a/drivers/rtc/rtc-pcf8563.c +++ b/drivers/rtc/rtc-pcf8563.c @@ -20,6 +20,7 @@ #include #include #include +#include #define DRV_VERSION "0.4.3" @@ -263,10 +264,7 @@ static int pcf8563_probe(struct i2c_client *client, pcf8563_driver.driver.name, &pcf8563_rtc_ops, THIS_MODULE); - if (IS_ERR(pcf8563->rtc)) - return PTR_ERR(pcf8563->rtc); - - return 0; + return PTR_RET(pcf8563->rtc); } static const struct i2c_device_id pcf8563_id[] = { -- cgit v0.10.2 From ee605e0b2e12040dc1fe88eb8ab03bde0b8f92b8 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Wed, 3 Jul 2013 15:07:52 -0700 Subject: drivers/rtc/rtc-pcf8583.c: use PTR_RET() Use of PTR_RET() simplifies the code. Signed-off-by: Sachin Kamat Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-pcf8583.c b/drivers/rtc/rtc-pcf8583.c index ab740cd..843a745 100644 --- a/drivers/rtc/rtc-pcf8583.c +++ b/drivers/rtc/rtc-pcf8583.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -284,10 +285,7 @@ static int pcf8583_probe(struct i2c_client *client, pcf8583_driver.driver.name, &pcf8583_rtc_ops, THIS_MODULE); - if (IS_ERR(pcf8583->rtc)) - return PTR_ERR(pcf8583->rtc); - - return 0; + return PTR_RET(pcf8583->rtc); } static const struct i2c_device_id pcf8583_id[] = { -- cgit v0.10.2 From ae8458949a57346e17a07768fbdb626cbe993b89 Mon Sep 17 00:00:00 2001 From: Kevin Hilman Date: Wed, 3 Jul 2013 15:07:53 -0700 Subject: drivers/rtc/rtc-twl.c: ensure IRQ is wakeup enabled Currently, the RTC IRQ is never wakeup-enabled so is not capable of bringing the system out of suspend. On OMAP platforms, we have gotten by without this because the TWL RTC is on an I2C-connected chip which is capable of waking up the OMAP via the IO ring when the OMAP is in low-power states. However, if the OMAP suspends without hitting the low-power states (and the IO ring is not enabled), RTC wakeups will not work because the IRQ is not wakeup enabled. To fix, ensure the RTC IRQ is wakeup enabled whenever the RTC alarm is set. Signed-off-by: Kevin Hilman Cc: Alessandro Zummo Cc: Tony Lindgren Cc: Grygorii Strashko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-twl.c b/drivers/rtc/rtc-twl.c index 52843d0..3614874 100644 --- a/drivers/rtc/rtc-twl.c +++ b/drivers/rtc/rtc-twl.c @@ -213,12 +213,24 @@ static int mask_rtc_irq_bit(unsigned char bit) static int twl_rtc_alarm_irq_enable(struct device *dev, unsigned enabled) { + struct platform_device *pdev = to_platform_device(dev); + int irq = platform_get_irq(pdev, 0); + static bool twl_rtc_wake_enabled; int ret; - if (enabled) + if (enabled) { ret = set_rtc_irq_bit(BIT_RTC_INTERRUPTS_REG_IT_ALARM_M); - else + if (device_can_wakeup(dev) && !twl_rtc_wake_enabled) { + enable_irq_wake(irq); + twl_rtc_wake_enabled = true; + } + } else { ret = mask_rtc_irq_bit(BIT_RTC_INTERRUPTS_REG_IT_ALARM_M); + if (twl_rtc_wake_enabled) { + disable_irq_wake(irq); + twl_rtc_wake_enabled = false; + } + } return ret; } -- cgit v0.10.2 From 998a06051afe6cba392eab66fa0ef1d7e7376f6c Mon Sep 17 00:00:00 2001 From: Derek Basehore Date: Wed, 3 Jul 2013 15:07:54 -0700 Subject: drivers/rtc/rtc-cmos.c: work around bios clearing rtc control The bios may clear the rtc control register when resuming the system. Since the cmos interrupt handler may now be run before the rtc_cmos is resumed, this can cause the interrupt handler to ignore an alarm since the alarm bit is not set in the rtc control register. To work around this, check if the rtc_cmos is suspended and use the stored value for the rtc control register. Signed-off-by: Derek Basehore Reviewed-by: Sameer Nanda Cc: Alessandro Zummo Cc: Jingoo Han Cc: Steven Rostedt Cc: John Stultz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index a6727d9..be06d71 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -556,17 +556,24 @@ static irqreturn_t cmos_interrupt(int irq, void *p) rtc_control = CMOS_READ(RTC_CONTROL); if (is_hpet_enabled()) irqstat = (unsigned long)irq & 0xF0; - irqstat &= (rtc_control & RTC_IRQMASK) | RTC_IRQF; + + /* If we were suspended, RTC_CONTROL may not be accurate since the + * bios may have cleared it. + */ + if (!cmos_rtc.suspend_ctrl) + irqstat &= (rtc_control & RTC_IRQMASK) | RTC_IRQF; + else + irqstat &= (cmos_rtc.suspend_ctrl & RTC_IRQMASK) | RTC_IRQF; /* All Linux RTC alarms should be treated as if they were oneshot. * Similar code may be needed in system wakeup paths, in case the * alarm woke the system. */ if (irqstat & RTC_AIE) { + cmos_rtc.suspend_ctrl &= ~RTC_AIE; rtc_control &= ~RTC_AIE; CMOS_WRITE(rtc_control, RTC_CONTROL); hpet_mask_rtc_irq_bit(RTC_AIE); - CMOS_READ(RTC_INTR_FLAGS); } spin_unlock(&rtc_lock); @@ -839,21 +846,23 @@ static inline int cmos_poweroff(struct device *dev) static int cmos_resume(struct device *dev) { struct cmos_rtc *cmos = dev_get_drvdata(dev); - unsigned char tmp = cmos->suspend_ctrl; + unsigned char tmp; + + if (cmos->enabled_wake) { + if (cmos->wake_off) + cmos->wake_off(dev); + else + disable_irq_wake(cmos->irq); + cmos->enabled_wake = 0; + } + spin_lock_irq(&rtc_lock); + tmp = cmos->suspend_ctrl; + cmos->suspend_ctrl = 0; /* re-enable any irqs previously active */ if (tmp & RTC_IRQMASK) { unsigned char mask; - if (cmos->enabled_wake) { - if (cmos->wake_off) - cmos->wake_off(dev); - else - disable_irq_wake(cmos->irq); - cmos->enabled_wake = 0; - } - - spin_lock_irq(&rtc_lock); if (device_may_wakeup(dev)) hpet_rtc_timer_init(); @@ -873,8 +882,8 @@ static int cmos_resume(struct device *dev) tmp &= ~RTC_AIE; hpet_mask_rtc_irq_bit(RTC_AIE); } while (mask & RTC_AIE); - spin_unlock_irq(&rtc_lock); } + spin_unlock_irq(&rtc_lock); dev_dbg(dev, "resume, ctrl %02x\n", tmp); -- cgit v0.10.2 From d3869ff684cb96ccfaf3c3a9a302fe3b7e976b02 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Wed, 3 Jul 2013 15:07:55 -0700 Subject: drivers/rtc/rtc-twl.c: fix rtc_reg_map initialization Initialize the rtc_reg_map in platform_driver's probe function instead at module_init time. This way we can make sure that the twl-core has been already probed and initialized (twl_priv->twl_id is valid) since the platform device for the RTC driver will be created by the twl-core after it finished its init. Reported-by: Christoph Fritz Signed-off-by: Peter Ujfalusi Tested-by: Kevin Hilman Tested-by: Grygorii Strashko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-twl.c b/drivers/rtc/rtc-twl.c index 3614874..8a04044 100644 --- a/drivers/rtc/rtc-twl.c +++ b/drivers/rtc/rtc-twl.c @@ -481,6 +481,12 @@ static int twl_rtc_probe(struct platform_device *pdev) if (irq <= 0) goto out1; + /* Initialize the register map */ + if (twl_class_is_4030()) + rtc_reg_map = (u8 *)twl4030_rtc_reg_map; + else + rtc_reg_map = (u8 *)twl6030_rtc_reg_map; + ret = twl_rtc_read_u8(&rd_reg, REG_RTC_STATUS_REG); if (ret < 0) goto out1; @@ -622,11 +628,6 @@ static struct platform_driver twl4030rtc_driver = { static int __init twl_rtc_init(void) { - if (twl_class_is_4030()) - rtc_reg_map = (u8 *) twl4030_rtc_reg_map; - else - rtc_reg_map = (u8 *) twl6030_rtc_reg_map; - return platform_driver_register(&twl4030rtc_driver); } module_init(twl_rtc_init); -- cgit v0.10.2 From 5ee67484dede220d96c0b323c78d02c63fdfae44 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Wed, 3 Jul 2013 15:07:56 -0700 Subject: drivers/rtc/rtc-twl.c: cleanup with module_platform_driver() conversion Use module_platform_driver() to register the platform driver. Signed-off-by: Peter Ujfalusi Acked-by: Kevin Hilman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-twl.c b/drivers/rtc/rtc-twl.c index 8a04044..02faf3c 100644 --- a/drivers/rtc/rtc-twl.c +++ b/drivers/rtc/rtc-twl.c @@ -626,17 +626,7 @@ static struct platform_driver twl4030rtc_driver = { }, }; -static int __init twl_rtc_init(void) -{ - return platform_driver_register(&twl4030rtc_driver); -} -module_init(twl_rtc_init); - -static void __exit twl_rtc_exit(void) -{ - platform_driver_unregister(&twl4030rtc_driver); -} -module_exit(twl_rtc_exit); +module_platform_driver(twl4030rtc_driver); MODULE_AUTHOR("Texas Instruments, MontaVista Software"); MODULE_LICENSE("GPL"); -- cgit v0.10.2 From 0734e27f0befe9e88c2b5dad789b05b7bf86ce90 Mon Sep 17 00:00:00 2001 From: Chris Brand Date: Wed, 3 Jul 2013 15:07:57 -0700 Subject: drivers/rtc/interface.c: return -EBUSY, not -EACCES when device is busy If rtc->irq_task is non-NULL and task is NULL, they always rtc_irq_set_freq(), whenever err is set to -EBUSY it will then immediately be set to -EACCES, misleading the caller as to the underlying problem. Signed-off-by: Chris Brand Acked-by: Alessandro Zummo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c index 14c1efd..72c5cdb 100644 --- a/drivers/rtc/interface.c +++ b/drivers/rtc/interface.c @@ -698,9 +698,9 @@ retry: spin_lock_irqsave(&rtc->irq_task_lock, flags); if (rtc->irq_task != NULL && task == NULL) err = -EBUSY; - if (rtc->irq_task != task) + else if (rtc->irq_task != task) err = -EACCES; - if (!err) { + else { if (rtc_update_hrtimer(rtc, enabled) < 0) { spin_unlock_irqrestore(&rtc->irq_task_lock, flags); cpu_relax(); @@ -734,9 +734,9 @@ retry: spin_lock_irqsave(&rtc->irq_task_lock, flags); if (rtc->irq_task != NULL && task == NULL) err = -EBUSY; - if (rtc->irq_task != task) + else if (rtc->irq_task != task) err = -EACCES; - if (!err) { + else { rtc->irq_freq = freq; if (rtc->pie_enabled && rtc_update_hrtimer(rtc, 1) < 0) { spin_unlock_irqrestore(&rtc->irq_task_lock, flags); -- cgit v0.10.2 From 4c5591c1eee54b7775ea63635ae310a29f0207bb Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 3 Jul 2013 15:07:58 -0700 Subject: drivers/rtc/rtc-pcf2123.c: replace strict_strtoul() with kstrtoul() The usage of strict_strtoul() is not preferred, because strict_strtoul() is obsolete. Thus, kstrtoul() should be used. Signed-off-by: Jingoo Han Reviewed-by: Andy Shevchenko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-pcf2123.c b/drivers/rtc/rtc-pcf2123.c index b2a78a0..1725b50 100644 --- a/drivers/rtc/rtc-pcf2123.c +++ b/drivers/rtc/rtc-pcf2123.c @@ -94,8 +94,9 @@ static ssize_t pcf2123_show(struct device *dev, struct device_attribute *attr, r = container_of(attr, struct pcf2123_sysfs_reg, attr); - if (strict_strtoul(r->name, 16, ®)) - return -EINVAL; + ret = kstrtoul(r->name, 16, ®); + if (ret) + return ret; txbuf[0] = PCF2123_READ | reg; ret = spi_write_then_read(spi, txbuf, 1, rxbuf, 1); @@ -117,9 +118,13 @@ static ssize_t pcf2123_store(struct device *dev, struct device_attribute *attr, r = container_of(attr, struct pcf2123_sysfs_reg, attr); - if (strict_strtoul(r->name, 16, ®) - || strict_strtoul(buffer, 10, &val)) - return -EINVAL; + ret = kstrtoul(r->name, 16, ®); + if (ret) + return ret; + + ret = kstrtoul(buffer, 10, &val); + if (ret) + return ret; txbuf[0] = PCF2123_WRITE | reg; txbuf[1] = val; -- cgit v0.10.2 From 92e7f04a7fcc9e7f5955f3337e26ca4ac2ae387b Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Wed, 3 Jul 2013 15:07:59 -0700 Subject: drivers/rtc/class: convert from Legacy pm ops to dev_pm_ops Convert drivers/rtc/class to use dev_pm_ops for power management and remove Legacy PM ops hooks. With this change, rtc class registers suspend/resume callbacks via class->pm (dev_pm_ops) instead of Legacy class->suspend/resume. When __device_suspend() runs call-backs, it will find class->pm ops for the rtc class. Signed-off-by: Shuah Khan Signed-off-by: Jingoo Han Cc: Shuah Khan Cc: Alessandro Zummo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c index 6638540..0242681 100644 --- a/drivers/rtc/class.c +++ b/drivers/rtc/class.c @@ -38,7 +38,7 @@ static void rtc_device_release(struct device *dev) int rtc_hctosys_ret = -ENODEV; #endif -#if defined(CONFIG_PM) && defined(CONFIG_RTC_HCTOSYS_DEVICE) +#if defined(CONFIG_PM_SLEEP) && defined(CONFIG_RTC_HCTOSYS_DEVICE) /* * On suspend(), measure the delta between one RTC and the * system's wall clock; restore it on resume(). @@ -47,7 +47,7 @@ int rtc_hctosys_ret = -ENODEV; static struct timespec old_rtc, old_system, old_delta; -static int rtc_suspend(struct device *dev, pm_message_t mesg) +static int rtc_suspend(struct device *dev) { struct rtc_device *rtc = to_rtc_device(dev); struct rtc_time tm; @@ -135,9 +135,10 @@ static int rtc_resume(struct device *dev) return 0; } +static SIMPLE_DEV_PM_OPS(rtc_class_dev_pm_ops, rtc_suspend, rtc_resume); +#define RTC_CLASS_DEV_PM_OPS (&rtc_class_dev_pm_ops) #else -#define rtc_suspend NULL -#define rtc_resume NULL +#define RTC_CLASS_DEV_PM_OPS NULL #endif @@ -336,8 +337,7 @@ static int __init rtc_init(void) pr_err("couldn't create class\n"); return PTR_ERR(rtc_class); } - rtc_class->suspend = rtc_suspend; - rtc_class->resume = rtc_resume; + rtc_class->pm = RTC_CLASS_DEV_PM_OPS; rtc_dev_init(); rtc_sysfs_init(rtc_class); return 0; -- cgit v0.10.2 From 2c5a5b30917b725ea95fa189e52fc8412f6a2814 Mon Sep 17 00:00:00 2001 From: Wei Ni Date: Wed, 3 Jul 2013 15:08:00 -0700 Subject: drivers/rtc/rtc-palmas.c: init wakeup before device register Enable dev as wakeup device before calling rtc_device_register(), so that it can create the "wakealarm" sysfs. Signed-off-by: Wei Ni Acked-by: Laxman Dewangan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-palmas.c b/drivers/rtc/rtc-palmas.c index 50204d4..a1fecc8 100644 --- a/drivers/rtc/rtc-palmas.c +++ b/drivers/rtc/rtc-palmas.c @@ -265,6 +265,7 @@ static int palmas_rtc_probe(struct platform_device *pdev) palmas_rtc->irq = platform_get_irq(pdev, 0); + device_init_wakeup(&pdev->dev, 1); palmas_rtc->rtc = devm_rtc_device_register(&pdev->dev, pdev->name, &palmas_rtc_ops, THIS_MODULE); if (IS_ERR(palmas_rtc->rtc)) { @@ -283,7 +284,6 @@ static int palmas_rtc_probe(struct platform_device *pdev) return ret; } - device_set_wakeup_capable(&pdev->dev, 1); return 0; } -- cgit v0.10.2 From 18cb6368f0b0fc6a28bd49ee547b4f655db97fc3 Mon Sep 17 00:00:00 2001 From: Renaud Cerrato Date: Wed, 3 Jul 2013 15:08:01 -0700 Subject: rtc: add NXP PCF2127 support (i2c) Added support for NXP PCF2127 RTC (i2c). [akpm@linux-foundation.org: fix typo, fix warnings] Signed-off-by: Renaud Cerrato Cc: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index b983813..8009e5d 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -313,6 +313,15 @@ config RTC_DRV_PALMAS This driver can also be built as a module. If so, the module will be called rtc-palma. +config RTC_DRV_PCF2127 + tristate "NXP PCF2127" + help + If you say yes here you get support for the NXP PCF2127/29 RTC + chips. + + This driver can also be built as a module. If so, the module + will be called rtc-pcf2127. + config RTC_DRV_PCF8523 tristate "NXP PCF8523" help diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile index c33f86f..994a2fa 100644 --- a/drivers/rtc/Makefile +++ b/drivers/rtc/Makefile @@ -83,6 +83,7 @@ obj-$(CONFIG_RTC_DRV_NUC900) += rtc-nuc900.o obj-$(CONFIG_RTC_DRV_OMAP) += rtc-omap.o obj-$(CONFIG_RTC_DRV_PALMAS) += rtc-palmas.o obj-$(CONFIG_RTC_DRV_PCAP) += rtc-pcap.o +obj-$(CONFIG_RTC_DRV_PCF2127) += rtc-pcf2127.o obj-$(CONFIG_RTC_DRV_PCF8523) += rtc-pcf8523.o obj-$(CONFIG_RTC_DRV_PCF8563) += rtc-pcf8563.o obj-$(CONFIG_RTC_DRV_PCF8583) += rtc-pcf8583.o diff --git a/drivers/rtc/rtc-pcf2127.c b/drivers/rtc/rtc-pcf2127.c new file mode 100644 index 0000000..205b9f7 --- /dev/null +++ b/drivers/rtc/rtc-pcf2127.c @@ -0,0 +1,241 @@ +/* + * An I2C driver for the NXP PCF2127 RTC + * Copyright 2013 Til-Technologies + * + * Author: Renaud Cerrato + * + * based on the other drivers in this same directory. + * + * http://www.nxp.com/documents/data_sheet/PCF2127AT.pdf + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include + +#define DRV_VERSION "0.0.1" + +#define PCF2127_REG_CTRL1 (0x00) /* Control Register 1 */ +#define PCF2127_REG_CTRL2 (0x01) /* Control Register 2 */ +#define PCF2127_REG_CTRL3 (0x02) /* Control Register 3 */ +#define PCF2127_REG_SC (0x03) /* datetime */ +#define PCF2127_REG_MN (0x04) +#define PCF2127_REG_HR (0x05) +#define PCF2127_REG_DM (0x06) +#define PCF2127_REG_DW (0x07) +#define PCF2127_REG_MO (0x08) +#define PCF2127_REG_YR (0x09) + +static struct i2c_driver pcf2127_driver; + +struct pcf2127 { + struct rtc_device *rtc; + int voltage_low; /* indicates if a low_voltage was detected */ +}; + +/* + * In the routines that deal directly with the pcf2127 hardware, we use + * rtc_time -- month 0-11, hour 0-23, yr = calendar year-epoch. + */ +static int pcf2127_get_datetime(struct i2c_client *client, struct rtc_time *tm) +{ + struct pcf2127 *pcf2127 = i2c_get_clientdata(client); + unsigned char buf[10] = { PCF2127_REG_CTRL1 }; + + /* read registers */ + if (i2c_master_send(client, buf, 1) != 1 || + i2c_master_recv(client, buf, sizeof(buf)) != sizeof(buf)) { + dev_err(&client->dev, "%s: read error\n", __func__); + return -EIO; + } + + if (buf[PCF2127_REG_CTRL3] & 0x04) { + pcf2127->voltage_low = 1; + dev_info(&client->dev, + "low voltage detected, date/time is not reliable.\n"); + } + + dev_dbg(&client->dev, + "%s: raw data is cr1=%02x, cr2=%02x, cr3=%02x, " + "sec=%02x, min=%02x, hr=%02x, " + "mday=%02x, wday=%02x, mon=%02x, year=%02x\n", + __func__, + buf[0], buf[1], buf[2], + buf[3], buf[4], buf[5], + buf[6], buf[7], buf[8], buf[9]); + + + tm->tm_sec = bcd2bin(buf[PCF2127_REG_SC] & 0x7F); + tm->tm_min = bcd2bin(buf[PCF2127_REG_MN] & 0x7F); + tm->tm_hour = bcd2bin(buf[PCF2127_REG_HR] & 0x3F); /* rtc hr 0-23 */ + tm->tm_mday = bcd2bin(buf[PCF2127_REG_DM] & 0x3F); + tm->tm_wday = buf[PCF2127_REG_DW] & 0x07; + tm->tm_mon = bcd2bin(buf[PCF2127_REG_MO] & 0x1F) - 1; /* rtc mn 1-12 */ + tm->tm_year = bcd2bin(buf[PCF2127_REG_YR]); + if (tm->tm_year < 70) + tm->tm_year += 100; /* assume we are in 1970...2069 */ + + dev_dbg(&client->dev, "%s: tm is secs=%d, mins=%d, hours=%d, " + "mday=%d, mon=%d, year=%d, wday=%d\n", + __func__, + tm->tm_sec, tm->tm_min, tm->tm_hour, + tm->tm_mday, tm->tm_mon, tm->tm_year, tm->tm_wday); + + /* the clock can give out invalid datetime, but we cannot return + * -EINVAL otherwise hwclock will refuse to set the time on bootup. + */ + if (rtc_valid_tm(tm) < 0) + dev_err(&client->dev, "retrieved date/time is not valid.\n"); + + return 0; +} + +static int pcf2127_set_datetime(struct i2c_client *client, struct rtc_time *tm) +{ + unsigned char buf[8]; + int i = 0, err; + + dev_dbg(&client->dev, "%s: secs=%d, mins=%d, hours=%d, " + "mday=%d, mon=%d, year=%d, wday=%d\n", + __func__, + tm->tm_sec, tm->tm_min, tm->tm_hour, + tm->tm_mday, tm->tm_mon, tm->tm_year, tm->tm_wday); + + /* start register address */ + buf[i++] = PCF2127_REG_SC; + + /* hours, minutes and seconds */ + buf[i++] = bin2bcd(tm->tm_sec); + buf[i++] = bin2bcd(tm->tm_min); + buf[i++] = bin2bcd(tm->tm_hour); + buf[i++] = bin2bcd(tm->tm_mday); + buf[i++] = tm->tm_wday & 0x07; + + /* month, 1 - 12 */ + buf[i++] = bin2bcd(tm->tm_mon + 1); + + /* year */ + buf[i++] = bin2bcd(tm->tm_year % 100); + + /* write register's data */ + err = i2c_master_send(client, buf, i); + if (err != i) { + dev_err(&client->dev, + "%s: err=%d", __func__, err); + return -EIO; + } + + return 0; +} + +#ifdef CONFIG_RTC_INTF_DEV +static int pcf2127_rtc_ioctl(struct device *dev, + unsigned int cmd, unsigned long arg) +{ + struct pcf2127 *pcf2127 = i2c_get_clientdata(to_i2c_client(dev)); + + switch (cmd) { + case RTC_VL_READ: + if (pcf2127->voltage_low) + dev_info(dev, "low voltage detected, date/time is not reliable.\n"); + + if (copy_to_user((void __user *)arg, &pcf2127->voltage_low, + sizeof(int))) + return -EFAULT; + return 0; + default: + return -ENOIOCTLCMD; + } +} +#else +#define pcf2127_rtc_ioctl NULL +#endif + +static int pcf2127_rtc_read_time(struct device *dev, struct rtc_time *tm) +{ + return pcf2127_get_datetime(to_i2c_client(dev), tm); +} + +static int pcf2127_rtc_set_time(struct device *dev, struct rtc_time *tm) +{ + return pcf2127_set_datetime(to_i2c_client(dev), tm); +} + +static const struct rtc_class_ops pcf2127_rtc_ops = { + .ioctl = pcf2127_rtc_ioctl, + .read_time = pcf2127_rtc_read_time, + .set_time = pcf2127_rtc_set_time, +}; + +static int pcf2127_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct pcf2127 *pcf2127; + + dev_dbg(&client->dev, "%s\n", __func__); + + if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) + return -ENODEV; + + pcf2127 = devm_kzalloc(&client->dev, sizeof(struct pcf2127), + GFP_KERNEL); + if (!pcf2127) + return -ENOMEM; + + dev_info(&client->dev, "chip found, driver version " DRV_VERSION "\n"); + + i2c_set_clientdata(client, pcf2127); + + pcf2127->rtc = devm_rtc_device_register(&client->dev, + pcf2127_driver.driver.name, + &pcf2127_rtc_ops, THIS_MODULE); + + if (IS_ERR(pcf2127->rtc)) + return PTR_ERR(pcf2127->rtc); + + return 0; +} + +static int pcf2127_remove(struct i2c_client *client) +{ + return 0; +} + +static const struct i2c_device_id pcf2127_id[] = { + { "pcf2127", 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, pcf2127_id); + +#ifdef CONFIG_OF +static const struct of_device_id pcf2127_of_match[] = { + { .compatible = "nxp,pcf2127" }, + {} +}; +MODULE_DEVICE_TABLE(of, pcf2127_of_match); +#endif + +static struct i2c_driver pcf2127_driver = { + .driver = { + .name = "rtc-pcf2127", + .owner = THIS_MODULE, + .of_match_table = of_match_ptr(pcf2127_of_match), + }, + .probe = pcf2127_probe, + .remove = pcf2127_remove, + .id_table = pcf2127_id, +}; + +module_i2c_driver(pcf2127_driver); + +MODULE_AUTHOR("Renaud Cerrato "); +MODULE_DESCRIPTION("NXP PCF2127 RTC driver"); +MODULE_LICENSE("GPL"); +MODULE_VERSION(DRV_VERSION); -- cgit v0.10.2 From 1d2e2b65d098c0a321e16b0997eb760439d99500 Mon Sep 17 00:00:00 2001 From: Hebbar Gururaja Date: Wed, 3 Jul 2013 15:08:02 -0700 Subject: rtc: omap: restore back (hard-code) wakeup support rtc-omap driver modules is used both by OMAP1/2, Davinci SoC platforms. However, rtc wake support on OMAP1 is broken. Hence the device_init_wakeup() was removed from rtc-omap driver and moved to platform board files that supported it (DA850/OMAP-L138). [1] However, recently [2] it was suggested that driver should always do a device_init_wakeup(dev, true). Platforms that don't want/need wakeup support can disable it from userspace via: echo disabled > /sys/devices/.../power/wakeup Also, with the new DT boot-up, board file doesn't exist and hence there is no way to have device wakeup support rtc. The fix for above issues, is to hard code device_init_wakeup() inside driver and let platforms that don't need this, handle it through the sysfs power entry. [1] https://patchwork.kernel.org/patch/136731/ [2] http://www.mail-archive.com/davinci-linux-open-source@linux. davincidsp.com/msg26077.html Signed-off-by: Hebbar Gururaja Cc: Alessandro Zummo Acked-by: Kevin Hilman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-omap.c b/drivers/rtc/rtc-omap.c index 6f6ac03..c6ffbae 100644 --- a/drivers/rtc/rtc-omap.c +++ b/drivers/rtc/rtc-omap.c @@ -421,6 +421,8 @@ static int __init omap_rtc_probe(struct platform_device *pdev) * is write-only, and always reads as zero...) */ + device_init_wakeup(&pdev->dev, true); + if (new_ctrl & (u8) OMAP_RTC_CTRL_SPLIT) pr_info("%s: split power mode\n", pdev->name); -- cgit v0.10.2 From 061d2a3e39c4686039519509d69cd31c7841f666 Mon Sep 17 00:00:00 2001 From: Fabio Porcedda Date: Wed, 3 Jul 2013 15:08:03 -0700 Subject: drivers/rtc/rtc-ds1216.c: use module_platform_driver_probe() Use module_platform_driver_probe() macro which makes the code smaller and simpler. Signed-off-by: Fabio Porcedda Cc: Alessandro Zummo Cc: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-ds1216.c b/drivers/rtc/rtc-ds1216.c index 1831c84..9c04fd2 100644 --- a/drivers/rtc/rtc-ds1216.c +++ b/drivers/rtc/rtc-ds1216.c @@ -174,21 +174,10 @@ static struct platform_driver ds1216_rtc_platform_driver = { }, }; -static int __init ds1216_rtc_init(void) -{ - return platform_driver_probe(&ds1216_rtc_platform_driver, ds1216_rtc_probe); -} - -static void __exit ds1216_rtc_exit(void) -{ - platform_driver_unregister(&ds1216_rtc_platform_driver); -} +module_platform_driver_probe(ds1216_rtc_platform_driver, ds1216_rtc_probe); MODULE_AUTHOR("Thomas Bogendoerfer "); MODULE_DESCRIPTION("DS1216 RTC driver"); MODULE_LICENSE("GPL"); MODULE_VERSION(DRV_VERSION); MODULE_ALIAS("platform:rtc-ds1216"); - -module_init(ds1216_rtc_init); -module_exit(ds1216_rtc_exit); -- cgit v0.10.2 From e88b815e011bcacb7066f1156ce390f6b0adc9df Mon Sep 17 00:00:00 2001 From: Xianglong Du Date: Wed, 3 Jul 2013 15:08:04 -0700 Subject: drivers/rtc/rtc-sirfsoc.c: add rtc drivers for CSR SiRFprimaII and SiRFatlasVI On CSR SiRFprimaII/atlasVI, there is a programmable 16-bit divider (RTC_DIV) that divides the input 32.768KHz clock to the frequency that users need (E.g. 1 Hz). The divided real-time clock will be used to drive a 32-bit counter (RTC_COUNTER) that provides users with the actual time. In each cycle of the divided real-time clock, there is a Hertz interrupt generated to the RISC. Users can also configure an alarm (RTC_ALARM). When RTC_COUNTER matches the alarm, there will be an alarm interrupt generated to the RISC. The system RTC can generate an alarm wake-up signal to notify the power controller to wake up from power saving mode. Signed-off-by: Xianglong Du Signed-off-by: Barry Song Cc: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/arm/boot/dts/atlas6.dtsi b/arch/arm/boot/dts/atlas6.dtsi index 7d1a279..9866cd7 100644 --- a/arch/arm/boot/dts/atlas6.dtsi +++ b/arch/arm/boot/dts/atlas6.dtsi @@ -613,7 +613,7 @@ }; rtc-iobg { - compatible = "sirf,prima2-rtciobg", "sirf-prima2-rtciobg-bus"; + compatible = "sirf,prima2-rtciobg", "sirf-prima2-rtciobg-bus", "simple-bus"; #address-cells = <1>; #size-cells = <1>; reg = <0x80030000 0x10000>; diff --git a/arch/arm/boot/dts/prima2.dtsi b/arch/arm/boot/dts/prima2.dtsi index 02edd89..05e9489 100644 --- a/arch/arm/boot/dts/prima2.dtsi +++ b/arch/arm/boot/dts/prima2.dtsi @@ -610,7 +610,7 @@ }; rtc-iobg { - compatible = "sirf,prima2-rtciobg", "sirf-prima2-rtciobg-bus"; + compatible = "sirf,prima2-rtciobg", "sirf-prima2-rtciobg-bus", "simple-bus"; #address-cells = <1>; #size-cells = <1>; reg = <0x80030000 0x10000>; diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index 8009e5d..9e3498b 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -1242,6 +1242,13 @@ config RTC_DRV_SNVS This driver can also be built as a module, if so, the module will be called "rtc-snvs". +config RTC_DRV_SIRFSOC + tristate "SiRFSOC RTC" + depends on ARCH_SIRF + help + Say "yes" here to support the real time clock on SiRF SOC chips. + This driver can also be built as a module called rtc-sirfsoc. + comment "HID Sensor RTC drivers" config RTC_DRV_HID_SENSOR_TIME diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile index 994a2fa..d3b4488 100644 --- a/drivers/rtc/Makefile +++ b/drivers/rtc/Makefile @@ -129,3 +129,4 @@ obj-$(CONFIG_RTC_DRV_VT8500) += rtc-vt8500.o obj-$(CONFIG_RTC_DRV_WM831X) += rtc-wm831x.o obj-$(CONFIG_RTC_DRV_WM8350) += rtc-wm8350.o obj-$(CONFIG_RTC_DRV_X1205) += rtc-x1205.o +obj-$(CONFIG_RTC_DRV_SIRFSOC) += rtc-sirfsoc.o diff --git a/drivers/rtc/rtc-sirfsoc.c b/drivers/rtc/rtc-sirfsoc.c new file mode 100644 index 0000000..aa7ed4b --- /dev/null +++ b/drivers/rtc/rtc-sirfsoc.c @@ -0,0 +1,475 @@ +/* + * SiRFSoC Real Time Clock interface for Linux + * + * Copyright (c) 2013 Cambridge Silicon Radio Limited, a CSR plc group company. + * + * Licensed under GPLv2 or later. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + + +#define RTC_CN 0x00 +#define RTC_ALARM0 0x04 +#define RTC_ALARM1 0x18 +#define RTC_STATUS 0x08 +#define RTC_SW_VALUE 0x40 +#define SIRFSOC_RTC_AL1E (1<<6) +#define SIRFSOC_RTC_AL1 (1<<4) +#define SIRFSOC_RTC_HZE (1<<3) +#define SIRFSOC_RTC_AL0E (1<<2) +#define SIRFSOC_RTC_HZ (1<<1) +#define SIRFSOC_RTC_AL0 (1<<0) +#define RTC_DIV 0x0c +#define RTC_DEEP_CTRL 0x14 +#define RTC_CLOCK_SWITCH 0x1c +#define SIRFSOC_RTC_CLK 0x03 /* others are reserved */ + +/* Refer to RTC DIV switch */ +#define RTC_HZ 16 + +/* This macro is also defined in arch/arm/plat-sirfsoc/cpu.c */ +#define RTC_SHIFT 4 + +#define INTR_SYSRTC_CN 0x48 + +struct sirfsoc_rtc_drv { + struct rtc_device *rtc; + u32 rtc_base; + u32 irq; + /* Overflow for every 8 years extra time */ + u32 overflow_rtc; +#ifdef CONFIG_PM + u32 saved_counter; + u32 saved_overflow_rtc; +#endif +}; + +static int sirfsoc_rtc_read_alarm(struct device *dev, + struct rtc_wkalrm *alrm) +{ + unsigned long rtc_alarm, rtc_count; + struct sirfsoc_rtc_drv *rtcdrv; + + rtcdrv = (struct sirfsoc_rtc_drv *)dev_get_drvdata(dev); + + local_irq_disable(); + + rtc_count = sirfsoc_rtc_iobrg_readl(rtcdrv->rtc_base + RTC_CN); + + rtc_alarm = sirfsoc_rtc_iobrg_readl(rtcdrv->rtc_base + RTC_ALARM0); + memset(alrm, 0, sizeof(struct rtc_wkalrm)); + + /* + * assume alarm interval not beyond one round counter overflow_rtc: + * 0->0xffffffff + */ + /* if alarm is in next overflow cycle */ + if (rtc_count > rtc_alarm) + rtc_time_to_tm((rtcdrv->overflow_rtc + 1) + << (BITS_PER_LONG - RTC_SHIFT) + | rtc_alarm >> RTC_SHIFT, &(alrm->time)); + else + rtc_time_to_tm(rtcdrv->overflow_rtc + << (BITS_PER_LONG - RTC_SHIFT) + | rtc_alarm >> RTC_SHIFT, &(alrm->time)); + if (sirfsoc_rtc_iobrg_readl( + rtcdrv->rtc_base + RTC_STATUS) & SIRFSOC_RTC_AL0E) + alrm->enabled = 1; + local_irq_enable(); + + return 0; +} + +static int sirfsoc_rtc_set_alarm(struct device *dev, + struct rtc_wkalrm *alrm) +{ + unsigned long rtc_status_reg, rtc_alarm; + struct sirfsoc_rtc_drv *rtcdrv; + rtcdrv = (struct sirfsoc_rtc_drv *)dev_get_drvdata(dev); + + if (alrm->enabled) { + rtc_tm_to_time(&(alrm->time), &rtc_alarm); + + local_irq_disable(); + + rtc_status_reg = sirfsoc_rtc_iobrg_readl( + rtcdrv->rtc_base + RTC_STATUS); + if (rtc_status_reg & SIRFSOC_RTC_AL0E) { + /* + * An ongoing alarm in progress - ingore it and not + * to return EBUSY + */ + dev_info(dev, "An old alarm was set, will be replaced by a new one\n"); + } + + sirfsoc_rtc_iobrg_writel( + rtc_alarm << RTC_SHIFT, rtcdrv->rtc_base + RTC_ALARM0); + rtc_status_reg &= ~0x07; /* mask out the lower status bits */ + /* + * This bit RTC_AL sets it as a wake-up source for Sleep Mode + * Writing 1 into this bit will clear it + */ + rtc_status_reg |= SIRFSOC_RTC_AL0; + /* enable the RTC alarm interrupt */ + rtc_status_reg |= SIRFSOC_RTC_AL0E; + sirfsoc_rtc_iobrg_writel( + rtc_status_reg, rtcdrv->rtc_base + RTC_STATUS); + local_irq_enable(); + } else { + /* + * if this function was called with enabled=0 + * then it could mean that the application is + * trying to cancel an ongoing alarm + */ + local_irq_disable(); + + rtc_status_reg = sirfsoc_rtc_iobrg_readl( + rtcdrv->rtc_base + RTC_STATUS); + if (rtc_status_reg & SIRFSOC_RTC_AL0E) { + /* clear the RTC status register's alarm bit */ + rtc_status_reg &= ~0x07; + /* write 1 into SIRFSOC_RTC_AL0 to force a clear */ + rtc_status_reg |= (SIRFSOC_RTC_AL0); + /* Clear the Alarm enable bit */ + rtc_status_reg &= ~(SIRFSOC_RTC_AL0E); + + sirfsoc_rtc_iobrg_writel(rtc_status_reg, + rtcdrv->rtc_base + RTC_STATUS); + } + + local_irq_enable(); + } + + return 0; +} + +static int sirfsoc_rtc_read_time(struct device *dev, + struct rtc_time *tm) +{ + unsigned long tmp_rtc = 0; + struct sirfsoc_rtc_drv *rtcdrv; + rtcdrv = (struct sirfsoc_rtc_drv *)dev_get_drvdata(dev); + /* + * This patch is taken from WinCE - Need to validate this for + * correctness. To work around sirfsoc RTC counter double sync logic + * fail, read several times to make sure get stable value. + */ + do { + tmp_rtc = sirfsoc_rtc_iobrg_readl(rtcdrv->rtc_base + RTC_CN); + cpu_relax(); + } while (tmp_rtc != sirfsoc_rtc_iobrg_readl(rtcdrv->rtc_base + RTC_CN)); + + rtc_time_to_tm(rtcdrv->overflow_rtc << (BITS_PER_LONG - RTC_SHIFT) | + tmp_rtc >> RTC_SHIFT, tm); + return 0; +} + +static int sirfsoc_rtc_set_time(struct device *dev, + struct rtc_time *tm) +{ + unsigned long rtc_time; + struct sirfsoc_rtc_drv *rtcdrv; + rtcdrv = (struct sirfsoc_rtc_drv *)dev_get_drvdata(dev); + + rtc_tm_to_time(tm, &rtc_time); + + rtcdrv->overflow_rtc = rtc_time >> (BITS_PER_LONG - RTC_SHIFT); + + sirfsoc_rtc_iobrg_writel(rtcdrv->overflow_rtc, + rtcdrv->rtc_base + RTC_SW_VALUE); + sirfsoc_rtc_iobrg_writel( + rtc_time << RTC_SHIFT, rtcdrv->rtc_base + RTC_CN); + + return 0; +} + +static int sirfsoc_rtc_ioctl(struct device *dev, unsigned int cmd, + unsigned long arg) +{ + switch (cmd) { + case RTC_PIE_ON: + case RTC_PIE_OFF: + case RTC_UIE_ON: + case RTC_UIE_OFF: + case RTC_AIE_ON: + case RTC_AIE_OFF: + return 0; + + default: + return -ENOIOCTLCMD; + } +} + +static const struct rtc_class_ops sirfsoc_rtc_ops = { + .read_time = sirfsoc_rtc_read_time, + .set_time = sirfsoc_rtc_set_time, + .read_alarm = sirfsoc_rtc_read_alarm, + .set_alarm = sirfsoc_rtc_set_alarm, + .ioctl = sirfsoc_rtc_ioctl +}; + +static irqreturn_t sirfsoc_rtc_irq_handler(int irq, void *pdata) +{ + struct sirfsoc_rtc_drv *rtcdrv = pdata; + unsigned long rtc_status_reg = 0x0; + unsigned long events = 0x0; + + rtc_status_reg = sirfsoc_rtc_iobrg_readl(rtcdrv->rtc_base + RTC_STATUS); + /* this bit will be set ONLY if an alarm was active + * and it expired NOW + * So this is being used as an ASSERT + */ + if (rtc_status_reg & SIRFSOC_RTC_AL0) { + /* + * clear the RTC status register's alarm bit + * mask out the lower status bits + */ + rtc_status_reg &= ~0x07; + /* write 1 into SIRFSOC_RTC_AL0 to ACK the alarm interrupt */ + rtc_status_reg |= (SIRFSOC_RTC_AL0); + /* Clear the Alarm enable bit */ + rtc_status_reg &= ~(SIRFSOC_RTC_AL0E); + } + sirfsoc_rtc_iobrg_writel(rtc_status_reg, rtcdrv->rtc_base + RTC_STATUS); + /* this should wake up any apps polling/waiting on the read + * after setting the alarm + */ + events |= RTC_IRQF | RTC_AF; + rtc_update_irq(rtcdrv->rtc, 1, events); + + return IRQ_HANDLED; +} + +static const struct of_device_id sirfsoc_rtc_of_match[] = { + { .compatible = "sirf,prima2-sysrtc"}, + {}, +}; +MODULE_DEVICE_TABLE(of, sirfsoc_rtc_of_match); + +static int sirfsoc_rtc_probe(struct platform_device *pdev) +{ + int err; + unsigned long rtc_div; + struct sirfsoc_rtc_drv *rtcdrv; + struct device_node *np = pdev->dev.of_node; + + rtcdrv = devm_kzalloc(&pdev->dev, + sizeof(struct sirfsoc_rtc_drv), GFP_KERNEL); + if (rtcdrv == NULL) { + dev_err(&pdev->dev, + "%s: can't alloc mem for drv struct\n", + pdev->name); + return -ENOMEM; + } + + err = of_property_read_u32(np, "reg", &rtcdrv->rtc_base); + if (err) { + dev_err(&pdev->dev, "unable to find base address of rtc node in dtb\n"); + goto error; + } + + platform_set_drvdata(pdev, rtcdrv); + + /* Register rtc alarm as a wakeup source */ + device_init_wakeup(&pdev->dev, 1); + + /* + * Set SYS_RTC counter in RTC_HZ HZ Units + * We are using 32K RTC crystal (32768 / RTC_HZ / 2) -1 + * If 16HZ, therefore RTC_DIV = 1023; + */ + rtc_div = ((32768 / RTC_HZ) / 2) - 1; + sirfsoc_rtc_iobrg_writel(rtc_div, rtcdrv->rtc_base + RTC_DIV); + + rtcdrv->rtc = rtc_device_register(pdev->name, &(pdev->dev), + &sirfsoc_rtc_ops, THIS_MODULE); + if (IS_ERR(rtcdrv->rtc)) { + err = PTR_ERR(rtcdrv->rtc); + dev_err(&pdev->dev, "can't register RTC device\n"); + return err; + } + + /* 0x3 -> RTC_CLK */ + sirfsoc_rtc_iobrg_writel(SIRFSOC_RTC_CLK, + rtcdrv->rtc_base + RTC_CLOCK_SWITCH); + + /* reset SYS RTC ALARM0 */ + sirfsoc_rtc_iobrg_writel(0x0, rtcdrv->rtc_base + RTC_ALARM0); + + /* reset SYS RTC ALARM1 */ + sirfsoc_rtc_iobrg_writel(0x0, rtcdrv->rtc_base + RTC_ALARM1); + + /* Restore RTC Overflow From Register After Command Reboot */ + rtcdrv->overflow_rtc = + sirfsoc_rtc_iobrg_readl(rtcdrv->rtc_base + RTC_SW_VALUE); + + rtcdrv->irq = platform_get_irq(pdev, 0); + err = devm_request_irq( + &pdev->dev, + rtcdrv->irq, + sirfsoc_rtc_irq_handler, + IRQF_SHARED, + pdev->name, + rtcdrv); + if (err) { + dev_err(&pdev->dev, "Unable to register for the SiRF SOC RTC IRQ\n"); + goto error; + } + + return 0; + +error: + if (rtcdrv->rtc) + rtc_device_unregister(rtcdrv->rtc); + + return err; +} + +static int sirfsoc_rtc_remove(struct platform_device *pdev) +{ + struct sirfsoc_rtc_drv *rtcdrv = platform_get_drvdata(pdev); + + device_init_wakeup(&pdev->dev, 0); + rtc_device_unregister(rtcdrv->rtc); + + return 0; +} + +#ifdef CONFIG_PM + +static int sirfsoc_rtc_suspend(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct sirfsoc_rtc_drv *rtcdrv = platform_get_drvdata(pdev); + rtcdrv->overflow_rtc = + sirfsoc_rtc_iobrg_readl(rtcdrv->rtc_base + RTC_SW_VALUE); + + rtcdrv->saved_counter = + sirfsoc_rtc_iobrg_readl(rtcdrv->rtc_base + RTC_CN); + rtcdrv->saved_overflow_rtc = rtcdrv->overflow_rtc; + if (device_may_wakeup(&pdev->dev)) + enable_irq_wake(rtcdrv->irq); + + return 0; +} + +static int sirfsoc_rtc_freeze(struct device *dev) +{ + sirfsoc_rtc_suspend(dev); + + return 0; +} + +static int sirfsoc_rtc_thaw(struct device *dev) +{ + u32 tmp; + struct sirfsoc_rtc_drv *rtcdrv; + rtcdrv = (struct sirfsoc_rtc_drv *)dev_get_drvdata(dev); + + /* + * if resume from snapshot and the rtc power is losed, + * restroe the rtc settings + */ + if (SIRFSOC_RTC_CLK != sirfsoc_rtc_iobrg_readl( + rtcdrv->rtc_base + RTC_CLOCK_SWITCH)) { + u32 rtc_div; + /* 0x3 -> RTC_CLK */ + sirfsoc_rtc_iobrg_writel(SIRFSOC_RTC_CLK, + rtcdrv->rtc_base + RTC_CLOCK_SWITCH); + /* + * Set SYS_RTC counter in RTC_HZ HZ Units + * We are using 32K RTC crystal (32768 / RTC_HZ / 2) -1 + * If 16HZ, therefore RTC_DIV = 1023; + */ + rtc_div = ((32768 / RTC_HZ) / 2) - 1; + + sirfsoc_rtc_iobrg_writel(rtc_div, rtcdrv->rtc_base + RTC_DIV); + + /* reset SYS RTC ALARM0 */ + sirfsoc_rtc_iobrg_writel(0x0, rtcdrv->rtc_base + RTC_ALARM0); + + /* reset SYS RTC ALARM1 */ + sirfsoc_rtc_iobrg_writel(0x0, rtcdrv->rtc_base + RTC_ALARM1); + } + rtcdrv->overflow_rtc = rtcdrv->saved_overflow_rtc; + + /* + * if current counter is small than previous, + * it means overflow in sleep + */ + tmp = sirfsoc_rtc_iobrg_readl(rtcdrv->rtc_base + RTC_CN); + if (tmp <= rtcdrv->saved_counter) + rtcdrv->overflow_rtc++; + /* + *PWRC Value Be Changed When Suspend, Restore Overflow + * In Memory To Register + */ + sirfsoc_rtc_iobrg_writel(rtcdrv->overflow_rtc, + rtcdrv->rtc_base + RTC_SW_VALUE); + + return 0; +} + +static int sirfsoc_rtc_resume(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct sirfsoc_rtc_drv *rtcdrv = platform_get_drvdata(pdev); + sirfsoc_rtc_thaw(dev); + if (device_may_wakeup(&pdev->dev)) + disable_irq_wake(rtcdrv->irq); + + return 0; +} + +static int sirfsoc_rtc_restore(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct sirfsoc_rtc_drv *rtcdrv = platform_get_drvdata(pdev); + + if (device_may_wakeup(&pdev->dev)) + disable_irq_wake(rtcdrv->irq); + return 0; +} + +#else +#define sirfsoc_rtc_suspend NULL +#define sirfsoc_rtc_resume NULL +#define sirfsoc_rtc_freeze NULL +#define sirfsoc_rtc_thaw NULL +#define sirfsoc_rtc_restore NULL +#endif + +static const struct dev_pm_ops sirfsoc_rtc_pm_ops = { + .suspend = sirfsoc_rtc_suspend, + .resume = sirfsoc_rtc_resume, + .freeze = sirfsoc_rtc_freeze, + .thaw = sirfsoc_rtc_thaw, + .restore = sirfsoc_rtc_restore, +}; + +static struct platform_driver sirfsoc_rtc_driver = { + .driver = { + .name = "sirfsoc-rtc", + .owner = THIS_MODULE, +#ifdef CONFIG_PM + .pm = &sirfsoc_rtc_pm_ops, +#endif + .of_match_table = of_match_ptr(sirfsoc_rtc_of_match), + }, + .probe = sirfsoc_rtc_probe, + .remove = sirfsoc_rtc_remove, +}; +module_platform_driver(sirfsoc_rtc_driver); + +MODULE_DESCRIPTION("SiRF SoC rtc driver"); +MODULE_AUTHOR("Xianglong Du "); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS("platform:sirfsoc-rtc"); -- cgit v0.10.2 From c7ef972c440fc9f1eda28b450cd30ad15c4d60cf Mon Sep 17 00:00:00 2001 From: Vyacheslav Dubeyko Date: Wed, 3 Jul 2013 15:08:05 -0700 Subject: nilfs2: implement calculation of free inodes count Currently, NILFS2 returns 0 as free inodes count (f_ffree) and current used inodes count as total file nodes in file system (f_files): df -i Filesystem Inodes IUsed IFree IUse% Mounted on /dev/loop0 2 2 0 100% /mnt/nilfs2 This patch implements real calculation of free inodes count. First of all, it is calculated total file nodes in file system as (desc_blocks_count * groups_per_desc_block * entries_per_group). Then, it is calculated free inodes count as difference the total file nodes and used inodes count. As a result, we have such output for NILFS2: df -i Filesystem Inodes IUsed IFree IUse% Mounted on /dev/loop0 4194304 2114701 2079603 51% /mnt/nilfs2 Reported-by: Clemens Eisserer Signed-off-by: Vyacheslav Dubeyko Signed-off-by: Ryusuke Konishi Tested-by: Vyacheslav Dubeyko Cc: Joern Engel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c index eed4d7b..741fd02 100644 --- a/fs/nilfs2/alloc.c +++ b/fs/nilfs2/alloc.c @@ -398,6 +398,69 @@ nilfs_palloc_rest_groups_in_desc_block(const struct inode *inode, } /** + * nilfs_palloc_count_desc_blocks - count descriptor blocks number + * @inode: inode of metadata file using this allocator + * @desc_blocks: descriptor blocks number [out] + */ +static int nilfs_palloc_count_desc_blocks(struct inode *inode, + unsigned long *desc_blocks) +{ + unsigned long blknum; + int ret; + + ret = nilfs_bmap_last_key(NILFS_I(inode)->i_bmap, &blknum); + if (likely(!ret)) + *desc_blocks = DIV_ROUND_UP( + blknum, NILFS_MDT(inode)->mi_blocks_per_desc_block); + return ret; +} + +/** + * nilfs_palloc_mdt_file_can_grow - check potential opportunity for + * MDT file growing + * @inode: inode of metadata file using this allocator + * @desc_blocks: known current descriptor blocks count + */ +static inline bool nilfs_palloc_mdt_file_can_grow(struct inode *inode, + unsigned long desc_blocks) +{ + return (nilfs_palloc_groups_per_desc_block(inode) * desc_blocks) < + nilfs_palloc_groups_count(inode); +} + +/** + * nilfs_palloc_count_max_entries - count max number of entries that can be + * described by descriptor blocks count + * @inode: inode of metadata file using this allocator + * @nused: current number of used entries + * @nmaxp: max number of entries [out] + */ +int nilfs_palloc_count_max_entries(struct inode *inode, u64 nused, u64 *nmaxp) +{ + unsigned long desc_blocks = 0; + u64 entries_per_desc_block, nmax; + int err; + + err = nilfs_palloc_count_desc_blocks(inode, &desc_blocks); + if (unlikely(err)) + return err; + + entries_per_desc_block = (u64)nilfs_palloc_entries_per_group(inode) * + nilfs_palloc_groups_per_desc_block(inode); + nmax = entries_per_desc_block * desc_blocks; + + if (nused == nmax && + nilfs_palloc_mdt_file_can_grow(inode, desc_blocks)) + nmax += entries_per_desc_block; + + if (nused > nmax) + return -ERANGE; + + *nmaxp = nmax; + return 0; +} + +/** * nilfs_palloc_prepare_alloc_entry - prepare to allocate a persistent object * @inode: inode of metadata file using this allocator * @req: nilfs_palloc_req structure exchanged for the allocation diff --git a/fs/nilfs2/alloc.h b/fs/nilfs2/alloc.h index fb72381..4bd6451 100644 --- a/fs/nilfs2/alloc.h +++ b/fs/nilfs2/alloc.h @@ -48,6 +48,8 @@ int nilfs_palloc_get_entry_block(struct inode *, __u64, int, void *nilfs_palloc_block_get_entry(const struct inode *, __u64, const struct buffer_head *, void *); +int nilfs_palloc_count_max_entries(struct inode *, u64, u64 *); + /** * nilfs_palloc_req - persistent allocator request and reply * @pr_entry_nr: entry number (vblocknr or inode number) diff --git a/fs/nilfs2/ifile.c b/fs/nilfs2/ifile.c index d8e65bd..d788a59 100644 --- a/fs/nilfs2/ifile.c +++ b/fs/nilfs2/ifile.c @@ -160,6 +160,28 @@ int nilfs_ifile_get_inode_block(struct inode *ifile, ino_t ino, } /** + * nilfs_ifile_count_free_inodes - calculate free inodes count + * @ifile: ifile inode + * @nmaxinodes: current maximum of available inodes count [out] + * @nfreeinodes: free inodes count [out] + */ +int nilfs_ifile_count_free_inodes(struct inode *ifile, + u64 *nmaxinodes, u64 *nfreeinodes) +{ + u64 nused; + int err; + + *nmaxinodes = 0; + *nfreeinodes = 0; + + nused = atomic_read(&NILFS_I(ifile)->i_root->inodes_count); + err = nilfs_palloc_count_max_entries(ifile, nused, nmaxinodes); + if (likely(!err)) + *nfreeinodes = *nmaxinodes - nused; + return err; +} + +/** * nilfs_ifile_read - read or get ifile inode * @sb: super block instance * @root: root object diff --git a/fs/nilfs2/ifile.h b/fs/nilfs2/ifile.h index 59b6f2b..679674d 100644 --- a/fs/nilfs2/ifile.h +++ b/fs/nilfs2/ifile.h @@ -49,6 +49,8 @@ int nilfs_ifile_create_inode(struct inode *, ino_t *, struct buffer_head **); int nilfs_ifile_delete_inode(struct inode *, ino_t); int nilfs_ifile_get_inode_block(struct inode *, ino_t, struct buffer_head **); +int nilfs_ifile_count_free_inodes(struct inode *, u64 *, u64 *); + int nilfs_ifile_read(struct super_block *sb, struct nilfs_root *root, size_t inode_size, struct nilfs_inode *raw_inode, struct inode **inodep); diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index c7d1f9f..7d257e7 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -609,6 +609,7 @@ static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf) unsigned long overhead; unsigned long nrsvblocks; sector_t nfreeblocks; + u64 nmaxinodes, nfreeinodes; int err; /* @@ -633,14 +634,34 @@ static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf) if (unlikely(err)) return err; + err = nilfs_ifile_count_free_inodes(root->ifile, + &nmaxinodes, &nfreeinodes); + if (unlikely(err)) { + printk(KERN_WARNING + "NILFS warning: fail to count free inodes: err %d.\n", + err); + if (err == -ERANGE) { + /* + * If nilfs_palloc_count_max_entries() returns + * -ERANGE error code then we simply treat + * curent inodes count as maximum possible and + * zero as free inodes value. + */ + nmaxinodes = atomic_read(&root->inodes_count); + nfreeinodes = 0; + err = 0; + } else + return err; + } + buf->f_type = NILFS_SUPER_MAGIC; buf->f_bsize = sb->s_blocksize; buf->f_blocks = blocks - overhead; buf->f_bfree = nfreeblocks; buf->f_bavail = (buf->f_bfree >= nrsvblocks) ? (buf->f_bfree - nrsvblocks) : 0; - buf->f_files = atomic_read(&root->inodes_count); - buf->f_ffree = 0; /* nilfs_count_free_inodes(sb); */ + buf->f_files = nmaxinodes; + buf->f_ffree = nfreeinodes; buf->f_namelen = NILFS_NAME_LEN; buf->f_fsid.val[0] = (u32)id; buf->f_fsid.val[1] = (u32)(id >> 32); -- cgit v0.10.2 From e5f7f84843154db8b6ef5b2ac5e286f72212f54e Mon Sep 17 00:00:00 2001 From: Vyacheslav Dubeyko Date: Wed, 3 Jul 2013 15:08:06 -0700 Subject: ] nilfs2: use atomic64_t type for inodes_count and blocks_count fields in nilfs_root struct The cp_inodes_count and cp_blocks_count are represented as __le64 type in on-disk structure (struct nilfs_checkpoint). But analogous fields in in-core structure (struct nilfs_root) are represented by atomic_t type. This patch replaces atomic_t on atomic64_t type in representation of inodes_count and blocks_count fields in struct nilfs_root. Signed-off-by: Vyacheslav Dubeyko Acked-by: Ryusuke Konishi Acked-by: Joern Engel Cc: Clemens Eisserer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/nilfs2/ifile.c b/fs/nilfs2/ifile.c index d788a59..6548c78 100644 --- a/fs/nilfs2/ifile.c +++ b/fs/nilfs2/ifile.c @@ -174,7 +174,7 @@ int nilfs_ifile_count_free_inodes(struct inode *ifile, *nmaxinodes = 0; *nfreeinodes = 0; - nused = atomic_read(&NILFS_I(ifile)->i_root->inodes_count); + nused = atomic64_read(&NILFS_I(ifile)->i_root->inodes_count); err = nilfs_palloc_count_max_entries(ifile, nused, nmaxinodes); if (likely(!err)) *nfreeinodes = *nmaxinodes - nused; diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index bccfec8..b1a5277 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -54,7 +54,7 @@ void nilfs_inode_add_blocks(struct inode *inode, int n) inode_add_bytes(inode, (1 << inode->i_blkbits) * n); if (root) - atomic_add(n, &root->blocks_count); + atomic64_add(n, &root->blocks_count); } void nilfs_inode_sub_blocks(struct inode *inode, int n) @@ -63,7 +63,7 @@ void nilfs_inode_sub_blocks(struct inode *inode, int n) inode_sub_bytes(inode, (1 << inode->i_blkbits) * n); if (root) - atomic_sub(n, &root->blocks_count); + atomic64_sub(n, &root->blocks_count); } /** @@ -369,7 +369,7 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode) goto failed_ifile_create_inode; /* reference count of i_bh inherits from nilfs_mdt_read_block() */ - atomic_inc(&root->inodes_count); + atomic64_inc(&root->inodes_count); inode_init_owner(inode, dir, mode); inode->i_ino = ino; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; @@ -801,7 +801,7 @@ void nilfs_evict_inode(struct inode *inode) ret = nilfs_ifile_delete_inode(ii->i_root->ifile, inode->i_ino); if (!ret) - atomic_dec(&ii->i_root->inodes_count); + atomic64_dec(&ii->i_root->inodes_count); nilfs_clear_inode(inode); diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index a5752a58..bd88a74 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -835,9 +835,9 @@ static int nilfs_segctor_fill_in_checkpoint(struct nilfs_sc_info *sci) raw_cp->cp_snapshot_list.ssl_next = 0; raw_cp->cp_snapshot_list.ssl_prev = 0; raw_cp->cp_inodes_count = - cpu_to_le64(atomic_read(&sci->sc_root->inodes_count)); + cpu_to_le64(atomic64_read(&sci->sc_root->inodes_count)); raw_cp->cp_blocks_count = - cpu_to_le64(atomic_read(&sci->sc_root->blocks_count)); + cpu_to_le64(atomic64_read(&sci->sc_root->blocks_count)); raw_cp->cp_nblk_inc = cpu_to_le64(sci->sc_nblk_inc + sci->sc_nblk_this_inc); raw_cp->cp_create = cpu_to_le64(sci->sc_seg_ctime); diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 7d257e7..1427de5 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -554,8 +554,10 @@ int nilfs_attach_checkpoint(struct super_block *sb, __u64 cno, int curr_mnt, if (err) goto failed_bh; - atomic_set(&root->inodes_count, le64_to_cpu(raw_cp->cp_inodes_count)); - atomic_set(&root->blocks_count, le64_to_cpu(raw_cp->cp_blocks_count)); + atomic64_set(&root->inodes_count, + le64_to_cpu(raw_cp->cp_inodes_count)); + atomic64_set(&root->blocks_count, + le64_to_cpu(raw_cp->cp_blocks_count)); nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, cno, bh_cp); @@ -647,7 +649,7 @@ static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf) * curent inodes count as maximum possible and * zero as free inodes value. */ - nmaxinodes = atomic_read(&root->inodes_count); + nmaxinodes = atomic64_read(&root->inodes_count); nfreeinodes = 0; err = 0; } else diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index 41e6a04..94c451c 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -764,8 +764,8 @@ nilfs_find_or_create_root(struct the_nilfs *nilfs, __u64 cno) new->ifile = NULL; new->nilfs = nilfs; atomic_set(&new->count, 1); - atomic_set(&new->inodes_count, 0); - atomic_set(&new->blocks_count, 0); + atomic64_set(&new->inodes_count, 0); + atomic64_set(&new->blocks_count, 0); rb_link_node(&new->rb_node, parent, p); rb_insert_color(&new->rb_node, &nilfs->ns_cptree); diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h index be1267a..de8cc53 100644 --- a/fs/nilfs2/the_nilfs.h +++ b/fs/nilfs2/the_nilfs.h @@ -241,8 +241,8 @@ struct nilfs_root { struct the_nilfs *nilfs; struct inode *ifile; - atomic_t inodes_count; - atomic_t blocks_count; + atomic64_t inodes_count; + atomic64_t blocks_count; }; /* Special checkpoint number */ -- cgit v0.10.2 From e68e96d2a7b2391cc1f70e207ec9cbe9d092adc9 Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Wed, 3 Jul 2013 15:08:08 -0700 Subject: fs/fat: use fat_msg() to replace printk() in __fat_fs_error() Signed-off-by: Gu Zheng Acked-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/fat/misc.c b/fs/fat/misc.c index 359d307..628e22a 100644 --- a/fs/fat/misc.c +++ b/fs/fat/misc.c @@ -30,7 +30,7 @@ void __fat_fs_error(struct super_block *sb, int report, const char *fmt, ...) va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; - printk(KERN_ERR "FAT-fs (%s): error, %pV\n", sb->s_id, &vaf); + fat_msg(sb, KERN_ERR, "error, %pV", &vaf); va_end(args); } @@ -38,8 +38,7 @@ void __fat_fs_error(struct super_block *sb, int report, const char *fmt, ...) panic("FAT-fs (%s): fs panic from previous error\n", sb->s_id); else if (opts->errors == FAT_ERRORS_RO && !(sb->s_flags & MS_RDONLY)) { sb->s_flags |= MS_RDONLY; - printk(KERN_ERR "FAT-fs (%s): Filesystem has been " - "set read-only\n", sb->s_id); + fat_msg(sb, KERN_ERR, "Filesystem has been set read-only"); } } EXPORT_SYMBOL_GPL(__fat_fs_error); -- cgit v0.10.2 From b57a0505e750b3d7b2d39e9823b276d8ca1a08fe Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 3 Jul 2013 15:08:08 -0700 Subject: Documentation/CodingStyle: allow multiple return statements per function A surprising number of newbies interpret this section to mean that only one return statement is allowed per function. Part of the problem is that the "one return statement per function" rule is an actual style guideline that people are used to from other projects. Signed-off-by: Dan Carpenter Cc: Eduardo Valentin Cc: Rob Landley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/Documentation/CodingStyle b/Documentation/CodingStyle index e00b8f0..7fe0546 100644 --- a/Documentation/CodingStyle +++ b/Documentation/CodingStyle @@ -389,7 +389,8 @@ Albeit deprecated by some people, the equivalent of the goto statement is used frequently by compilers in form of the unconditional jump instruction. The goto statement comes in handy when a function exits from multiple -locations and some common work such as cleanup has to be done. +locations and some common work such as cleanup has to be done. If there is no +cleanup needed then just return directly. The rationale is: -- cgit v0.10.2 From 4d8eaaae7629074b6d3455cb71632f5969f34de7 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 3 Jul 2013 15:08:10 -0700 Subject: docbook: add futexes to kernel-locking docbook Add Fast User Mutexes (futexes) to kernel-locking docbook. Signed-off-by: Randy Dunlap Acked-by: Rob Landley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/Documentation/DocBook/kernel-locking.tmpl b/Documentation/DocBook/kernel-locking.tmpl index 67e7ab4..09e884e 100644 --- a/Documentation/DocBook/kernel-locking.tmpl +++ b/Documentation/DocBook/kernel-locking.tmpl @@ -1955,12 +1955,17 @@ machines due to caching. - + Mutex API reference !Iinclude/linux/mutex.h !Ekernel/mutex.c + + Futex API reference +!Ikernel/futex.c + + Further reading -- cgit v0.10.2 From 37f07655522042399db7a4b54aee830bdb52ce2c Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 3 Jul 2013 15:08:11 -0700 Subject: x86: kill TIF_DEBUG Because it is not used. Signed-off-by: Oleg Nesterov Cc: Benjamin Herrenschmidt Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jan Kratochvil Cc: Michael Neuling Cc: Paul Mackerras Cc: Paul Mundt Cc: Will Deacon Cc: Prasad Cc: Russell King Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index a1df6e8..2781119 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -89,7 +89,6 @@ struct thread_info { #define TIF_FORK 18 /* ret_from_fork */ #define TIF_NOHZ 19 /* in adaptive nohz mode */ #define TIF_MEMDIE 20 /* is terminating due to OOM killer */ -#define TIF_DEBUG 21 /* uses debug registers */ #define TIF_IO_BITMAP 22 /* uses I/O bitmap */ #define TIF_FORCED_TF 24 /* true if TF in eflags artificially */ #define TIF_BLOCKSTEP 25 /* set when we want DEBUGCTLMSR_BTF */ @@ -113,7 +112,6 @@ struct thread_info { #define _TIF_IA32 (1 << TIF_IA32) #define _TIF_FORK (1 << TIF_FORK) #define _TIF_NOHZ (1 << TIF_NOHZ) -#define _TIF_DEBUG (1 << TIF_DEBUG) #define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) #define _TIF_FORCED_TF (1 << TIF_FORCED_TF) #define _TIF_BLOCKSTEP (1 << TIF_BLOCKSTEP) @@ -154,7 +152,7 @@ struct thread_info { (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP) #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) -#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG) +#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) #define PREEMPT_ACTIVE 0x10000000 -- cgit v0.10.2 From 29000caecbe87b6b66f144f72111f0d02fbbf0c1 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Wed, 3 Jul 2013 15:08:12 -0700 Subject: ptrace: add ability to get/set signal-blocked mask crtools uses a parasite code for dumping processes. The parasite code is injected into a process with help PTRACE_SEIZE. Currently crtools blocks signals from a parasite code. If a process has pending signals, crtools wait while a process handles these signals. This method is not suitable for stopped tasks. A stopped task can have a few pending signals, when we will try to execute a parasite code, we will need to drop SIGSTOP, but all other signals must remain pending, because a state of processes must not be changed during checkpointing. This patch adds two ptrace commands to set/get signal-blocked mask. I think gdb can use this commands too. [akpm@linux-foundation.org: be consistent with brace layout] Signed-off-by: Andrey Vagin Reviewed-by: Oleg Nesterov Cc: Roland McGrath Cc: Michael Kerrisk Cc: Pavel Emelyanov Cc: Cyrill Gorcunov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/uapi/linux/ptrace.h b/include/uapi/linux/ptrace.h index 52ebcc8..cf1019e 100644 --- a/include/uapi/linux/ptrace.h +++ b/include/uapi/linux/ptrace.h @@ -61,6 +61,9 @@ struct ptrace_peeksiginfo_args { __s32 nr; /* how may siginfos to take */ }; +#define PTRACE_GETSIGMASK 0x420a +#define PTRACE_SETSIGMASK 0x420b + /* Read signals from a shared (process wide) queue */ #define PTRACE_PEEKSIGINFO_SHARED (1 << 0) diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 335a7ae..ba5e6ce 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -844,6 +844,47 @@ int ptrace_request(struct task_struct *child, long request, ret = ptrace_setsiginfo(child, &siginfo); break; + case PTRACE_GETSIGMASK: + if (addr != sizeof(sigset_t)) { + ret = -EINVAL; + break; + } + + if (copy_to_user(datavp, &child->blocked, sizeof(sigset_t))) + ret = -EFAULT; + else + ret = 0; + + break; + + case PTRACE_SETSIGMASK: { + sigset_t new_set; + + if (addr != sizeof(sigset_t)) { + ret = -EINVAL; + break; + } + + if (copy_from_user(&new_set, datavp, sizeof(sigset_t))) { + ret = -EFAULT; + break; + } + + sigdelsetmask(&new_set, sigmask(SIGKILL)|sigmask(SIGSTOP)); + + /* + * Every thread does recalc_sigpending() after resume, so + * retarget_shared_pending() and recalc_sigpending() are not + * called here. + */ + spin_lock_irq(&child->sighand->siglock); + child->blocked = new_set; + spin_unlock_irq(&child->sighand->siglock); + + ret = 0; + break; + } + case PTRACE_INTERRUPT: /* * Stop tracee without any side-effect on signal or job @@ -948,8 +989,7 @@ int ptrace_request(struct task_struct *child, long request, #ifdef CONFIG_HAVE_ARCH_TRACEHOOK case PTRACE_GETREGSET: - case PTRACE_SETREGSET: - { + case PTRACE_SETREGSET: { struct iovec kiov; struct iovec __user *uiov = datavp; -- cgit v0.10.2 From 77d55918029ef4626bfd6ba716728dcc7d919de3 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 3 Jul 2013 15:08:14 -0700 Subject: signals: eventpoll: do not use sigprocmask() sigprocmask() should die. None of the current callers actually need this strange interface. Change fs/eventpoll.c to use set_current_blocked(). This also means we should not worry about SIGKILL/SIGSTOP. Signed-off-by: Oleg Nesterov Cc: Al Viro Cc: Denys Vlasenko Cc: Eric Wong Cc: Jason Baron Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/eventpoll.c b/fs/eventpoll.c index deecc72..0f0f736 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1975,8 +1975,8 @@ SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events, return -EINVAL; if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask))) return -EFAULT; - sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP)); - sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); + sigsaved = current->blocked; + set_current_blocked(&ksigmask); } error = sys_epoll_wait(epfd, events, maxevents, timeout); @@ -1993,7 +1993,7 @@ SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events, sizeof(sigsaved)); set_restore_sigmask(); } else - sigprocmask(SIG_SETMASK, &sigsaved, NULL); + set_current_blocked(&sigsaved); } return error; @@ -2020,8 +2020,8 @@ COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd, if (copy_from_user(&csigmask, sigmask, sizeof(csigmask))) return -EFAULT; sigset_from_compat(&ksigmask, &csigmask); - sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP)); - sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); + sigsaved = current->blocked; + set_current_blocked(&ksigmask); } err = sys_epoll_wait(epfd, events, maxevents, timeout); @@ -2038,7 +2038,7 @@ COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd, sizeof(sigsaved)); set_restore_sigmask(); } else - sigprocmask(SIG_SETMASK, &sigsaved, NULL); + set_current_blocked(&sigsaved); } return err; -- cgit v0.10.2 From 7f57cfa4e2aa29fabe69e41529fd26578adc9b58 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 3 Jul 2013 15:08:15 -0700 Subject: usermodehelper: kill the sub_info->path[0] check call_usermodehelper_exec() does nothing but returns success if path[0] == 0. The only user which needs this strange feature is request_module(), it can check modprobe_path[0] itself like other users do if they want to detect the "disabled by admin" case. Kill it. Not only it looks strange, it can confuse other callers. And this allows us to revert 264b83c0 ("usermodehelper: check subprocess_info->path != NULL"), do_execve(NULL) is safe. Signed-off-by: Oleg Nesterov Acked-by: Rusty Russell Cc: Lucas De Marchi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/kernel/kmod.c b/kernel/kmod.c index 8241906..fb32636 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -147,6 +147,9 @@ int __request_module(bool wait, const char *fmt, ...) */ WARN_ON_ONCE(wait && current_is_async()); + if (!modprobe_path[0]) + return 0; + va_start(args, fmt); ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args); va_end(args); @@ -569,14 +572,6 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait) int retval = 0; helper_lock(); - if (!sub_info->path) { - retval = -EINVAL; - goto out; - } - - if (sub_info->path[0] == '\0') - goto out; - if (!khelper_wq || usermodehelper_disabled) { retval = -EBUSY; goto out; -- cgit v0.10.2 From e7fd1549aeb83e34ee0955cdf5dee5d4088508f3 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 3 Jul 2013 15:08:16 -0700 Subject: coredump: format_corename() can leak cn->corename do_coredump() assumes that format_corename() can only fail if expand_corename() fails and frees cn->corename. This is not true, for example cn_print_exe_file() can fail and in this case nobody frees cn->corename. Change do_coredump() to always do kfree(cn->corename) after it calls format_corename() (NULL is fine), change expand_corename() to do nothing if kmalloc() fails. Signed-off-by: Oleg Nesterov Cc: Andi Kleen Cc: Colin Walters Cc: Denys Vlasenko Cc: Jiri Slaby Cc: Lennart Poettering Cc: Lucas De Marchi Acked-by: Neil Horman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/coredump.c b/fs/coredump.c index dafafba..11bc368 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -58,16 +58,14 @@ static atomic_t call_count = ATOMIC_INIT(1); static int expand_corename(struct core_name *cn) { - char *old_corename = cn->corename; + int size = CORENAME_MAX_SIZE * atomic_inc_return(&call_count); + char *corename = krealloc(cn->corename, size, GFP_KERNEL); - cn->size = CORENAME_MAX_SIZE * atomic_inc_return(&call_count); - cn->corename = krealloc(old_corename, cn->size, GFP_KERNEL); - - if (!cn->corename) { - kfree(old_corename); + if (!corename) return -ENOMEM; - } + cn->size = size; + cn->corename = corename; return 0; } @@ -157,10 +155,9 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm) int pid_in_pattern = 0; int err = 0; + cn->used = 0; cn->size = CORENAME_MAX_SIZE * atomic_read(&call_count); cn->corename = kmalloc(cn->size, GFP_KERNEL); - cn->used = 0; - if (!cn->corename) return -ENOMEM; @@ -549,7 +546,7 @@ void do_coredump(siginfo_t *siginfo) if (ispipe < 0) { printk(KERN_WARNING "format_corename failed\n"); printk(KERN_WARNING "Aborting core\n"); - goto fail_corename; + goto fail_unlock; } if (cprm.limit == 1) { @@ -669,7 +666,6 @@ fail_dropcount: atomic_dec(&core_dump_count); fail_unlock: kfree(cn.corename); -fail_corename: coredump_finish(mm, core_dumped); revert_creds(old_cred); fail_creds: -- cgit v0.10.2 From bc03c691aa86948af4e272ebdcdd4203018210f3 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 3 Jul 2013 15:08:17 -0700 Subject: coredump: introduce cn_vprintf() Turn cn_printf(...) into cn_vprintf(va_list args), reintroduce cn_printf() as a trivial wrapper. This simplifies the next change and cn_vprintf() will have more callers. Signed-off-by: Oleg Nesterov Cc: Andi Kleen Cc: Colin Walters Cc: Denys Vlasenko Cc: Jiri Slaby Cc: Lennart Poettering Cc: Lucas De Marchi Acked-by: Neil Horman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/coredump.c b/fs/coredump.c index 11bc368..c10a43a 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -69,17 +69,13 @@ static int expand_corename(struct core_name *cn) return 0; } -static int cn_printf(struct core_name *cn, const char *fmt, ...) +static int cn_vprintf(struct core_name *cn, const char *fmt, va_list arg) { char *cur; int need; int ret; - va_list arg; - va_start(arg, fmt); need = vsnprintf(NULL, 0, fmt, arg); - va_end(arg); - if (likely(need < cn->size - cn->used - 1)) goto out_printf; @@ -89,9 +85,7 @@ static int cn_printf(struct core_name *cn, const char *fmt, ...) out_printf: cur = cn->corename + cn->used; - va_start(arg, fmt); vsnprintf(cur, need + 1, fmt, arg); - va_end(arg); cn->used += need; return 0; @@ -99,6 +93,18 @@ expand_fail: return ret; } +static int cn_printf(struct core_name *cn, const char *fmt, ...) +{ + va_list arg; + int ret; + + va_start(arg, fmt); + ret = cn_vprintf(cn, fmt, arg); + va_end(arg); + + return ret; +} + static void cn_escape(char *str) { for (; *str; str++) -- cgit v0.10.2 From 5fe9d8ca21cc1517258fe448639392d5d542eec6 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 3 Jul 2013 15:08:19 -0700 Subject: coredump: cn_vprintf() has no reason to call vsnprintf() twice cn_vprintf() looks really overcomplicated and sub-optimal. We do not need vsnprintf(NULL) to calculate the size we need, we can simply try to print into the current buffer and expand/retry only if necessary. Signed-off-by: Oleg Nesterov Cc: Andi Kleen Cc: Colin Walters Cc: Denys Vlasenko Cc: Jiri Slaby Cc: Lennart Poettering Cc: Lucas De Marchi Acked-by: Neil Horman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/coredump.c b/fs/coredump.c index c10a43a..2b1d1f5 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -71,26 +71,20 @@ static int expand_corename(struct core_name *cn) static int cn_vprintf(struct core_name *cn, const char *fmt, va_list arg) { - char *cur; - int need; - int ret; - - need = vsnprintf(NULL, 0, fmt, arg); - if (likely(need < cn->size - cn->used - 1)) - goto out_printf; - - ret = expand_corename(cn); - if (ret) - goto expand_fail; + int free, need; + +again: + free = cn->size - cn->used; + need = vsnprintf(cn->corename + cn->used, free, fmt, arg); + if (need < free) { + cn->used += need; + return 0; + } -out_printf: - cur = cn->corename + cn->used; - vsnprintf(cur, need + 1, fmt, arg); - cn->used += need; - return 0; + if (!expand_corename(cn)) + goto again; -expand_fail: - return ret; + return -ENOMEM; } static int cn_printf(struct core_name *cn, const char *fmt, ...) -- cgit v0.10.2 From 923bed030ff6e20b5176e10da151fade83097891 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 3 Jul 2013 15:08:20 -0700 Subject: coredump: kill cn_escape(), introduce cn_esc_printf() The usage of cn_escape() looks really annoying, imho this sequence needs a wrapper. And it is buggy. If cn_printf() does expand_corename() cn_escape() writes to the freed memory. Introduce cn_esc_printf() which hopefully does this all right. It records the index before cn_vprintf(), not "char *" which is no longer valid (in general) after krealloc(). Signed-off-by: Oleg Nesterov Cc: Andi Kleen Cc: Colin Walters Cc: Denys Vlasenko Cc: Jiri Slaby Cc: Lennart Poettering Cc: Lucas De Marchi Acked-by: Neil Horman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/coredump.c b/fs/coredump.c index 2b1d1f5..90d7cee 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -99,11 +99,21 @@ static int cn_printf(struct core_name *cn, const char *fmt, ...) return ret; } -static void cn_escape(char *str) +static int cn_esc_printf(struct core_name *cn, const char *fmt, ...) { - for (; *str; str++) - if (*str == '/') - *str = '!'; + int cur = cn->used; + va_list arg; + int ret; + + va_start(arg, fmt); + ret = cn_vprintf(cn, fmt, arg); + va_end(arg); + + for (; cur < cn->used; ++cur) { + if (cn->corename[cur] == '/') + cn->corename[cur] = '!'; + } + return ret; } static int cn_print_exe_file(struct core_name *cn) @@ -113,12 +123,8 @@ static int cn_print_exe_file(struct core_name *cn) int ret; exe_file = get_mm_exe_file(current->mm); - if (!exe_file) { - char *commstart = cn->corename + cn->used; - ret = cn_printf(cn, "%s (path unknown)", current->comm); - cn_escape(commstart); - return ret; - } + if (!exe_file) + return cn_esc_printf(cn, "%s (path unknown)", current->comm); pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY); if (!pathbuf) { @@ -132,9 +138,7 @@ static int cn_print_exe_file(struct core_name *cn) goto free_buf; } - cn_escape(path); - - ret = cn_printf(cn, "%s", path); + ret = cn_esc_printf(cn, "%s", path); free_buf: kfree(pathbuf); @@ -207,22 +211,16 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm) break; } /* hostname */ - case 'h': { - char *namestart = cn->corename + cn->used; + case 'h': down_read(&uts_sem); - err = cn_printf(cn, "%s", + err = cn_esc_printf(cn, "%s", utsname()->nodename); up_read(&uts_sem); - cn_escape(namestart); break; - } /* executable */ - case 'e': { - char *commstart = cn->corename + cn->used; - err = cn_printf(cn, "%s", current->comm); - cn_escape(commstart); + case 'e': + err = cn_esc_printf(cn, "%s", current->comm); break; - } case 'E': err = cn_print_exe_file(cn); break; -- cgit v0.10.2 From 3ceadcf6d489650ade673b7197c11c521aecb038 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 3 Jul 2013 15:08:22 -0700 Subject: coredump: kill call_count, add core_name_size Imho, "atomic_t call_count" is ugly and should die. It buys nothing and in fact it can grow more than necessary, expand doesn't check if it was already incremented by another task. Kill it, and introduce "static int core_name_size" updated by expand_corename(). This is obviously racy too but harmless, and core_name_size never grows for no reason. We do not bother to to calculate the "right" new size, we simply do kmalloc(size_we_need) and use ksize() to rely on kmalloc_index's decision. Finally change format_corename() to use expand_corename(), krealloc(NULL) is fine. Signed-off-by: Oleg Nesterov Cc: Andi Kleen Cc: Colin Walters Cc: Denys Vlasenko Cc: Jiri Slaby Cc: Lennart Poettering Cc: Lucas De Marchi Acked-by: Neil Horman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/coredump.c b/fs/coredump.c index 90d7cee..56a9ab9 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -45,26 +45,28 @@ #include int core_uses_pid; -char core_pattern[CORENAME_MAX_SIZE] = "core"; unsigned int core_pipe_limit; +char core_pattern[CORENAME_MAX_SIZE] = "core"; +static int core_name_size = CORENAME_MAX_SIZE; struct core_name { char *corename; int used, size; }; -static atomic_t call_count = ATOMIC_INIT(1); /* The maximal length of core_pattern is also specified in sysctl.c */ -static int expand_corename(struct core_name *cn) +static int expand_corename(struct core_name *cn, int size) { - int size = CORENAME_MAX_SIZE * atomic_inc_return(&call_count); char *corename = krealloc(cn->corename, size, GFP_KERNEL); if (!corename) return -ENOMEM; - cn->size = size; + if (size > core_name_size) /* racy but harmless */ + core_name_size = size; + + cn->size = ksize(corename); cn->corename = corename; return 0; } @@ -81,7 +83,7 @@ again: return 0; } - if (!expand_corename(cn)) + if (!expand_corename(cn, cn->size + need - free + 1)) goto again; return -ENOMEM; @@ -160,9 +162,8 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm) int err = 0; cn->used = 0; - cn->size = CORENAME_MAX_SIZE * atomic_read(&call_count); - cn->corename = kmalloc(cn->size, GFP_KERNEL); - if (!cn->corename) + cn->corename = NULL; + if (expand_corename(cn, core_name_size)) return -ENOMEM; /* Repeat as long as we have more pattern to process and more output -- cgit v0.10.2 From 888ffc5923e4343a78575918ab781e85fa22d244 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 3 Jul 2013 15:08:23 -0700 Subject: coredump: '% at the end' shouldn't bypass core_uses_pid logic "goto end" should not bypass the "Backward compatibility with core_uses_pid" code, move this label up. While at it, - It is ugly to copy '|' into cn->corename and then inc the pointer for argv_split(). Change format_corename() to increment pat_ptr instead. - Remove the dead "if (*pat_ptr == 0)" in format_corename(), we already checked it is not zero. Signed-off-by: Oleg Nesterov Cc: Andi Kleen Cc: Colin Walters Cc: Denys Vlasenko Cc: Jiri Slaby Cc: Lennart Poettering Cc: Lucas De Marchi Acked-by: Neil Horman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/coredump.c b/fs/coredump.c index 56a9ab9..72f816d 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -165,13 +165,15 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm) cn->corename = NULL; if (expand_corename(cn, core_name_size)) return -ENOMEM; + cn->corename[0] = '\0'; + + if (ispipe) + ++pat_ptr; /* Repeat as long as we have more pattern to process and more output space */ while (*pat_ptr) { if (*pat_ptr != '%') { - if (*pat_ptr == 0) - goto out; err = cn_printf(cn, "%c", *pat_ptr++); } else { switch (*++pat_ptr) { @@ -240,6 +242,7 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm) return err; } +out: /* Backward compatibility with core_uses_pid: * * If core_pattern does not include a %p (as is the default) @@ -250,7 +253,6 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm) if (err) return err; } -out: return ispipe; } @@ -580,7 +582,7 @@ void do_coredump(siginfo_t *siginfo) goto fail_dropcount; } - helper_argv = argv_split(GFP_KERNEL, cn.corename+1, NULL); + helper_argv = argv_split(GFP_KERNEL, cn.corename, NULL); if (!helper_argv) { printk(KERN_WARNING "%s failed to allocate memory\n", __func__); @@ -597,7 +599,7 @@ void do_coredump(siginfo_t *siginfo) argv_free(helper_argv); if (retval) { - printk(KERN_INFO "Core dump to %s pipe failed\n", + printk(KERN_INFO "Core dump to |%s pipe failed\n", cn.corename); goto close_fail; } -- cgit v0.10.2 From 3f4185483832ccf3d2977923db576fa689c2abce Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 3 Jul 2013 15:08:25 -0700 Subject: fs/exec.c:de_thread(): use change_pid() rather than detach_pid/attach_pid de_thread() can use change_pid() instead of detach + attach. This looks better and this ensures that, say, next_thread() can never see a task with ->pid == NULL. Signed-off-by: Oleg Nesterov Acked-by: "Eric W. Biederman" Cc: Michal Hocko Cc: Pavel Emelyanov Cc: Sergey Dyasly Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/exec.c b/fs/exec.c index 03b907c..7619ddd 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -947,9 +947,8 @@ static int de_thread(struct task_struct *tsk) * Note: The old leader also uses this pid until release_task * is called. Odd but simple and correct. */ - detach_pid(tsk, PIDTYPE_PID); tsk->pid = leader->pid; - attach_pid(tsk, PIDTYPE_PID, task_pid(leader)); + change_pid(tsk, PIDTYPE_PID, task_pid(leader)); transfer_pid(leader, tsk, PIDTYPE_PGID); transfer_pid(leader, tsk, PIDTYPE_SID); -- cgit v0.10.2 From 81dabb464139324c005159f5afba377104d8828d Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 3 Jul 2013 15:08:26 -0700 Subject: exit.c: unexport __set_special_pids() Move __set_special_pids() from exit.c to sys.c close to its single caller and make it static. And rename it to set_special_pids(), another helper with this name has gone away. Signed-off-by: Oleg Nesterov Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/sched.h b/include/linux/sched.h index ec80684..cdd5407 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1950,8 +1950,6 @@ extern struct task_struct *find_task_by_vpid(pid_t nr); extern struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns); -extern void __set_special_pids(struct pid *pid); - /* per-UID process charging. */ extern struct user_struct * alloc_uid(kuid_t); static inline struct user_struct *get_uid(struct user_struct *u) diff --git a/kernel/exit.c b/kernel/exit.c index 7bb73f9..3a77cd9 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -312,17 +312,6 @@ kill_orphaned_pgrp(struct task_struct *tsk, struct task_struct *parent) } } -void __set_special_pids(struct pid *pid) -{ - struct task_struct *curr = current->group_leader; - - if (task_session(curr) != pid) - change_pid(curr, PIDTYPE_SID, pid); - - if (task_pgrp(curr) != pid) - change_pid(curr, PIDTYPE_PGID, pid); -} - /* * Let kernel threads use this to say that they allow a certain signal. * Must not be used if kthread was cloned with CLONE_SIGHAND. diff --git a/kernel/sys.c b/kernel/sys.c index 7bf50dc..071de90 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1309,6 +1309,17 @@ out: return retval; } +static void set_special_pids(struct pid *pid) +{ + struct task_struct *curr = current->group_leader; + + if (task_session(curr) != pid) + change_pid(curr, PIDTYPE_SID, pid); + + if (task_pgrp(curr) != pid) + change_pid(curr, PIDTYPE_PGID, pid); +} + SYSCALL_DEFINE0(setsid) { struct task_struct *group_leader = current->group_leader; @@ -1328,7 +1339,7 @@ SYSCALL_DEFINE0(setsid) goto out; group_leader->signal->leader = 1; - __set_special_pids(sid); + set_special_pids(sid); proc_clear_tty(group_leader); -- cgit v0.10.2 From 1d98a5fa11e9a66a7cf7b03f73cab60184781065 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 3 Jul 2013 15:08:27 -0700 Subject: fs/proc/uptime.c:uptime_proc_show(): use get_monotonic_boottime() Change uptime_proc_show() to use get_monotonic_boottime() instead of do_posix_clock_monotonic_gettime() + monotonic_to_bootbased(). Signed-off-by: Oleg Nesterov Cc: "Eric W. Biederman" Acked-by: John Stultz Cc: Tomas Janousek Cc: Tomas Smetana Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c index 9610ac7..0618946 100644 --- a/fs/proc/uptime.c +++ b/fs/proc/uptime.c @@ -20,8 +20,7 @@ static int uptime_proc_show(struct seq_file *m, void *v) for_each_possible_cpu(i) idletime += (__force u64) kcpustat_cpu(i).cpustat[CPUTIME_IDLE]; - do_posix_clock_monotonic_gettime(&uptime); - monotonic_to_bootbased(&uptime); + get_monotonic_boottime(&uptime); nsec = cputime64_to_jiffies64(idletime) * TICK_NSEC; idle.tv_sec = div_u64_rem(nsec, NSEC_PER_SEC, &rem); idle.tv_nsec = rem; -- cgit v0.10.2 From 30bc30df102b2d0c003d93477e04b97e6c528573 Mon Sep 17 00:00:00 2001 From: Zhao Hongjiang Date: Wed, 3 Jul 2013 15:08:28 -0700 Subject: fs/proc/kcore.c: using strlcpy() instead of strncpy() For NUL terminated string, set '\0' at the end. Signed-off-by: Zhao Hongjiang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 0a22194..06ea155 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -408,7 +408,7 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff) prpsinfo.pr_zomb = 0; strcpy(prpsinfo.pr_fname, "vmlinux"); - strncpy(prpsinfo.pr_psargs, saved_command_line, ELF_PRARGSZ); + strlcpy(prpsinfo.pr_psargs, saved_command_line, sizeof(prpsinfo.pr_psargs)); nhdr->p_filesz += notesize(¬es[1]); bufp = storenote(¬es[1], bufp); -- cgit v0.10.2 From b57922b6c76c3ee401bb32fd3f298409dd6e6a53 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Wed, 3 Jul 2013 15:08:29 -0700 Subject: fork: reorder permissions when violating number of processes limits When a task is attempting to violate the RLIMIT_NPROC limit we have a check to see if the task is sufficiently priviledged. The check first looks at CAP_SYS_ADMIN, then CAP_SYS_RESOURCE, then if the task is uid=0. A result is that tasks which are allowed by the uid=0 check are first checked against the security subsystem. This results in the security subsystem auditting a denial for sys_admin and sys_resource and then the task passing the uid=0 check. This patch rearranges the code to first check uid=0, since if we pass that we shouldn't hit the security system at all. We then check sys_resource, since it is the smallest capability which will solve the problem. Lastly we check the fallback everything cap_sysadmin. We don't want to give this capability many places since it is so powerful. This will eliminate many of the false positive/needless denial messages we get when a root task tries to violate the nproc limit. (note that kthreads count against root, so on a sufficiently large machine we can actually get past the default limits before any userspace tasks are launched.) Signed-off-by: Eric Paris Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/kernel/fork.c b/kernel/fork.c index 987b28a..09dbda3 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1199,8 +1199,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, retval = -EAGAIN; if (atomic_read(&p->real_cred->user->processes) >= task_rlimit(p, RLIMIT_NPROC)) { - if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && - p->real_cred->user != INIT_USER) + if (p->real_cred->user != INIT_USER && + !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN)) goto bad_fork_free; } current->flags &= ~PF_NPROC_EXCEEDED; -- cgit v0.10.2 From 80628ca06c5d42929de6bc22c0a41589a834d151 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 3 Jul 2013 15:08:30 -0700 Subject: kernel/fork.c:copy_process(): unify CLONE_THREAD-or-thread_group_leader code Cleanup and preparation for the next changes. Move the "if (clone_flags & CLONE_THREAD)" code down under "if (likely(p->pid))" and turn it into into the "else" branch. This makes the process/thread initialization more symmetrical and removes one check. Signed-off-by: Oleg Nesterov Cc: "Eric W. Biederman" Cc: Michal Hocko Cc: Pavel Emelyanov Cc: Sergey Dyasly Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/kernel/fork.c b/kernel/fork.c index 09dbda3..417cb86 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1446,14 +1446,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, goto bad_fork_free_pid; } - if (clone_flags & CLONE_THREAD) { - current->signal->nr_threads++; - atomic_inc(¤t->signal->live); - atomic_inc(¤t->signal->sigcnt); - p->group_leader = current->group_leader; - list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group); - } - if (likely(p->pid)) { ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace); @@ -1470,6 +1462,13 @@ static struct task_struct *copy_process(unsigned long clone_flags, list_add_tail(&p->sibling, &p->real_parent->children); list_add_tail_rcu(&p->tasks, &init_task.tasks); __this_cpu_inc(process_counts); + } else { + current->signal->nr_threads++; + atomic_inc(¤t->signal->live); + atomic_inc(¤t->signal->sigcnt); + p->group_leader = current->group_leader; + list_add_tail_rcu(&p->thread_group, + &p->group_leader->thread_group); } attach_pid(p, PIDTYPE_PID, pid); nr_threads++; -- cgit v0.10.2 From 8190773985141f063e1d6dc10200527c655abfb5 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 3 Jul 2013 15:08:31 -0700 Subject: kernel/fork.c:copy_process(): don't add the uninitialized child to thread/task/pid lists copy_process() adds the new child to thread_group/init_task.tasks list and then does attach_pid(child, PIDTYPE_PID). This means that the lockless next_thread() or next_task() can see this thread with the wrong pid. Say, "ls /proc/pid/task" can list the same inode twice. We could move attach_pid(child, PIDTYPE_PID) up, but in this case find_task_by_vpid() can find the new thread before it was fully initialized. And this is already true for PIDTYPE_PGID/PIDTYPE_SID, With this patch copy_process() initializes child->pids[*].pid first, then calls attach_pid() to insert the task into the pid->tasks list. attach_pid() no longer need the "struct pid*" argument, it is always called after pid_link->pid was already set. Signed-off-by: Oleg Nesterov Cc: "Eric W. Biederman" Cc: Michal Hocko Cc: Pavel Emelyanov Cc: Sergey Dyasly Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/pid.h b/include/linux/pid.h index a089a3c..23705a5 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -86,11 +86,9 @@ extern struct task_struct *get_pid_task(struct pid *pid, enum pid_type); extern struct pid *get_task_pid(struct task_struct *task, enum pid_type type); /* - * attach_pid() and detach_pid() must be called with the tasklist_lock - * write-held. + * these helpers must be called with the tasklist_lock write-held. */ -extern void attach_pid(struct task_struct *task, enum pid_type type, - struct pid *pid); +extern void attach_pid(struct task_struct *task, enum pid_type); extern void detach_pid(struct task_struct *task, enum pid_type); extern void change_pid(struct task_struct *task, enum pid_type, struct pid *pid); diff --git a/kernel/fork.c b/kernel/fork.c index 417cb86..7d6962f 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1121,6 +1121,12 @@ static void posix_cpu_timers_init(struct task_struct *tsk) INIT_LIST_HEAD(&tsk->cpu_timers[2]); } +static inline void +init_task_pid(struct task_struct *task, enum pid_type type, struct pid *pid) +{ + task->pids[type].pid = pid; +} + /* * This creates a new process as a copy of the old one, * but does not actually start it yet. @@ -1449,7 +1455,11 @@ static struct task_struct *copy_process(unsigned long clone_flags, if (likely(p->pid)) { ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace); + init_task_pid(p, PIDTYPE_PID, pid); if (thread_group_leader(p)) { + init_task_pid(p, PIDTYPE_PGID, task_pgrp(current)); + init_task_pid(p, PIDTYPE_SID, task_session(current)); + if (is_child_reaper(pid)) { ns_of_pid(pid)->child_reaper = p; p->signal->flags |= SIGNAL_UNKILLABLE; @@ -1457,10 +1467,10 @@ static struct task_struct *copy_process(unsigned long clone_flags, p->signal->leader_pid = pid; p->signal->tty = tty_kref_get(current->signal->tty); - attach_pid(p, PIDTYPE_PGID, task_pgrp(current)); - attach_pid(p, PIDTYPE_SID, task_session(current)); list_add_tail(&p->sibling, &p->real_parent->children); list_add_tail_rcu(&p->tasks, &init_task.tasks); + attach_pid(p, PIDTYPE_PGID); + attach_pid(p, PIDTYPE_SID); __this_cpu_inc(process_counts); } else { current->signal->nr_threads++; @@ -1470,7 +1480,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group); } - attach_pid(p, PIDTYPE_PID, pid); + attach_pid(p, PIDTYPE_PID); nr_threads++; } diff --git a/kernel/pid.c b/kernel/pid.c index 0db3e79..61980ce 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -373,14 +373,10 @@ EXPORT_SYMBOL_GPL(find_vpid); /* * attach_pid() must be called with the tasklist_lock write-held. */ -void attach_pid(struct task_struct *task, enum pid_type type, - struct pid *pid) +void attach_pid(struct task_struct *task, enum pid_type type) { - struct pid_link *link; - - link = &task->pids[type]; - link->pid = pid; - hlist_add_head_rcu(&link->node, &pid->tasks[type]); + struct pid_link *link = &task->pids[type]; + hlist_add_head_rcu(&link->node, &link->pid->tasks[type]); } static void __change_pid(struct task_struct *task, enum pid_type type, @@ -412,7 +408,7 @@ void change_pid(struct task_struct *task, enum pid_type type, struct pid *pid) { __change_pid(task, type, pid); - attach_pid(task, type, pid); + attach_pid(task, type); } /* transfer_pid is an optimization of attach_pid(new), detach_pid(old) */ -- cgit v0.10.2 From 18c830df771f2ba8b4699fea9af1492275ae627b Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 3 Jul 2013 15:08:32 -0700 Subject: kernel/fork.c:copy_process(): consolidate the lockless CLONE_THREAD checks copy_process() does a lot of "chaotic" initializations and checks CLONE_THREAD twice before it takes tasklist. In particular it sets "p->group_leader = p" and then changes it again under tasklist if !thread_group_leader(p). This looks a bit confusing, lets create a single "if (CLONE_THREAD)" block which initializes ->exit_signal, ->group_leader, and ->tgid. Signed-off-by: Oleg Nesterov Cc: "Eric W. Biederman" Cc: Michal Hocko Cc: Pavel Emelyanov Cc: Sergey Dyasly Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/kernel/fork.c b/kernel/fork.c index 7d6962f..6e6a1c1 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1360,11 +1360,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, goto bad_fork_cleanup_io; } - p->pid = pid_nr(pid); - p->tgid = p->pid; - if (clone_flags & CLONE_THREAD) - p->tgid = current->tgid; - p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; /* * Clear TID on mm_release()? @@ -1400,12 +1395,19 @@ static struct task_struct *copy_process(unsigned long clone_flags, clear_all_latency_tracing(p); /* ok, now we should be set up.. */ - if (clone_flags & CLONE_THREAD) + p->pid = pid_nr(pid); + if (clone_flags & CLONE_THREAD) { p->exit_signal = -1; - else if (clone_flags & CLONE_PARENT) - p->exit_signal = current->group_leader->exit_signal; - else - p->exit_signal = (clone_flags & CSIGNAL); + p->group_leader = current->group_leader; + p->tgid = current->tgid; + } else { + if (clone_flags & CLONE_PARENT) + p->exit_signal = current->group_leader->exit_signal; + else + p->exit_signal = (clone_flags & CSIGNAL); + p->group_leader = p; + p->tgid = p->pid; + } p->pdeath_signal = 0; p->exit_state = 0; @@ -1414,15 +1416,13 @@ static struct task_struct *copy_process(unsigned long clone_flags, p->nr_dirtied_pause = 128 >> (PAGE_SHIFT - 10); p->dirty_paused_when = 0; - /* - * Ok, make it visible to the rest of the system. - * We dont wake it up yet. - */ - p->group_leader = p; INIT_LIST_HEAD(&p->thread_group); p->task_works = NULL; - /* Need tasklist lock for parent etc handling! */ + /* + * Make it visible to the rest of the system, but dont wake it up yet. + * Need tasklist lock for parent etc handling! + */ write_lock_irq(&tasklist_lock); /* CLONE_PARENT re-uses the old parent */ @@ -1476,7 +1476,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, current->signal->nr_threads++; atomic_inc(¤t->signal->live); atomic_inc(¤t->signal->sigcnt); - p->group_leader = current->group_leader; list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group); } -- cgit v0.10.2 From bd9d43f47d6944019918a801398c587ee86c4b52 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 3 Jul 2013 15:08:34 -0700 Subject: fs/exec.c: do_execve_common(): use current_user() Trivial cleanup. do_execve_common() can use current_user() and avoid the unnecessary "struct cred *cred" var. Signed-off-by: Oleg Nesterov Cc: Vasiliy Kulikov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/exec.c b/fs/exec.c index 7619ddd..396aec2 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1464,7 +1464,6 @@ static int do_execve_common(const char *filename, struct files_struct *displaced; bool clear_in_exec; int retval; - const struct cred *cred = current_cred(); /* * We move the actual failure in case of RLIMIT_NPROC excess from @@ -1473,7 +1472,7 @@ static int do_execve_common(const char *filename, * whether NPROC limit is still exceeded. */ if ((current->flags & PF_NPROC_EXCEEDED) && - atomic_read(&cred->user->processes) > rlimit(RLIMIT_NPROC)) { + atomic_read(¤t_user()->processes) > rlimit(RLIMIT_NPROC)) { retval = -EAGAIN; goto out_ret; } -- cgit v0.10.2 From 266b7a021f7dcc4d4531961a47f4ef74c3c4ab6b Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 3 Jul 2013 15:08:35 -0700 Subject: fs/exec.c:de_thread: mt-exec should update ->real_start_time 924b42d5 ("Use boot based time for process start time and boot time in /proc") updated copy_process/do_task_stat but forgot about de_thread(). This breaks "ps axOT" if a sub-thread execs. Note: I think that task->start_time should die. Signed-off-by: Oleg Nesterov Cc: "Eric W. Biederman" Acked-by: John Stultz Cc: Tomas Janousek Cc: Tomas Smetana Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/exec.c b/fs/exec.c index 396aec2..9c73def 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -932,6 +932,7 @@ static int de_thread(struct task_struct *tsk) * also take its birthdate (always earlier than our own). */ tsk->start_time = leader->start_time; + tsk->real_start_time = leader->real_start_time; BUG_ON(!same_thread_group(leader, tsk)); BUG_ON(has_group_leader_pid(tsk)); -- cgit v0.10.2 From a11edb59a05d8d5195419bd1fc28d82752324158 Mon Sep 17 00:00:00 2001 From: Zhang Yanfei Date: Wed, 3 Jul 2013 15:08:36 -0700 Subject: /dev/oldmem: Remove the interface /dev/oldmem provides the interface for us to access the "old memory" in the dump-capture kernel. Unfortunately, no one actually uses this interface. And this interface could actually cause some real problems if used on ia64 where the cached/uncached accesses are mixed. See the discussion from the link: https://lkml.org/lkml/2013/4/12/386. So Eric suggested that we should remove /dev/oldmem as an unused piece of code. [akpm@linux-foundation.org: mention /dev/oldmem obsolescence in devices.txt] Suggested-by: "Eric W. Biederman" Signed-off-by: Zhang Yanfei Cc: Vivek Goyal Cc: Dave Hansen Cc: "H. Peter Anvin" Cc: "H. Peter Anvin" Cc: Benjamin Herrenschmidt Cc: Fenghua Yu Cc: Heiko Carstens Cc: Martin Schwidefsky Cc: Matt Fleming Cc: Michael Holzheu Cc: Paul Mackerras Cc: Ralf Baechle Cc: Tony Luck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/Documentation/devices.txt b/Documentation/devices.txt index b901591..23721d3 100644 --- a/Documentation/devices.txt +++ b/Documentation/devices.txt @@ -100,8 +100,7 @@ Your cooperation is appreciated. 10 = /dev/aio Asynchronous I/O notification interface 11 = /dev/kmsg Writes to this come out as printk's, reads export the buffered printk records. - 12 = /dev/oldmem Used by crashdump kernels to access - the memory of the kernel that crashed. + 12 = /dev/oldmem OBSOLETE - replaced by /proc/vmcore 1 block RAM disk 0 = /dev/ram0 First RAM disk diff --git a/drivers/char/mem.c b/drivers/char/mem.c index 2ca6d78..f895a8c 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -21,7 +21,6 @@ #include #include #include -#include #include #include #include @@ -357,40 +356,6 @@ static int mmap_kmem(struct file *file, struct vm_area_struct *vma) } #endif -#ifdef CONFIG_CRASH_DUMP -/* - * Read memory corresponding to the old kernel. - */ -static ssize_t read_oldmem(struct file *file, char __user *buf, - size_t count, loff_t *ppos) -{ - unsigned long pfn, offset; - size_t read = 0, csize; - int rc = 0; - - while (count) { - pfn = *ppos / PAGE_SIZE; - if (pfn > saved_max_pfn) - return read; - - offset = (unsigned long)(*ppos % PAGE_SIZE); - if (count > PAGE_SIZE - offset) - csize = PAGE_SIZE - offset; - else - csize = count; - - rc = copy_oldmem_page(pfn, buf, csize, offset, 1); - if (rc < 0) - return rc; - buf += csize; - *ppos += csize; - read += csize; - count -= csize; - } - return read; -} -#endif - #ifdef CONFIG_DEVKMEM /* * This function reads the *virtual* memory as seen by the kernel. @@ -772,7 +737,6 @@ static int open_port(struct inode *inode, struct file *filp) #define aio_write_zero aio_write_null #define open_mem open_port #define open_kmem open_mem -#define open_oldmem open_mem static const struct file_operations mem_fops = { .llseek = memory_lseek, @@ -837,14 +801,6 @@ static const struct file_operations full_fops = { .write = write_full, }; -#ifdef CONFIG_CRASH_DUMP -static const struct file_operations oldmem_fops = { - .read = read_oldmem, - .open = open_oldmem, - .llseek = default_llseek, -}; -#endif - static const struct memdev { const char *name; umode_t mode; @@ -866,9 +822,6 @@ static const struct memdev { #ifdef CONFIG_PRINTK [11] = { "kmsg", 0644, &kmsg_fops, NULL }, #endif -#ifdef CONFIG_CRASH_DUMP - [12] = { "oldmem", 0, &oldmem_fops, NULL }, -#endif }; static int memory_open(struct inode *inode, struct file *filp) -- cgit v0.10.2 From 987bf6fe3b1409b1cdf4352b3c421260c95d52f2 Mon Sep 17 00:00:00 2001 From: Zhang Yanfei Date: Wed, 3 Jul 2013 15:08:38 -0700 Subject: Documentation/kdump/kdump.txt: remove /dev/oldmem description Signed-off-by: Zhang Yanfei Cc: Vivek Goyal Cc: "Eric W. Biederman" Cc: "H. Peter Anvin" Cc: Benjamin Herrenschmidt Cc: Dave Hansen Cc: Fenghua Yu Cc: Heiko Carstens Cc: Martin Schwidefsky Cc: Matt Fleming Cc: Michael Holzheu Cc: Paul Mackerras Cc: Ralf Baechle Cc: Tony Luck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt index 9c7fd988..bec123e 100644 --- a/Documentation/kdump/kdump.txt +++ b/Documentation/kdump/kdump.txt @@ -47,19 +47,12 @@ parameter. Optionally the size of the ELF header can also be passed when using the elfcorehdr=[size[KMG]@]offset[KMG] syntax. -With the dump-capture kernel, you can access the memory image, or "old -memory," in two ways: - -- Through a /dev/oldmem device interface. A capture utility can read the - device file and write out the memory in raw format. This is a raw dump - of memory. Analysis and capture tools must be intelligent enough to - determine where to look for the right information. - -- Through /proc/vmcore. This exports the dump as an ELF-format file that - you can write out using file copy commands such as cp or scp. Further, - you can use analysis tools such as the GNU Debugger (GDB) and the Crash - tool to debug the dump file. This method ensures that the dump pages are - correctly ordered. +With the dump-capture kernel, you can access the memory image through +/proc/vmcore. This exports the dump as an ELF-format file that you can +write out using file copy commands such as cp or scp. Further, you can +use analysis tools such as the GNU Debugger (GDB) and the Crash tool to +debug the dump file. This method ensures that the dump pages are correctly +ordered. Setup and Installation @@ -423,18 +416,6 @@ the following command: cp /proc/vmcore -You can also access dumped memory as a /dev/oldmem device for a linear -and raw view. To create the device, use the following command: - - mknod /dev/oldmem c 1 12 - -Use the dd command with suitable options for count, bs, and skip to -access specific portions of the dump. - -To see the entire memory, use the following command: - - dd if=/dev/oldmem of=oldmem.001 - Analysis ======== -- cgit v0.10.2 From db9ab97d9df678b6039e79de3516db83ec71b1f0 Mon Sep 17 00:00:00 2001 From: Zhang Yanfei Date: Wed, 3 Jul 2013 15:08:40 -0700 Subject: mips: remove savemaxmem parameter setup saved_max_pfn is used to know the amount of memory that the previous kernel used. And for powerpc, we set saved_max_pfn by passing the kernel commandline parameter "savemaxmem=". The only user of saved_max_pfn in mips is read_oldmem interface. Since we have removed read_oldmem, so we don't need this parameter anymore. Signed-off-by: Zhang Yanfei Cc: Ralf Baechle Cc: "Eric W. Biederman" Cc: "H. Peter Anvin" Cc: Benjamin Herrenschmidt Cc: Dave Hansen Cc: Fenghua Yu Cc: Heiko Carstens Cc: Martin Schwidefsky Cc: Matt Fleming Cc: Michael Holzheu Cc: Paul Mackerras Cc: Tony Luck Cc: Vivek Goyal Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/mips/kernel/crash_dump.c b/arch/mips/kernel/crash_dump.c index 3be9e7b..f291cf9 100644 --- a/arch/mips/kernel/crash_dump.c +++ b/arch/mips/kernel/crash_dump.c @@ -4,16 +4,6 @@ #include #include -static int __init parse_savemaxmem(char *p) -{ - if (p) - saved_max_pfn = (memparse(p, &p) >> PAGE_SHIFT) - 1; - - return 1; -} -__setup("savemaxmem=", parse_savemaxmem); - - static void *kdump_buf_page; /** -- cgit v0.10.2 From 427fcd23b56598fd0dda7b8a5925c8479aa197db Mon Sep 17 00:00:00 2001 From: Zhang Yanfei Date: Wed, 3 Jul 2013 15:08:42 -0700 Subject: powerpc: Remove savemaxmem parameter setup saved_max_pfn is used to know the amount of memory that the previous kernel used. And for powerpc, we set saved_max_pfn by passing the kernel commandline parameter "savemaxmem=". The only user of saved_max_pfn in powerpc is read_oldmem interface. Since we have removed read_oldmem, we don't need this parameter anymore. Signed-off-by: Zhang Yanfei Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: "Eric W. Biederman" Cc: "H. Peter Anvin" Cc: Dave Hansen Cc: Fenghua Yu Cc: Heiko Carstens Cc: Martin Schwidefsky Cc: Matt Fleming Cc: Michael Holzheu Cc: Ralf Baechle Cc: Tony Luck Cc: Vivek Goyal Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c index 9ec3fe1..779a78c 100644 --- a/arch/powerpc/kernel/crash_dump.c +++ b/arch/powerpc/kernel/crash_dump.c @@ -69,16 +69,6 @@ void __init setup_kdump_trampoline(void) } #endif /* CONFIG_NONSTATIC_KERNEL */ -static int __init parse_savemaxmem(char *p) -{ - if (p) - saved_max_pfn = (memparse(p, &p) >> PAGE_SHIFT) - 1; - - return 1; -} -__setup("savemaxmem=", parse_savemaxmem); - - static size_t copy_oldmem_vaddr(void *vaddr, char *buf, size_t csize, unsigned long offset, int userbuf) { -- cgit v0.10.2 From eeb407a57441e1178bb3ce1c8e1c23b877411f24 Mon Sep 17 00:00:00 2001 From: Zhang Yanfei Date: Wed, 3 Jul 2013 15:08:44 -0700 Subject: ia64: remove setting for saved_max_pfn The only user of saved_max_pfn in ia64 is read_oldmem interface but we have removed that interface, so saved_max_pfn is now unneeded in ia64, and we needn't set it anymore. Signed-off-by: Zhang Yanfei Cc: Matt Fleming Cc: Tony Luck Cc: Fenghua Yu Cc: "Eric W. Biederman" Cc: "H. Peter Anvin" Cc: Benjamin Herrenschmidt Cc: Dave Hansen Cc: Heiko Carstens Cc: Martin Schwidefsky Cc: Michael Holzheu Cc: Paul Mackerras Cc: Ralf Baechle Cc: Vivek Goyal Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c index f034563..51bce59 100644 --- a/arch/ia64/kernel/efi.c +++ b/arch/ia64/kernel/efi.c @@ -1116,11 +1116,6 @@ efi_memmap_init(u64 *s, u64 *e) if (!is_memory_available(md)) continue; -#ifdef CONFIG_CRASH_DUMP - /* saved_max_pfn should ignore max_addr= command line arg */ - if (saved_max_pfn < (efi_md_end(md) >> PAGE_SHIFT)) - saved_max_pfn = (efi_md_end(md) >> PAGE_SHIFT); -#endif /* * Round ends inward to granule boundaries * Give trimmings to uncached allocator -- cgit v0.10.2 From 8bdc237ac113dd42a1c977c8cd3a65a82f774d5e Mon Sep 17 00:00:00 2001 From: Zhang Yanfei Date: Wed, 3 Jul 2013 15:08:45 -0700 Subject: s390: remove setting for saved_max_pfn The only user of saved_max_pfn in s390 is read_oldmem interface but we have removed that interface, so saved_max_pfn is now unneeded in s390, and we needn't set it anymore. Signed-off-by: Zhang Yanfei Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Michael Holzheu Cc: "Eric W. Biederman" Cc: "H. Peter Anvin" Cc: Benjamin Herrenschmidt Cc: Dave Hansen Cc: Fenghua Yu Cc: Matt Fleming Cc: Paul Mackerras Cc: Ralf Baechle Cc: Tony Luck Cc: Vivek Goyal Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 0a49095..497451e 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -719,10 +719,6 @@ static void reserve_oldmem(void) } create_mem_hole(memory_chunk, OLDMEM_BASE, OLDMEM_SIZE); create_mem_hole(memory_chunk, OLDMEM_SIZE, real_size - OLDMEM_SIZE); - if (OLDMEM_BASE + OLDMEM_SIZE == real_size) - saved_max_pfn = PFN_DOWN(OLDMEM_BASE) - 1; - else - saved_max_pfn = PFN_DOWN(real_size) - 1; #endif } -- cgit v0.10.2 From dd04b452f532ca100f7c557295ffcbc049c77171 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 3 Jul 2013 15:08:47 -0700 Subject: idr: print a stack dump after ida_remove warning We print a dump stack after idr_remove warning. This is useful to find the faulty piece of code. Let's do the same for ida_remove, as it would be equally useful there. [akpm@linux-foundation.org: convert the open-coded printk+dump_stack into WARN()] Signed-off-by: Jean Delvare Cc: Tejun Heo Cc: Takashi Iwai Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/lib/idr.c b/lib/idr.c index cca4b93..bfe4db4 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -524,9 +524,7 @@ EXPORT_SYMBOL(idr_alloc_cyclic); static void idr_remove_warning(int id) { - printk(KERN_WARNING - "idr_remove called for id=%d which is not allocated.\n", id); - dump_stack(); + WARN(1, "idr_remove called for id=%d which is not allocated.\n", id); } static void sub_remove(struct idr *idp, int shift, int id) @@ -1064,8 +1062,7 @@ void ida_remove(struct ida *ida, int id) return; err: - printk(KERN_WARNING - "ida_remove called for id=%d which is not allocated.\n", id); + WARN(1, "ida_remove called for id=%d which is not allocated.\n", id); } EXPORT_SYMBOL(ida_remove); -- cgit v0.10.2 From e350cd1d9308b67363ebb870a678a1fd807df1a6 Mon Sep 17 00:00:00 2001 From: Alexandre Bounine Date: Wed, 3 Jul 2013 15:08:48 -0700 Subject: rapidio/switches: remove tsi500 driver Remove the driver for Tsi500 Parallel RapidIO switch because this device has not been available for several years. Since the first introduction of Tsi500, the parallel RapidIO interface was replaced by the serial RapidIO (sRIO) and therefore there is no value in keeping this driver. Signed-off-by: Alexandre Bounine Cc: Matt Porter Cc: Li Yang Cc: Kumar Gala Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rapidio/switches/Kconfig b/drivers/rapidio/switches/Kconfig index f47fee5..62d4a06 100644 --- a/drivers/rapidio/switches/Kconfig +++ b/drivers/rapidio/switches/Kconfig @@ -26,10 +26,3 @@ config RAPIDIO_CPS_GEN2 default n ---help--- Includes support for ITD CPS Gen.2 serial RapidIO switches. - -config RAPIDIO_TSI500 - bool "Tsi500 Parallel RapidIO switch support" - depends on RAPIDIO - default n - ---help--- - Includes support for IDT Tsi500 parallel RapidIO switch. diff --git a/drivers/rapidio/switches/Makefile b/drivers/rapidio/switches/Makefile index c4d3acc..051cc6b 100644 --- a/drivers/rapidio/switches/Makefile +++ b/drivers/rapidio/switches/Makefile @@ -5,5 +5,4 @@ obj-$(CONFIG_RAPIDIO_TSI57X) += tsi57x.o obj-$(CONFIG_RAPIDIO_CPS_XX) += idtcps.o obj-$(CONFIG_RAPIDIO_TSI568) += tsi568.o -obj-$(CONFIG_RAPIDIO_TSI500) += tsi500.o obj-$(CONFIG_RAPIDIO_CPS_GEN2) += idt_gen2.o diff --git a/drivers/rapidio/switches/tsi500.c b/drivers/rapidio/switches/tsi500.c deleted file mode 100644 index 914eddd..0000000 --- a/drivers/rapidio/switches/tsi500.c +++ /dev/null @@ -1,78 +0,0 @@ -/* - * RapidIO Tsi500 switch support - * - * Copyright 2009-2010 Integrated Device Technology, Inc. - * Alexandre Bounine - * - Modified switch operations initialization. - * - * Copyright 2005 MontaVista Software, Inc. - * Matt Porter - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ - -#include -#include -#include -#include "../rio.h" - -static int -tsi500_route_add_entry(struct rio_mport *mport, u16 destid, u8 hopcount, u16 table, u16 route_destid, u8 route_port) -{ - int i; - u32 offset = 0x10000 + 0xa00 + ((route_destid / 2)&~0x3); - u32 result; - - if (table == 0xff) { - rio_mport_read_config_32(mport, destid, hopcount, offset, &result); - result &= ~(0xf << (4*(route_destid & 0x7))); - for (i=0;i<4;i++) - rio_mport_write_config_32(mport, destid, hopcount, offset + (0x20000*i), result | (route_port << (4*(route_destid & 0x7)))); - } - else { - rio_mport_read_config_32(mport, destid, hopcount, offset + (0x20000*table), &result); - result &= ~(0xf << (4*(route_destid & 0x7))); - rio_mport_write_config_32(mport, destid, hopcount, offset + (0x20000*table), result | (route_port << (4*(route_destid & 0x7)))); - } - - return 0; -} - -static int -tsi500_route_get_entry(struct rio_mport *mport, u16 destid, u8 hopcount, u16 table, u16 route_destid, u8 *route_port) -{ - int ret = 0; - u32 offset = 0x10000 + 0xa00 + ((route_destid / 2)&~0x3); - u32 result; - - if (table == 0xff) - rio_mport_read_config_32(mport, destid, hopcount, offset, &result); - else - rio_mport_read_config_32(mport, destid, hopcount, offset + (0x20000*table), &result); - - result &= 0xf << (4*(route_destid & 0x7)); - *route_port = result >> (4*(route_destid & 0x7)); - if (*route_port > 3) - ret = -1; - - return ret; -} - -static int tsi500_switch_init(struct rio_dev *rdev, int do_enum) -{ - pr_debug("RIO: %s for %s\n", __func__, rio_name(rdev)); - rdev->rswitch->add_entry = tsi500_route_add_entry; - rdev->rswitch->get_entry = tsi500_route_get_entry; - rdev->rswitch->clr_table = NULL; - rdev->rswitch->set_domain = NULL; - rdev->rswitch->get_domain = NULL; - rdev->rswitch->em_init = NULL; - rdev->rswitch->em_handle = NULL; - - return 0; -} - -DECLARE_RIO_SWITCH_INIT(RIO_VID_TUNDRA, RIO_DID_TSI500, tsi500_switch_init); -- cgit v0.10.2 From 36f0efbbe8e21c153dfc2f94c91f89ab06fd64c5 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Wed, 3 Jul 2013 15:08:49 -0700 Subject: drivers/rapidio/rio-scan.c: make functions static sparse warnings: drivers/rapidio/rio-scan.c:1143:5: sparse: symbol 'rio_enum_mport' was not declared. Should it be static? drivers/rapidio/rio-scan.c:1246:5: sparse: symbol 'rio_disc_mport' was not declared. Should it be static? Signed-off-by: Fengguang Wu Cc: "Bounine, Alexandre" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rapidio/rio-scan.c b/drivers/rapidio/rio-scan.c index 4c15dbf..4b9b15e 100644 --- a/drivers/rapidio/rio-scan.c +++ b/drivers/rapidio/rio-scan.c @@ -1141,7 +1141,7 @@ static void rio_pw_enable(struct rio_mport *port, int enable) * link, then start recursive peer enumeration. Returns %0 if * enumeration succeeds or %-EBUSY if enumeration fails. */ -int rio_enum_mport(struct rio_mport *mport, u32 flags) +static int rio_enum_mport(struct rio_mport *mport, u32 flags) { struct rio_net *net = NULL; int rc = 0; @@ -1256,7 +1256,7 @@ static void rio_build_route_tables(struct rio_net *net) * peer discovery. Returns %0 if discovery succeeds or %-EBUSY * on failure. */ -int rio_disc_mport(struct rio_mport *mport, u32 flags) +static int rio_disc_mport(struct rio_mport *mport, u32 flags) { struct rio_net *net = NULL; unsigned long to_end; -- cgit v0.10.2 From 2ec3ba69faf301fb599e3651515e808e8efa533e Mon Sep 17 00:00:00 2001 From: Alexandre Bounine Date: Wed, 3 Jul 2013 15:08:50 -0700 Subject: rapidio: convert switch drivers to modules Rework RapidIO switch drivers to add an option to build them as loadable kernel modules. This patch removes RapidIO-specific vmlinux section and converts switch drivers to be compatible with LDM driver registration method. To simplify registration of device-specific callback routines this patch introduces rio_switch_ops data structure. The sw_sysfs() callback is removed from the list of device-specific operations because under the new structure its functions can be handled by switch driver's probe() and remove() routines. If a specific switch device driver is not loaded the RapidIO subsystem core will use default standard-based operations to configure a switch. Because the current implementation of RapidIO enumeration/discovery method relies on availability of device-specific operations for error management, switch device drivers must be loaded before the RapidIO enumeration/discovery starts. This patch also moves several common routines from enumeration/discovery module into the RapidIO core code to make switch-specific operations accessible to all components of RapidIO subsystem. Signed-off-by: Alexandre Bounine Cc: Matt Porter Cc: Li Yang Cc: Kumar Gala Cc: Andre van Herk Cc: Micha Nelissen Cc: Stef van Os Cc: Jean Delvare Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rapidio/Kconfig b/drivers/rapidio/Kconfig index 5ab0564..3e3be57 100644 --- a/drivers/rapidio/Kconfig +++ b/drivers/rapidio/Kconfig @@ -67,4 +67,9 @@ config RAPIDIO_ENUM_BASIC endchoice +menu "RapidIO Switch drivers" + depends on RAPIDIO + source "drivers/rapidio/switches/Kconfig" + +endmenu diff --git a/drivers/rapidio/rio-scan.c b/drivers/rapidio/rio-scan.c index 4b9b15e..9139502 100644 --- a/drivers/rapidio/rio-scan.c +++ b/drivers/rapidio/rio-scan.c @@ -406,6 +406,7 @@ static struct rio_dev *rio_setup_device(struct rio_net *net, rio_mport_write_config_32(port, destid, hopcount, RIO_COMPONENT_TAG_CSR, next_comptag); rdev->comp_tag = next_comptag++; + rdev->do_enum = true; } else { rio_mport_read_config_32(port, destid, hopcount, RIO_COMPONENT_TAG_CSR, @@ -434,6 +435,7 @@ static struct rio_dev *rio_setup_device(struct rio_net *net, rswitch = rdev->rswitch; rswitch->switchid = rdev->comp_tag & RIO_CTAG_UDEVID; rswitch->port_ok = 0; + spin_lock_init(&rswitch->lock); rswitch->route_table = kzalloc(sizeof(u8)* RIO_MAX_ROUTE_ENTRIES(port->sys_size), GFP_KERNEL); @@ -445,11 +447,9 @@ static struct rio_dev *rio_setup_device(struct rio_net *net, rswitch->route_table[rdid] = RIO_INVALID_ROUTE; dev_set_name(&rdev->dev, "%02x:s:%04x", rdev->net->id, rswitch->switchid); - rio_switch_init(rdev, do_enum); - if (do_enum && rswitch->clr_table) - rswitch->clr_table(port, destid, hopcount, - RIO_GLOBAL_TABLE); + if (do_enum) + rio_route_clr_table(rdev, RIO_GLOBAL_TABLE, 0); list_add_tail(&rswitch->node, &net->switches); @@ -533,156 +533,6 @@ rio_sport_is_active(struct rio_mport *port, u16 destid, u8 hopcount, int sport) } /** - * rio_lock_device - Acquires host device lock for specified device - * @port: Master port to send transaction - * @destid: Destination ID for device/switch - * @hopcount: Hopcount to reach switch - * @wait_ms: Max wait time in msec (0 = no timeout) - * - * Attepts to acquire host device lock for specified device - * Returns 0 if device lock acquired or EINVAL if timeout expires. - */ -static int -rio_lock_device(struct rio_mport *port, u16 destid, u8 hopcount, int wait_ms) -{ - u32 result; - int tcnt = 0; - - /* Attempt to acquire device lock */ - rio_mport_write_config_32(port, destid, hopcount, - RIO_HOST_DID_LOCK_CSR, port->host_deviceid); - rio_mport_read_config_32(port, destid, hopcount, - RIO_HOST_DID_LOCK_CSR, &result); - - while (result != port->host_deviceid) { - if (wait_ms != 0 && tcnt == wait_ms) { - pr_debug("RIO: timeout when locking device %x:%x\n", - destid, hopcount); - return -EINVAL; - } - - /* Delay a bit */ - mdelay(1); - tcnt++; - /* Try to acquire device lock again */ - rio_mport_write_config_32(port, destid, - hopcount, - RIO_HOST_DID_LOCK_CSR, - port->host_deviceid); - rio_mport_read_config_32(port, destid, - hopcount, - RIO_HOST_DID_LOCK_CSR, &result); - } - - return 0; -} - -/** - * rio_unlock_device - Releases host device lock for specified device - * @port: Master port to send transaction - * @destid: Destination ID for device/switch - * @hopcount: Hopcount to reach switch - * - * Returns 0 if device lock released or EINVAL if fails. - */ -static int -rio_unlock_device(struct rio_mport *port, u16 destid, u8 hopcount) -{ - u32 result; - - /* Release device lock */ - rio_mport_write_config_32(port, destid, - hopcount, - RIO_HOST_DID_LOCK_CSR, - port->host_deviceid); - rio_mport_read_config_32(port, destid, hopcount, - RIO_HOST_DID_LOCK_CSR, &result); - if ((result & 0xffff) != 0xffff) { - pr_debug("RIO: badness when releasing device lock %x:%x\n", - destid, hopcount); - return -EINVAL; - } - - return 0; -} - -/** - * rio_route_add_entry- Add a route entry to a switch routing table - * @rdev: RIO device - * @table: Routing table ID - * @route_destid: Destination ID to be routed - * @route_port: Port number to be routed - * @lock: lock switch device flag - * - * Calls the switch specific add_entry() method to add a route entry - * on a switch. The route table can be specified using the @table - * argument if a switch has per port routing tables or the normal - * use is to specific all tables (or the global table) by passing - * %RIO_GLOBAL_TABLE in @table. Returns %0 on success or %-EINVAL - * on failure. - */ -static int -rio_route_add_entry(struct rio_dev *rdev, - u16 table, u16 route_destid, u8 route_port, int lock) -{ - int rc; - - if (lock) { - rc = rio_lock_device(rdev->net->hport, rdev->destid, - rdev->hopcount, 1000); - if (rc) - return rc; - } - - rc = rdev->rswitch->add_entry(rdev->net->hport, rdev->destid, - rdev->hopcount, table, - route_destid, route_port); - if (lock) - rio_unlock_device(rdev->net->hport, rdev->destid, - rdev->hopcount); - - return rc; -} - -/** - * rio_route_get_entry- Read a route entry in a switch routing table - * @rdev: RIO device - * @table: Routing table ID - * @route_destid: Destination ID to be routed - * @route_port: Pointer to read port number into - * @lock: lock switch device flag - * - * Calls the switch specific get_entry() method to read a route entry - * in a switch. The route table can be specified using the @table - * argument if a switch has per port routing tables or the normal - * use is to specific all tables (or the global table) by passing - * %RIO_GLOBAL_TABLE in @table. Returns %0 on success or %-EINVAL - * on failure. - */ -static int -rio_route_get_entry(struct rio_dev *rdev, u16 table, - u16 route_destid, u8 *route_port, int lock) -{ - int rc; - - if (lock) { - rc = rio_lock_device(rdev->net->hport, rdev->destid, - rdev->hopcount, 1000); - if (rc) - return rc; - } - - rc = rdev->rswitch->get_entry(rdev->net->hport, rdev->destid, - rdev->hopcount, table, - route_destid, route_port); - if (lock) - rio_unlock_device(rdev->net->hport, rdev->destid, - rdev->hopcount); - - return rc; -} - -/** * rio_get_host_deviceid_lock- Reads the Host Device ID Lock CSR on a device * @port: Master port to send transaction * @hopcount: Number of hops to the device @@ -1094,12 +944,9 @@ static void rio_update_route_tables(struct rio_net *net) sport = RIO_GET_PORT_NUM(swrdev->swpinfo); - if (rswitch->add_entry) { - rio_route_add_entry(swrdev, - RIO_GLOBAL_TABLE, destid, - sport, 0); - rswitch->route_table[destid] = sport; - } + rio_route_add_entry(swrdev, RIO_GLOBAL_TABLE, + destid, sport, 0); + rswitch->route_table[destid] = sport; } } } @@ -1115,8 +962,8 @@ static void rio_update_route_tables(struct rio_net *net) static void rio_init_em(struct rio_dev *rdev) { if (rio_is_switch(rdev) && (rdev->em_efptr) && - (rdev->rswitch->em_init)) { - rdev->rswitch->em_init(rdev); + rdev->rswitch->ops && rdev->rswitch->ops->em_init) { + rdev->rswitch->ops->em_init(rdev); } } diff --git a/drivers/rapidio/rio-sysfs.c b/drivers/rapidio/rio-sysfs.c index 66d4acd..864e52f 100644 --- a/drivers/rapidio/rio-sysfs.c +++ b/drivers/rapidio/rio-sysfs.c @@ -257,8 +257,6 @@ int rio_create_sysfs_dev_files(struct rio_dev *rdev) err |= device_create_file(&rdev->dev, &dev_attr_routes); err |= device_create_file(&rdev->dev, &dev_attr_lnext); err |= device_create_file(&rdev->dev, &dev_attr_hopcount); - if (!err && rdev->rswitch->sw_sysfs) - err = rdev->rswitch->sw_sysfs(rdev, RIO_SW_SYSFS_CREATE); } if (err) @@ -281,8 +279,6 @@ void rio_remove_sysfs_dev_files(struct rio_dev *rdev) device_remove_file(&rdev->dev, &dev_attr_routes); device_remove_file(&rdev->dev, &dev_attr_lnext); device_remove_file(&rdev->dev, &dev_attr_hopcount); - if (rdev->rswitch->sw_sysfs) - rdev->rswitch->sw_sysfs(rdev, RIO_SW_SYSFS_REMOVE); } } diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c index cb1c089..b17d521 100644 --- a/drivers/rapidio/rio.c +++ b/drivers/rapidio/rio.c @@ -7,7 +7,6 @@ * * Copyright 2009 Integrated Device Technology, Inc. * Alex Bounine - * - Added Port-Write/Error Management initialization and handling * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -580,44 +579,6 @@ int rio_set_port_lockout(struct rio_dev *rdev, u32 pnum, int lock) EXPORT_SYMBOL_GPL(rio_set_port_lockout); /** - * rio_switch_init - Sets switch operations for a particular vendor switch - * @rdev: RIO device - * @do_enum: Enumeration/Discovery mode flag - * - * Searches the RIO switch ops table for known switch types. If the vid - * and did match a switch table entry, then call switch initialization - * routine to setup switch-specific routines. - */ -void rio_switch_init(struct rio_dev *rdev, int do_enum) -{ - struct rio_switch_ops *cur = __start_rio_switch_ops; - struct rio_switch_ops *end = __end_rio_switch_ops; - - while (cur < end) { - if ((cur->vid == rdev->vid) && (cur->did == rdev->did)) { - pr_debug("RIO: calling init routine for %s\n", - rio_name(rdev)); - cur->init_hook(rdev, do_enum); - break; - } - cur++; - } - - if ((cur >= end) && (rdev->pef & RIO_PEF_STD_RT)) { - pr_debug("RIO: adding STD routing ops for %s\n", - rio_name(rdev)); - rdev->rswitch->add_entry = rio_std_route_add_entry; - rdev->rswitch->get_entry = rio_std_route_get_entry; - rdev->rswitch->clr_table = rio_std_route_clr_table; - } - - if (!rdev->rswitch->add_entry || !rdev->rswitch->get_entry) - printk(KERN_ERR "RIO: missing routing ops for %s\n", - rio_name(rdev)); -} -EXPORT_SYMBOL_GPL(rio_switch_init); - -/** * rio_enable_rx_tx_port - enable input receiver and output transmitter of * given port * @port: Master port associated with the RIO network @@ -970,8 +931,8 @@ int rio_inb_pwrite_handler(union rio_pw_msg *pw_msg) /* * Process the port-write notification from switch */ - if (rdev->rswitch->em_handle) - rdev->rswitch->em_handle(rdev, portnum); + if (rdev->rswitch->ops && rdev->rswitch->ops->em_handle) + rdev->rswitch->ops->em_handle(rdev, portnum); rio_read_config_32(rdev, rdev->phys_efptr + RIO_PORT_N_ERR_STS_CSR(portnum), @@ -1207,8 +1168,9 @@ struct rio_dev *rio_get_device(u16 vid, u16 did, struct rio_dev *from) * @route_destid: destID entry in the RT * @route_port: destination port for specified destID */ -int rio_std_route_add_entry(struct rio_mport *mport, u16 destid, u8 hopcount, - u16 table, u16 route_destid, u8 route_port) +static int +rio_std_route_add_entry(struct rio_mport *mport, u16 destid, u8 hopcount, + u16 table, u16 route_destid, u8 route_port) { if (table == RIO_GLOBAL_TABLE) { rio_mport_write_config_32(mport, destid, hopcount, @@ -1234,8 +1196,9 @@ int rio_std_route_add_entry(struct rio_mport *mport, u16 destid, u8 hopcount, * @route_destid: destID entry in the RT * @route_port: returned destination port for specified destID */ -int rio_std_route_get_entry(struct rio_mport *mport, u16 destid, u8 hopcount, - u16 table, u16 route_destid, u8 *route_port) +static int +rio_std_route_get_entry(struct rio_mport *mport, u16 destid, u8 hopcount, + u16 table, u16 route_destid, u8 *route_port) { u32 result; @@ -1259,8 +1222,9 @@ int rio_std_route_get_entry(struct rio_mport *mport, u16 destid, u8 hopcount, * @hopcount: Number of switch hops to the device * @table: routing table ID (global or port-specific) */ -int rio_std_route_clr_table(struct rio_mport *mport, u16 destid, u8 hopcount, - u16 table) +static int +rio_std_route_clr_table(struct rio_mport *mport, u16 destid, u8 hopcount, + u16 table) { u32 max_destid = 0xff; u32 i, pef, id_inc = 1, ext_cfg = 0; @@ -1301,6 +1265,234 @@ int rio_std_route_clr_table(struct rio_mport *mport, u16 destid, u8 hopcount, return 0; } +/** + * rio_lock_device - Acquires host device lock for specified device + * @port: Master port to send transaction + * @destid: Destination ID for device/switch + * @hopcount: Hopcount to reach switch + * @wait_ms: Max wait time in msec (0 = no timeout) + * + * Attepts to acquire host device lock for specified device + * Returns 0 if device lock acquired or EINVAL if timeout expires. + */ +int rio_lock_device(struct rio_mport *port, u16 destid, + u8 hopcount, int wait_ms) +{ + u32 result; + int tcnt = 0; + + /* Attempt to acquire device lock */ + rio_mport_write_config_32(port, destid, hopcount, + RIO_HOST_DID_LOCK_CSR, port->host_deviceid); + rio_mport_read_config_32(port, destid, hopcount, + RIO_HOST_DID_LOCK_CSR, &result); + + while (result != port->host_deviceid) { + if (wait_ms != 0 && tcnt == wait_ms) { + pr_debug("RIO: timeout when locking device %x:%x\n", + destid, hopcount); + return -EINVAL; + } + + /* Delay a bit */ + mdelay(1); + tcnt++; + /* Try to acquire device lock again */ + rio_mport_write_config_32(port, destid, + hopcount, + RIO_HOST_DID_LOCK_CSR, + port->host_deviceid); + rio_mport_read_config_32(port, destid, + hopcount, + RIO_HOST_DID_LOCK_CSR, &result); + } + + return 0; +} +EXPORT_SYMBOL_GPL(rio_lock_device); + +/** + * rio_unlock_device - Releases host device lock for specified device + * @port: Master port to send transaction + * @destid: Destination ID for device/switch + * @hopcount: Hopcount to reach switch + * + * Returns 0 if device lock released or EINVAL if fails. + */ +int rio_unlock_device(struct rio_mport *port, u16 destid, u8 hopcount) +{ + u32 result; + + /* Release device lock */ + rio_mport_write_config_32(port, destid, + hopcount, + RIO_HOST_DID_LOCK_CSR, + port->host_deviceid); + rio_mport_read_config_32(port, destid, hopcount, + RIO_HOST_DID_LOCK_CSR, &result); + if ((result & 0xffff) != 0xffff) { + pr_debug("RIO: badness when releasing device lock %x:%x\n", + destid, hopcount); + return -EINVAL; + } + + return 0; +} +EXPORT_SYMBOL_GPL(rio_unlock_device); + +/** + * rio_route_add_entry- Add a route entry to a switch routing table + * @rdev: RIO device + * @table: Routing table ID + * @route_destid: Destination ID to be routed + * @route_port: Port number to be routed + * @lock: apply a hardware lock on switch device flag (1=lock, 0=no_lock) + * + * If available calls the switch specific add_entry() method to add a route + * entry into a switch routing table. Otherwise uses standard RT update method + * as defined by RapidIO specification. A specific routing table can be selected + * using the @table argument if a switch has per port routing tables or + * the standard (or global) table may be used by passing + * %RIO_GLOBAL_TABLE in @table. + * + * Returns %0 on success or %-EINVAL on failure. + */ +int rio_route_add_entry(struct rio_dev *rdev, + u16 table, u16 route_destid, u8 route_port, int lock) +{ + int rc = -EINVAL; + struct rio_switch_ops *ops = rdev->rswitch->ops; + + if (lock) { + rc = rio_lock_device(rdev->net->hport, rdev->destid, + rdev->hopcount, 1000); + if (rc) + return rc; + } + + spin_lock(&rdev->rswitch->lock); + + if (ops == NULL || ops->add_entry == NULL) { + rc = rio_std_route_add_entry(rdev->net->hport, rdev->destid, + rdev->hopcount, table, + route_destid, route_port); + } else if (try_module_get(ops->owner)) { + rc = ops->add_entry(rdev->net->hport, rdev->destid, + rdev->hopcount, table, route_destid, + route_port); + module_put(ops->owner); + } + + spin_unlock(&rdev->rswitch->lock); + + if (lock) + rio_unlock_device(rdev->net->hport, rdev->destid, + rdev->hopcount); + + return rc; +} +EXPORT_SYMBOL_GPL(rio_route_add_entry); + +/** + * rio_route_get_entry- Read an entry from a switch routing table + * @rdev: RIO device + * @table: Routing table ID + * @route_destid: Destination ID to be routed + * @route_port: Pointer to read port number into + * @lock: apply a hardware lock on switch device flag (1=lock, 0=no_lock) + * + * If available calls the switch specific get_entry() method to fetch a route + * entry from a switch routing table. Otherwise uses standard RT read method + * as defined by RapidIO specification. A specific routing table can be selected + * using the @table argument if a switch has per port routing tables or + * the standard (or global) table may be used by passing + * %RIO_GLOBAL_TABLE in @table. + * + * Returns %0 on success or %-EINVAL on failure. + */ +int rio_route_get_entry(struct rio_dev *rdev, u16 table, + u16 route_destid, u8 *route_port, int lock) +{ + int rc = -EINVAL; + struct rio_switch_ops *ops = rdev->rswitch->ops; + + if (lock) { + rc = rio_lock_device(rdev->net->hport, rdev->destid, + rdev->hopcount, 1000); + if (rc) + return rc; + } + + spin_lock(&rdev->rswitch->lock); + + if (ops == NULL || ops->get_entry == NULL) { + rc = rio_std_route_get_entry(rdev->net->hport, rdev->destid, + rdev->hopcount, table, + route_destid, route_port); + } else if (try_module_get(ops->owner)) { + rc = ops->get_entry(rdev->net->hport, rdev->destid, + rdev->hopcount, table, route_destid, + route_port); + module_put(ops->owner); + } + + spin_unlock(&rdev->rswitch->lock); + + if (lock) + rio_unlock_device(rdev->net->hport, rdev->destid, + rdev->hopcount); + return rc; +} +EXPORT_SYMBOL_GPL(rio_route_get_entry); + +/** + * rio_route_clr_table - Clear a switch routing table + * @rdev: RIO device + * @table: Routing table ID + * @lock: apply a hardware lock on switch device flag (1=lock, 0=no_lock) + * + * If available calls the switch specific clr_table() method to clear a switch + * routing table. Otherwise uses standard RT write method as defined by RapidIO + * specification. A specific routing table can be selected using the @table + * argument if a switch has per port routing tables or the standard (or global) + * table may be used by passing %RIO_GLOBAL_TABLE in @table. + * + * Returns %0 on success or %-EINVAL on failure. + */ +int rio_route_clr_table(struct rio_dev *rdev, u16 table, int lock) +{ + int rc = -EINVAL; + struct rio_switch_ops *ops = rdev->rswitch->ops; + + if (lock) { + rc = rio_lock_device(rdev->net->hport, rdev->destid, + rdev->hopcount, 1000); + if (rc) + return rc; + } + + spin_lock(&rdev->rswitch->lock); + + if (ops == NULL || ops->clr_table == NULL) { + rc = rio_std_route_clr_table(rdev->net->hport, rdev->destid, + rdev->hopcount, table); + } else if (try_module_get(ops->owner)) { + rc = ops->clr_table(rdev->net->hport, rdev->destid, + rdev->hopcount, table); + + module_put(ops->owner); + } + + spin_unlock(&rdev->rswitch->lock); + + if (lock) + rio_unlock_device(rdev->net->hport, rdev->destid, + rdev->hopcount); + + return rc; +} +EXPORT_SYMBOL_GPL(rio_route_clr_table); + #ifdef CONFIG_RAPIDIO_DMA_ENGINE static bool rio_chan_filter(struct dma_chan *chan, void *arg) diff --git a/drivers/rapidio/rio.h b/drivers/rapidio/rio.h index c14f864..d595877 100644 --- a/drivers/rapidio/rio.h +++ b/drivers/rapidio/rio.h @@ -28,18 +28,17 @@ extern u32 rio_mport_get_efb(struct rio_mport *port, int local, u16 destid, extern int rio_mport_chk_dev_access(struct rio_mport *mport, u16 destid, u8 hopcount); extern int rio_create_sysfs_dev_files(struct rio_dev *rdev); -extern int rio_std_route_add_entry(struct rio_mport *mport, u16 destid, - u8 hopcount, u16 table, u16 route_destid, - u8 route_port); -extern int rio_std_route_get_entry(struct rio_mport *mport, u16 destid, - u8 hopcount, u16 table, u16 route_destid, - u8 *route_port); -extern int rio_std_route_clr_table(struct rio_mport *mport, u16 destid, - u8 hopcount, u16 table); +extern int rio_lock_device(struct rio_mport *port, u16 destid, + u8 hopcount, int wait_ms); +extern int rio_unlock_device(struct rio_mport *port, u16 destid, u8 hopcount); +extern int rio_route_add_entry(struct rio_dev *rdev, + u16 table, u16 route_destid, u8 route_port, int lock); +extern int rio_route_get_entry(struct rio_dev *rdev, u16 table, + u16 route_destid, u8 *route_port, int lock); +extern int rio_route_clr_table(struct rio_dev *rdev, u16 table, int lock); extern int rio_set_port_lockout(struct rio_dev *rdev, u32 pnum, int lock); extern struct rio_dev *rio_get_comptag(u32 comp_tag, struct rio_dev *from); extern int rio_add_device(struct rio_dev *rdev); -extern void rio_switch_init(struct rio_dev *rdev, int do_enum); extern int rio_enable_rx_tx_port(struct rio_mport *port, int local, u16 destid, u8 hopcount, u8 port_num); extern int rio_register_scan(int mport_id, struct rio_scan *scan_ops); @@ -51,29 +50,5 @@ extern struct rio_mport *rio_find_mport(int mport_id); extern struct device_attribute rio_dev_attrs[]; extern struct bus_attribute rio_bus_attrs[]; -extern struct rio_switch_ops __start_rio_switch_ops[]; -extern struct rio_switch_ops __end_rio_switch_ops[]; - -/* Helpers internal to the RIO core code */ -#define DECLARE_RIO_SWITCH_SECTION(section, name, vid, did, init_hook) \ - static const struct rio_switch_ops __rio_switch_##name __used \ - __section(section) = { vid, did, init_hook }; - -/** - * DECLARE_RIO_SWITCH_INIT - Registers switch initialization routine - * @vid: RIO vendor ID - * @did: RIO device ID - * @init_hook: Callback that performs switch-specific initialization - * - * Manipulating switch route tables and error management in RIO - * is switch specific. This registers a switch by vendor and device ID with - * initialization callback for setting up switch operations and (if required) - * hardware initialization. A &struct rio_switch_ops is initialized with - * pointer to the init routine and placed into a RIO-specific kernel section. - */ -#define DECLARE_RIO_SWITCH_INIT(vid, did, init_hook) \ - DECLARE_RIO_SWITCH_SECTION(.rio_switch_ops, vid##did, \ - vid, did, init_hook) - #define RIO_GET_DID(size, x) (size ? (x & 0xffff) : ((x & 0x00ff0000) >> 16)) #define RIO_SET_DID(size, x) (size ? (x & 0xffff) : ((x & 0x000000ff) << 16)) diff --git a/drivers/rapidio/switches/Kconfig b/drivers/rapidio/switches/Kconfig index 62d4a06..3458415 100644 --- a/drivers/rapidio/switches/Kconfig +++ b/drivers/rapidio/switches/Kconfig @@ -2,27 +2,23 @@ # RapidIO switches configuration # config RAPIDIO_TSI57X - bool "IDT Tsi57x SRIO switches support" - depends on RAPIDIO + tristate "IDT Tsi57x SRIO switches support" ---help--- Includes support for IDT Tsi57x family of serial RapidIO switches. config RAPIDIO_CPS_XX - bool "IDT CPS-xx SRIO switches support" - depends on RAPIDIO + tristate "IDT CPS-xx SRIO switches support" ---help--- Includes support for IDT CPS-16/12/10/8 serial RapidIO switches. config RAPIDIO_TSI568 - bool "Tsi568 SRIO switch support" - depends on RAPIDIO + tristate "Tsi568 SRIO switch support" default n ---help--- Includes support for IDT Tsi568 serial RapidIO switch. config RAPIDIO_CPS_GEN2 - bool "IDT CPS Gen.2 SRIO switch support" - depends on RAPIDIO + tristate "IDT CPS Gen.2 SRIO switch support" default n ---help--- Includes support for ITD CPS Gen.2 serial RapidIO switches. diff --git a/drivers/rapidio/switches/idt_gen2.c b/drivers/rapidio/switches/idt_gen2.c index 809b7a3..00a71eb 100644 --- a/drivers/rapidio/switches/idt_gen2.c +++ b/drivers/rapidio/switches/idt_gen2.c @@ -11,6 +11,7 @@ */ #include +#include #include #include #include @@ -387,12 +388,12 @@ idtg2_show_errlog(struct device *dev, struct device_attribute *attr, char *buf) static DEVICE_ATTR(errlog, S_IRUGO, idtg2_show_errlog, NULL); -static int idtg2_sysfs(struct rio_dev *rdev, int create) +static int idtg2_sysfs(struct rio_dev *rdev, bool create) { struct device *dev = &rdev->dev; int err = 0; - if (create == RIO_SW_SYSFS_CREATE) { + if (create) { /* Initialize sysfs entries */ err = device_create_file(dev, &dev_attr_errlog); if (err) @@ -403,29 +404,90 @@ static int idtg2_sysfs(struct rio_dev *rdev, int create) return err; } -static int idtg2_switch_init(struct rio_dev *rdev, int do_enum) +static struct rio_switch_ops idtg2_switch_ops = { + .owner = THIS_MODULE, + .add_entry = idtg2_route_add_entry, + .get_entry = idtg2_route_get_entry, + .clr_table = idtg2_route_clr_table, + .set_domain = idtg2_set_domain, + .get_domain = idtg2_get_domain, + .em_init = idtg2_em_init, + .em_handle = idtg2_em_handler, +}; + +static int idtg2_probe(struct rio_dev *rdev, const struct rio_device_id *id) { pr_debug("RIO: %s for %s\n", __func__, rio_name(rdev)); - rdev->rswitch->add_entry = idtg2_route_add_entry; - rdev->rswitch->get_entry = idtg2_route_get_entry; - rdev->rswitch->clr_table = idtg2_route_clr_table; - rdev->rswitch->set_domain = idtg2_set_domain; - rdev->rswitch->get_domain = idtg2_get_domain; - rdev->rswitch->em_init = idtg2_em_init; - rdev->rswitch->em_handle = idtg2_em_handler; - rdev->rswitch->sw_sysfs = idtg2_sysfs; - - if (do_enum) { + + spin_lock(&rdev->rswitch->lock); + + if (rdev->rswitch->ops) { + spin_unlock(&rdev->rswitch->lock); + return -EINVAL; + } + + rdev->rswitch->ops = &idtg2_switch_ops; + + if (rdev->do_enum) { /* Ensure that default routing is disabled on startup */ rio_write_config_32(rdev, RIO_STD_RTE_DEFAULT_PORT, IDT_NO_ROUTE); } + /* Create device-specific sysfs attributes */ + idtg2_sysfs(rdev, true); + + spin_unlock(&rdev->rswitch->lock); return 0; } -DECLARE_RIO_SWITCH_INIT(RIO_VID_IDT, RIO_DID_IDTCPS1848, idtg2_switch_init); -DECLARE_RIO_SWITCH_INIT(RIO_VID_IDT, RIO_DID_IDTCPS1616, idtg2_switch_init); -DECLARE_RIO_SWITCH_INIT(RIO_VID_IDT, RIO_DID_IDTVPS1616, idtg2_switch_init); -DECLARE_RIO_SWITCH_INIT(RIO_VID_IDT, RIO_DID_IDTSPS1616, idtg2_switch_init); -DECLARE_RIO_SWITCH_INIT(RIO_VID_IDT, RIO_DID_IDTCPS1432, idtg2_switch_init); +static void idtg2_remove(struct rio_dev *rdev) +{ + pr_debug("RIO: %s for %s\n", __func__, rio_name(rdev)); + spin_lock(&rdev->rswitch->lock); + if (rdev->rswitch->ops != &idtg2_switch_ops) { + spin_unlock(&rdev->rswitch->lock); + return; + } + rdev->rswitch->ops = NULL; + + /* Remove device-specific sysfs attributes */ + idtg2_sysfs(rdev, false); + + spin_unlock(&rdev->rswitch->lock); +} + +static struct rio_device_id idtg2_id_table[] = { + {RIO_DEVICE(RIO_DID_IDTCPS1848, RIO_VID_IDT)}, + {RIO_DEVICE(RIO_DID_IDTCPS1616, RIO_VID_IDT)}, + {RIO_DEVICE(RIO_DID_IDTVPS1616, RIO_VID_IDT)}, + {RIO_DEVICE(RIO_DID_IDTSPS1616, RIO_VID_IDT)}, + {RIO_DEVICE(RIO_DID_IDTCPS1432, RIO_VID_IDT)}, + { 0, } /* terminate list */ +}; + +static struct rio_driver idtg2_driver = { + .name = "idt_gen2", + .id_table = idtg2_id_table, + .probe = idtg2_probe, + .remove = idtg2_remove, +}; + +static int __init idtg2_init(void) +{ + return rio_register_driver(&idtg2_driver); +} + +static void __exit idtg2_exit(void) +{ + pr_debug("RIO: %s\n", __func__); + rio_unregister_driver(&idtg2_driver); + pr_debug("RIO: %s done\n", __func__); +} + +device_initcall(idtg2_init); +module_exit(idtg2_exit); + +MODULE_DESCRIPTION("IDT CPS Gen.2 Serial RapidIO switch family driver"); +MODULE_AUTHOR("Integrated Device Technology, Inc."); +MODULE_LICENSE("GPL"); diff --git a/drivers/rapidio/switches/idtcps.c b/drivers/rapidio/switches/idtcps.c index d06ee2d..7fbb60d 100644 --- a/drivers/rapidio/switches/idtcps.c +++ b/drivers/rapidio/switches/idtcps.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "../rio.h" #define CPS_DEFAULT_ROUTE 0xde @@ -118,18 +119,31 @@ idtcps_get_domain(struct rio_mport *mport, u16 destid, u8 hopcount, return 0; } -static int idtcps_switch_init(struct rio_dev *rdev, int do_enum) +static struct rio_switch_ops idtcps_switch_ops = { + .owner = THIS_MODULE, + .add_entry = idtcps_route_add_entry, + .get_entry = idtcps_route_get_entry, + .clr_table = idtcps_route_clr_table, + .set_domain = idtcps_set_domain, + .get_domain = idtcps_get_domain, + .em_init = NULL, + .em_handle = NULL, +}; + +static int idtcps_probe(struct rio_dev *rdev, const struct rio_device_id *id) { pr_debug("RIO: %s for %s\n", __func__, rio_name(rdev)); - rdev->rswitch->add_entry = idtcps_route_add_entry; - rdev->rswitch->get_entry = idtcps_route_get_entry; - rdev->rswitch->clr_table = idtcps_route_clr_table; - rdev->rswitch->set_domain = idtcps_set_domain; - rdev->rswitch->get_domain = idtcps_get_domain; - rdev->rswitch->em_init = NULL; - rdev->rswitch->em_handle = NULL; - - if (do_enum) { + + spin_lock(&rdev->rswitch->lock); + + if (rdev->rswitch->ops) { + spin_unlock(&rdev->rswitch->lock); + return -EINVAL; + } + + rdev->rswitch->ops = &idtcps_switch_ops; + + if (rdev->do_enum) { /* set TVAL = ~50us */ rio_write_config_32(rdev, rdev->phys_efptr + RIO_PORT_LINKTO_CTL_CSR, 0x8e << 8); @@ -138,12 +152,52 @@ static int idtcps_switch_init(struct rio_dev *rdev, int do_enum) RIO_STD_RTE_DEFAULT_PORT, CPS_NO_ROUTE); } + spin_unlock(&rdev->rswitch->lock); return 0; } -DECLARE_RIO_SWITCH_INIT(RIO_VID_IDT, RIO_DID_IDTCPS6Q, idtcps_switch_init); -DECLARE_RIO_SWITCH_INIT(RIO_VID_IDT, RIO_DID_IDTCPS8, idtcps_switch_init); -DECLARE_RIO_SWITCH_INIT(RIO_VID_IDT, RIO_DID_IDTCPS10Q, idtcps_switch_init); -DECLARE_RIO_SWITCH_INIT(RIO_VID_IDT, RIO_DID_IDTCPS12, idtcps_switch_init); -DECLARE_RIO_SWITCH_INIT(RIO_VID_IDT, RIO_DID_IDTCPS16, idtcps_switch_init); -DECLARE_RIO_SWITCH_INIT(RIO_VID_IDT, RIO_DID_IDT70K200, idtcps_switch_init); +static void idtcps_remove(struct rio_dev *rdev) +{ + pr_debug("RIO: %s for %s\n", __func__, rio_name(rdev)); + spin_lock(&rdev->rswitch->lock); + if (rdev->rswitch->ops != &idtcps_switch_ops) { + spin_unlock(&rdev->rswitch->lock); + return; + } + rdev->rswitch->ops = NULL; + spin_unlock(&rdev->rswitch->lock); +} + +static struct rio_device_id idtcps_id_table[] = { + {RIO_DEVICE(RIO_DID_IDTCPS6Q, RIO_VID_IDT)}, + {RIO_DEVICE(RIO_DID_IDTCPS8, RIO_VID_IDT)}, + {RIO_DEVICE(RIO_DID_IDTCPS10Q, RIO_VID_IDT)}, + {RIO_DEVICE(RIO_DID_IDTCPS12, RIO_VID_IDT)}, + {RIO_DEVICE(RIO_DID_IDTCPS16, RIO_VID_IDT)}, + {RIO_DEVICE(RIO_DID_IDT70K200, RIO_VID_IDT)}, + { 0, } /* terminate list */ +}; + +static struct rio_driver idtcps_driver = { + .name = "idtcps", + .id_table = idtcps_id_table, + .probe = idtcps_probe, + .remove = idtcps_remove, +}; + +static int __init idtcps_init(void) +{ + return rio_register_driver(&idtcps_driver); +} + +static void __exit idtcps_exit(void) +{ + rio_unregister_driver(&idtcps_driver); +} + +device_initcall(idtcps_init); +module_exit(idtcps_exit); + +MODULE_DESCRIPTION("IDT CPS Gen.1 Serial RapidIO switch family driver"); +MODULE_AUTHOR("Integrated Device Technology, Inc."); +MODULE_LICENSE("GPL"); diff --git a/drivers/rapidio/switches/tsi568.c b/drivers/rapidio/switches/tsi568.c index 3994c00..8a43561 100644 --- a/drivers/rapidio/switches/tsi568.c +++ b/drivers/rapidio/switches/tsi568.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "../rio.h" /* Global (broadcast) route registers */ @@ -129,18 +130,70 @@ tsi568_em_init(struct rio_dev *rdev) return 0; } -static int tsi568_switch_init(struct rio_dev *rdev, int do_enum) +static struct rio_switch_ops tsi568_switch_ops = { + .owner = THIS_MODULE, + .add_entry = tsi568_route_add_entry, + .get_entry = tsi568_route_get_entry, + .clr_table = tsi568_route_clr_table, + .set_domain = NULL, + .get_domain = NULL, + .em_init = tsi568_em_init, + .em_handle = NULL, +}; + +static int tsi568_probe(struct rio_dev *rdev, const struct rio_device_id *id) { pr_debug("RIO: %s for %s\n", __func__, rio_name(rdev)); - rdev->rswitch->add_entry = tsi568_route_add_entry; - rdev->rswitch->get_entry = tsi568_route_get_entry; - rdev->rswitch->clr_table = tsi568_route_clr_table; - rdev->rswitch->set_domain = NULL; - rdev->rswitch->get_domain = NULL; - rdev->rswitch->em_init = tsi568_em_init; - rdev->rswitch->em_handle = NULL; + spin_lock(&rdev->rswitch->lock); + + if (rdev->rswitch->ops) { + spin_unlock(&rdev->rswitch->lock); + return -EINVAL; + } + + rdev->rswitch->ops = &tsi568_switch_ops; + spin_unlock(&rdev->rswitch->lock); return 0; } -DECLARE_RIO_SWITCH_INIT(RIO_VID_TUNDRA, RIO_DID_TSI568, tsi568_switch_init); +static void tsi568_remove(struct rio_dev *rdev) +{ + pr_debug("RIO: %s for %s\n", __func__, rio_name(rdev)); + spin_lock(&rdev->rswitch->lock); + if (rdev->rswitch->ops != &tsi568_switch_ops) { + spin_unlock(&rdev->rswitch->lock); + return; + } + rdev->rswitch->ops = NULL; + spin_unlock(&rdev->rswitch->lock); +} + +static struct rio_device_id tsi568_id_table[] = { + {RIO_DEVICE(RIO_DID_TSI568, RIO_VID_TUNDRA)}, + { 0, } /* terminate list */ +}; + +static struct rio_driver tsi568_driver = { + .name = "tsi568", + .id_table = tsi568_id_table, + .probe = tsi568_probe, + .remove = tsi568_remove, +}; + +static int __init tsi568_init(void) +{ + return rio_register_driver(&tsi568_driver); +} + +static void __exit tsi568_exit(void) +{ + rio_unregister_driver(&tsi568_driver); +} + +device_initcall(tsi568_init); +module_exit(tsi568_exit); + +MODULE_DESCRIPTION("IDT Tsi568 Serial RapidIO switch driver"); +MODULE_AUTHOR("Integrated Device Technology, Inc."); +MODULE_LICENSE("GPL"); diff --git a/drivers/rapidio/switches/tsi57x.c b/drivers/rapidio/switches/tsi57x.c index db8b802..42c8b01 100644 --- a/drivers/rapidio/switches/tsi57x.c +++ b/drivers/rapidio/switches/tsi57x.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "../rio.h" /* Global (broadcast) route registers */ @@ -292,27 +293,79 @@ exit_es: return 0; } -static int tsi57x_switch_init(struct rio_dev *rdev, int do_enum) +static struct rio_switch_ops tsi57x_switch_ops = { + .owner = THIS_MODULE, + .add_entry = tsi57x_route_add_entry, + .get_entry = tsi57x_route_get_entry, + .clr_table = tsi57x_route_clr_table, + .set_domain = tsi57x_set_domain, + .get_domain = tsi57x_get_domain, + .em_init = tsi57x_em_init, + .em_handle = tsi57x_em_handler, +}; + +static int tsi57x_probe(struct rio_dev *rdev, const struct rio_device_id *id) { pr_debug("RIO: %s for %s\n", __func__, rio_name(rdev)); - rdev->rswitch->add_entry = tsi57x_route_add_entry; - rdev->rswitch->get_entry = tsi57x_route_get_entry; - rdev->rswitch->clr_table = tsi57x_route_clr_table; - rdev->rswitch->set_domain = tsi57x_set_domain; - rdev->rswitch->get_domain = tsi57x_get_domain; - rdev->rswitch->em_init = tsi57x_em_init; - rdev->rswitch->em_handle = tsi57x_em_handler; - - if (do_enum) { + + spin_lock(&rdev->rswitch->lock); + + if (rdev->rswitch->ops) { + spin_unlock(&rdev->rswitch->lock); + return -EINVAL; + } + rdev->rswitch->ops = &tsi57x_switch_ops; + + if (rdev->do_enum) { /* Ensure that default routing is disabled on startup */ rio_write_config_32(rdev, RIO_STD_RTE_DEFAULT_PORT, RIO_INVALID_ROUTE); } + spin_unlock(&rdev->rswitch->lock); return 0; } -DECLARE_RIO_SWITCH_INIT(RIO_VID_TUNDRA, RIO_DID_TSI572, tsi57x_switch_init); -DECLARE_RIO_SWITCH_INIT(RIO_VID_TUNDRA, RIO_DID_TSI574, tsi57x_switch_init); -DECLARE_RIO_SWITCH_INIT(RIO_VID_TUNDRA, RIO_DID_TSI577, tsi57x_switch_init); -DECLARE_RIO_SWITCH_INIT(RIO_VID_TUNDRA, RIO_DID_TSI578, tsi57x_switch_init); +static void tsi57x_remove(struct rio_dev *rdev) +{ + pr_debug("RIO: %s for %s\n", __func__, rio_name(rdev)); + spin_lock(&rdev->rswitch->lock); + if (rdev->rswitch->ops != &tsi57x_switch_ops) { + spin_unlock(&rdev->rswitch->lock); + return; + } + rdev->rswitch->ops = NULL; + spin_unlock(&rdev->rswitch->lock); +} + +static struct rio_device_id tsi57x_id_table[] = { + {RIO_DEVICE(RIO_DID_TSI572, RIO_VID_TUNDRA)}, + {RIO_DEVICE(RIO_DID_TSI574, RIO_VID_TUNDRA)}, + {RIO_DEVICE(RIO_DID_TSI577, RIO_VID_TUNDRA)}, + {RIO_DEVICE(RIO_DID_TSI578, RIO_VID_TUNDRA)}, + { 0, } /* terminate list */ +}; + +static struct rio_driver tsi57x_driver = { + .name = "tsi57x", + .id_table = tsi57x_id_table, + .probe = tsi57x_probe, + .remove = tsi57x_remove, +}; + +static int __init tsi57x_init(void) +{ + return rio_register_driver(&tsi57x_driver); +} + +static void __exit tsi57x_exit(void) +{ + rio_unregister_driver(&tsi57x_driver); +} + +device_initcall(tsi57x_init); +module_exit(tsi57x_exit); + +MODULE_DESCRIPTION("IDT Tsi57x Serial RapidIO switch family driver"); +MODULE_AUTHOR("Integrated Device Technology, Inc."); +MODULE_LICENSE("GPL"); diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 4f27372..c74d88b 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -275,13 +275,6 @@ VMLINUX_SYMBOL(__end_builtin_fw) = .; \ } \ \ - /* RapidIO route ops */ \ - .rio_ops : AT(ADDR(.rio_ops) - LOAD_OFFSET) { \ - VMLINUX_SYMBOL(__start_rio_switch_ops) = .; \ - *(.rio_switch_ops) \ - VMLINUX_SYMBOL(__end_rio_switch_ops) = .; \ - } \ - \ TRACEDATA \ \ /* Kernel symbol table: Normal symbols */ \ diff --git a/include/linux/rio.h b/include/linux/rio.h index 18e0993..fcd492e 100644 --- a/include/linux/rio.h +++ b/include/linux/rio.h @@ -94,6 +94,23 @@ union rio_pw_msg; * @switchid: Switch ID that is unique across a network * @route_table: Copy of switch routing table * @port_ok: Status of each port (one bit per port) - OK=1 or UNINIT=0 + * @ops: pointer to switch-specific operations + * @lock: lock to serialize operations updates + * @nextdev: Array of per-port pointers to the next attached device + */ +struct rio_switch { + struct list_head node; + u16 switchid; + u8 *route_table; + u32 port_ok; + struct rio_switch_ops *ops; + spinlock_t lock; + struct rio_dev *nextdev[0]; +}; + +/** + * struct rio_switch_ops - Per-switch operations + * @owner: The module owner of this structure * @add_entry: Callback for switch-specific route add function * @get_entry: Callback for switch-specific route get function * @clr_table: Callback for switch-specific clear route table function @@ -101,14 +118,12 @@ union rio_pw_msg; * @get_domain: Callback for switch-specific domain get function * @em_init: Callback for switch-specific error management init function * @em_handle: Callback for switch-specific error management handler function - * @sw_sysfs: Callback that initializes switch-specific sysfs attributes - * @nextdev: Array of per-port pointers to the next attached device + * + * Defines the operations that are necessary to initialize/control + * a particular RIO switch device. */ -struct rio_switch { - struct list_head node; - u16 switchid; - u8 *route_table; - u32 port_ok; +struct rio_switch_ops { + struct module *owner; int (*add_entry) (struct rio_mport *mport, u16 destid, u8 hopcount, u16 table, u16 route_destid, u8 route_port); int (*get_entry) (struct rio_mport *mport, u16 destid, u8 hopcount, @@ -121,8 +136,6 @@ struct rio_switch { u8 *sw_domain); int (*em_init) (struct rio_dev *dev); int (*em_handle) (struct rio_dev *dev, u8 swport); - int (*sw_sysfs) (struct rio_dev *dev, int create); - struct rio_dev *nextdev[0]; }; /** @@ -130,6 +143,7 @@ struct rio_switch { * @global_list: Node in list of all RIO devices * @net_list: Node in list of RIO devices in a network * @net: Network this device is a part of + * @do_enum: Enumeration flag * @did: Device ID * @vid: Vendor ID * @device_rev: Device revision @@ -158,6 +172,7 @@ struct rio_dev { struct list_head global_list; /* node in list of all RIO devices */ struct list_head net_list; /* node in per net list */ struct rio_net *net; /* RIO net this device resides in */ + bool do_enum; u16 did; u16 vid; u32 device_rev; @@ -297,10 +312,6 @@ struct rio_net { struct rio_id_table destid_table; /* destID allocation table */ }; -/* Definitions used by switch sysfs initialization callback */ -#define RIO_SW_SYSFS_CREATE 1 /* Create switch attributes */ -#define RIO_SW_SYSFS_REMOVE 0 /* Remove switch attributes */ - /* Low-level architecture-dependent routines */ /** @@ -400,20 +411,6 @@ struct rio_device_id { u16 asm_did, asm_vid; }; -/** - * struct rio_switch_ops - Per-switch operations - * @vid: RIO vendor ID - * @did: RIO device ID - * @init_hook: Callback that performs switch device initialization - * - * Defines the operations that are necessary to initialize/control - * a particular RIO switch device. - */ -struct rio_switch_ops { - u16 vid, did; - int (*init_hook) (struct rio_dev *rdev, int do_enum); -}; - union rio_pw_msg { struct { u32 comptag; /* Component Tag CSR */ -- cgit v0.10.2 From e6161d64263ee7a903acdde1a8ab7d4221d5512f Mon Sep 17 00:00:00 2001 From: Alexandre Bounine Date: Wed, 3 Jul 2013 15:08:52 -0700 Subject: rapidio/rionet: rework driver initialization and removal Rework probe/remove routines to prevent rionet driver from monopolizing target RapidIO devices. Fix conflict with modular RapidIO switch drivers. Using one of RapidIO messaging channels rionet driver provides a service layer common to all endpoint devices in a system's RapidIO network. These devices may also require their own specific device driver which will be blocked from attaching to the target device by rionet (or block rionet if loaded earlier). To avoid conflict with device-specific drivers, the rionet driver is reworked to be registered as a subsystem interface on the RapidIO bus. The reworked rio_remove_dev() and rionet_exit() routines also include handling of individual rionet peer device removal which was not supported before. Signed-off-by: Alexandre Bounine Cc: Matt Porter Cc: Li Yang Cc: Kumar Gala Cc: "David S. Miller" Cc: Andre van Herk Cc: Micha Nelissen Cc: Stef van Os Cc: Jean Delvare Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/net/rionet.c b/drivers/net/rionet.c index f433b59..6d1f6ed 100644 --- a/drivers/net/rionet.c +++ b/drivers/net/rionet.c @@ -208,6 +208,17 @@ static int rionet_start_xmit(struct sk_buff *skb, struct net_device *ndev) if (nets[rnet->mport->id].active[destid]) rionet_queue_tx_msg(skb, ndev, nets[rnet->mport->id].active[destid]); + else { + /* + * If the target device was removed from the list of + * active peers but we still have TX packets targeting + * it just report sending a packet to the target + * (without actual packet transfer). + */ + dev_kfree_skb_any(skb); + ndev->stats.tx_packets++; + ndev->stats.tx_bytes += skb->len; + } } spin_unlock_irqrestore(&rnet->tx_lock, flags); @@ -385,24 +396,28 @@ static int rionet_close(struct net_device *ndev) return 0; } -static void rionet_remove(struct rio_dev *rdev) +static int rionet_remove_dev(struct device *dev, struct subsys_interface *sif) { - struct net_device *ndev = rio_get_drvdata(rdev); + struct rio_dev *rdev = to_rio_dev(dev); unsigned char netid = rdev->net->hport->id; struct rionet_peer *peer, *tmp; - unregister_netdev(ndev); - - free_pages((unsigned long)nets[netid].active, get_order(sizeof(void *) * - RIO_MAX_ROUTE_ENTRIES(rdev->net->hport->sys_size))); - nets[netid].active = NULL; + if (dev_rionet_capable(rdev)) { + list_for_each_entry_safe(peer, tmp, &nets[netid].peers, node) { + if (peer->rdev == rdev) { + if (nets[netid].active[rdev->destid]) { + nets[netid].active[rdev->destid] = NULL; + nets[netid].nact--; + } - list_for_each_entry_safe(peer, tmp, &nets[netid].peers, node) { - list_del(&peer->node); - kfree(peer); + list_del(&peer->node); + kfree(peer); + break; + } + } } - free_netdev(ndev); + return 0; } static void rionet_get_drvinfo(struct net_device *ndev, @@ -503,12 +518,13 @@ static int rionet_setup_netdev(struct rio_mport *mport, struct net_device *ndev) static unsigned long net_table[RIONET_MAX_NETS/sizeof(unsigned long) + 1]; -static int rionet_probe(struct rio_dev *rdev, const struct rio_device_id *id) +static int rionet_add_dev(struct device *dev, struct subsys_interface *sif) { int rc = -ENODEV; u32 lsrc_ops, ldst_ops; struct rionet_peer *peer; struct net_device *ndev = NULL; + struct rio_dev *rdev = to_rio_dev(dev); unsigned char netid = rdev->net->hport->id; int oldnet; @@ -518,8 +534,9 @@ static int rionet_probe(struct rio_dev *rdev, const struct rio_device_id *id) oldnet = test_and_set_bit(netid, net_table); /* - * First time through, make sure local device is rionet - * capable, setup netdev (will be skipped on later probes) + * If first time through this net, make sure local device is rionet + * capable and setup netdev (this step will be skipped in later probes + * on the same net). */ if (!oldnet) { rio_local_read_config_32(rdev->net->hport, RIO_SRC_OPS_CAR, @@ -541,6 +558,12 @@ static int rionet_probe(struct rio_dev *rdev, const struct rio_device_id *id) } nets[netid].ndev = ndev; rc = rionet_setup_netdev(rdev->net->hport, ndev); + if (rc) { + printk(KERN_ERR "%s: failed to setup netdev (rc=%d)\n", + DRV_NAME, rc); + goto out; + } + INIT_LIST_HEAD(&nets[netid].peers); nets[netid].nact = 0; } else if (nets[netid].ndev == NULL) @@ -559,31 +582,61 @@ static int rionet_probe(struct rio_dev *rdev, const struct rio_device_id *id) list_add_tail(&peer->node, &nets[netid].peers); } - rio_set_drvdata(rdev, nets[netid].ndev); - - out: + return 0; +out: return rc; } +#ifdef MODULE static struct rio_device_id rionet_id_table[] = { - {RIO_DEVICE(RIO_ANY_ID, RIO_ANY_ID)} + {RIO_DEVICE(RIO_ANY_ID, RIO_ANY_ID)}, + { 0, } /* terminate list */ }; -static struct rio_driver rionet_driver = { - .name = "rionet", - .id_table = rionet_id_table, - .probe = rionet_probe, - .remove = rionet_remove, +MODULE_DEVICE_TABLE(rapidio, rionet_id_table); +#endif + +static struct subsys_interface rionet_interface = { + .name = "rionet", + .subsys = &rio_bus_type, + .add_dev = rionet_add_dev, + .remove_dev = rionet_remove_dev, }; static int __init rionet_init(void) { - return rio_register_driver(&rionet_driver); + return subsys_interface_register(&rionet_interface); } static void __exit rionet_exit(void) { - rio_unregister_driver(&rionet_driver); + struct rionet_private *rnet; + struct net_device *ndev; + struct rionet_peer *peer, *tmp; + int i; + + for (i = 0; i < RIONET_MAX_NETS; i++) { + if (nets[i].ndev != NULL) { + ndev = nets[i].ndev; + rnet = netdev_priv(ndev); + unregister_netdev(ndev); + + list_for_each_entry_safe(peer, + tmp, &nets[i].peers, node) { + list_del(&peer->node); + kfree(peer); + } + + free_pages((unsigned long)nets[i].active, + get_order(sizeof(void *) * + RIO_MAX_ROUTE_ENTRIES(rnet->mport->sys_size))); + nets[i].active = NULL; + + free_netdev(ndev); + } + } + + subsys_interface_unregister(&rionet_interface); } late_initcall(rionet_init); -- cgit v0.10.2 From 9edbc30b434f56258d03faac5daf37a555384db3 Mon Sep 17 00:00:00 2001 From: Alexandre Bounine Date: Wed, 3 Jul 2013 15:08:53 -0700 Subject: rapidio: update enumerator registration mechanism Update enumeration/discovery method registration mechanism to allow loading enumeration/discovery methods before all mports are registered. Existing statically linked RapidIO subsystem expects that all available RapidIO mport devices are initialized and registered before the enumeration/discovery method is registered. Switching to loadable mport device drivers creates situation when mport device driver can be loaded after enumeration/discovery method is attached (e.g., loadable mport driver in a system with statically linked RapidIO core and enumerator). This also will happen in a system with hot-pluggable RapidIO controllers. To remove the dependency on the initialization/registration order this patch introduces enumeration/discovery registration mechanism that supports arbitrary registration order of mports and enumerator/discovery methods. The following registration rules are implemented: - only one enumeration/discovery method can be registered for given mport ID (including RIO_MPORT_ANY); - when new enumeration/discovery methods tries to attach to the registered mport device, method with matching mport ID will replace a default method previously registered for given mport (if any); - enumeration/discovery method with target ID=RIO_MPORT_ANY will be attached only to mports that do not have another enumerator attached to them; - when new mport device is registered with RapidIO subsystem, registration routine searches for the enumeration/discovery method with the best matching mport ID; Signed-off-by: Alexandre Bounine Cc: Matt Porter Cc: Li Yang Cc: Kumar Gala Cc: Andre van Herk Cc: Micha Nelissen Cc: Stef van Os Cc: Jean Delvare Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rapidio/rio-scan.c b/drivers/rapidio/rio-scan.c index 9139502..ab837fd 100644 --- a/drivers/rapidio/rio-scan.c +++ b/drivers/rapidio/rio-scan.c @@ -1162,6 +1162,7 @@ bail: } static struct rio_scan rio_scan_ops = { + .owner = THIS_MODULE, .enumerate = rio_enum_mport, .discover = rio_disc_mport, }; diff --git a/drivers/rapidio/rio-sysfs.c b/drivers/rapidio/rio-sysfs.c index 864e52f..0c4473e 100644 --- a/drivers/rapidio/rio-sysfs.c +++ b/drivers/rapidio/rio-sysfs.c @@ -286,7 +286,6 @@ static ssize_t bus_scan_store(struct bus_type *bus, const char *buf, size_t count) { long val; - struct rio_mport *port = NULL; int rc; if (kstrtol(buf, 0, &val) < 0) @@ -300,21 +299,7 @@ static ssize_t bus_scan_store(struct bus_type *bus, const char *buf, if (val < 0 || val >= RIO_MAX_MPORTS) return -EINVAL; - port = rio_find_mport((int)val); - - if (!port) { - pr_debug("RIO: %s: mport_%d not available\n", - __func__, (int)val); - return -EINVAL; - } - - if (!port->nscan) - return -EINVAL; - - if (port->host_deviceid >= 0) - rc = port->nscan->enumerate(port, 0); - else - rc = port->nscan->discover(port, RIO_SCAN_ENUM_NO_WAIT); + rc = rio_mport_scan((int)val); exit: if (!rc) rc = count; diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c index b17d521..5eb727c 100644 --- a/drivers/rapidio/rio.c +++ b/drivers/rapidio/rio.c @@ -34,6 +34,7 @@ static LIST_HEAD(rio_devices); static DEFINE_SPINLOCK(rio_global_list_lock); static LIST_HEAD(rio_mports); +static LIST_HEAD(rio_scans); static DEFINE_MUTEX(rio_mport_list_lock); static unsigned char next_portid; static DEFINE_SPINLOCK(rio_mmap_lock); @@ -1602,34 +1603,73 @@ found: * rio_register_scan - enumeration/discovery method registration interface * @mport_id: mport device ID for which fabric scan routine has to be set * (RIO_MPORT_ANY = set for all available mports) - * @scan_ops: enumeration/discovery control structure + * @scan_ops: enumeration/discovery operations structure + * + * Registers enumeration/discovery operations with RapidIO subsystem and + * attaches it to the specified mport device (or all available mports + * if RIO_MPORT_ANY is specified). * - * Assigns enumeration or discovery method to the specified mport device (or all - * available mports if RIO_MPORT_ANY is specified). * Returns error if the mport already has an enumerator attached to it. - * In case of RIO_MPORT_ANY ignores ports with valid scan routines and returns - * an error if was unable to find at least one available mport. + * In case of RIO_MPORT_ANY skips mports with valid scan routines (no error). */ int rio_register_scan(int mport_id, struct rio_scan *scan_ops) { struct rio_mport *port; - int rc = -EBUSY; + struct rio_scan_node *scan; + int rc = 0; - mutex_lock(&rio_mport_list_lock); - list_for_each_entry(port, &rio_mports, node) { - if (port->id == mport_id || mport_id == RIO_MPORT_ANY) { - if (port->nscan && mport_id == RIO_MPORT_ANY) - continue; - else if (port->nscan) - break; + pr_debug("RIO: %s for mport_id=%d\n", __func__, mport_id); - port->nscan = scan_ops; - rc = 0; + if ((mport_id != RIO_MPORT_ANY && mport_id >= RIO_MAX_MPORTS) || + !scan_ops) + return -EINVAL; - if (mport_id != RIO_MPORT_ANY) - break; + mutex_lock(&rio_mport_list_lock); + + /* + * Check if there is another enumerator already registered for + * the same mport ID (including RIO_MPORT_ANY). Multiple enumerators + * for the same mport ID are not supported. + */ + list_for_each_entry(scan, &rio_scans, node) { + if (scan->mport_id == mport_id) { + rc = -EBUSY; + goto err_out; } } + + /* + * Allocate and initialize new scan registration node. + */ + scan = kzalloc(sizeof(*scan), GFP_KERNEL); + if (!scan) { + rc = -ENOMEM; + goto err_out; + } + + scan->mport_id = mport_id; + scan->ops = scan_ops; + + /* + * Traverse the list of registered mports to attach this new scan. + * + * The new scan with matching mport ID overrides any previously attached + * scan assuming that old scan (if any) is the default one (based on the + * enumerator registration check above). + * If the new scan is the global one, it will be attached only to mports + * that do not have their own individual operations already attached. + */ + list_for_each_entry(port, &rio_mports, node) { + if (port->id == mport_id) { + port->nscan = scan_ops; + break; + } else if (mport_id == RIO_MPORT_ANY && !port->nscan) + port->nscan = scan_ops; + } + + list_add_tail(&scan->node, &rio_scans); + +err_out: mutex_unlock(&rio_mport_list_lock); return rc; @@ -1639,30 +1679,81 @@ EXPORT_SYMBOL_GPL(rio_register_scan); /** * rio_unregister_scan - removes enumeration/discovery method from mport * @mport_id: mport device ID for which fabric scan routine has to be - * unregistered (RIO_MPORT_ANY = set for all available mports) + * unregistered (RIO_MPORT_ANY = apply to all mports that use + * the specified scan_ops) + * @scan_ops: enumeration/discovery operations structure * * Removes enumeration or discovery method assigned to the specified mport - * device (or all available mports if RIO_MPORT_ANY is specified). + * device. If RIO_MPORT_ANY is specified, removes the specified operations from + * all mports that have them attached. */ -int rio_unregister_scan(int mport_id) +int rio_unregister_scan(int mport_id, struct rio_scan *scan_ops) { struct rio_mport *port; + struct rio_scan_node *scan; + + pr_debug("RIO: %s for mport_id=%d\n", __func__, mport_id); + + if (mport_id != RIO_MPORT_ANY && mport_id >= RIO_MAX_MPORTS) + return -EINVAL; mutex_lock(&rio_mport_list_lock); - list_for_each_entry(port, &rio_mports, node) { - if (port->id == mport_id || mport_id == RIO_MPORT_ANY) { - if (port->nscan) - port->nscan = NULL; - if (mport_id != RIO_MPORT_ANY) - break; + + list_for_each_entry(port, &rio_mports, node) + if (port->id == mport_id || + (mport_id == RIO_MPORT_ANY && port->nscan == scan_ops)) + port->nscan = NULL; + + list_for_each_entry(scan, &rio_scans, node) + if (scan->mport_id == mport_id) { + list_del(&scan->node); + kfree(scan); } - } + mutex_unlock(&rio_mport_list_lock); return 0; } EXPORT_SYMBOL_GPL(rio_unregister_scan); +/** + * rio_mport_scan - execute enumeration/discovery on the specified mport + * @mport_id: number (ID) of mport device + */ +int rio_mport_scan(int mport_id) +{ + struct rio_mport *port = NULL; + int rc; + + mutex_lock(&rio_mport_list_lock); + list_for_each_entry(port, &rio_mports, node) { + if (port->id == mport_id) + goto found; + } + mutex_unlock(&rio_mport_list_lock); + return -ENODEV; +found: + if (!port->nscan) { + mutex_unlock(&rio_mport_list_lock); + return -EINVAL; + } + + if (!try_module_get(port->nscan->owner)) { + mutex_unlock(&rio_mport_list_lock); + return -ENODEV; + } + + mutex_unlock(&rio_mport_list_lock); + + if (port->host_deviceid >= 0) + rc = port->nscan->enumerate(port, 0); + else + rc = port->nscan->discover(port, RIO_SCAN_ENUM_NO_WAIT); + + module_put(port->nscan->owner); + return rc; +} + static void rio_fixup_device(struct rio_dev *dev) { } @@ -1691,7 +1782,10 @@ static void disc_work_handler(struct work_struct *_work) work = container_of(_work, struct rio_disc_work, work); pr_debug("RIO: discovery work for mport %d %s\n", work->mport->id, work->mport->name); - work->mport->nscan->discover(work->mport, 0); + if (try_module_get(work->mport->nscan->owner)) { + work->mport->nscan->discover(work->mport, 0); + module_put(work->mport->nscan->owner); + } } int rio_init_mports(void) @@ -1710,8 +1804,10 @@ int rio_init_mports(void) mutex_lock(&rio_mport_list_lock); list_for_each_entry(port, &rio_mports, node) { if (port->host_deviceid >= 0) { - if (port->nscan) + if (port->nscan && try_module_get(port->nscan->owner)) { port->nscan->enumerate(port, 0); + module_put(port->nscan->owner); + } } else n++; } @@ -1725,7 +1821,7 @@ int rio_init_mports(void) * for each of them. If the code below fails to allocate needed * resources, exit without error to keep results of enumeration * process (if any). - * TODO: Implement restart of dicovery process for all or + * TODO: Implement restart of discovery process for all or * individual discovering mports. */ rio_wq = alloc_workqueue("riodisc", 0, 0); @@ -1751,9 +1847,9 @@ int rio_init_mports(void) n++; } } - mutex_unlock(&rio_mport_list_lock); flush_workqueue(rio_wq); + mutex_unlock(&rio_mport_list_lock); pr_debug("RIO: destroy discovery workqueue\n"); destroy_workqueue(rio_wq); kfree(work); @@ -1784,6 +1880,8 @@ __setup("riohdid=", rio_hdid_setup); int rio_register_mport(struct rio_mport *port) { + struct rio_scan_node *scan = NULL; + if (next_portid >= RIO_MAX_MPORTS) { pr_err("RIO: reached specified max number of mports\n"); return 1; @@ -1792,9 +1890,25 @@ int rio_register_mport(struct rio_mport *port) port->id = next_portid++; port->host_deviceid = rio_get_hdid(port->id); port->nscan = NULL; + mutex_lock(&rio_mport_list_lock); list_add_tail(&port->node, &rio_mports); + + /* + * Check if there are any registered enumeration/discovery operations + * that have to be attached to the added mport. + */ + list_for_each_entry(scan, &rio_scans, node) { + if (port->id == scan->mport_id || + scan->mport_id == RIO_MPORT_ANY) { + port->nscan = scan->ops; + if (port->id == scan->mport_id) + break; + } + } mutex_unlock(&rio_mport_list_lock); + + pr_debug("RIO: %s %s id=%d\n", __func__, port->name, port->id); return 0; } diff --git a/drivers/rapidio/rio.h b/drivers/rapidio/rio.h index d595877..085215c 100644 --- a/drivers/rapidio/rio.h +++ b/drivers/rapidio/rio.h @@ -42,9 +42,10 @@ extern int rio_add_device(struct rio_dev *rdev); extern int rio_enable_rx_tx_port(struct rio_mport *port, int local, u16 destid, u8 hopcount, u8 port_num); extern int rio_register_scan(int mport_id, struct rio_scan *scan_ops); -extern int rio_unregister_scan(int mport_id); +extern int rio_unregister_scan(int mport_id, struct rio_scan *scan_ops); extern void rio_attach_device(struct rio_dev *rdev); extern struct rio_mport *rio_find_mport(int mport_id); +extern int rio_mport_scan(int mport_id); /* Structures internal to the RIO core code */ extern struct device_attribute rio_dev_attrs[]; diff --git a/include/linux/rio.h b/include/linux/rio.h index fcd492e..8d3db1b 100644 --- a/include/linux/rio.h +++ b/include/linux/rio.h @@ -465,14 +465,29 @@ static inline struct rio_mport *dma_to_mport(struct dma_device *ddev) /** * struct rio_scan - RIO enumeration and discovery operations + * @owner: The module owner of this structure * @enumerate: Callback to perform RapidIO fabric enumeration. * @discover: Callback to perform RapidIO fabric discovery. */ struct rio_scan { + struct module *owner; int (*enumerate)(struct rio_mport *mport, u32 flags); int (*discover)(struct rio_mport *mport, u32 flags); }; +/** + * struct rio_scan_node - list node to register RapidIO enumeration and + * discovery methods with RapidIO core. + * @mport_id: ID of an mport (net) serviced by this enumerator + * @node: node in global list of registered enumerators + * @ops: RIO enumeration and discovery operations + */ +struct rio_scan_node { + int mport_id; + struct list_head node; + struct rio_scan *ops; +}; + /* Architecture and hardware-specific functions */ extern int rio_register_mport(struct rio_mport *); extern int rio_open_inb_mbox(struct rio_mport *, void *, int, int); -- cgit v0.10.2 From 94d9bd4576fcd340c344bffbbe0526227535a95f Mon Sep 17 00:00:00 2001 From: Alexandre Bounine Date: Wed, 3 Jul 2013 15:08:55 -0700 Subject: rapidio/tsi721: convert to modular mport driver This patch adds an option to build device driver for Tsi721 PCIe-to-SRIO bridge device as a kernel module. Currently this module cannot be unloaded because the existing RapidIO subsystem code does not support dynamic removal of local RapidIO controllers (TODO). Signed-off-by: Alexandre Bounine Cc: Matt Porter Cc: Li Yang Cc: Kumar Gala Cc: Andre van Herk Cc: Micha Nelissen Cc: Stef van Os Cc: Jean Delvare Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rapidio/devices/Kconfig b/drivers/rapidio/devices/Kconfig index 12a9d7f..c4cb087 100644 --- a/drivers/rapidio/devices/Kconfig +++ b/drivers/rapidio/devices/Kconfig @@ -3,7 +3,7 @@ # config RAPIDIO_TSI721 - bool "IDT Tsi721 PCI Express SRIO Controller support" + tristate "IDT Tsi721 PCI Express SRIO Controller support" depends on RAPIDIO && PCIEPORTBUS default "n" ---help--- diff --git a/drivers/rapidio/devices/Makefile b/drivers/rapidio/devices/Makefile index 7b62860..9432c49 100644 --- a/drivers/rapidio/devices/Makefile +++ b/drivers/rapidio/devices/Makefile @@ -2,7 +2,6 @@ # Makefile for RapidIO devices # -obj-$(CONFIG_RAPIDIO_TSI721) += tsi721.o -ifeq ($(CONFIG_RAPIDIO_DMA_ENGINE),y) -obj-$(CONFIG_RAPIDIO_TSI721) += tsi721_dma.o -endif +obj-$(CONFIG_RAPIDIO_TSI721) += tsi721_mport.o +tsi721_mport-y := tsi721.o +tsi721_mport-$(CONFIG_RAPIDIO_DMA_ENGINE) += tsi721_dma.o diff --git a/drivers/rapidio/devices/tsi721.c b/drivers/rapidio/devices/tsi721.c index a8b2c23..ff7cbf2 100644 --- a/drivers/rapidio/devices/tsi721.c +++ b/drivers/rapidio/devices/tsi721.c @@ -2515,9 +2515,8 @@ static int __init tsi721_init(void) return pci_register_driver(&tsi721_driver); } -static void __exit tsi721_exit(void) -{ - pci_unregister_driver(&tsi721_driver); -} - device_initcall(tsi721_init); + +MODULE_DESCRIPTION("IDT Tsi721 PCIExpress-to-SRIO bridge driver"); +MODULE_AUTHOR("Integrated Device Technology, Inc."); +MODULE_LICENSE("GPL"); diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c index 5eb727c..2054b62 100644 --- a/drivers/rapidio/rio.c +++ b/drivers/rapidio/rio.c @@ -1911,6 +1911,7 @@ int rio_register_mport(struct rio_mport *port) pr_debug("RIO: %s %s id=%d\n", __func__, port->name, port->id); return 0; } +EXPORT_SYMBOL_GPL(rio_register_mport); EXPORT_SYMBOL_GPL(rio_local_get_device_id); EXPORT_SYMBOL_GPL(rio_get_device); -- cgit v0.10.2 From fdf90abc00979fb2d61dbdba9e855200e236142b Mon Sep 17 00:00:00 2001 From: Alexandre Bounine Date: Wed, 3 Jul 2013 15:08:56 -0700 Subject: rapidio: add modular build option for the subsystem core Add a configuration option to build RapidIO subsystem core code as a loadable kernel module. Currently this option is available only for x86-based platforms, with the additional patch for PowerPC planned to be provided later. This patch replaces kernel command line parameter "riohdid=" with its module-specific analog "rapidio.hdid=". Signed-off-by: Alexandre Bounine Cc: Matt Porter Cc: Li Yang Cc: Kumar Gala Cc: Andre van Herk Cc: Micha Nelissen Cc: Stef van Os Cc: Jean Delvare Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 10764a3..2775023 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2259,11 +2259,11 @@ source "drivers/pcmcia/Kconfig" source "drivers/pci/hotplug/Kconfig" config RAPIDIO - bool "RapidIO support" + tristate "RapidIO support" depends on PCI default n help - If you say Y here, the kernel will include drivers and + If enabled this option will include drivers and the core infrastructure code to support RapidIO interconnect devices. source "drivers/rapidio/Kconfig" diff --git a/drivers/rapidio/Makefile b/drivers/rapidio/Makefile index 3036702..6271ada 100644 --- a/drivers/rapidio/Makefile +++ b/drivers/rapidio/Makefile @@ -1,7 +1,9 @@ # # Makefile for RapidIO interconnect services # -obj-y += rio.o rio-access.o rio-driver.o rio-sysfs.o +obj-$(CONFIG_RAPIDIO) += rapidio.o +rapidio-y := rio.o rio-access.o rio-driver.o rio-sysfs.o + obj-$(CONFIG_RAPIDIO_ENUM_BASIC) += rio-scan.o obj-$(CONFIG_RAPIDIO) += switches/ diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c index 2054b62..f4f30af 100644 --- a/drivers/rapidio/rio.c +++ b/drivers/rapidio/rio.c @@ -5,7 +5,7 @@ * Copyright 2005 MontaVista Software, Inc. * Matt Porter * - * Copyright 2009 Integrated Device Technology, Inc. + * Copyright 2009 - 2013 Integrated Device Technology, Inc. * Alex Bounine * * This program is free software; you can redistribute it and/or modify it @@ -30,6 +30,17 @@ #include "rio.h" +MODULE_DESCRIPTION("RapidIO Subsystem Core"); +MODULE_AUTHOR("Matt Porter "); +MODULE_AUTHOR("Alexandre Bounine "); +MODULE_LICENSE("GPL"); + +static int hdid[RIO_MAX_MPORTS]; +static int ids_num; +module_param_array(hdid, int, &ids_num, 0); +MODULE_PARM_DESC(hdid, + "Destination ID assignment to local RapidIO controllers"); + static LIST_HEAD(rio_devices); static DEFINE_SPINLOCK(rio_global_list_lock); @@ -1860,24 +1871,14 @@ no_disc: return 0; } -static int hdids[RIO_MAX_MPORTS + 1]; - static int rio_get_hdid(int index) { - if (!hdids[0] || hdids[0] <= index || index >= RIO_MAX_MPORTS) + if (ids_num == 0 || ids_num <= index || index >= RIO_MAX_MPORTS) return -1; - return hdids[index + 1]; + return hdid[index]; } -static int rio_hdid_setup(char *str) -{ - (void)get_options(str, ARRAY_SIZE(hdids), hdids); - return 1; -} - -__setup("riohdid=", rio_hdid_setup); - int rio_register_mport(struct rio_mport *port) { struct rio_scan_node *scan = NULL; -- cgit v0.10.2 From 3bdbb62fe97537252b22f700009863eeb51aa750 Mon Sep 17 00:00:00 2001 From: Alexandre Bounine Date: Wed, 3 Jul 2013 15:08:58 -0700 Subject: rapidio: add udev notification Add RapidIO-specific modalias generation to enable udev notifications about RapidIO-specific events. The RapidIO modalias string format is shown below: "rapidio:vNNNNdNNNNavNNNNadNNNN" Where: v - Device Vendor ID (16 bit), d - Device ID (16 bit), av - Assembly Vendor ID (16 bit), ad - Assembly ID (16 bit), as they are reported in corresponding Capability Registers (CARs) of each RapidIO device. Signed-off-by: Alexandre Bounine Cc: Matt Porter Cc: Li Yang Cc: Kumar Gala Cc: Andre van Herk Cc: Micha Nelissen Cc: Stef van Os Cc: Jean Delvare Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rapidio/rio-driver.c b/drivers/rapidio/rio-driver.c index a0c8755..3e9b6a7 100644 --- a/drivers/rapidio/rio-driver.c +++ b/drivers/rapidio/rio-driver.c @@ -199,6 +199,23 @@ static int rio_match_bus(struct device *dev, struct device_driver *drv) out:return 0; } +static int rio_uevent(struct device *dev, struct kobj_uevent_env *env) +{ + struct rio_dev *rdev; + + if (!dev) + return -ENODEV; + + rdev = to_rio_dev(dev); + if (!rdev) + return -ENODEV; + + if (add_uevent_var(env, "MODALIAS=rapidio:v%04Xd%04Xav%04Xad%04X", + rdev->vid, rdev->did, rdev->asm_vid, rdev->asm_did)) + return -ENOMEM; + return 0; +} + struct device rio_bus = { .init_name = "rapidio", }; @@ -210,6 +227,7 @@ struct bus_type rio_bus_type = { .bus_attrs = rio_bus_attrs, .probe = rio_device_probe, .remove = rio_device_remove, + .uevent = rio_uevent, }; /** diff --git a/drivers/rapidio/rio-sysfs.c b/drivers/rapidio/rio-sysfs.c index 0c4473e..9331be6 100644 --- a/drivers/rapidio/rio-sysfs.c +++ b/drivers/rapidio/rio-sysfs.c @@ -84,6 +84,15 @@ static ssize_t lnext_show(struct device *dev, return str - buf; } +static ssize_t modalias_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct rio_dev *rdev = to_rio_dev(dev); + + return sprintf(buf, "rapidio:v%04Xd%04Xav%04Xad%04X\n", + rdev->vid, rdev->did, rdev->asm_vid, rdev->asm_did); +} + struct device_attribute rio_dev_attrs[] = { __ATTR_RO(did), __ATTR_RO(vid), @@ -93,6 +102,7 @@ struct device_attribute rio_dev_attrs[] = { __ATTR_RO(asm_rev), __ATTR_RO(lprev), __ATTR_RO(destid), + __ATTR_RO(modalias), __ATTR_NULL, }; diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index b3bd7e7..b62d4af 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -579,4 +579,23 @@ struct mei_cl_device_id { kernel_ulong_t driver_info; }; +/* RapidIO */ + +#define RIO_ANY_ID 0xffff + +/** + * struct rio_device_id - RIO device identifier + * @did: RapidIO device ID + * @vid: RapidIO vendor ID + * @asm_did: RapidIO assembly device ID + * @asm_vid: RapidIO assembly vendor ID + * + * Identifies a RapidIO device based on both the device/vendor IDs and + * the assembly device/vendor IDs. + */ +struct rio_device_id { + __u16 did, vid; + __u16 asm_did, asm_vid; +}; + #endif /* LINUX_MOD_DEVICETABLE_H */ diff --git a/include/linux/rio.h b/include/linux/rio.h index 8d3db1b..e2faf7b 100644 --- a/include/linux/rio.h +++ b/include/linux/rio.h @@ -20,6 +20,7 @@ #include #include #include +#include #ifdef CONFIG_RAPIDIO_DMA_ENGINE #include #endif @@ -396,21 +397,6 @@ struct rio_driver { #define to_rio_driver(drv) container_of(drv,struct rio_driver, driver) -/** - * struct rio_device_id - RIO device identifier - * @did: RIO device ID - * @vid: RIO vendor ID - * @asm_did: RIO assembly device ID - * @asm_vid: RIO assembly vendor ID - * - * Identifies a RIO device based on both the device/vendor IDs and - * the assembly device/vendor IDs. - */ -struct rio_device_id { - u16 did, vid; - u16 asm_did, asm_vid; -}; - union rio_pw_msg { struct { u32 comptag; /* Component Tag CSR */ diff --git a/include/linux/rio_ids.h b/include/linux/rio_ids.h index b66d13d..2543bc1 100644 --- a/include/linux/rio_ids.h +++ b/include/linux/rio_ids.h @@ -13,8 +13,6 @@ #ifndef LINUX_RIO_IDS_H #define LINUX_RIO_IDS_H -#define RIO_ANY_ID 0xffff - #define RIO_VID_FREESCALE 0x0002 #define RIO_DID_MPC8560 0x0003 diff --git a/scripts/mod/devicetable-offsets.c b/scripts/mod/devicetable-offsets.c index e66d4d2..bb5d115 100644 --- a/scripts/mod/devicetable-offsets.c +++ b/scripts/mod/devicetable-offsets.c @@ -177,5 +177,11 @@ int main(void) DEVID(mei_cl_device_id); DEVID_FIELD(mei_cl_device_id, name); + DEVID(rio_device_id); + DEVID_FIELD(rio_device_id, did); + DEVID_FIELD(rio_device_id, vid); + DEVID_FIELD(rio_device_id, asm_did); + DEVID_FIELD(rio_device_id, asm_vid); + return 0; } diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c index 45f9a33..d9e67b7 100644 --- a/scripts/mod/file2alias.c +++ b/scripts/mod/file2alias.c @@ -1145,6 +1145,26 @@ static int do_mei_entry(const char *filename, void *symval, } ADD_TO_DEVTABLE("mei", mei_cl_device_id, do_mei_entry); +/* Looks like: rapidio:vNdNavNadN */ +static int do_rio_entry(const char *filename, + void *symval, char *alias) +{ + DEF_FIELD(symval, rio_device_id, did); + DEF_FIELD(symval, rio_device_id, vid); + DEF_FIELD(symval, rio_device_id, asm_did); + DEF_FIELD(symval, rio_device_id, asm_vid); + + strcpy(alias, "rapidio:"); + ADD(alias, "v", vid != RIO_ANY_ID, vid); + ADD(alias, "d", did != RIO_ANY_ID, did); + ADD(alias, "av", asm_vid != RIO_ANY_ID, asm_vid); + ADD(alias, "ad", asm_did != RIO_ANY_ID, asm_did); + + add_wildcard(alias); + return 1; +} +ADD_TO_DEVTABLE("rapidio", rio_device_id, do_rio_entry); + /* Does namelen bytes of name exactly match the symbol? */ static bool sym_is(const char *name, unsigned namelen, const char *symbol) { -- cgit v0.10.2 From ed5edee2f8547d5c2b28de21cb1471aaea71ee0a Mon Sep 17 00:00:00 2001 From: Alexandre Bounine Date: Wed, 3 Jul 2013 15:08:59 -0700 Subject: rapidio: documentation update Update RapidIO documentation files to reflect modularization changes. Signed-off-by: Alexandre Bounine Cc: Matt Porter Cc: Li Yang Cc: Kumar Gala Cc: Andre van Herk Cc: Micha Nelissen Cc: Stef van Os Cc: Jean Delvare Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/Documentation/rapidio/rapidio.txt b/Documentation/rapidio/rapidio.txt index a9c16c9..717f5aa 100644 --- a/Documentation/rapidio/rapidio.txt +++ b/Documentation/rapidio/rapidio.txt @@ -73,28 +73,44 @@ data structure. This structure includes lists of all devices and local master ports that form the same network. It also contains a pointer to the default master port that is used to communicate with devices within the network. +2.5 Device Drivers + +RapidIO device-specific drivers follow Linux Kernel Driver Model and are +intended to support specific RapidIO devices attached to the RapidIO network. + +2.6 Subsystem Interfaces + +RapidIO interconnect specification defines features that may be used to provide +one or more common service layers for all participating RapidIO devices. These +common services may act separately from device-specific drivers or be used by +device-specific drivers. Example of such service provider is the RIONET driver +which implements Ethernet-over-RapidIO interface. Because only one driver can be +registered for a device, all common RapidIO services have to be registered as +subsystem interfaces. This allows to have multiple common services attached to +the same device without blocking attachment of a device-specific driver. + 3. Subsystem Initialization --------------------------- In order to initialize the RapidIO subsystem, a platform must initialize and register at least one master port within the RapidIO network. To register mport -within the subsystem controller driver initialization code calls function +within the subsystem controller driver's initialization code calls function rio_register_mport() for each available master port. -RapidIO subsystem uses subsys_initcall() or device_initcall() to perform -controller initialization (depending on controller device type). - After all active master ports are registered with a RapidIO subsystem, an enumeration and/or discovery routine may be called automatically or by user-space command. +RapidIO subsystem can be configured to be built as a statically linked or +modular component of the kernel (see details below). + 4. Enumeration and Discovery ---------------------------- 4.1 Overview ------------ -RapidIO subsystem configuration options allow users to specify enumeration and +RapidIO subsystem configuration options allow users to build enumeration and discovery methods as statically linked components or loadable modules. An enumeration/discovery method implementation and available input parameters define how any given method can be attached to available RapidIO mports: @@ -115,8 +131,8 @@ several methods to initiate an enumeration and/or discovery process: endpoint waits for enumeration to be completed. If the specified timeout expires the discovery process is terminated without obtaining RapidIO network information. NOTE: a timed out discovery process may be restarted later using - a user-space command as it is described later if the given endpoint was - enumerated successfully. + a user-space command as it is described below (if the given endpoint was + enumerated successfully). (b) Statically linked enumeration and discovery process can be started by a command from user space. This initiation method provides more flexibility @@ -138,15 +154,42 @@ When a network scan process is started it calls an enumeration or discovery routine depending on the configured role of a master port: host or agent. Enumeration is performed by a master port if it is configured as a host port by -assigning a host device ID greater than or equal to zero. A host device ID is -assigned to a master port through the kernel command line parameter "riohdid=", -or can be configured in a platform-specific manner. If the host device ID for -a specific master port is set to -1, the discovery process will be performed -for it. +assigning a host destination ID greater than or equal to zero. The host +destination ID can be assigned to a master port using various methods depending +on RapidIO subsystem build configuration: + + (a) For a statically linked RapidIO subsystem core use command line parameter + "rapidio.hdid=" with a list of destination ID assignments in order of mport + device registration. For example, in a system with two RapidIO controllers + the command line parameter "rapidio.hdid=-1,7" will result in assignment of + the host destination ID=7 to the second RapidIO controller, while the first + one will be assigned destination ID=-1. + + (b) If the RapidIO subsystem core is built as a loadable module, in addition + to the method shown above, the host destination ID(s) can be specified using + traditional methods of passing module parameter "hdid=" during its loading: + - from command line: "modprobe rapidio hdid=-1,7", or + - from modprobe configuration file using configuration command "options", + like in this example: "options rapidio hdid=-1,7". An example of modprobe + configuration file is provided in the section below. + + NOTES: + (i) if "hdid=" parameter is omitted all available mport will be assigned + destination ID = -1; + (ii) the "hdid=" parameter in systems with multiple mports can have + destination ID assignments omitted from the end of list (default = -1). + +If the host device ID for a specific master port is set to -1, the discovery +process will be performed for it. The enumeration and discovery routines use RapidIO maintenance transactions to access the configuration space of devices. +NOTE: If RapidIO switch-specific device drivers are built as loadable modules +they must be loaded before enumeration/discovery process starts. +This requirement is cased by the fact that enumeration/discovery methods invoke +vendor-specific callbacks on early stages. + 4.2 Automatic Start of Enumeration and Discovery ------------------------------------------------ @@ -266,7 +309,36 @@ method's module initialization routine calls rio_register_scan() to attach an enumerator to a specified mport device (or devices). The basic enumerator implementation demonstrates this process. -5. References +4.6 Using Loadable RapidIO Switch Drivers +----------------------------------------- + +In the case when RapidIO switch drivers are built as loadable modules a user +must ensure that they are loaded before the enumeration/discovery starts. +This process can be automated by specifying pre- or post- dependencies in the +RapidIO-specific modprobe configuration file as shown in the example below. + + File /etc/modprobe.d/rapidio.conf: + ---------------------------------- + + # Configure RapidIO subsystem modules + + # Set enumerator host destination ID (overrides kernel command line option) + options rapidio hdid=-1,2 + + # Load RapidIO switch drivers immediately after rapidio core module was loaded + softdep rapidio post: idt_gen2 idtcps tsi57x + + # OR : + + # Load RapidIO switch drivers just before rio-scan enumerator module is loaded + softdep rio-scan pre: idt_gen2 idtcps tsi57x + + -------------------------- + +NOTE: In the example above, one of "softdep" commands must be removed or +commented out to keep required module loading sequence. + +A. References ------------- [1] RapidIO Trade Association. RapidIO Interconnect Specifications. diff --git a/Documentation/rapidio/sysfs.txt b/Documentation/rapidio/sysfs.txt index 1987817..271438c 100644 --- a/Documentation/rapidio/sysfs.txt +++ b/Documentation/rapidio/sysfs.txt @@ -40,6 +40,7 @@ device_rev - returns the device revision level (see 4.1 for switch specific details) lprev - returns name of previous device (switch) on the path to the device that that owns this attribute + modalias - returns the device modalias In addition to the files listed above, each device has a binary attribute file that allows read/write access to the device configuration registers using -- cgit v0.10.2 From 6ca40c2565fc617534d20d10a5848b626213608c Mon Sep 17 00:00:00 2001 From: Alexandre Bounine Date: Wed, 3 Jul 2013 15:09:01 -0700 Subject: rapidio: change endpoint device name format Change endpoint device name format to use a component tag value instead of device destination ID. RapidIO specification defines a component tag to be a unique identifier for devices in a network. RapidIO switches already use component tag as part of their device name and also use it for device identification when processing error management event notifications. Forming an endpoint's device name using its component tag instead of destination ID allows to keep sysfs device directories unchanged in case if a routing process dynamically changes endpoint's destination ID as a result of route optimization. This change should not affect any existing users because a valid device destination ID always should be obtained by reading "destid" attribute and not by parsing device name. This patch also removes switchid member from struct rio_switch because it simply duplicates the component tag and does not have other use than in device name generation. Signed-off-by: Alexandre Bounine Cc: Matt Porter Cc: Li Yang Cc: Kumar Gala Cc: Andre van Herk Cc: Micha Nelissen Cc: Stef van Os Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rapidio/rio-scan.c b/drivers/rapidio/rio-scan.c index ab837fd..d3a6539 100644 --- a/drivers/rapidio/rio-scan.c +++ b/drivers/rapidio/rio-scan.c @@ -433,7 +433,6 @@ static struct rio_dev *rio_setup_device(struct rio_net *net, /* If a PE has both switch and other functions, show it as a switch */ if (rio_is_switch(rdev)) { rswitch = rdev->rswitch; - rswitch->switchid = rdev->comp_tag & RIO_CTAG_UDEVID; rswitch->port_ok = 0; spin_lock_init(&rswitch->lock); rswitch->route_table = kzalloc(sizeof(u8)* @@ -446,7 +445,7 @@ static struct rio_dev *rio_setup_device(struct rio_net *net, rdid++) rswitch->route_table[rdid] = RIO_INVALID_ROUTE; dev_set_name(&rdev->dev, "%02x:s:%04x", rdev->net->id, - rswitch->switchid); + rdev->comp_tag & RIO_CTAG_UDEVID); if (do_enum) rio_route_clr_table(rdev, RIO_GLOBAL_TABLE, 0); @@ -459,7 +458,7 @@ static struct rio_dev *rio_setup_device(struct rio_net *net, rio_enable_rx_tx_port(port, 0, destid, hopcount, 0); dev_set_name(&rdev->dev, "%02x:e:%04x", rdev->net->id, - rdev->destid); + rdev->comp_tag & RIO_CTAG_UDEVID); } rio_attach_device(rdev); diff --git a/include/linux/rio.h b/include/linux/rio.h index e2faf7b..b71d573 100644 --- a/include/linux/rio.h +++ b/include/linux/rio.h @@ -92,7 +92,6 @@ union rio_pw_msg; /** * struct rio_switch - RIO switch info * @node: Node in global list of switches - * @switchid: Switch ID that is unique across a network * @route_table: Copy of switch routing table * @port_ok: Status of each port (one bit per port) - OK=1 or UNINIT=0 * @ops: pointer to switch-specific operations @@ -101,7 +100,6 @@ union rio_pw_msg; */ struct rio_switch { struct list_head node; - u16 switchid; u8 *route_table; u32 port_ok; struct rio_switch_ops *ops; -- cgit v0.10.2 From 8f75af44eed0c81f818b5b345023cebfe5209400 Mon Sep 17 00:00:00 2001 From: "Raphael S. Carvalho" Date: Wed, 3 Jul 2013 15:09:02 -0700 Subject: kernel/pid.c: move statement Move statement to static initilization of init_pid_ns. Signed-off-by: Raphael S. Carvalho Cc: "Eric W. Biederman" Acked-by: Serge Hallyn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/kernel/pid.c b/kernel/pid.c index 61980ce..66505c1 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -75,6 +75,7 @@ struct pid_namespace init_pid_ns = { [ 0 ... PIDMAP_ENTRIES-1] = { ATOMIC_INIT(BITS_PER_PAGE), NULL } }, .last_pid = 0, + .nr_hashed = PIDNS_HASH_ADDING, .level = 0, .child_reaper = &init_task, .user_ns = &init_user_ns, @@ -590,7 +591,6 @@ void __init pidmap_init(void) /* Reserve PID 0. We never call free_pidmap(0) */ set_bit(0, init_pid_ns.pidmap[0].page); atomic_dec(&init_pid_ns.pidmap[0].nr_free); - init_pid_ns.nr_hashed = PIDNS_HASH_ADDING; init_pid_ns.pid_cachep = KMEM_CACHE(pid, SLAB_HWCACHE_ALIGN | SLAB_PANIC); -- cgit v0.10.2 From 9532f149ee4532b5efc7e4a76381e1742805e1af Mon Sep 17 00:00:00 2001 From: Michal Belczyk Date: Wed, 3 Jul 2013 15:09:03 -0700 Subject: nbd: remove bogus BUG_ON in NBD_CLEAR_QUE The NBD_CLEAR_QUE ioctl has been deprecated for quite some time (its job is now done by two other ioctls). We should stop trying to make bogus assertions in it. Also, user-level code should remove calls to NBD_CLEAR_QUE, ASAP. Signed-off-by: Michal Belczyk Signed-off-by: Paul Clements Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 46b35f7..7ed1308 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -751,7 +751,6 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, * This is for compatibility only. The queue is always cleared * by NBD_DO_IT or NBD_CLEAR_SOCK. */ - BUG_ON(!nbd->sock && !list_empty(&nbd->queue_head)); return 0; case NBD_PRINT_DEBUG: -- cgit v0.10.2 From c378f70adbc1bbecd9e6db145019f14b2f688c7c Mon Sep 17 00:00:00 2001 From: Paul Clements Date: Wed, 3 Jul 2013 15:09:04 -0700 Subject: nbd: correct disconnect behavior Currently, when a disconnect is requested by the user (via NBD_DISCONNECT ioctl) the return from NBD_DO_IT is undefined (it is usually one of several error codes). This means that nbd-client does not know if a manual disconnect was performed or whether a network error occurred. Because of this, nbd-client's persist mode (which tries to reconnect after error, but not after manual disconnect) does not always work correctly. This change fixes this by causing NBD_DO_IT to always return 0 if a user requests a disconnect. This means that nbd-client can correctly either persist the connection (if an error occurred) or disconnect (if the user requested it). Signed-off-by: Paul Clements Acked-by: Rob Landley Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 7ed1308..2dc3b51 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -623,8 +623,10 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, if (!nbd->sock) return -EINVAL; + nbd->disconnect = 1; + nbd_send_req(nbd, &sreq); - return 0; + return 0; } case NBD_CLEAR_SOCK: { @@ -654,6 +656,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, nbd->sock = SOCKET_I(inode); if (max_part > 0) bdev->bd_invalidated = 1; + nbd->disconnect = 0; /* we're connected now */ return 0; } else { fput(file); @@ -743,6 +746,8 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, set_capacity(nbd->disk, 0); if (max_part > 0) ioctl_by_bdev(bdev, BLKRRPART, 0); + if (nbd->disconnect) /* user requested, ignore socket errors */ + return 0; return nbd->harderror; } diff --git a/include/linux/nbd.h b/include/linux/nbd.h index 4871170..ae4981e 100644 --- a/include/linux/nbd.h +++ b/include/linux/nbd.h @@ -41,6 +41,7 @@ struct nbd_device { u64 bytesize; pid_t pid; /* pid of nbd-client, if attached */ int xmit_timeout; + int disconnect; /* a disconnect has been requested by user */ }; #endif -- cgit v0.10.2 From 8030d34397e066deecb5ee9d17387fa767b12de2 Mon Sep 17 00:00:00 2001 From: Ed Cashin Date: Wed, 3 Jul 2013 15:09:05 -0700 Subject: aoe: perform I/O completions in parallel Some users have a large AoE target while others like to use many AoE targets at the same time. In the latter case, there is an opportunity to greatly improve aggregate throughput by allowing different threads to complete the I/O associated with each target. For 36 targets, 4 KiB read throughput roughly doubles, for example, with these changes in place. Signed-off-by: Ed Cashin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h index 1756494..c969852 100644 --- a/drivers/block/aoe/aoe.h +++ b/drivers/block/aoe/aoe.h @@ -196,9 +196,11 @@ struct ktstate { struct completion rendez; struct task_struct *task; wait_queue_head_t *waitq; - int (*fn) (void); - char *name; + int (*fn) (int); + char name[12]; spinlock_t *lock; + int id; + int active; }; int aoeblk_init(void); @@ -222,6 +224,7 @@ int aoecmd_init(void); struct sk_buff *aoecmd_ata_id(struct aoedev *); void aoe_freetframe(struct frame *); void aoe_flush_iocq(void); +void aoe_flush_iocq_by_index(int); void aoe_end_request(struct aoedev *, struct request *, int); int aoe_ktstart(struct ktstate *k); void aoe_ktstop(struct ktstate *k); diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index b75c7db..19955dd 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -35,14 +35,27 @@ module_param(aoe_maxout, int, 0644); MODULE_PARM_DESC(aoe_maxout, "Only aoe_maxout outstanding packets for every MAC on eX.Y."); -static wait_queue_head_t ktiowq; -static struct ktstate kts; +/* The number of online cpus during module initialization gives us a + * convenient heuristic cap on the parallelism used for ktio threads + * doing I/O completion. It is not important that the cap equal the + * actual number of running CPUs at any given time, but because of CPU + * hotplug, we take care to use ncpus instead of using + * num_online_cpus() after module initialization. + */ +static int ncpus; + +/* mutex lock used for synchronization while thread spawning */ +static DEFINE_MUTEX(ktio_spawn_lock); + +static wait_queue_head_t *ktiowq; +static struct ktstate *kts; /* io completion queue */ -static struct { +struct iocq_ktio { struct list_head head; spinlock_t lock; -} iocq; +}; +static struct iocq_ktio *iocq; static struct page *empty_page; @@ -1278,23 +1291,36 @@ out: * Returns true iff responses needing processing remain. */ static int -ktio(void) +ktio(int id) { struct frame *f; struct list_head *pos; int i; + int actual_id; for (i = 0; ; ++i) { if (i == MAXIOC) return 1; - if (list_empty(&iocq.head)) + if (list_empty(&iocq[id].head)) return 0; - pos = iocq.head.next; + pos = iocq[id].head.next; list_del(pos); - spin_unlock_irq(&iocq.lock); f = list_entry(pos, struct frame, head); + spin_unlock_irq(&iocq[id].lock); ktiocomplete(f); - spin_lock_irq(&iocq.lock); + + /* Figure out if extra threads are required. */ + actual_id = f->t->d->aoeminor % ncpus; + + if (!kts[actual_id].active) { + BUG_ON(id != 0); + mutex_lock(&ktio_spawn_lock); + if (!kts[actual_id].active + && aoe_ktstart(&kts[actual_id]) == 0) + kts[actual_id].active = 1; + mutex_unlock(&ktio_spawn_lock); + } + spin_lock_irq(&iocq[id].lock); } } @@ -1311,7 +1337,7 @@ kthread(void *vp) complete(&k->rendez); /* tell spawner we're running */ do { spin_lock_irq(k->lock); - more = k->fn(); + more = k->fn(k->id); if (!more) { add_wait_queue(k->waitq, &wait); __set_current_state(TASK_INTERRUPTIBLE); @@ -1353,13 +1379,24 @@ aoe_ktstart(struct ktstate *k) static void ktcomplete(struct frame *f, struct sk_buff *skb) { + int id; ulong flags; f->r_skb = skb; - spin_lock_irqsave(&iocq.lock, flags); - list_add_tail(&f->head, &iocq.head); - spin_unlock_irqrestore(&iocq.lock, flags); - wake_up(&ktiowq); + id = f->t->d->aoeminor % ncpus; + spin_lock_irqsave(&iocq[id].lock, flags); + if (!kts[id].active) { + spin_unlock_irqrestore(&iocq[id].lock, flags); + /* The thread with id has not been spawned yet, + * so delegate the work to the main thread and + * try spawning a new thread. + */ + id = 0; + spin_lock_irqsave(&iocq[id].lock, flags); + } + list_add_tail(&f->head, &iocq[id].head); + spin_unlock_irqrestore(&iocq[id].lock, flags); + wake_up(&ktiowq[id]); } struct sk_buff * @@ -1706,6 +1743,17 @@ aoe_failbuf(struct aoedev *d, struct buf *buf) void aoe_flush_iocq(void) { + int i; + + for (i = 0; i < ncpus; i++) { + if (kts[i].active) + aoe_flush_iocq_by_index(i); + } +} + +void +aoe_flush_iocq_by_index(int id) +{ struct frame *f; struct aoedev *d; LIST_HEAD(flist); @@ -1713,9 +1761,9 @@ aoe_flush_iocq(void) struct sk_buff *skb; ulong flags; - spin_lock_irqsave(&iocq.lock, flags); - list_splice_init(&iocq.head, &flist); - spin_unlock_irqrestore(&iocq.lock, flags); + spin_lock_irqsave(&iocq[id].lock, flags); + list_splice_init(&iocq[id].head, &flist); + spin_unlock_irqrestore(&iocq[id].lock, flags); while (!list_empty(&flist)) { pos = flist.next; list_del(pos); @@ -1738,6 +1786,8 @@ int __init aoecmd_init(void) { void *p; + int i; + int ret; /* get_zeroed_page returns page with ref count 1 */ p = (void *) get_zeroed_page(GFP_KERNEL | __GFP_REPEAT); @@ -1745,22 +1795,72 @@ aoecmd_init(void) return -ENOMEM; empty_page = virt_to_page(p); - INIT_LIST_HEAD(&iocq.head); - spin_lock_init(&iocq.lock); - init_waitqueue_head(&ktiowq); - kts.name = "aoe_ktio"; - kts.fn = ktio; - kts.waitq = &ktiowq; - kts.lock = &iocq.lock; - return aoe_ktstart(&kts); + ncpus = num_online_cpus(); + + iocq = kcalloc(ncpus, sizeof(struct iocq_ktio), GFP_KERNEL); + if (!iocq) + return -ENOMEM; + + kts = kcalloc(ncpus, sizeof(struct ktstate), GFP_KERNEL); + if (!kts) { + ret = -ENOMEM; + goto kts_fail; + } + + ktiowq = kcalloc(ncpus, sizeof(wait_queue_head_t), GFP_KERNEL); + if (!ktiowq) { + ret = -ENOMEM; + goto ktiowq_fail; + } + + mutex_init(&ktio_spawn_lock); + + for (i = 0; i < ncpus; i++) { + INIT_LIST_HEAD(&iocq[i].head); + spin_lock_init(&iocq[i].lock); + init_waitqueue_head(&ktiowq[i]); + snprintf(kts[i].name, sizeof(kts[i].name), "aoe_ktio%d", i); + kts[i].fn = ktio; + kts[i].waitq = &ktiowq[i]; + kts[i].lock = &iocq[i].lock; + kts[i].id = i; + kts[i].active = 0; + } + kts[0].active = 1; + if (aoe_ktstart(&kts[0])) { + ret = -ENOMEM; + goto ktstart_fail; + } + return 0; + +ktstart_fail: + kfree(ktiowq); +ktiowq_fail: + kfree(kts); +kts_fail: + kfree(iocq); + + return ret; } void aoecmd_exit(void) { - aoe_ktstop(&kts); + int i; + + for (i = 0; i < ncpus; i++) + if (kts[i].active) + aoe_ktstop(&kts[i]); + aoe_flush_iocq(); + /* Free up the iocq and thread speicific configuration + * allocated during startup. + */ + kfree(iocq); + kfree(kts); + kfree(ktiowq); + free_page((unsigned long) page_address(empty_page)); empty_page = NULL; } diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c index 98f2965..92201b6 100644 --- a/drivers/block/aoe/aoedev.c +++ b/drivers/block/aoe/aoedev.c @@ -518,7 +518,6 @@ void aoedev_exit(void) { flush_scheduled_work(); - aoe_flush_iocq(); flush(NULL, 0, EXITING); } diff --git a/drivers/block/aoe/aoenet.c b/drivers/block/aoe/aoenet.c index 71d3ea8..4af5f06 100644 --- a/drivers/block/aoe/aoenet.c +++ b/drivers/block/aoe/aoenet.c @@ -52,7 +52,7 @@ static struct sk_buff_head skbtxq; /* enters with txlock held */ static int -tx(void) __must_hold(&txlock) +tx(int id) __must_hold(&txlock) { struct sk_buff *skb; struct net_device *ifp; @@ -205,7 +205,8 @@ aoenet_init(void) kts.lock = &txlock; kts.fn = tx; kts.waitq = &txwq; - kts.name = "aoe_tx"; + kts.id = 0; + snprintf(kts.name, sizeof(kts.name), "aoe_tx%d", kts.id); if (aoe_ktstart(&kts)) return -EAGAIN; dev_add_pack(&aoe_pt); -- cgit v0.10.2 From ca47bbd93c1cc75b9b2736b0ac49129718f32342 Mon Sep 17 00:00:00 2001 From: Ed Cashin Date: Wed, 3 Jul 2013 15:09:06 -0700 Subject: aoe: update copyright date Signed-off-by: Ed Cashin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h index c969852..2284f89 100644 --- a/drivers/block/aoe/aoe.h +++ b/drivers/block/aoe/aoe.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2012 Coraid, Inc. See COPYING for GPL terms. */ +/* Copyright (c) 2013 Coraid, Inc. See COPYING for GPL terms. */ #define VERSION "81" #define AOE_MAJOR 152 #define DEVICE_NAME "aoe" diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 19955dd..99cb944 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2012 Coraid, Inc. See COPYING for GPL terms. */ +/* Copyright (c) 2013 Coraid, Inc. See COPYING for GPL terms. */ /* * aoecmd.c * Filesystem request handling methods diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c index 92201b6..784c92e 100644 --- a/drivers/block/aoe/aoedev.c +++ b/drivers/block/aoe/aoedev.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2012 Coraid, Inc. See COPYING for GPL terms. */ +/* Copyright (c) 2013 Coraid, Inc. See COPYING for GPL terms. */ /* * aoedev.c * AoE device utility functions; maintains device list. diff --git a/drivers/block/aoe/aoenet.c b/drivers/block/aoe/aoenet.c index 4af5f06..63773a9 100644 --- a/drivers/block/aoe/aoenet.c +++ b/drivers/block/aoe/aoenet.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2012 Coraid, Inc. See COPYING for GPL terms. */ +/* Copyright (c) 2013 Coraid, Inc. See COPYING for GPL terms. */ /* * aoenet.c * Ethernet portion of AoE driver -- cgit v0.10.2 From 94ac11833fc46fcde6eec7d97893a44c7673967b Mon Sep 17 00:00:00 2001 From: Ed Cashin Date: Wed, 3 Jul 2013 15:09:07 -0700 Subject: aoe: update internal version number to v83 Signed-off-by: Ed Cashin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h index 2284f89..025c41d 100644 --- a/drivers/block/aoe/aoe.h +++ b/drivers/block/aoe/aoe.h @@ -1,5 +1,5 @@ /* Copyright (c) 2013 Coraid, Inc. See COPYING for GPL terms. */ -#define VERSION "81" +#define VERSION "83" #define AOE_MAJOR 152 #define DEVICE_NAME "aoe" -- cgit v0.10.2 From 8da01af45e197110cbce84fb5ccb54645f051ac6 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 3 Jul 2013 15:09:08 -0700 Subject: Documentation/accounting/getdelays.c: avoid strncpy in accounting tool Avoid strncpy anti-pattern. [akpm@linux-foundation.org: remove the str[cpy|dup] altogether] Signed-off-by: Kees Cook Cc: Andreas Schwab Cc: Rob Landley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/Documentation/accounting/getdelays.c b/Documentation/accounting/getdelays.c index f8ebcde..c6a06b7 100644 --- a/Documentation/accounting/getdelays.c +++ b/Documentation/accounting/getdelays.c @@ -272,7 +272,7 @@ int main(int argc, char *argv[]) char *logfile = NULL; int loop = 0; int containerset = 0; - char containerpath[1024]; + char *containerpath = NULL; int cfd = 0; int forking = 0; sigset_t sigset; @@ -299,7 +299,7 @@ int main(int argc, char *argv[]) break; case 'C': containerset = 1; - strncpy(containerpath, optarg, strlen(optarg) + 1); + containerpath = optarg; break; case 'w': logfile = strdup(optarg); -- cgit v0.10.2 From c32df4e182e5bf40edde45da247318986d3cbf91 Mon Sep 17 00:00:00 2001 From: Alexandru Gheorghiu Date: Wed, 3 Jul 2013 15:09:09 -0700 Subject: drivers/parport/share.c: use kzalloc Replaced calls to kmalloc and memset with kzalloc. Patch found using coccinelle. Signed-off-by: Alexandru Gheorghiu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/parport/share.c b/drivers/parport/share.c index a848e02..6a83ee1 100644 --- a/drivers/parport/share.c +++ b/drivers/parport/share.c @@ -282,14 +282,13 @@ struct parport *parport_register_port(unsigned long base, int irq, int dma, int device; char *name; - tmp = kmalloc(sizeof(struct parport), GFP_KERNEL); + tmp = kzalloc(sizeof(struct parport), GFP_KERNEL); if (!tmp) { printk(KERN_WARNING "parport: memory squeeze\n"); return NULL; } /* Init our structure */ - memset(tmp, 0, sizeof(struct parport)); tmp->base = base; tmp->irq = irq; tmp->dma = dma; -- cgit v0.10.2 From 2a65182235790fc9a6abb9f41c1006c95f506545 Mon Sep 17 00:00:00 2001 From: Jan Luebbe Date: Wed, 3 Jul 2013 15:09:10 -0700 Subject: drivers/pps/clients/pps-gpio.c: convert to devm_* helpers Signed-off-by: Jan Luebbe Acked-by: Rodolfo Giometti Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/pps/clients/pps-gpio.c b/drivers/pps/clients/pps-gpio.c index d3db26e..54c2809 100644 --- a/drivers/pps/clients/pps-gpio.c +++ b/drivers/pps/clients/pps-gpio.c @@ -74,7 +74,7 @@ static int pps_gpio_setup(struct platform_device *pdev) int ret; const struct pps_gpio_platform_data *pdata = pdev->dev.platform_data; - ret = gpio_request(pdata->gpio_pin, pdata->gpio_label); + ret = devm_gpio_request(&pdev->dev, pdata->gpio_pin, pdata->gpio_label); if (ret) { pr_warning("failed to request GPIO %u\n", pdata->gpio_pin); return -EINVAL; @@ -83,7 +83,6 @@ static int pps_gpio_setup(struct platform_device *pdev) ret = gpio_direction_input(pdata->gpio_pin); if (ret) { pr_warning("failed to set pin direction\n"); - gpio_free(pdata->gpio_pin); return -EINVAL; } @@ -109,7 +108,6 @@ static int pps_gpio_probe(struct platform_device *pdev) struct pps_gpio_device_data *data; int irq; int ret; - int err; int pps_default_params; const struct pps_gpio_platform_data *pdata = pdev->dev.platform_data; @@ -123,17 +121,14 @@ static int pps_gpio_probe(struct platform_device *pdev) irq = gpio_to_irq(pdata->gpio_pin); if (irq < 0) { pr_err("failed to map GPIO to IRQ: %d\n", irq); - err = -EINVAL; - goto return_error; + return -EINVAL; } /* allocate space for device info */ data = devm_kzalloc(&pdev->dev, sizeof(struct pps_gpio_device_data), GFP_KERNEL); - if (data == NULL) { - err = -ENOMEM; - goto return_error; - } + if (data == NULL) + return -ENOMEM; /* initialize PPS specific parts of the bookkeeping data structure. */ data->info.mode = PPS_CAPTUREASSERT | PPS_OFFSETASSERT | @@ -152,41 +147,32 @@ static int pps_gpio_probe(struct platform_device *pdev) data->pps = pps_register_source(&data->info, pps_default_params); if (data->pps == NULL) { pr_err("failed to register IRQ %d as PPS source\n", irq); - err = -EINVAL; - goto return_error; + return -EINVAL; } data->irq = irq; data->pdata = pdata; /* register IRQ interrupt handler */ - ret = request_irq(irq, pps_gpio_irq_handler, + ret = devm_request_irq(&pdev->dev, irq, pps_gpio_irq_handler, get_irqf_trigger_flags(pdata), data->info.name, data); if (ret) { pps_unregister_source(data->pps); pr_err("failed to acquire IRQ %d\n", irq); - err = -EINVAL; - goto return_error; + return -EINVAL; } platform_set_drvdata(pdev, data); dev_info(data->pps->dev, "Registered IRQ %d as PPS source\n", irq); return 0; - -return_error: - gpio_free(pdata->gpio_pin); - return err; } static int pps_gpio_remove(struct platform_device *pdev) { struct pps_gpio_device_data *data = platform_get_drvdata(pdev); - const struct pps_gpio_platform_data *pdata = data->pdata; platform_set_drvdata(pdev, NULL); - free_irq(data->irq, data); - gpio_free(pdata->gpio_pin); pps_unregister_source(data->pps); pr_info("removed IRQ %d as PPS source\n", data->irq); return 0; -- cgit v0.10.2 From 05212be363363efbc003d4fa1eaf8e48dfbe425a Mon Sep 17 00:00:00 2001 From: Jan Luebbe Date: Wed, 3 Jul 2013 15:09:10 -0700 Subject: drivers/pps/clients/pps-gpio.c: convert to module_platform_driver This removes some boilerplate code (no functional changes). Signed-off-by: Jan Luebbe Acked-by: Rodolfo Giometti Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/pps/clients/pps-gpio.c b/drivers/pps/clients/pps-gpio.c index 54c2809..5bd3bf0 100644 --- a/drivers/pps/clients/pps-gpio.c +++ b/drivers/pps/clients/pps-gpio.c @@ -187,23 +187,7 @@ static struct platform_driver pps_gpio_driver = { }, }; -static int __init pps_gpio_init(void) -{ - int ret = platform_driver_register(&pps_gpio_driver); - if (ret < 0) - pr_err("failed to register platform driver\n"); - return ret; -} - -static void __exit pps_gpio_exit(void) -{ - platform_driver_unregister(&pps_gpio_driver); - pr_debug("unregistered platform driver\n"); -} - -module_init(pps_gpio_init); -module_exit(pps_gpio_exit); - +module_platform_driver(pps_gpio_driver); MODULE_AUTHOR("Ricardo Martins "); MODULE_AUTHOR("James Nuss "); MODULE_DESCRIPTION("Use GPIO pin as PPS source"); -- cgit v0.10.2 From c5dbcf8b70b50b1f6ef4850f61d79204ea46d761 Mon Sep 17 00:00:00 2001 From: Jan Luebbe Date: Wed, 3 Jul 2013 15:09:12 -0700 Subject: pps-gpio: add device-tree binding and support Instead of allocating a struct pps_gpio_platform_data in the DT case, store the necessary information in struct pps_gpio_device_data itself. This avoids an additional allocation and the ifdef. It also gets rid of some indirection. Also use dev_err instead of pr_err in the changed code. Signed-off-by: Jan Luebbe Acked-by: Arnd Bergmann Acked-by: Rodolfo Giometti Cc: Grant Likely Cc: Rob Herring Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/Documentation/devicetree/bindings/pps/pps-gpio.txt b/Documentation/devicetree/bindings/pps/pps-gpio.txt new file mode 100644 index 0000000..40bf9c3 --- /dev/null +++ b/Documentation/devicetree/bindings/pps/pps-gpio.txt @@ -0,0 +1,20 @@ +Device-Tree Bindings for a PPS Signal on GPIO + +These properties describe a PPS (pulse-per-second) signal connected to +a GPIO pin. + +Required properties: +- compatible: should be "pps-gpio" +- gpios: one PPS GPIO in the format described by ../gpio/gpio.txt + +Optional properties: +- assert-falling-edge: when present, assert is indicated by a falling edge + (instead of by a rising edge) + +Example: + pps { + compatible = "pps-gpio"; + gpios = <&gpio2 6 0>; + + assert-falling-edge; + }; diff --git a/drivers/pps/clients/pps-gpio.c b/drivers/pps/clients/pps-gpio.c index 5bd3bf0..eae0eda 100644 --- a/drivers/pps/clients/pps-gpio.c +++ b/drivers/pps/clients/pps-gpio.c @@ -33,13 +33,17 @@ #include #include #include +#include +#include /* Info for each registered platform device */ struct pps_gpio_device_data { int irq; /* IRQ used as PPS source */ struct pps_device *pps; /* PPS source device */ struct pps_source_info info; /* PPS source information */ - const struct pps_gpio_platform_data *pdata; + bool assert_falling_edge; + bool capture_clear; + unsigned int gpio_pin; }; /* @@ -57,45 +61,25 @@ static irqreturn_t pps_gpio_irq_handler(int irq, void *data) info = data; - rising_edge = gpio_get_value(info->pdata->gpio_pin); - if ((rising_edge && !info->pdata->assert_falling_edge) || - (!rising_edge && info->pdata->assert_falling_edge)) + rising_edge = gpio_get_value(info->gpio_pin); + if ((rising_edge && !info->assert_falling_edge) || + (!rising_edge && info->assert_falling_edge)) pps_event(info->pps, &ts, PPS_CAPTUREASSERT, NULL); - else if (info->pdata->capture_clear && - ((rising_edge && info->pdata->assert_falling_edge) || - (!rising_edge && !info->pdata->assert_falling_edge))) + else if (info->capture_clear && + ((rising_edge && info->assert_falling_edge) || + (!rising_edge && !info->assert_falling_edge))) pps_event(info->pps, &ts, PPS_CAPTURECLEAR, NULL); return IRQ_HANDLED; } -static int pps_gpio_setup(struct platform_device *pdev) -{ - int ret; - const struct pps_gpio_platform_data *pdata = pdev->dev.platform_data; - - ret = devm_gpio_request(&pdev->dev, pdata->gpio_pin, pdata->gpio_label); - if (ret) { - pr_warning("failed to request GPIO %u\n", pdata->gpio_pin); - return -EINVAL; - } - - ret = gpio_direction_input(pdata->gpio_pin); - if (ret) { - pr_warning("failed to set pin direction\n"); - return -EINVAL; - } - - return 0; -} - static unsigned long -get_irqf_trigger_flags(const struct pps_gpio_platform_data *pdata) +get_irqf_trigger_flags(const struct pps_gpio_device_data *data) { - unsigned long flags = pdata->assert_falling_edge ? + unsigned long flags = data->assert_falling_edge ? IRQF_TRIGGER_FALLING : IRQF_TRIGGER_RISING; - if (pdata->capture_clear) { + if (data->capture_clear) { flags |= ((flags & IRQF_TRIGGER_RISING) ? IRQF_TRIGGER_FALLING : IRQF_TRIGGER_RISING); } @@ -106,34 +90,63 @@ get_irqf_trigger_flags(const struct pps_gpio_platform_data *pdata) static int pps_gpio_probe(struct platform_device *pdev) { struct pps_gpio_device_data *data; - int irq; + const char *gpio_label; int ret; int pps_default_params; const struct pps_gpio_platform_data *pdata = pdev->dev.platform_data; + struct device_node *np = pdev->dev.of_node; + + /* allocate space for device info */ + data = devm_kzalloc(&pdev->dev, sizeof(struct pps_gpio_device_data), + GFP_KERNEL); + if (!data) + return -ENOMEM; + if (pdata) { + data->gpio_pin = pdata->gpio_pin; + gpio_label = pdata->gpio_label; + + data->assert_falling_edge = pdata->assert_falling_edge; + data->capture_clear = pdata->capture_clear; + } else { + ret = of_get_gpio(np, 0); + if (ret < 0) { + dev_err(&pdev->dev, "failed to get GPIO from device tree\n"); + return ret; + } + data->gpio_pin = ret; + gpio_label = PPS_GPIO_NAME; + + if (of_get_property(np, "assert-falling-edge", NULL)) + data->assert_falling_edge = true; + } /* GPIO setup */ - ret = pps_gpio_setup(pdev); - if (ret) + ret = devm_gpio_request(&pdev->dev, data->gpio_pin, gpio_label); + if (ret) { + dev_err(&pdev->dev, "failed to request GPIO %u\n", + data->gpio_pin); + return ret; + } + + ret = gpio_direction_input(data->gpio_pin); + if (ret) { + dev_err(&pdev->dev, "failed to set pin direction\n"); return -EINVAL; + } /* IRQ setup */ - irq = gpio_to_irq(pdata->gpio_pin); - if (irq < 0) { - pr_err("failed to map GPIO to IRQ: %d\n", irq); + ret = gpio_to_irq(data->gpio_pin); + if (ret < 0) { + dev_err(&pdev->dev, "failed to map GPIO to IRQ: %d\n", ret); return -EINVAL; } - - /* allocate space for device info */ - data = devm_kzalloc(&pdev->dev, sizeof(struct pps_gpio_device_data), - GFP_KERNEL); - if (data == NULL) - return -ENOMEM; + data->irq = ret; /* initialize PPS specific parts of the bookkeeping data structure. */ data->info.mode = PPS_CAPTUREASSERT | PPS_OFFSETASSERT | PPS_ECHOASSERT | PPS_CANWAIT | PPS_TSFMT_TSPEC; - if (pdata->capture_clear) + if (data->capture_clear) data->info.mode |= PPS_CAPTURECLEAR | PPS_OFFSETCLEAR | PPS_ECHOCLEAR; data->info.owner = THIS_MODULE; @@ -142,28 +155,27 @@ static int pps_gpio_probe(struct platform_device *pdev) /* register PPS source */ pps_default_params = PPS_CAPTUREASSERT | PPS_OFFSETASSERT; - if (pdata->capture_clear) + if (data->capture_clear) pps_default_params |= PPS_CAPTURECLEAR | PPS_OFFSETCLEAR; data->pps = pps_register_source(&data->info, pps_default_params); if (data->pps == NULL) { - pr_err("failed to register IRQ %d as PPS source\n", irq); + dev_err(&pdev->dev, "failed to register IRQ %d as PPS source\n", + data->irq); return -EINVAL; } - data->irq = irq; - data->pdata = pdata; - /* register IRQ interrupt handler */ - ret = devm_request_irq(&pdev->dev, irq, pps_gpio_irq_handler, - get_irqf_trigger_flags(pdata), data->info.name, data); + ret = devm_request_irq(&pdev->dev, data->irq, pps_gpio_irq_handler, + get_irqf_trigger_flags(data), data->info.name, data); if (ret) { pps_unregister_source(data->pps); - pr_err("failed to acquire IRQ %d\n", irq); + dev_err(&pdev->dev, "failed to acquire IRQ %d\n", data->irq); return -EINVAL; } platform_set_drvdata(pdev, data); - dev_info(data->pps->dev, "Registered IRQ %d as PPS source\n", irq); + dev_info(data->pps->dev, "Registered IRQ %d as PPS source\n", + data->irq); return 0; } @@ -174,16 +186,23 @@ static int pps_gpio_remove(struct platform_device *pdev) platform_set_drvdata(pdev, NULL); pps_unregister_source(data->pps); - pr_info("removed IRQ %d as PPS source\n", data->irq); + dev_info(&pdev->dev, "removed IRQ %d as PPS source\n", data->irq); return 0; } +static const struct of_device_id pps_gpio_dt_ids[] = { + { .compatible = "pps-gpio", }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, pps_gpio_dt_ids); + static struct platform_driver pps_gpio_driver = { .probe = pps_gpio_probe, .remove = pps_gpio_remove, .driver = { .name = PPS_GPIO_NAME, - .owner = THIS_MODULE + .owner = THIS_MODULE, + .of_match_table = of_match_ptr(pps_gpio_dt_ids), }, }; -- cgit v0.10.2 From 10560490d9555b3add7e9b241594dddd07b160ec Mon Sep 17 00:00:00 2001 From: Libo Chen Date: Wed, 3 Jul 2013 15:09:13 -0700 Subject: drivers/memstick/host/jmb38x_ms: convert to module_pci_driver Use module_pci_driver instead of init/exit, make code clean. Signed-off-by: Libo Chen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/memstick/host/jmb38x_ms.c b/drivers/memstick/host/jmb38x_ms.c index c37d375..aeabaa5 100644 --- a/drivers/memstick/host/jmb38x_ms.c +++ b/drivers/memstick/host/jmb38x_ms.c @@ -1046,20 +1046,9 @@ static struct pci_driver jmb38x_ms_driver = { .resume = jmb38x_ms_resume }; -static int __init jmb38x_ms_init(void) -{ - return pci_register_driver(&jmb38x_ms_driver); -} - -static void __exit jmb38x_ms_exit(void) -{ - pci_unregister_driver(&jmb38x_ms_driver); -} +module_pci_driver(jmb38x_ms_driver); MODULE_AUTHOR("Alex Dubov"); MODULE_DESCRIPTION("JMicron jmb38x MemoryStick driver"); MODULE_LICENSE("GPL"); MODULE_DEVICE_TABLE(pci, jmb38x_ms_id_tbl); - -module_init(jmb38x_ms_init); -module_exit(jmb38x_ms_exit); -- cgit v0.10.2 From 5af1a9ce3d114bbd49b5a06bfc5f5404d73709e3 Mon Sep 17 00:00:00 2001 From: Libo Chen Date: Wed, 3 Jul 2013 15:09:14 -0700 Subject: drivers/memstick/host/r592.c: convert to module_pci_driver Signed-off-by: Libo Chen Cc: Maxim Levitsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/memstick/host/r592.c b/drivers/memstick/host/r592.c index 9718661..1b6e913 100644 --- a/drivers/memstick/host/r592.c +++ b/drivers/memstick/host/r592.c @@ -884,18 +884,7 @@ static struct pci_driver r852_pci_driver = { .driver.pm = &r592_pm_ops, }; -static __init int r592_module_init(void) -{ - return pci_register_driver(&r852_pci_driver); -} - -static void __exit r592_module_exit(void) -{ - pci_unregister_driver(&r852_pci_driver); -} - -module_init(r592_module_init); -module_exit(r592_module_exit); +module_pci_driver(r852_pci_driver); module_param_named(enable_dma, r592_enable_dma, bool, S_IRUGO); MODULE_PARM_DESC(enable_dma, "Enable usage of the DMA (default)"); -- cgit v0.10.2 From d5528773e68e35512f69dd574e25d8c81e7d3105 Mon Sep 17 00:00:00 2001 From: Jean-Francois Dagenais Date: Wed, 3 Jul 2013 15:09:15 -0700 Subject: drivers/w1/slaves/w1_ds2408.c: add magic sequence to disable P0 test mode Power-up timing The DS2408 is sensitive to the power-on slew rate and can inadvertently power up with a test mode feature enabled. When this occurs, the P0 port does not respond to the Channel Access Write command. For most reliable operation, it is recommended to disable the test mode after every power-on reset using the Disable Test Mode sequence shown below. The 64-bit ROM code must be transmitted in the same bit sequence as with the Match ROM command, i.e., least significant bit first. This precaution is recommended in parasite power mode (VCC pin connected to GND) as well as with VCC power. Disable Test Mode: RST,PD,96h,<64-bit DS2408 ROM Code>,3Ch,RST,PD [akpm@linux-foundation.org: don't use kerenldoc token to introduce a non-kerneldoc comment, tweak whitespace] Signed-off-by: Jean-Francois Dagenais Cc: Evgeniy Polyakov Cc: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/w1/slaves/w1_ds2408.c b/drivers/w1/slaves/w1_ds2408.c index 91cc2cd..cb8a8e5 100644 --- a/drivers/w1/slaves/w1_ds2408.c +++ b/drivers/w1/slaves/w1_ds2408.c @@ -302,7 +302,33 @@ error: return -EIO; } +/* + * This is a special sequence we must do to ensure the P0 output is not stuck + * in test mode. This is described in rev 2 of the ds2408's datasheet + * (http://datasheets.maximintegrated.com/en/ds/DS2408.pdf) under + * "APPLICATION INFORMATION/Power-up timing". + */ +static int w1_f29_disable_test_mode(struct w1_slave *sl) +{ + int res; + u8 magic[10] = {0x96, }; + u64 rn = le64_to_cpu(*((u64*)&sl->reg_num)); + + memcpy(&magic[1], &rn, 8); + magic[9] = 0x3C; + + mutex_lock(&sl->master->bus_mutex); + res = w1_reset_bus(sl->master); + if (res) + goto out; + w1_write_block(sl->master, magic, ARRAY_SIZE(magic)); + + res = w1_reset_bus(sl->master); +out: + mutex_unlock(&sl->master->bus_mutex); + return res; +} static struct bin_attribute w1_f29_sysfs_bin_files[] = { { @@ -363,6 +389,10 @@ static int w1_f29_add_slave(struct w1_slave *sl) int err = 0; int i; + err = w1_f29_disable_test_mode(sl); + if (err) + return err; + for (i = 0; i < ARRAY_SIZE(w1_f29_sysfs_bin_files) && !err; ++i) err = sysfs_create_bin_file( &sl->dev.kobj, -- cgit v0.10.2 From 4b30f07e74d2df673925019ad58bc59a719df3bb Mon Sep 17 00:00:00 2001 From: Tang Chen Date: Wed, 3 Jul 2013 15:09:16 -0700 Subject: aio: fix wrong comment in aio_complete() ctx->ctx_lock should be ctx->completion_lock. Signed-off-by: Tang Chen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/aio.c b/fs/aio.c index a8ecc83..9b5ca11 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -625,7 +625,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2) /* * Add a completion event to the ring buffer. Must be done holding - * ctx->ctx_lock to prevent other code from messing with the tail + * ctx->completion_lock to prevent other code from messing with the tail * pointer since we might be called from irq context. */ spin_lock_irqsave(&ctx->completion_lock, flags); -- cgit v0.10.2 From 0786f7b225ba1edd801dc4bfbf6191d058b943a2 Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Wed, 3 Jul 2013 15:09:16 -0700 Subject: kernel/resource.c: remove the unneeded assignment in function __find_resource This line was introduced by fcb11918 ("resources: add arch hook for preventing allocation in reserved areas"). But the struct tmp was already assigned to *new in the above line, so this seems superfluous. Just remove it. Signed-off-by: Kevin Hao Cc: Bjorn Helgaas Cc: Jesse Barnes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/kernel/resource.c b/kernel/resource.c index 77bf11a..3f285dc 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -449,7 +449,6 @@ static int __find_resource(struct resource *root, struct resource *old, struct resource *this = root->child; struct resource tmp = *new, avail, alloc; - tmp.flags = new->flags; tmp.start = root->start; /* * Skip past an allocated resource that starts at 0, since the assignment -- cgit v0.10.2 From 51a1d16563fb6488ae1f30d31f62abc6aa50b268 Mon Sep 17 00:00:00 2001 From: Joern Engel Date: Wed, 3 Jul 2013 15:09:17 -0700 Subject: selftests: exit 1 on failure In case this ever gets scripted, it should return 0 on success and 1 on failure. Parsing the output should be left to meatbags. Signed-off-by: Joern Engel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index 436d2e8..7d47927 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -8,7 +8,7 @@ all: hugepage-mmap hugepage-shm map_hugetlb thuge-gen $(CC) $(CFLAGS) -o $@ $^ run_tests: all - @/bin/sh ./run_vmtests || echo "vmtests: [FAIL]" + @/bin/sh ./run_vmtests || (echo "vmtests: [FAIL]"; exit 1) clean: $(RM) hugepage-mmap hugepage-shm map_hugetlb diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests index 4c53cae..7a9072d 100644 --- a/tools/testing/selftests/vm/run_vmtests +++ b/tools/testing/selftests/vm/run_vmtests @@ -4,6 +4,7 @@ #we need 256M, below is the size in kB needmem=262144 mnt=./huge +exitcode=0 #get pagesize and freepages from /proc/meminfo while read name size unit; do @@ -41,6 +42,7 @@ echo "--------------------" ./hugepage-mmap if [ $? -ne 0 ]; then echo "[FAIL]" + exitcode=1 else echo "[PASS]" fi @@ -55,6 +57,7 @@ echo "--------------------" ./hugepage-shm if [ $? -ne 0 ]; then echo "[FAIL]" + exitcode=1 else echo "[PASS]" fi @@ -67,6 +70,7 @@ echo "--------------------" ./map_hugetlb if [ $? -ne 0 ]; then echo "[FAIL]" + exitcode=1 else echo "[PASS]" fi @@ -75,3 +79,4 @@ fi umount $mnt rm -rf $mnt echo $nr_hugepgs > /proc/sys/vm/nr_hugepages +exit $exitcode -- cgit v0.10.2 From fc256f04eb82abcd2bd828fee4af0c36763ffee0 Mon Sep 17 00:00:00 2001 From: Joern Engel Date: Wed, 3 Jul 2013 15:09:18 -0700 Subject: self-test: fix make clean thuge-gen was forgotten. Fix it by removing the duplication, so we don't get too many repeats. Signed-off-by: Joern Engel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index 7d47927..cb3f5f2 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -2,8 +2,9 @@ CC = $(CROSS_COMPILE)gcc CFLAGS = -Wall +BINARIES = hugepage-mmap hugepage-shm map_hugetlb thuge-gen -all: hugepage-mmap hugepage-shm map_hugetlb thuge-gen +all: $(BINARIES) %: %.c $(CC) $(CFLAGS) -o $@ $^ @@ -11,4 +12,4 @@ run_tests: all @/bin/sh ./run_vmtests || (echo "vmtests: [FAIL]"; exit 1) clean: - $(RM) hugepage-mmap hugepage-shm map_hugetlb + $(RM) $(BINARIES) -- cgit v0.10.2 From 7e50533d4b84289e4f01de56d6f98e9c64e2229e Mon Sep 17 00:00:00 2001 From: Joern Engel Date: Wed, 3 Jul 2013 15:09:19 -0700 Subject: selftests: add hugetlbfstest As the confusing naming indicates, this test has some overlap with pre-existing tests. Would be nice to merge them eventually. But since it is only test code, cleanliness is much less important than mere existence. Signed-off-by: Joern Engel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index cb3f5f2..3f94e1a 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -2,7 +2,7 @@ CC = $(CROSS_COMPILE)gcc CFLAGS = -Wall -BINARIES = hugepage-mmap hugepage-shm map_hugetlb thuge-gen +BINARIES = hugepage-mmap hugepage-shm map_hugetlb thuge-gen hugetlbfstest all: $(BINARIES) %: %.c diff --git a/tools/testing/selftests/vm/hugetlbfstest.c b/tools/testing/selftests/vm/hugetlbfstest.c new file mode 100644 index 0000000..ea40ff8 --- /dev/null +++ b/tools/testing/selftests/vm/hugetlbfstest.c @@ -0,0 +1,84 @@ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef unsigned long long u64; + +static size_t length = 1 << 24; + +static u64 read_rss(void) +{ + char buf[4096], *s = buf; + int i, fd; + u64 rss; + + fd = open("/proc/self/statm", O_RDONLY); + assert(fd > 2); + memset(buf, 0, sizeof(buf)); + read(fd, buf, sizeof(buf) - 1); + for (i = 0; i < 1; i++) + s = strchr(s, ' ') + 1; + rss = strtoull(s, NULL, 10); + return rss << 12; /* assumes 4k pagesize */ +} + +static void do_mmap(int fd, int extra_flags, int unmap) +{ + int *p; + int flags = MAP_PRIVATE | MAP_POPULATE | extra_flags; + u64 before, after; + + before = read_rss(); + p = mmap(NULL, length, PROT_READ | PROT_WRITE, flags, fd, 0); + assert(p != MAP_FAILED || + !"mmap returned an unexpected error"); + after = read_rss(); + assert(llabs(after - before - length) < 0x40000 || + !"rss didn't grow as expected"); + if (!unmap) + return; + munmap(p, length); + after = read_rss(); + assert(llabs(after - before) < 0x40000 || + !"rss didn't shrink as expected"); +} + +static int open_file(const char *path) +{ + int fd, err; + + unlink(path); + fd = open(path, O_CREAT | O_RDWR | O_TRUNC | O_EXCL + | O_LARGEFILE | O_CLOEXEC, 0600); + assert(fd > 2); + unlink(path); + err = ftruncate(fd, length); + assert(!err); + return fd; +} + +int main(void) +{ + int hugefd, fd; + + fd = open_file("/dev/shm/hugetlbhog"); + hugefd = open_file("/hugepages/hugetlbhog"); + + system("echo 100 > /proc/sys/vm/nr_hugepages"); + do_mmap(-1, MAP_ANONYMOUS, 1); + do_mmap(fd, 0, 1); + do_mmap(-1, MAP_ANONYMOUS | MAP_HUGETLB, 1); + do_mmap(hugefd, 0, 1); + do_mmap(hugefd, MAP_HUGETLB, 1); + /* Leak the last one to test do_exit() */ + do_mmap(-1, MAP_ANONYMOUS | MAP_HUGETLB, 0); + printf("oll korrekt.\n"); + return 0; +} diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests index 7a9072d..c87b681 100644 --- a/tools/testing/selftests/vm/run_vmtests +++ b/tools/testing/selftests/vm/run_vmtests @@ -75,6 +75,17 @@ else echo "[PASS]" fi +echo "--------------------" +echo "running hugetlbfstest" +echo "--------------------" +./hugetlbfstest +if [ $? -ne 0 ]; then + echo "[FAIL]" + exitcode=1 +else + echo "[PASS]" +fi + #cleanup umount $mnt rm -rf $mnt -- cgit v0.10.2 From c7fed9cf614aa1dc561ad3334b02e896d042068f Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Wed, 3 Jul 2013 15:09:20 -0700 Subject: selftests: add .gitignore for vm Signed-off-by: Ramkumar Ramachandra Cc: Dave Young Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore new file mode 100644 index 0000000..ff1bb16 --- /dev/null +++ b/tools/testing/selftests/vm/.gitignore @@ -0,0 +1,4 @@ +hugepage-mmap +hugepage-shm +map_hugetlb +thuge-gen -- cgit v0.10.2 From 1b7871550330c8777c45e07b84781529f27e2b3c Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Wed, 3 Jul 2013 15:09:21 -0700 Subject: selftests: fix clean target in kcmp Makefile Signed-off-by: Ramkumar Ramachandra Cc: Dave Young Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/tools/testing/selftests/kcmp/Makefile b/tools/testing/selftests/kcmp/Makefile index 56eb552..d7d6bbe 100644 --- a/tools/testing/selftests/kcmp/Makefile +++ b/tools/testing/selftests/kcmp/Makefile @@ -25,5 +25,4 @@ run_tests: all @./kcmp_test || echo "kcmp_test: [FAIL]" clean: - rm -fr ./run_test - rm -fr ./test-file + $(RM) kcmp_test kcmp-test-file -- cgit v0.10.2 From 94d090013e82ba7e8da74f042e3b88406479706c Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Wed, 3 Jul 2013 15:09:22 -0700 Subject: selftests: add .gitignore for kcmp Signed-off-by: Ramkumar Ramachandra Cc: Dave Young Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/tools/testing/selftests/kcmp/.gitignore b/tools/testing/selftests/kcmp/.gitignore new file mode 100644 index 0000000..5a9b373 --- /dev/null +++ b/tools/testing/selftests/kcmp/.gitignore @@ -0,0 +1,2 @@ +kcmp_test +kcmp-test-file -- cgit v0.10.2 From 9307c29524502c21f0e8a6d96d850b2f5bc0bd9a Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 3 Jul 2013 15:09:23 -0700 Subject: tools/testing/selftests: don't assume the x bit is set on scripts The x bit can easily get lost (patch(1) loses it, for example). Reported-by: Ramkumar Ramachandra Cc: Dave Young Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/tools/testing/selftests/cpu-hotplug/Makefile b/tools/testing/selftests/cpu-hotplug/Makefile index 12657a5..ae5faf9 100644 --- a/tools/testing/selftests/cpu-hotplug/Makefile +++ b/tools/testing/selftests/cpu-hotplug/Makefile @@ -1,6 +1,6 @@ all: run_tests: - @./on-off-test.sh || echo "cpu-hotplug selftests: [FAIL]" + @/bin/sh ./on-off-test.sh || echo "cpu-hotplug selftests: [FAIL]" clean: diff --git a/tools/testing/selftests/memory-hotplug/Makefile b/tools/testing/selftests/memory-hotplug/Makefile index 0f49c3f..350bfed 100644 --- a/tools/testing/selftests/memory-hotplug/Makefile +++ b/tools/testing/selftests/memory-hotplug/Makefile @@ -1,6 +1,6 @@ all: run_tests: - @./on-off-test.sh || echo "memory-hotplug selftests: [FAIL]" + @/bin/sh ./on-off-test.sh || echo "memory-hotplug selftests: [FAIL]" clean: -- cgit v0.10.2