From fb5cbe9efd741b16e72133613747f76490bbecd3 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Wed, 28 Oct 2009 22:28:24 -0700 Subject: ocfs2: Return -EINVAL when a device is not ocfs2. In case of non-modular kernels the root filesystem is mounted by trying several filesystems. If ocfs2 was tried before the actual filesystem type, the mount would fail because ocfs2_sb_probe() returns -EAGAIN instead of -EINVAL. ocfs2 will now return -EINVAL properly. Signed-off-by: Joel Becker Reported-by: Laszlo Attila Toth diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index c0e48ae..9606730 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -773,18 +773,20 @@ static int ocfs2_sb_probe(struct super_block *sb, if (tmpstat < 0) { status = tmpstat; mlog_errno(status); - goto bail; + break; } di = (struct ocfs2_dinode *) (*bh)->b_data; memset(stats, 0, sizeof(struct ocfs2_blockcheck_stats)); spin_lock_init(&stats->b_lock); - status = ocfs2_verify_volume(di, *bh, blksize, stats); - if (status >= 0) - goto bail; - brelse(*bh); - *bh = NULL; - if (status != -EAGAIN) + tmpstat = ocfs2_verify_volume(di, *bh, blksize, stats); + if (tmpstat < 0) { + brelse(*bh); + *bh = NULL; + } + if (tmpstat != -EAGAIN) { + status = tmpstat; break; + } } bail: -- cgit v0.10.2 From 87f4b1bb98696e6cf84f57df7de41f28c2a7dbeb Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Thu, 15 Oct 2009 11:10:48 +0800 Subject: ocfs2: Move ocfs2_complete_reflink to the right place. As its name ocfs2_complete_reflink indicates, it should be called after all the work for reflink is done, so it really should be called after we reflink xattr successfully. Signed-off-by: Tao Ma Signed-off-by: Joel Becker Tested-by: Tristan Ye diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 60287fc..9d439b2 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -4013,10 +4013,6 @@ static int ocfs2_create_reflink_node(struct inode *s_inode, goto out_unlock_refcount; } - ret = ocfs2_complete_reflink(s_inode, s_bh, t_inode, t_bh, preserve); - if (ret) - mlog_errno(ret); - out_unlock_refcount: ocfs2_unlock_refcount_tree(osb, ref_tree, 1); brelse(ref_root_bh); @@ -4068,9 +4064,17 @@ static int __ocfs2_reflink(struct dentry *old_dentry, ret = ocfs2_reflink_xattrs(inode, old_bh, new_inode, new_bh, preserve); - if (ret) + if (ret) { mlog_errno(ret); + goto inode_unlock; + } } + + ret = ocfs2_complete_reflink(inode, old_bh, + new_inode, new_bh, preserve); + if (ret) + mlog_errno(ret); + inode_unlock: ocfs2_inode_unlock(new_inode, 1); brelse(new_bh); -- cgit v0.10.2 From 2f48d593b6ceb7bb63d34124ceba77d33be298cf Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Thu, 15 Oct 2009 11:10:49 +0800 Subject: ocfs2: duplicate inline data properly during reflink. The old reflink fails to handle inodes with inline data and will oops if it encounters them. This patch copies inline data to the new inode. Extended attributes may still be refcounted. Signed-off-by: Tao Ma Signed-off-by: Joel Becker Tested-by: Tristan Ye diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 89fc8ee..de059f4 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -1712,7 +1712,8 @@ int ocfs2_check_range_for_refcount(struct inode *inode, loff_t pos, struct super_block *sb = inode->i_sb; if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb)) || - !(OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)) + !(OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL) || + OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) return 0; cpos = pos >> OCFS2_SB(sb)->s_clustersize_bits; diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 9d439b2..3a0df7a 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -3743,6 +3743,9 @@ static int ocfs2_attach_refcount_tree(struct inode *inode, goto out; } + if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) + goto attach_xattr; + ocfs2_init_dinode_extent_tree(&di_et, INODE_CACHE(inode), di_bh); size = i_size_read(inode); @@ -3769,6 +3772,7 @@ static int ocfs2_attach_refcount_tree(struct inode *inode, cpos += num_clusters; } +attach_xattr: if (oi->ip_dyn_features & OCFS2_HAS_XATTR_FL) { ret = ocfs2_xattr_attach_refcount_tree(inode, di_bh, &ref_tree->rf_ci, @@ -3858,6 +3862,49 @@ out: return ret; } +static int ocfs2_duplicate_inline_data(struct inode *s_inode, + struct buffer_head *s_bh, + struct inode *t_inode, + struct buffer_head *t_bh) +{ + int ret; + handle_t *handle; + struct ocfs2_super *osb = OCFS2_SB(s_inode->i_sb); + struct ocfs2_dinode *s_di = (struct ocfs2_dinode *)s_bh->b_data; + struct ocfs2_dinode *t_di = (struct ocfs2_dinode *)t_bh->b_data; + + BUG_ON(!(OCFS2_I(s_inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)); + + handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + mlog_errno(ret); + goto out; + } + + ret = ocfs2_journal_access_di(handle, INODE_CACHE(t_inode), t_bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret) { + mlog_errno(ret); + goto out_commit; + } + + t_di->id2.i_data.id_count = s_di->id2.i_data.id_count; + memcpy(t_di->id2.i_data.id_data, s_di->id2.i_data.id_data, + le16_to_cpu(s_di->id2.i_data.id_count)); + spin_lock(&OCFS2_I(t_inode)->ip_lock); + OCFS2_I(t_inode)->ip_dyn_features |= OCFS2_INLINE_DATA_FL; + t_di->i_dyn_features = cpu_to_le16(OCFS2_I(t_inode)->ip_dyn_features); + spin_unlock(&OCFS2_I(t_inode)->ip_lock); + + ocfs2_journal_dirty(handle, t_bh); + +out_commit: + ocfs2_commit_trans(osb, handle); +out: + return ret; +} + static int ocfs2_duplicate_extent_list(struct inode *s_inode, struct inode *t_inode, struct buffer_head *t_bh, @@ -3997,6 +4044,14 @@ static int ocfs2_create_reflink_node(struct inode *s_inode, goto out; } + if (OCFS2_I(s_inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { + ret = ocfs2_duplicate_inline_data(s_inode, s_bh, + t_inode, t_bh); + if (ret) + mlog_errno(ret); + goto out; + } + ret = ocfs2_lock_refcount_tree(osb, le64_to_cpu(di->i_refcount_loc), 1, &ref_tree, &ref_root_bh); if (ret) { -- cgit v0.10.2 From 837711f862bb71ac263837a0f0714dd8cc4ef7ea Mon Sep 17 00:00:00 2001 From: Coly Li Date: Fri, 16 Jan 2009 16:33:05 +0800 Subject: ocfs2: return f_fsid info in ocfs2_statfs() Currently the f_fsid of struct kstatfs returned from ocfs2_statfs() is undefined (vfs layer fills in 0 as default). Since in some conditions, f_fsid value might be used in a (f_fsid, ino) pair to uniquely identify a file, ocfs2 should return a unique defined f_fsid value from ocfs2_statfs(). Because uuid_str is the same on big or litlle endian machine, it's endian consistent to use osb->uuid_str to generate f_fsid value. Signed-off-by: Coly Li Cc: Sunil Mushran Cc: Mark Fasheh Signed-off-by: Joel Becker diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 9606730..14f47d2 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1647,6 +1647,10 @@ static int ocfs2_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_bavail = buf->f_bfree; buf->f_files = numbits; buf->f_ffree = freebits; + buf->f_fsid.val[0] = crc32_le(0, osb->uuid_str, OCFS2_VOL_UUID_LEN) + & 0xFFFFFFFFUL; + buf->f_fsid.val[1] = crc32_le(0, osb->uuid_str + OCFS2_VOL_UUID_LEN, + OCFS2_VOL_UUID_LEN) & 0xFFFFFFFFUL; brelse(bh); -- cgit v0.10.2 From e8edb3cbd7dd8acf6c748a02d06ec1d82c4124ea Mon Sep 17 00:00:00 2001 From: Chuck Ebbert Date: Tue, 3 Nov 2009 10:32:03 -0500 Subject: crypto: padlock-aes - Use the correct mask when checking whether copying is required Masking with PAGE_SIZE is just wrong... Signed-off-by: Chuck Ebbert Signed-off-by: Herbert Xu diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c index a9952b1..84c51e1 100644 --- a/drivers/crypto/padlock-aes.c +++ b/drivers/crypto/padlock-aes.c @@ -236,7 +236,7 @@ static inline void ecb_crypt(const u8 *in, u8 *out, u32 *key, /* Padlock in ECB mode fetches at least ecb_fetch_bytes of data. * We could avoid some copying here but it's probably not worth it. */ - if (unlikely(((unsigned long)in & PAGE_SIZE) + ecb_fetch_bytes > PAGE_SIZE)) { + if (unlikely(((unsigned long)in & ~PAGE_MASK) + ecb_fetch_bytes > PAGE_SIZE)) { ecb_crypt_copy(in, out, key, cword, count); return; } @@ -248,7 +248,7 @@ static inline u8 *cbc_crypt(const u8 *in, u8 *out, u32 *key, u8 *iv, struct cword *cword, int count) { /* Padlock in CBC mode fetches at least cbc_fetch_bytes of data. */ - if (unlikely(((unsigned long)in & PAGE_SIZE) + cbc_fetch_bytes > PAGE_SIZE)) + if (unlikely(((unsigned long)in & ~PAGE_MASK) + cbc_fetch_bytes > PAGE_SIZE)) return cbc_crypt_copy(in, out, key, iv, cword, count); return rep_xcrypt_cbc(in, out, key, iv, cword, count); -- cgit v0.10.2 From ba86bf8bfc1add5f515db8cf1d6042bb9396a299 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 20 Oct 2009 16:29:16 +0000 Subject: drm/i915: Avoid potential sleep whilst holding spinlock Miles Lane reported the following error: 2 locks held by cat/4179: #0: (&p->lock){+.+.+.}, at: [] seq_read+0x25/0x315 #1: (&dev_priv->mm.active_list_lock){+.+...}, at: [] i915_batchbuffer_info+0x2b/0x124 Pid: 4179, comm: cat Not tainted 2.6.32-rc5-git1 #2 Call Trace: [] ? __debug_show_held_locks+0x1e/0x20 [] __might_sleep+0xf0/0xf7 [] kmap+0x17/0x58 [] i915_batchbuffer_info+0xad/0x124 [] seq_read+0x160/0x315 [] ? rw_verify_area+0x98/0xbb [] ? seq_read+0x0/0x315 [] vfs_read+0x75/0xa9 [] sys_read+0x3b/0x5d [] sysenter_do_call+0x12/0x36 The fix is relatively simple, use the atomic variants of kmap() that avoid the potential sleep. Signed-off-by: Chris Wilson Cc: Miles Lane Signed-off-by: Eric Anholt diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index f8ce9a3..26bf055 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -267,10 +267,10 @@ static void i915_dump_pages(struct seq_file *m, struct page **pages, int page_co uint32_t *mem; for (page = 0; page < page_count; page++) { - mem = kmap(pages[page]); + mem = kmap_atomic(pages[page], KM_USER0); for (i = 0; i < PAGE_SIZE; i += 4) seq_printf(m, "%08x : %08x\n", i, mem[i / 4]); - kunmap(pages[page]); + kunmap_atomic(pages[page], KM_USER0); } } -- cgit v0.10.2 From 4bfe6b6876a036d26a960320f1ab0bbd752c19bf Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Mon, 2 Nov 2009 07:52:29 +0000 Subject: drm/i915: Fix and cleanup DPLL calculation for Ironlake When the ideal error range can't be reached, this will safely use a most closed one. Clean up some dumb codes in DPLL function too. This fixes DPLL clock issue against one monitor at 1680x1050@60hz. Signed-off-by: Zhenyu Wang Signed-off-by: Eric Anholt diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 3ba6546b..099f420 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -863,10 +863,8 @@ intel_igdng_find_best_PLL(const intel_limit_t *limit, struct drm_crtc *crtc, struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; intel_clock_t clock; - int max_n; - bool found; int err_most = 47; - found = false; + int err_min = 10000; /* eDP has only 2 clock choice, no n/m/p setting */ if (HAS_eDP) @@ -890,10 +888,9 @@ intel_igdng_find_best_PLL(const intel_limit_t *limit, struct drm_crtc *crtc, } memset(best_clock, 0, sizeof(*best_clock)); - max_n = limit->n.max; for (clock.p1 = limit->p1.max; clock.p1 >= limit->p1.min; clock.p1--) { /* based on hardware requriment prefer smaller n to precision */ - for (clock.n = limit->n.min; clock.n <= max_n; clock.n++) { + for (clock.n = limit->n.min; clock.n <= limit->n.max; clock.n++) { /* based on hardware requirment prefere larger m1,m2 */ for (clock.m1 = limit->m1.max; clock.m1 >= limit->m1.min; clock.m1--) { @@ -907,18 +904,18 @@ intel_igdng_find_best_PLL(const intel_limit_t *limit, struct drm_crtc *crtc, this_err = abs((10000 - (target*10000/clock.dot))); if (this_err < err_most) { *best_clock = clock; - err_most = this_err; - max_n = clock.n; - found = true; /* found on first matching */ goto out; + } else if (this_err < err_min) { + *best_clock = clock; + err_min = this_err; } } } } } out: - return found; + return true; } /* DisplayPort has only two frequencies, 162MHz and 270MHz */ -- cgit v0.10.2 From d8a2d0e00c0d5a0d55e14b884bff034205015e51 Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Mon, 2 Nov 2009 07:52:30 +0000 Subject: drm/i915: HDMI hardware workaround for Ironlake This brings some hardware workaround for HDMI port on PCH (Ibex Peak), which fixes unstable issues like during rotation. Signed-off-by: Zhenyu Wang Signed-off-by: Eric Anholt diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index 663ab6d..c33451a 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -77,14 +77,32 @@ static void intel_hdmi_dpms(struct drm_encoder *encoder, int mode) struct intel_hdmi_priv *hdmi_priv = intel_output->dev_priv; u32 temp; - if (mode != DRM_MODE_DPMS_ON) { - temp = I915_READ(hdmi_priv->sdvox_reg); + temp = I915_READ(hdmi_priv->sdvox_reg); + + /* HW workaround, need to toggle enable bit off and on for 12bpc, but + * we do this anyway which shows more stable in testing. + */ + if (IS_IGDNG(dev)) { I915_WRITE(hdmi_priv->sdvox_reg, temp & ~SDVO_ENABLE); + POSTING_READ(hdmi_priv->sdvox_reg); + } + + if (mode != DRM_MODE_DPMS_ON) { + temp &= ~SDVO_ENABLE; } else { - temp = I915_READ(hdmi_priv->sdvox_reg); - I915_WRITE(hdmi_priv->sdvox_reg, temp | SDVO_ENABLE); + temp |= SDVO_ENABLE; } + + I915_WRITE(hdmi_priv->sdvox_reg, temp); POSTING_READ(hdmi_priv->sdvox_reg); + + /* HW workaround, need to write this twice for issue that may result + * in first write getting masked. + */ + if (IS_IGDNG(dev)) { + I915_WRITE(hdmi_priv->sdvox_reg, temp); + POSTING_READ(hdmi_priv->sdvox_reg); + } } static void intel_hdmi_save(struct drm_connector *connector) -- cgit v0.10.2 From 96d25e532234bec1a1989e6e1baf702d43a78b0d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 11 Nov 2009 16:15:42 +0900 Subject: NFSv4: Fix a cache validation bug which causes getcwd() to return ENOENT Changeset a65318bf3afc93ce49227e849d213799b072c5fd (NFSv4: Simplify some cache consistency post-op GETATTRs) incorrectly changed the getattr bitmap for readdir(). This causes the readdir() function to fail to return a fileid/inode number, which again exposed a bug in the NFS readdir code that causes spurious ENOENT errors to appear in applications (see http://bugzilla.kernel.org/show_bug.cgi?id=14541). The immediate band aid is to revert the incorrect bitmap change, but more long term, we should change the NFS readdir code to cope with the fact that NFSv4 servers are not required to support fileids/inode numbers. Reported-by: Daniel J Blueman Cc: stable@kernel.org Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index ff37454..741a562 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2767,7 +2767,7 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, .pages = &page, .pgbase = 0, .count = count, - .bitmask = NFS_SERVER(dentry->d_inode)->cache_consistency_bitmask, + .bitmask = NFS_SERVER(dentry->d_inode)->attr_bitmask, }; struct nfs4_readdir_res res; struct rpc_message msg = { -- cgit v0.10.2 From c62b17a58ab5e97534ff6487241addd5fcc606de Mon Sep 17 00:00:00 2001 From: Romit Dasgupta Date: Thu, 12 Nov 2009 13:08:11 +0100 Subject: Thaw refrigerated bdi flusher threads before invoking kthread_stop on them Unfreezes the bdi flusher task when the said task needs to exit. Steps to reproduce this. 1) Mount a file system from MMC/SD card. 2) Unmount the file system. This creates a flusher task. 3) Attempt suspend to RAM. System is unresponsive. This is because the bdi flusher thread is already in the refrigerator and will remain so until it is thawed. The MMC driver suspend routine call stack will ultimately issue a 'kthread_stop' on the bdi flusher thread and will block until the flusher thread is exited. Since the bdi flusher thread is in the refrigerator it never cleans up until thawed. Signed-off-by: Romit Dasgupta Signed-off-by: Jens Axboe diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 11aee09..67a33a5 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -604,10 +604,14 @@ static void bdi_wb_shutdown(struct backing_dev_info *bdi) /* * Finally, kill the kernel threads. We don't need to be RCU - * safe anymore, since the bdi is gone from visibility. + * safe anymore, since the bdi is gone from visibility. Force + * unfreeze of the thread before calling kthread_stop(), otherwise + * it would never exet if it is currently stuck in the refrigerator. */ - list_for_each_entry(wb, &bdi->wb_list, list) + list_for_each_entry(wb, &bdi->wb_list, list) { + wb->task->flags &= ~PF_FROZEN; kthread_stop(wb->task); + } } /* -- cgit v0.10.2 From 2d109a845dd3074885db726892c629ab73dd0ed8 Mon Sep 17 00:00:00 2001 From: "Zou, Nanhai" Date: Fri, 6 Nov 2009 02:13:01 +0000 Subject: drm/i915: Fix IRQ stall issue on Ironlake The master irq control in DE must be disabled before irq handling, and enable after the process. This fixes the irq stall issue on Ironlake. Cc: Stable Team Signed-off-by: Zou, Nanhai Signed-off-by: Zhenyu Wang Signed-off-by: Eric Anholt diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index c3ceffa..aa7fd82 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -254,10 +254,15 @@ irqreturn_t igdng_irq_handler(struct drm_device *dev) { drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private; int ret = IRQ_NONE; - u32 de_iir, gt_iir; + u32 de_iir, gt_iir, de_ier; u32 new_de_iir, new_gt_iir; struct drm_i915_master_private *master_priv; + /* disable master interrupt before clearing iir */ + de_ier = I915_READ(DEIER); + I915_WRITE(DEIER, de_ier & ~DE_MASTER_IRQ_CONTROL); + (void)I915_READ(DEIER); + de_iir = I915_READ(DEIIR); gt_iir = I915_READ(GTIIR); @@ -290,6 +295,9 @@ irqreturn_t igdng_irq_handler(struct drm_device *dev) gt_iir = new_gt_iir; } + I915_WRITE(DEIER, de_ier); + (void)I915_READ(DEIER); + return ret; } -- cgit v0.10.2 From 5586c8bc93ac5fe75f5fd14e8c7add5344d1c548 Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Fri, 6 Nov 2009 02:13:02 +0000 Subject: drm/i915: Add more registers save/restore for Ironlake suspend Add more display registers save/restore to fix unstable issues during S4 testing on Ironlake. And DPLL_B_MD should not be restored on Ironlake. Signed-off-by: Zhenyu Wang Signed-off-by: Eric Anholt diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 57204e2..a725f659 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -296,6 +296,7 @@ typedef struct drm_i915_private { u32 saveVBLANK_A; u32 saveVSYNC_A; u32 saveBCLRPAT_A; + u32 saveTRANSACONF; u32 saveTRANS_HTOTAL_A; u32 saveTRANS_HBLANK_A; u32 saveTRANS_HSYNC_A; @@ -326,6 +327,7 @@ typedef struct drm_i915_private { u32 saveVBLANK_B; u32 saveVSYNC_B; u32 saveBCLRPAT_B; + u32 saveTRANSBCONF; u32 saveTRANS_HTOTAL_B; u32 saveTRANS_HBLANK_B; u32 saveTRANS_HSYNC_B; @@ -414,6 +416,16 @@ typedef struct drm_i915_private { u32 savePFB_WIN_SZ; u32 savePFA_WIN_POS; u32 savePFB_WIN_POS; + u32 savePCH_DREF_CONTROL; + u32 saveDISP_ARB_CTL; + u32 savePIPEA_DATA_M1; + u32 savePIPEA_DATA_N1; + u32 savePIPEA_LINK_M1; + u32 savePIPEA_LINK_N1; + u32 savePIPEB_DATA_M1; + u32 savePIPEB_DATA_N1; + u32 savePIPEB_LINK_M1; + u32 savePIPEB_LINK_N1; struct { struct drm_mm gtt_space; diff --git a/drivers/gpu/drm/i915/i915_suspend.c b/drivers/gpu/drm/i915/i915_suspend.c index 992d561..6eec817 100644 --- a/drivers/gpu/drm/i915/i915_suspend.c +++ b/drivers/gpu/drm/i915/i915_suspend.c @@ -239,6 +239,11 @@ static void i915_save_modeset_reg(struct drm_device *dev) if (drm_core_check_feature(dev, DRIVER_MODESET)) return; + if (IS_IGDNG(dev)) { + dev_priv->savePCH_DREF_CONTROL = I915_READ(PCH_DREF_CONTROL); + dev_priv->saveDISP_ARB_CTL = I915_READ(DISP_ARB_CTL); + } + /* Pipe & plane A info */ dev_priv->savePIPEACONF = I915_READ(PIPEACONF); dev_priv->savePIPEASRC = I915_READ(PIPEASRC); @@ -263,6 +268,11 @@ static void i915_save_modeset_reg(struct drm_device *dev) dev_priv->saveBCLRPAT_A = I915_READ(BCLRPAT_A); if (IS_IGDNG(dev)) { + dev_priv->savePIPEA_DATA_M1 = I915_READ(PIPEA_DATA_M1); + dev_priv->savePIPEA_DATA_N1 = I915_READ(PIPEA_DATA_N1); + dev_priv->savePIPEA_LINK_M1 = I915_READ(PIPEA_LINK_M1); + dev_priv->savePIPEA_LINK_N1 = I915_READ(PIPEA_LINK_N1); + dev_priv->saveFDI_TXA_CTL = I915_READ(FDI_TXA_CTL); dev_priv->saveFDI_RXA_CTL = I915_READ(FDI_RXA_CTL); @@ -270,6 +280,7 @@ static void i915_save_modeset_reg(struct drm_device *dev) dev_priv->savePFA_WIN_SZ = I915_READ(PFA_WIN_SZ); dev_priv->savePFA_WIN_POS = I915_READ(PFA_WIN_POS); + dev_priv->saveTRANSACONF = I915_READ(TRANSACONF); dev_priv->saveTRANS_HTOTAL_A = I915_READ(TRANS_HTOTAL_A); dev_priv->saveTRANS_HBLANK_A = I915_READ(TRANS_HBLANK_A); dev_priv->saveTRANS_HSYNC_A = I915_READ(TRANS_HSYNC_A); @@ -314,6 +325,11 @@ static void i915_save_modeset_reg(struct drm_device *dev) dev_priv->saveBCLRPAT_B = I915_READ(BCLRPAT_B); if (IS_IGDNG(dev)) { + dev_priv->savePIPEB_DATA_M1 = I915_READ(PIPEB_DATA_M1); + dev_priv->savePIPEB_DATA_N1 = I915_READ(PIPEB_DATA_N1); + dev_priv->savePIPEB_LINK_M1 = I915_READ(PIPEB_LINK_M1); + dev_priv->savePIPEB_LINK_N1 = I915_READ(PIPEB_LINK_N1); + dev_priv->saveFDI_TXB_CTL = I915_READ(FDI_TXB_CTL); dev_priv->saveFDI_RXB_CTL = I915_READ(FDI_RXB_CTL); @@ -321,6 +337,7 @@ static void i915_save_modeset_reg(struct drm_device *dev) dev_priv->savePFB_WIN_SZ = I915_READ(PFB_WIN_SZ); dev_priv->savePFB_WIN_POS = I915_READ(PFB_WIN_POS); + dev_priv->saveTRANSBCONF = I915_READ(TRANSBCONF); dev_priv->saveTRANS_HTOTAL_B = I915_READ(TRANS_HTOTAL_B); dev_priv->saveTRANS_HBLANK_B = I915_READ(TRANS_HBLANK_B); dev_priv->saveTRANS_HSYNC_B = I915_READ(TRANS_HSYNC_B); @@ -368,6 +385,11 @@ static void i915_restore_modeset_reg(struct drm_device *dev) fpb1_reg = FPB1; } + if (IS_IGDNG(dev)) { + I915_WRITE(PCH_DREF_CONTROL, dev_priv->savePCH_DREF_CONTROL); + I915_WRITE(DISP_ARB_CTL, dev_priv->saveDISP_ARB_CTL); + } + /* Pipe & plane A info */ /* Prime the clock */ if (dev_priv->saveDPLL_A & DPLL_VCO_ENABLE) { @@ -395,6 +417,11 @@ static void i915_restore_modeset_reg(struct drm_device *dev) I915_WRITE(BCLRPAT_A, dev_priv->saveBCLRPAT_A); if (IS_IGDNG(dev)) { + I915_WRITE(PIPEA_DATA_M1, dev_priv->savePIPEA_DATA_M1); + I915_WRITE(PIPEA_DATA_N1, dev_priv->savePIPEA_DATA_N1); + I915_WRITE(PIPEA_LINK_M1, dev_priv->savePIPEA_LINK_M1); + I915_WRITE(PIPEA_LINK_N1, dev_priv->savePIPEA_LINK_N1); + I915_WRITE(FDI_RXA_CTL, dev_priv->saveFDI_RXA_CTL); I915_WRITE(FDI_TXA_CTL, dev_priv->saveFDI_TXA_CTL); @@ -402,6 +429,7 @@ static void i915_restore_modeset_reg(struct drm_device *dev) I915_WRITE(PFA_WIN_SZ, dev_priv->savePFA_WIN_SZ); I915_WRITE(PFA_WIN_POS, dev_priv->savePFA_WIN_POS); + I915_WRITE(TRANSACONF, dev_priv->saveTRANSACONF); I915_WRITE(TRANS_HTOTAL_A, dev_priv->saveTRANS_HTOTAL_A); I915_WRITE(TRANS_HBLANK_A, dev_priv->saveTRANS_HBLANK_A); I915_WRITE(TRANS_HSYNC_A, dev_priv->saveTRANS_HSYNC_A); @@ -439,7 +467,7 @@ static void i915_restore_modeset_reg(struct drm_device *dev) /* Actually enable it */ I915_WRITE(dpll_b_reg, dev_priv->saveDPLL_B); DRM_UDELAY(150); - if (IS_I965G(dev)) + if (IS_I965G(dev) && !IS_IGDNG(dev)) I915_WRITE(DPLL_B_MD, dev_priv->saveDPLL_B_MD); DRM_UDELAY(150); @@ -454,6 +482,11 @@ static void i915_restore_modeset_reg(struct drm_device *dev) I915_WRITE(BCLRPAT_B, dev_priv->saveBCLRPAT_B); if (IS_IGDNG(dev)) { + I915_WRITE(PIPEB_DATA_M1, dev_priv->savePIPEB_DATA_M1); + I915_WRITE(PIPEB_DATA_N1, dev_priv->savePIPEB_DATA_N1); + I915_WRITE(PIPEB_LINK_M1, dev_priv->savePIPEB_LINK_M1); + I915_WRITE(PIPEB_LINK_N1, dev_priv->savePIPEB_LINK_N1); + I915_WRITE(FDI_RXB_CTL, dev_priv->saveFDI_RXB_CTL); I915_WRITE(FDI_TXB_CTL, dev_priv->saveFDI_TXB_CTL); @@ -461,6 +494,7 @@ static void i915_restore_modeset_reg(struct drm_device *dev) I915_WRITE(PFB_WIN_SZ, dev_priv->savePFB_WIN_SZ); I915_WRITE(PFB_WIN_POS, dev_priv->savePFB_WIN_POS); + I915_WRITE(TRANSBCONF, dev_priv->saveTRANSBCONF); I915_WRITE(TRANS_HTOTAL_B, dev_priv->saveTRANS_HTOTAL_B); I915_WRITE(TRANS_HBLANK_B, dev_priv->saveTRANS_HBLANK_B); I915_WRITE(TRANS_HSYNC_B, dev_priv->saveTRANS_HSYNC_B); -- cgit v0.10.2 From 9cf1e35cb025eaa52dde37df38e2750b6adb1620 Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Tue, 10 Nov 2009 03:10:22 +0000 Subject: agp/intel: new host bridge support Add new CPU host bridge id, needed for support Ironlake graphics device with it. No change for graphics device itself, so no need to update drm/i915. Signed-off-by: Zhenyu Wang Signed-off-by: Eric Anholt diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c index 4068467..1d32d5c 100644 --- a/drivers/char/agp/intel-agp.c +++ b/drivers/char/agp/intel-agp.c @@ -62,6 +62,7 @@ #define PCI_DEVICE_ID_INTEL_IGDNG_D_IG 0x0042 #define PCI_DEVICE_ID_INTEL_IGDNG_M_HB 0x0044 #define PCI_DEVICE_ID_INTEL_IGDNG_MA_HB 0x0062 +#define PCI_DEVICE_ID_INTEL_IGDNG_MC2_HB 0x006a #define PCI_DEVICE_ID_INTEL_IGDNG_M_IG 0x0046 /* cover 915 and 945 variants */ @@ -96,7 +97,8 @@ agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_B43_HB || \ agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_IGDNG_D_HB || \ agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_IGDNG_M_HB || \ - agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_IGDNG_MA_HB) + agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_IGDNG_MA_HB || \ + agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_IGDNG_MC2_HB) extern int agp_memory_reserved; @@ -1364,6 +1366,7 @@ static void intel_i965_get_gtt_range(int *gtt_offset, int *gtt_size) case PCI_DEVICE_ID_INTEL_IGDNG_D_HB: case PCI_DEVICE_ID_INTEL_IGDNG_M_HB: case PCI_DEVICE_ID_INTEL_IGDNG_MA_HB: + case PCI_DEVICE_ID_INTEL_IGDNG_MC2_HB: *gtt_offset = *gtt_size = MB(2); break; default: @@ -2365,6 +2368,8 @@ static const struct intel_driver_description { "IGDNG/M", NULL, &intel_i965_driver }, { PCI_DEVICE_ID_INTEL_IGDNG_MA_HB, PCI_DEVICE_ID_INTEL_IGDNG_M_IG, 0, "IGDNG/MA", NULL, &intel_i965_driver }, + { PCI_DEVICE_ID_INTEL_IGDNG_MC2_HB, PCI_DEVICE_ID_INTEL_IGDNG_M_IG, 0, + "IGDNG/MC2", NULL, &intel_i965_driver }, { 0, 0, 0, NULL, NULL, NULL } }; @@ -2561,6 +2566,7 @@ static struct pci_device_id agp_intel_pci_table[] = { ID(PCI_DEVICE_ID_INTEL_IGDNG_D_HB), ID(PCI_DEVICE_ID_INTEL_IGDNG_M_HB), ID(PCI_DEVICE_ID_INTEL_IGDNG_MA_HB), + ID(PCI_DEVICE_ID_INTEL_IGDNG_MC2_HB), { } }; -- cgit v0.10.2 From 69ac74822277fa999a3f469d8362e93262deb3f4 Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Fri, 13 Nov 2009 08:47:53 +0100 Subject: cciss: make device attrs static No need to export those device attributes. In fact, without this patch, we can trip over a build error if cciss is a built-in and another driver also declares and exports attributes with the same name. You'll see errors like: drivers/scsi/built-in.o: multiple definition of `dev_attr_lunid' drivers/block/built-in.o: first defined here Cc: Stephen M. Cameron Signed-off-by: Alex Chiang Cc: Cc: Jens Axboe Signed-off-by: Jens Axboe diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 6399e50..92b1263 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -482,7 +482,7 @@ static ssize_t host_store_rescan(struct device *dev, return count; } -DEVICE_ATTR(rescan, S_IWUSR, NULL, host_store_rescan); +static DEVICE_ATTR(rescan, S_IWUSR, NULL, host_store_rescan); static ssize_t dev_show_unique_id(struct device *dev, struct device_attribute *attr, @@ -512,7 +512,7 @@ static ssize_t dev_show_unique_id(struct device *dev, sn[8], sn[9], sn[10], sn[11], sn[12], sn[13], sn[14], sn[15]); } -DEVICE_ATTR(unique_id, S_IRUGO, dev_show_unique_id, NULL); +static DEVICE_ATTR(unique_id, S_IRUGO, dev_show_unique_id, NULL); static ssize_t dev_show_vendor(struct device *dev, struct device_attribute *attr, @@ -536,7 +536,7 @@ static ssize_t dev_show_vendor(struct device *dev, else return snprintf(buf, sizeof(vendor) + 1, "%s\n", drv->vendor); } -DEVICE_ATTR(vendor, S_IRUGO, dev_show_vendor, NULL); +static DEVICE_ATTR(vendor, S_IRUGO, dev_show_vendor, NULL); static ssize_t dev_show_model(struct device *dev, struct device_attribute *attr, @@ -560,7 +560,7 @@ static ssize_t dev_show_model(struct device *dev, else return snprintf(buf, sizeof(model) + 1, "%s\n", drv->model); } -DEVICE_ATTR(model, S_IRUGO, dev_show_model, NULL); +static DEVICE_ATTR(model, S_IRUGO, dev_show_model, NULL); static ssize_t dev_show_rev(struct device *dev, struct device_attribute *attr, @@ -584,7 +584,7 @@ static ssize_t dev_show_rev(struct device *dev, else return snprintf(buf, sizeof(rev) + 1, "%s\n", drv->rev); } -DEVICE_ATTR(rev, S_IRUGO, dev_show_rev, NULL); +static DEVICE_ATTR(rev, S_IRUGO, dev_show_rev, NULL); static ssize_t cciss_show_lunid(struct device *dev, struct device_attribute *attr, char *buf) @@ -609,7 +609,7 @@ static ssize_t cciss_show_lunid(struct device *dev, lunid[0], lunid[1], lunid[2], lunid[3], lunid[4], lunid[5], lunid[6], lunid[7]); } -DEVICE_ATTR(lunid, S_IRUGO, cciss_show_lunid, NULL); +static DEVICE_ATTR(lunid, S_IRUGO, cciss_show_lunid, NULL); static ssize_t cciss_show_raid_level(struct device *dev, struct device_attribute *attr, char *buf) @@ -632,7 +632,7 @@ static ssize_t cciss_show_raid_level(struct device *dev, return snprintf(buf, strlen(raid_label[raid]) + 7, "RAID %s\n", raid_label[raid]); } -DEVICE_ATTR(raid_level, S_IRUGO, cciss_show_raid_level, NULL); +static DEVICE_ATTR(raid_level, S_IRUGO, cciss_show_raid_level, NULL); static ssize_t cciss_show_usage_count(struct device *dev, struct device_attribute *attr, char *buf) @@ -651,7 +651,7 @@ static ssize_t cciss_show_usage_count(struct device *dev, spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags); return snprintf(buf, 20, "%d\n", count); } -DEVICE_ATTR(usage_count, S_IRUGO, cciss_show_usage_count, NULL); +static DEVICE_ATTR(usage_count, S_IRUGO, cciss_show_usage_count, NULL); static struct attribute *cciss_host_attrs[] = { &dev_attr_rescan.attr, -- cgit v0.10.2 From 1e360a60b24ad8f8685af66fa6de10ce46693a4b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 13 Nov 2009 10:52:55 -0500 Subject: SUNRPC: Address buffer overrun in rpc_uaddr2sockaddr() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The size of buf[] must account for the string termination needed for the first strict_strtoul() call. Introduced in commit a02d6926. Fábio Olivé Leite points out that strict_strtoul() requires _either_ '\n\0' _or_ '\0' termination, so use the simpler '\0' here instead. See http://bugzilla.kernel.org/show_bug.cgi?id=14546 . Reported-by: argp@census-labs.com Signed-off-by: Chuck Lever Signed-off-by: Fábio Olivé Leite Signed-off-by: Trond Myklebust diff --git a/net/sunrpc/addr.c b/net/sunrpc/addr.c index 22e8fd8..c7450c8 100644 --- a/net/sunrpc/addr.c +++ b/net/sunrpc/addr.c @@ -306,24 +306,25 @@ EXPORT_SYMBOL_GPL(rpc_sockaddr2uaddr); * @sap: buffer into which to plant socket address * @salen: size of buffer * + * @uaddr does not have to be '\0'-terminated, but strict_strtoul() and + * rpc_pton() require proper string termination to be successful. + * * Returns the size of the socket address if successful; otherwise * zero is returned. */ size_t rpc_uaddr2sockaddr(const char *uaddr, const size_t uaddr_len, struct sockaddr *sap, const size_t salen) { - char *c, buf[RPCBIND_MAXUADDRLEN]; + char *c, buf[RPCBIND_MAXUADDRLEN + sizeof('\0')]; unsigned long portlo, porthi; unsigned short port; - if (uaddr_len > sizeof(buf)) + if (uaddr_len > RPCBIND_MAXUADDRLEN) return 0; memcpy(buf, uaddr, uaddr_len); - buf[uaddr_len] = '\n'; - buf[uaddr_len + 1] = '\0'; - + buf[uaddr_len] = '\0'; c = strrchr(buf, '.'); if (unlikely(c == NULL)) return 0; @@ -332,9 +333,7 @@ size_t rpc_uaddr2sockaddr(const char *uaddr, const size_t uaddr_len, if (unlikely(portlo > 255)) return 0; - c[0] = '\n'; - c[1] = '\0'; - + *c = '\0'; c = strrchr(buf, '.'); if (unlikely(c == NULL)) return 0; @@ -345,8 +344,7 @@ size_t rpc_uaddr2sockaddr(const char *uaddr, const size_t uaddr_len, port = (unsigned short)((porthi << 8) | portlo); - c[0] = '\0'; - + *c = '\0'; if (rpc_pton(buf, strlen(buf), sap, salen) == 0) return 0; -- cgit v0.10.2 From 7ab8f5244d6a8695688a91c8a3f6d3f40356ff97 Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Mon, 2 Nov 2009 13:38:10 -0800 Subject: ocfs2: Refresh documentation Signed-off-by: Sunil Mushran Signed-off-by: Joel Becker diff --git a/Documentation/filesystems/ocfs2.txt b/Documentation/filesystems/ocfs2.txt index c2a0871..c58b9f5 100644 --- a/Documentation/filesystems/ocfs2.txt +++ b/Documentation/filesystems/ocfs2.txt @@ -20,15 +20,16 @@ Lots of code taken from ext3 and other projects. Authors in alphabetical order: Joel Becker Zach Brown -Mark Fasheh +Mark Fasheh Kurt Hackel +Tao Ma Sunil Mushran Manish Singh +Tiger Yang Caveats ======= Features which OCFS2 does not support yet: - - quotas - Directory change notification (F_NOTIFY) - Distributed Caching (F_SETLEASE/F_GETLEASE/break_lease) @@ -70,7 +71,6 @@ commit=nrsec (*) Ocfs2 can be told to sync all its data and metadata performance. localalloc=8(*) Allows custom localalloc size in MB. If the value is too large, the fs will silently revert it to the default. - Localalloc is not enabled for local mounts. localflocks This disables cluster aware flock. inode64 Indicates that Ocfs2 is allowed to create inodes at any location in the filesystem, including those which -- cgit v0.10.2 From 7aee47b0bb9f93baecdbea205e878fe0f155f7da Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Fri, 6 Nov 2009 14:50:22 -0800 Subject: ocfs2: Trivial cleanup of jbd compatibility layer removal Mainline commit 53ef99cad9878f02f27bb30bc304fc42af8bdd6e removed the JBD compatibility layer from OCFS2. This patch removes the last remaining remnants of that. Signed-off-by: Sunil Mushran Signed-off-by: Joel Becker diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index eae4046..d963d86 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -35,12 +35,7 @@ #include #include #include -#ifndef CONFIG_OCFS2_COMPAT_JBD -# include -#else -# include -# include "ocfs2_jbd_compat.h" -#endif +#include /* For union ocfs2_dlm_lksb */ #include "stackglue.h" diff --git a/fs/ocfs2/uptodate.c b/fs/ocfs2/uptodate.c index b6284f2..c613693 100644 --- a/fs/ocfs2/uptodate.c +++ b/fs/ocfs2/uptodate.c @@ -53,11 +53,6 @@ #include #include #include -#ifndef CONFIG_OCFS2_COMPAT_JBD -# include -#else -# include -#endif #define MLOG_MASK_PREFIX ML_UPTODATE -- cgit v0.10.2 From 62c5593aea4b71d61dc0f37fea96c556c158a042 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Mon, 16 Nov 2009 21:52:22 +0800 Subject: crypto: gcm - fix another complete call in complete fuction The flow of the complete function (xxx_done) in gcm.c is as follow: void complete(struct crypto_async_request *areq, int err) { struct aead_request *req = areq->data; if (!err) { err = async_next_step(); if (err == -EINPROGRESS || err == -EBUSY) return; } complete_for_next_step(areq, err); } But *areq may be destroyed in async_next_step(), this makes complete_for_next_step() can not work properly. To fix this, one of following methods is used for each complete function. - Add a __complete() for each complete(), which accept struct aead_request *req instead of areq, so avoid using areq after it is destroyed. - Expand complete_for_next_step(). The fixing method is based on the idea of Herbert Xu. Signed-off-by: Huang Ying Signed-off-by: Herbert Xu diff --git a/crypto/gcm.c b/crypto/gcm.c index 5fc3292..c654713 100644 --- a/crypto/gcm.c +++ b/crypto/gcm.c @@ -40,7 +40,7 @@ struct crypto_rfc4106_ctx { struct crypto_gcm_ghash_ctx { unsigned int cryptlen; struct scatterlist *src; - crypto_completion_t complete; + void (*complete)(struct aead_request *req, int err); }; struct crypto_gcm_req_priv_ctx { @@ -267,23 +267,26 @@ static int gcm_hash_final(struct aead_request *req, return crypto_ahash_final(ahreq); } -static void gcm_hash_final_done(struct crypto_async_request *areq, - int err) +static void __gcm_hash_final_done(struct aead_request *req, int err) { - struct aead_request *req = areq->data; struct crypto_gcm_req_priv_ctx *pctx = crypto_gcm_reqctx(req); struct crypto_gcm_ghash_ctx *gctx = &pctx->ghash_ctx; if (!err) crypto_xor(pctx->auth_tag, pctx->iauth_tag, 16); - gctx->complete(areq, err); + gctx->complete(req, err); } -static void gcm_hash_len_done(struct crypto_async_request *areq, - int err) +static void gcm_hash_final_done(struct crypto_async_request *areq, int err) { struct aead_request *req = areq->data; + + __gcm_hash_final_done(req, err); +} + +static void __gcm_hash_len_done(struct aead_request *req, int err) +{ struct crypto_gcm_req_priv_ctx *pctx = crypto_gcm_reqctx(req); if (!err) { @@ -292,13 +295,18 @@ static void gcm_hash_len_done(struct crypto_async_request *areq, return; } - gcm_hash_final_done(areq, err); + __gcm_hash_final_done(req, err); } -static void gcm_hash_crypt_remain_done(struct crypto_async_request *areq, - int err) +static void gcm_hash_len_done(struct crypto_async_request *areq, int err) { struct aead_request *req = areq->data; + + __gcm_hash_len_done(req, err); +} + +static void __gcm_hash_crypt_remain_done(struct aead_request *req, int err) +{ struct crypto_gcm_req_priv_ctx *pctx = crypto_gcm_reqctx(req); if (!err) { @@ -307,13 +315,19 @@ static void gcm_hash_crypt_remain_done(struct crypto_async_request *areq, return; } - gcm_hash_len_done(areq, err); + __gcm_hash_len_done(req, err); } -static void gcm_hash_crypt_done(struct crypto_async_request *areq, - int err) +static void gcm_hash_crypt_remain_done(struct crypto_async_request *areq, + int err) { struct aead_request *req = areq->data; + + __gcm_hash_crypt_remain_done(req, err); +} + +static void __gcm_hash_crypt_done(struct aead_request *req, int err) +{ struct crypto_gcm_req_priv_ctx *pctx = crypto_gcm_reqctx(req); struct crypto_gcm_ghash_ctx *gctx = &pctx->ghash_ctx; unsigned int remain; @@ -327,13 +341,18 @@ static void gcm_hash_crypt_done(struct crypto_async_request *areq, return; } - gcm_hash_crypt_remain_done(areq, err); + __gcm_hash_crypt_remain_done(req, err); } -static void gcm_hash_assoc_remain_done(struct crypto_async_request *areq, - int err) +static void gcm_hash_crypt_done(struct crypto_async_request *areq, int err) { struct aead_request *req = areq->data; + + __gcm_hash_crypt_done(req, err); +} + +static void __gcm_hash_assoc_remain_done(struct aead_request *req, int err) +{ struct crypto_gcm_req_priv_ctx *pctx = crypto_gcm_reqctx(req); struct crypto_gcm_ghash_ctx *gctx = &pctx->ghash_ctx; crypto_completion_t complete; @@ -350,15 +369,21 @@ static void gcm_hash_assoc_remain_done(struct crypto_async_request *areq, } if (remain) - gcm_hash_crypt_done(areq, err); + __gcm_hash_crypt_done(req, err); else - gcm_hash_crypt_remain_done(areq, err); + __gcm_hash_crypt_remain_done(req, err); } -static void gcm_hash_assoc_done(struct crypto_async_request *areq, - int err) +static void gcm_hash_assoc_remain_done(struct crypto_async_request *areq, + int err) { struct aead_request *req = areq->data; + + __gcm_hash_assoc_remain_done(req, err); +} + +static void __gcm_hash_assoc_done(struct aead_request *req, int err) +{ struct crypto_gcm_req_priv_ctx *pctx = crypto_gcm_reqctx(req); unsigned int remain; @@ -371,13 +396,18 @@ static void gcm_hash_assoc_done(struct crypto_async_request *areq, return; } - gcm_hash_assoc_remain_done(areq, err); + __gcm_hash_assoc_remain_done(req, err); } -static void gcm_hash_init_done(struct crypto_async_request *areq, - int err) +static void gcm_hash_assoc_done(struct crypto_async_request *areq, int err) { struct aead_request *req = areq->data; + + __gcm_hash_assoc_done(req, err); +} + +static void __gcm_hash_init_done(struct aead_request *req, int err) +{ struct crypto_gcm_req_priv_ctx *pctx = crypto_gcm_reqctx(req); crypto_completion_t complete; unsigned int remain = 0; @@ -393,9 +423,16 @@ static void gcm_hash_init_done(struct crypto_async_request *areq, } if (remain) - gcm_hash_assoc_done(areq, err); + __gcm_hash_assoc_done(req, err); else - gcm_hash_assoc_remain_done(areq, err); + __gcm_hash_assoc_remain_done(req, err); +} + +static void gcm_hash_init_done(struct crypto_async_request *areq, int err) +{ + struct aead_request *req = areq->data; + + __gcm_hash_init_done(req, err); } static int gcm_hash(struct aead_request *req, @@ -457,10 +494,8 @@ static void gcm_enc_copy_hash(struct aead_request *req, crypto_aead_authsize(aead), 1); } -static void gcm_enc_hash_done(struct crypto_async_request *areq, - int err) +static void gcm_enc_hash_done(struct aead_request *req, int err) { - struct aead_request *req = areq->data; struct crypto_gcm_req_priv_ctx *pctx = crypto_gcm_reqctx(req); if (!err) @@ -469,8 +504,7 @@ static void gcm_enc_hash_done(struct crypto_async_request *areq, aead_request_complete(req, err); } -static void gcm_encrypt_done(struct crypto_async_request *areq, - int err) +static void gcm_encrypt_done(struct crypto_async_request *areq, int err) { struct aead_request *req = areq->data; struct crypto_gcm_req_priv_ctx *pctx = crypto_gcm_reqctx(req); @@ -479,9 +513,13 @@ static void gcm_encrypt_done(struct crypto_async_request *areq, err = gcm_hash(req, pctx); if (err == -EINPROGRESS || err == -EBUSY) return; + else if (!err) { + crypto_xor(pctx->auth_tag, pctx->iauth_tag, 16); + gcm_enc_copy_hash(req, pctx); + } } - gcm_enc_hash_done(areq, err); + aead_request_complete(req, err); } static int crypto_gcm_encrypt(struct aead_request *req) @@ -538,9 +576,8 @@ static void gcm_decrypt_done(struct crypto_async_request *areq, int err) aead_request_complete(req, err); } -static void gcm_dec_hash_done(struct crypto_async_request *areq, int err) +static void gcm_dec_hash_done(struct aead_request *req, int err) { - struct aead_request *req = areq->data; struct crypto_gcm_req_priv_ctx *pctx = crypto_gcm_reqctx(req); struct ablkcipher_request *abreq = &pctx->u.abreq; struct crypto_gcm_ghash_ctx *gctx = &pctx->ghash_ctx; @@ -552,9 +589,11 @@ static void gcm_dec_hash_done(struct crypto_async_request *areq, int err) err = crypto_ablkcipher_decrypt(abreq); if (err == -EINPROGRESS || err == -EBUSY) return; + else if (!err) + err = crypto_gcm_verify(req, pctx); } - gcm_decrypt_done(areq, err); + aead_request_complete(req, err); } static int crypto_gcm_decrypt(struct aead_request *req) -- cgit v0.10.2 From e22dde9904c2d26a522f1a2b89854a8238bf0933 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 17 Nov 2009 11:34:31 -0700 Subject: ioat: silence "dca disabled" messages Turning off dca is not an "error", and the dca-enabled state can be viewed from sysfs. Signed-off-by: Dan Williams diff --git a/drivers/dma/ioat/dca.c b/drivers/dma/ioat/dca.c index 69d0261..14f0a78 100644 --- a/drivers/dma/ioat/dca.c +++ b/drivers/dma/ioat/dca.c @@ -98,7 +98,7 @@ static int dca_enabled_in_bios(struct pci_dev *pdev) cpuid_level_9 = cpuid_eax(9); res = test_bit(0, &cpuid_level_9); if (!res) - dev_err(&pdev->dev, "DCA is disabled in BIOS\n"); + dev_dbg(&pdev->dev, "DCA is disabled in BIOS\n"); return res; } @@ -108,7 +108,7 @@ static int system_has_dca_enabled(struct pci_dev *pdev) if (boot_cpu_has(X86_FEATURE_DCA)) return dca_enabled_in_bios(pdev); - dev_err(&pdev->dev, "boot cpu doesn't have X86_FEATURE_DCA\n"); + dev_dbg(&pdev->dev, "boot cpu doesn't have X86_FEATURE_DCA\n"); return 0; } -- cgit v0.10.2 From 5ed1f321a71b8549cc2eea26c94fe7943ed01d31 Mon Sep 17 00:00:00 2001 From: Jay Fenlason Date: Tue, 17 Nov 2009 12:29:17 -0500 Subject: firewire: ohci: Make cycleMatch ISO transmission work Calling the START_ISO ioctl with a nonnegative cycle paramater has never worked. Last night I got around to figuring out why. Most of this patch is a big comment explaining why we enable an interrupt source then don't actually do anything when we get one. As the comment says, we should do more, but we don't have a way to tell userspace what happened. . . Signed-off-by: Jay Fenlason Signed-off-by: Stefan Richter (edited comment) diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c index 5d52425..c07cfad 100644 --- a/drivers/firewire/ohci.c +++ b/drivers/firewire/ohci.c @@ -275,7 +275,7 @@ static void log_irqs(u32 evt) !(evt & OHCI1394_busReset)) return; - fw_notify("IRQ %08x%s%s%s%s%s%s%s%s%s%s%s%s%s\n", evt, + fw_notify("IRQ %08x%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", evt, evt & OHCI1394_selfIDComplete ? " selfID" : "", evt & OHCI1394_RQPkt ? " AR_req" : "", evt & OHCI1394_RSPkt ? " AR_resp" : "", @@ -286,6 +286,7 @@ static void log_irqs(u32 evt) evt & OHCI1394_postedWriteErr ? " postedWriteErr" : "", evt & OHCI1394_cycleTooLong ? " cycleTooLong" : "", evt & OHCI1394_cycle64Seconds ? " cycle64Seconds" : "", + evt & OHCI1394_cycleInconsistent ? " cycleInconsistent" : "", evt & OHCI1394_regAccessFail ? " regAccessFail" : "", evt & OHCI1394_busReset ? " busReset" : "", evt & ~(OHCI1394_selfIDComplete | OHCI1394_RQPkt | @@ -293,6 +294,7 @@ static void log_irqs(u32 evt) OHCI1394_respTxComplete | OHCI1394_isochRx | OHCI1394_isochTx | OHCI1394_postedWriteErr | OHCI1394_cycleTooLong | OHCI1394_cycle64Seconds | + OHCI1394_cycleInconsistent | OHCI1394_regAccessFail | OHCI1394_busReset) ? " ?" : ""); } @@ -1439,6 +1441,17 @@ static irqreturn_t irq_handler(int irq, void *data) OHCI1394_LinkControl_cycleMaster); } + if (unlikely(event & OHCI1394_cycleInconsistent)) { + /* + * We need to clear this event bit in order to make + * cycleMatch isochronous I/O work. In theory we should + * stop active cycleMatch iso contexts now and restart + * them at least two cycles later. (FIXME?) + */ + if (printk_ratelimit()) + fw_notify("isochronous cycle inconsistent\n"); + } + if (event & OHCI1394_cycle64Seconds) { cycle_time = reg_read(ohci, OHCI1394_IsochronousCycleTimer); if ((cycle_time & 0x80000000) == 0) @@ -1528,6 +1541,7 @@ static int ohci_enable(struct fw_card *card, u32 *config_rom, size_t length) OHCI1394_reqTxComplete | OHCI1394_respTxComplete | OHCI1394_isochRx | OHCI1394_isochTx | OHCI1394_postedWriteErr | OHCI1394_cycleTooLong | + OHCI1394_cycleInconsistent | OHCI1394_cycle64Seconds | OHCI1394_regAccessFail | OHCI1394_masterIntEnable); if (param_debug & OHCI_PARAM_DEBUG_BUSRESETS) -- cgit v0.10.2 From 3d7a641e544e428191667e8b1f83f96fa46dbd65 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 19 Nov 2009 18:10:23 +0000 Subject: SLOW_WORK: Wait for outstanding work items belonging to a module to clear Wait for outstanding slow work items belonging to a module to clear when unregistering that module as a user of the facility. This prevents the put_ref code of a work item from being taken away before it returns. Signed-off-by: David Howells diff --git a/Documentation/slow-work.txt b/Documentation/slow-work.txt index ebc50f8..f12fda3 100644 --- a/Documentation/slow-work.txt +++ b/Documentation/slow-work.txt @@ -64,9 +64,11 @@ USING SLOW WORK ITEMS Firstly, a module or subsystem wanting to make use of slow work items must register its interest: - int ret = slow_work_register_user(); + int ret = slow_work_register_user(struct module *module); -This will return 0 if successful, or a -ve error upon failure. +This will return 0 if successful, or a -ve error upon failure. The module +pointer should be the module interested in using this facility (almost +certainly THIS_MODULE). Slow work items may then be set up by: @@ -110,7 +112,12 @@ operation. When all a module's slow work items have been processed, and the module has no further interest in the facility, it should unregister its interest: - slow_work_unregister_user(); + slow_work_unregister_user(struct module *module); + +The module pointer is used to wait for all outstanding work items for that +module before completing the unregistration. This prevents the put_ref() code +from being taken away before it completes. module should almost certainly be +THIS_MODULE. =============== diff --git a/fs/fscache/main.c b/fs/fscache/main.c index 4de41b5..add6bdb 100644 --- a/fs/fscache/main.c +++ b/fs/fscache/main.c @@ -48,7 +48,7 @@ static int __init fscache_init(void) { int ret; - ret = slow_work_register_user(); + ret = slow_work_register_user(THIS_MODULE); if (ret < 0) goto error_slow_work; @@ -80,7 +80,7 @@ error_kobj: error_cookie_jar: fscache_proc_cleanup(); error_proc: - slow_work_unregister_user(); + slow_work_unregister_user(THIS_MODULE); error_slow_work: return ret; } @@ -97,7 +97,7 @@ static void __exit fscache_exit(void) kobject_put(fscache_root); kmem_cache_destroy(fscache_cookie_jar); fscache_proc_cleanup(); - slow_work_unregister_user(); + slow_work_unregister_user(THIS_MODULE); printk(KERN_NOTICE "FS-Cache: Unloaded\n"); } diff --git a/fs/fscache/object.c b/fs/fscache/object.c index 392a41b..d236eb1 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c @@ -45,6 +45,7 @@ static void fscache_enqueue_dependents(struct fscache_object *); static void fscache_dequeue_object(struct fscache_object *); const struct slow_work_ops fscache_object_slow_work_ops = { + .owner = THIS_MODULE, .get_ref = fscache_object_slow_work_get_ref, .put_ref = fscache_object_slow_work_put_ref, .execute = fscache_object_slow_work_execute, diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c index e7f8d53..f1a2857 100644 --- a/fs/fscache/operation.c +++ b/fs/fscache/operation.c @@ -453,6 +453,7 @@ static void fscache_op_execute(struct slow_work *work) } const struct slow_work_ops fscache_op_slow_work_ops = { + .owner = THIS_MODULE, .get_ref = fscache_op_get_ref, .put_ref = fscache_op_put_ref, .execute = fscache_op_execute, diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index eacd78a..5b31f77 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -114,7 +114,7 @@ static int __init init_gfs2_fs(void) if (error) goto fail_unregister; - error = slow_work_register_user(); + error = slow_work_register_user(THIS_MODULE); if (error) goto fail_slow; @@ -163,7 +163,7 @@ static void __exit exit_gfs2_fs(void) gfs2_unregister_debugfs(); unregister_filesystem(&gfs2_fs_type); unregister_filesystem(&gfs2meta_fs_type); - slow_work_unregister_user(); + slow_work_unregister_user(THIS_MODULE); kmem_cache_destroy(gfs2_quotad_cachep); kmem_cache_destroy(gfs2_rgrpd_cachep); diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index 59d2695..b2bb779 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c @@ -593,6 +593,7 @@ fail: } struct slow_work_ops gfs2_recover_ops = { + .owner = THIS_MODULE, .get_ref = gfs2_recover_get_ref, .put_ref = gfs2_recover_put_ref, .execute = gfs2_recover_work, diff --git a/include/linux/slow-work.h b/include/linux/slow-work.h index b65c888..9adb2b3 100644 --- a/include/linux/slow-work.h +++ b/include/linux/slow-work.h @@ -24,6 +24,9 @@ struct slow_work; * The operations used to support slow work items */ struct slow_work_ops { + /* owner */ + struct module *owner; + /* get a ref on a work item * - return 0 if successful, -ve if not */ @@ -42,6 +45,7 @@ struct slow_work_ops { * queued */ struct slow_work { + struct module *owner; /* the owning module */ unsigned long flags; #define SLOW_WORK_PENDING 0 /* item pending (further) execution */ #define SLOW_WORK_EXECUTING 1 /* item currently executing */ @@ -84,8 +88,8 @@ static inline void vslow_work_init(struct slow_work *work, } extern int slow_work_enqueue(struct slow_work *work); -extern int slow_work_register_user(void); -extern void slow_work_unregister_user(void); +extern int slow_work_register_user(struct module *owner); +extern void slow_work_unregister_user(struct module *owner); #ifdef CONFIG_SYSCTL extern ctl_table slow_work_sysctls[]; diff --git a/kernel/slow-work.c b/kernel/slow-work.c index 0d31135..dd08f37 100644 --- a/kernel/slow-work.c +++ b/kernel/slow-work.c @@ -22,6 +22,8 @@ #define SLOW_WORK_OOM_TIMEOUT (5 * HZ) /* can't start new threads for 5s after * OOM */ +#define SLOW_WORK_THREAD_LIMIT 255 /* abs maximum number of slow-work threads */ + static void slow_work_cull_timeout(unsigned long); static void slow_work_oom_timeout(unsigned long); @@ -46,7 +48,7 @@ static unsigned vslow_work_proportion = 50; /* % of threads that may process #ifdef CONFIG_SYSCTL static const int slow_work_min_min_threads = 2; -static int slow_work_max_max_threads = 255; +static int slow_work_max_max_threads = SLOW_WORK_THREAD_LIMIT; static const int slow_work_min_vslow = 1; static const int slow_work_max_vslow = 99; @@ -98,6 +100,23 @@ static DEFINE_TIMER(slow_work_oom_timer, slow_work_oom_timeout, 0, 0); static struct slow_work slow_work_new_thread; /* new thread starter */ /* + * slow work ID allocation (use slow_work_queue_lock) + */ +static DECLARE_BITMAP(slow_work_ids, SLOW_WORK_THREAD_LIMIT); + +/* + * Unregistration tracking to prevent put_ref() from disappearing during module + * unload + */ +#ifdef CONFIG_MODULES +static struct module *slow_work_thread_processing[SLOW_WORK_THREAD_LIMIT]; +static struct module *slow_work_unreg_module; +static struct slow_work *slow_work_unreg_work_item; +static DECLARE_WAIT_QUEUE_HEAD(slow_work_unreg_wq); +static DEFINE_MUTEX(slow_work_unreg_sync_lock); +#endif + +/* * The queues of work items and the lock governing access to them. These are * shared between all the CPUs. It doesn't make sense to have per-CPU queues * as the number of threads bears no relation to the number of CPUs. @@ -149,8 +168,11 @@ static unsigned slow_work_calc_vsmax(void) * Attempt to execute stuff queued on a slow thread. Return true if we managed * it, false if there was nothing to do. */ -static bool slow_work_execute(void) +static bool slow_work_execute(int id) { +#ifdef CONFIG_MODULES + struct module *module; +#endif struct slow_work *work = NULL; unsigned vsmax; bool very_slow; @@ -186,6 +208,12 @@ static bool slow_work_execute(void) } else { very_slow = false; /* avoid the compiler warning */ } + +#ifdef CONFIG_MODULES + if (work) + slow_work_thread_processing[id] = work->owner; +#endif + spin_unlock_irq(&slow_work_queue_lock); if (!work) @@ -219,7 +247,18 @@ static bool slow_work_execute(void) spin_unlock_irq(&slow_work_queue_lock); } + /* sort out the race between module unloading and put_ref() */ work->ops->put_ref(work); + +#ifdef CONFIG_MODULES + module = slow_work_thread_processing[id]; + slow_work_thread_processing[id] = NULL; + smp_mb(); + if (slow_work_unreg_work_item == work || + slow_work_unreg_module == module) + wake_up_all(&slow_work_unreg_wq); +#endif + return true; auto_requeue: @@ -232,6 +271,7 @@ auto_requeue: else list_add_tail(&work->link, &slow_work_queue); spin_unlock_irq(&slow_work_queue_lock); + slow_work_thread_processing[id] = NULL; return true; } @@ -368,13 +408,22 @@ static inline bool slow_work_available(int vsmax) */ static int slow_work_thread(void *_data) { - int vsmax; + int vsmax, id; DEFINE_WAIT(wait); set_freezable(); set_user_nice(current, -5); + /* allocate ourselves an ID */ + spin_lock_irq(&slow_work_queue_lock); + id = find_first_zero_bit(slow_work_ids, SLOW_WORK_THREAD_LIMIT); + BUG_ON(id < 0 || id >= SLOW_WORK_THREAD_LIMIT); + __set_bit(id, slow_work_ids); + spin_unlock_irq(&slow_work_queue_lock); + + sprintf(current->comm, "kslowd%03u", id); + for (;;) { vsmax = vslow_work_proportion; vsmax *= atomic_read(&slow_work_thread_count); @@ -395,7 +444,7 @@ static int slow_work_thread(void *_data) vsmax *= atomic_read(&slow_work_thread_count); vsmax /= 100; - if (slow_work_available(vsmax) && slow_work_execute()) { + if (slow_work_available(vsmax) && slow_work_execute(id)) { cond_resched(); if (list_empty(&slow_work_queue) && list_empty(&vslow_work_queue) && @@ -412,6 +461,10 @@ static int slow_work_thread(void *_data) break; } + spin_lock_irq(&slow_work_queue_lock); + __clear_bit(id, slow_work_ids); + spin_unlock_irq(&slow_work_queue_lock); + if (atomic_dec_and_test(&slow_work_thread_count)) complete_and_exit(&slow_work_last_thread_exited, 0); return 0; @@ -475,6 +528,7 @@ static void slow_work_new_thread_execute(struct slow_work *work) } static const struct slow_work_ops slow_work_new_thread_ops = { + .owner = THIS_MODULE, .get_ref = slow_work_new_thread_get_ref, .put_ref = slow_work_new_thread_put_ref, .execute = slow_work_new_thread_execute, @@ -546,12 +600,13 @@ static int slow_work_max_threads_sysctl(struct ctl_table *table, int write, /** * slow_work_register_user - Register a user of the facility + * @module: The module about to make use of the facility * * Register a user of the facility, starting up the initial threads if there * aren't any other users at this point. This will return 0 if successful, or * an error if not. */ -int slow_work_register_user(void) +int slow_work_register_user(struct module *module) { struct task_struct *p; int loop; @@ -598,14 +653,79 @@ error: } EXPORT_SYMBOL(slow_work_register_user); +/* + * wait for all outstanding items from the calling module to complete + * - note that more items may be queued whilst we're waiting + */ +static void slow_work_wait_for_items(struct module *module) +{ + DECLARE_WAITQUEUE(myself, current); + struct slow_work *work; + int loop; + + mutex_lock(&slow_work_unreg_sync_lock); + add_wait_queue(&slow_work_unreg_wq, &myself); + + for (;;) { + spin_lock_irq(&slow_work_queue_lock); + + /* first of all, we wait for the last queued item in each list + * to be processed */ + list_for_each_entry_reverse(work, &vslow_work_queue, link) { + if (work->owner == module) { + set_current_state(TASK_UNINTERRUPTIBLE); + slow_work_unreg_work_item = work; + goto do_wait; + } + } + list_for_each_entry_reverse(work, &slow_work_queue, link) { + if (work->owner == module) { + set_current_state(TASK_UNINTERRUPTIBLE); + slow_work_unreg_work_item = work; + goto do_wait; + } + } + + /* then we wait for the items being processed to finish */ + slow_work_unreg_module = module; + smp_mb(); + for (loop = 0; loop < SLOW_WORK_THREAD_LIMIT; loop++) { + if (slow_work_thread_processing[loop] == module) + goto do_wait; + } + spin_unlock_irq(&slow_work_queue_lock); + break; /* okay, we're done */ + + do_wait: + spin_unlock_irq(&slow_work_queue_lock); + schedule(); + slow_work_unreg_work_item = NULL; + slow_work_unreg_module = NULL; + } + + remove_wait_queue(&slow_work_unreg_wq, &myself); + mutex_unlock(&slow_work_unreg_sync_lock); +} + /** * slow_work_unregister_user - Unregister a user of the facility + * @module: The module whose items should be cleared * * Unregister a user of the facility, killing all the threads if this was the * last one. + * + * This waits for all the work items belonging to the nominated module to go + * away before proceeding. */ -void slow_work_unregister_user(void) +void slow_work_unregister_user(struct module *module) { + /* first of all, wait for all outstanding items from the calling module + * to complete */ + if (module) + slow_work_wait_for_items(module); + + /* then we can actually go about shutting down the facility if need + * be */ mutex_lock(&slow_work_user_lock); BUG_ON(slow_work_user_count <= 0); -- cgit v0.10.2 From 4d8bb2cbccf6dccaada509aafeb01c6205c9d8c4 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 19 Nov 2009 18:10:39 +0000 Subject: SLOW_WORK: Make slow_work_ops ->get_ref/->put_ref optional Make the ability for the slow-work facility to take references on a work item optional as not everyone requires this. Even the internal slow-work stubs them out, so those can be got rid of too. Signed-off-by: Jens Axboe Signed-off-by: David Howells diff --git a/Documentation/slow-work.txt b/Documentation/slow-work.txt index f12fda3..c655c51 100644 --- a/Documentation/slow-work.txt +++ b/Documentation/slow-work.txt @@ -125,7 +125,7 @@ ITEM OPERATIONS =============== Each work item requires a table of operations of type struct slow_work_ops. -All members are required: +Only ->execute() is required, getting and putting of a reference are optional. (*) Get a reference on an item: diff --git a/kernel/slow-work.c b/kernel/slow-work.c index dd08f37..fccf421 100644 --- a/kernel/slow-work.c +++ b/kernel/slow-work.c @@ -145,6 +145,20 @@ static DECLARE_COMPLETION(slow_work_last_thread_exited); static int slow_work_user_count; static DEFINE_MUTEX(slow_work_user_lock); +static inline int slow_work_get_ref(struct slow_work *work) +{ + if (work->ops->get_ref) + return work->ops->get_ref(work); + + return 0; +} + +static inline void slow_work_put_ref(struct slow_work *work) +{ + if (work->ops->put_ref) + work->ops->put_ref(work); +} + /* * Calculate the maximum number of active threads in the pool that are * permitted to process very slow work items. @@ -248,7 +262,7 @@ static bool slow_work_execute(int id) } /* sort out the race between module unloading and put_ref() */ - work->ops->put_ref(work); + slow_work_put_ref(work); #ifdef CONFIG_MODULES module = slow_work_thread_processing[id]; @@ -309,7 +323,6 @@ int slow_work_enqueue(struct slow_work *work) BUG_ON(slow_work_user_count <= 0); BUG_ON(!work); BUG_ON(!work->ops); - BUG_ON(!work->ops->get_ref); /* when honouring an enqueue request, we only promise that we will run * the work function in the future; we do not promise to run it once @@ -339,7 +352,7 @@ int slow_work_enqueue(struct slow_work *work) if (test_bit(SLOW_WORK_EXECUTING, &work->flags)) { set_bit(SLOW_WORK_ENQ_DEFERRED, &work->flags); } else { - if (work->ops->get_ref(work) < 0) + if (slow_work_get_ref(work) < 0) goto cant_get_ref; if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags)) list_add_tail(&work->link, &vslow_work_queue); @@ -480,21 +493,6 @@ static void slow_work_cull_timeout(unsigned long data) } /* - * Get a reference on slow work thread starter - */ -static int slow_work_new_thread_get_ref(struct slow_work *work) -{ - return 0; -} - -/* - * Drop a reference on slow work thread starter - */ -static void slow_work_new_thread_put_ref(struct slow_work *work) -{ -} - -/* * Start a new slow work thread */ static void slow_work_new_thread_execute(struct slow_work *work) @@ -529,8 +527,6 @@ static void slow_work_new_thread_execute(struct slow_work *work) static const struct slow_work_ops slow_work_new_thread_ops = { .owner = THIS_MODULE, - .get_ref = slow_work_new_thread_get_ref, - .put_ref = slow_work_new_thread_put_ref, .execute = slow_work_new_thread_execute, }; -- cgit v0.10.2 From 0160950297c08f8233c89b9f9e7dd59cfb080809 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 19 Nov 2009 18:10:43 +0000 Subject: SLOW_WORK: Add support for cancellation of slow work Add support for cancellation of queued slow work and delayed slow work items. The cancellation functions will wait for items that are pending or undergoing execution to be discarded by the slow work facility. Attempting to enqueue work that is in the process of being cancelled will result in ECANCELED. Signed-off-by: Jens Axboe Signed-off-by: David Howells diff --git a/Documentation/slow-work.txt b/Documentation/slow-work.txt index c655c51..2e384bd 100644 --- a/Documentation/slow-work.txt +++ b/Documentation/slow-work.txt @@ -108,7 +108,17 @@ on the item, 0 otherwise. The items are reference counted, so there ought to be no need for a flush -operation. When all a module's slow work items have been processed, and the +operation. But as the reference counting is optional, means to cancel +existing work items are also included: + + cancel_slow_work(&myitem); + +can be used to cancel pending work. The above cancel function waits for +existing work to have been executed (or prevent execution of them, depending +on timing). + + +When all a module's slow work items have been processed, and the module has no further interest in the facility, it should unregister its interest: diff --git a/include/linux/slow-work.h b/include/linux/slow-work.h index 9adb2b3..eef2018 100644 --- a/include/linux/slow-work.h +++ b/include/linux/slow-work.h @@ -51,6 +51,7 @@ struct slow_work { #define SLOW_WORK_EXECUTING 1 /* item currently executing */ #define SLOW_WORK_ENQ_DEFERRED 2 /* item enqueue deferred */ #define SLOW_WORK_VERY_SLOW 3 /* item is very slow */ +#define SLOW_WORK_CANCELLING 4 /* item is being cancelled, don't enqueue */ const struct slow_work_ops *ops; /* operations table for this item */ struct list_head link; /* link in queue */ }; @@ -88,6 +89,7 @@ static inline void vslow_work_init(struct slow_work *work, } extern int slow_work_enqueue(struct slow_work *work); +extern void slow_work_cancel(struct slow_work *work); extern int slow_work_register_user(struct module *owner); extern void slow_work_unregister_user(struct module *owner); diff --git a/kernel/slow-work.c b/kernel/slow-work.c index fccf421..671cc43 100644 --- a/kernel/slow-work.c +++ b/kernel/slow-work.c @@ -236,12 +236,17 @@ static bool slow_work_execute(int id) if (!test_and_clear_bit(SLOW_WORK_PENDING, &work->flags)) BUG(); - work->ops->execute(work); + /* don't execute if the work is in the process of being cancelled */ + if (!test_bit(SLOW_WORK_CANCELLING, &work->flags)) + work->ops->execute(work); if (very_slow) atomic_dec(&vslow_work_executing_count); clear_bit_unlock(SLOW_WORK_EXECUTING, &work->flags); + /* wake up anyone waiting for this work to be complete */ + wake_up_bit(&work->flags, SLOW_WORK_EXECUTING); + /* if someone tried to enqueue the item whilst we were executing it, * then it'll be left unenqueued to avoid multiple threads trying to * execute it simultaneously @@ -314,11 +319,16 @@ auto_requeue: * allowed to pick items to execute. This ensures that very slow items won't * overly block ones that are just ordinarily slow. * - * Returns 0 if successful, -EAGAIN if not. + * Returns 0 if successful, -EAGAIN if not (or -ECANCELED if cancelled work is + * attempted queued) */ int slow_work_enqueue(struct slow_work *work) { unsigned long flags; + int ret; + + if (test_bit(SLOW_WORK_CANCELLING, &work->flags)) + return -ECANCELED; BUG_ON(slow_work_user_count <= 0); BUG_ON(!work); @@ -335,6 +345,9 @@ int slow_work_enqueue(struct slow_work *work) if (!test_and_set_bit_lock(SLOW_WORK_PENDING, &work->flags)) { spin_lock_irqsave(&slow_work_queue_lock, flags); + if (unlikely(test_bit(SLOW_WORK_CANCELLING, &work->flags))) + goto cancelled; + /* we promise that we will not attempt to execute the work * function in more than one thread simultaneously * @@ -352,8 +365,9 @@ int slow_work_enqueue(struct slow_work *work) if (test_bit(SLOW_WORK_EXECUTING, &work->flags)) { set_bit(SLOW_WORK_ENQ_DEFERRED, &work->flags); } else { - if (slow_work_get_ref(work) < 0) - goto cant_get_ref; + ret = slow_work_get_ref(work); + if (ret < 0) + goto failed; if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags)) list_add_tail(&work->link, &vslow_work_queue); else @@ -365,12 +379,67 @@ int slow_work_enqueue(struct slow_work *work) } return 0; -cant_get_ref: +cancelled: + ret = -ECANCELED; +failed: spin_unlock_irqrestore(&slow_work_queue_lock, flags); - return -EAGAIN; + return ret; } EXPORT_SYMBOL(slow_work_enqueue); +static int slow_work_wait(void *word) +{ + schedule(); + return 0; +} + +/** + * slow_work_cancel - Cancel a slow work item + * @work: The work item to cancel + * + * This function will cancel a previously enqueued work item. If we cannot + * cancel the work item, it is guarenteed to have run when this function + * returns. + */ +void slow_work_cancel(struct slow_work *work) +{ + bool wait = true, put = false; + + set_bit(SLOW_WORK_CANCELLING, &work->flags); + + spin_lock_irq(&slow_work_queue_lock); + + if (test_bit(SLOW_WORK_PENDING, &work->flags) && + !list_empty(&work->link)) { + /* the link in the pending queue holds a reference on the item + * that we will need to release */ + list_del_init(&work->link); + wait = false; + put = true; + clear_bit(SLOW_WORK_PENDING, &work->flags); + + } else if (test_and_clear_bit(SLOW_WORK_ENQ_DEFERRED, &work->flags)) { + /* the executor is holding our only reference on the item, so + * we merely need to wait for it to finish executing */ + clear_bit(SLOW_WORK_PENDING, &work->flags); + } + + spin_unlock_irq(&slow_work_queue_lock); + + /* the EXECUTING flag is set by the executor whilst the spinlock is set + * and before the item is dequeued - so assuming the above doesn't + * actually dequeue it, simply waiting for the EXECUTING flag to be + * released here should be sufficient */ + if (wait) + wait_on_bit(&work->flags, SLOW_WORK_EXECUTING, slow_work_wait, + TASK_UNINTERRUPTIBLE); + + clear_bit(SLOW_WORK_CANCELLING, &work->flags); + if (put) + slow_work_put_ref(work); +} +EXPORT_SYMBOL(slow_work_cancel); + /* * Schedule a cull of the thread pool at some time in the near future */ -- cgit v0.10.2 From 6b8268b17a1ffc942bc72d7d00274e433d6b6719 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 19 Nov 2009 18:10:47 +0000 Subject: SLOW_WORK: Add delayed_slow_work support This adds support for starting slow work with a delay, similar to the functionality we have for workqueues. Signed-off-by: Jens Axboe Signed-off-by: David Howells diff --git a/Documentation/slow-work.txt b/Documentation/slow-work.txt index 2e384bd..a9d1b0f 100644 --- a/Documentation/slow-work.txt +++ b/Documentation/slow-work.txt @@ -41,6 +41,13 @@ expand files, provided the time taken to do so isn't too long. Operations of both types may sleep during execution, thus tying up the thread loaned to it. +A further class of work item is available, based on the slow work item class: + + (*) Delayed slow work items. + +These are slow work items that have a timer to defer queueing of the item for +a while. + THREAD-TO-CLASS ALLOCATION -------------------------- @@ -95,6 +102,10 @@ Slow work items may then be set up by: or: + delayed_slow_work_init(&myitem, &myitem_ops); + + or: + vslow_work_init(&myitem, &myitem_ops); depending on its class. @@ -104,7 +115,9 @@ A suitably set up work item can then be enqueued for processing: int ret = slow_work_enqueue(&myitem); This will return a -ve error if the thread pool is unable to gain a reference -on the item, 0 otherwise. +on the item, 0 otherwise, or (for delayed work): + + int ret = delayed_slow_work_enqueue(&myitem, my_jiffy_delay); The items are reference counted, so there ought to be no need for a flush @@ -112,6 +125,7 @@ operation. But as the reference counting is optional, means to cancel existing work items are also included: cancel_slow_work(&myitem); + cancel_delayed_slow_work(&myitem); can be used to cancel pending work. The above cancel function waits for existing work to have been executed (or prevent execution of them, depending diff --git a/include/linux/slow-work.h b/include/linux/slow-work.h index eef2018..b245b9a 100644 --- a/include/linux/slow-work.h +++ b/include/linux/slow-work.h @@ -17,6 +17,7 @@ #ifdef CONFIG_SLOW_WORK #include +#include struct slow_work; @@ -52,10 +53,16 @@ struct slow_work { #define SLOW_WORK_ENQ_DEFERRED 2 /* item enqueue deferred */ #define SLOW_WORK_VERY_SLOW 3 /* item is very slow */ #define SLOW_WORK_CANCELLING 4 /* item is being cancelled, don't enqueue */ +#define SLOW_WORK_DELAYED 5 /* item is struct delayed_slow_work with active timer */ const struct slow_work_ops *ops; /* operations table for this item */ struct list_head link; /* link in queue */ }; +struct delayed_slow_work { + struct slow_work work; + struct timer_list timer; +}; + /** * slow_work_init - Initialise a slow work item * @work: The work item to initialise @@ -72,6 +79,20 @@ static inline void slow_work_init(struct slow_work *work, } /** + * slow_work_init - Initialise a delayed slow work item + * @work: The work item to initialise + * @ops: The operations to use to handle the slow work item + * + * Initialise a delayed slow work item. + */ +static inline void delayed_slow_work_init(struct delayed_slow_work *dwork, + const struct slow_work_ops *ops) +{ + init_timer(&dwork->timer); + slow_work_init(&dwork->work, ops); +} + +/** * vslow_work_init - Initialise a very slow work item * @work: The work item to initialise * @ops: The operations to use to handle the slow work item @@ -93,6 +114,14 @@ extern void slow_work_cancel(struct slow_work *work); extern int slow_work_register_user(struct module *owner); extern void slow_work_unregister_user(struct module *owner); +extern int delayed_slow_work_enqueue(struct delayed_slow_work *dwork, + unsigned long delay); + +static inline void delayed_slow_work_cancel(struct delayed_slow_work *dwork) +{ + slow_work_cancel(&dwork->work); +} + #ifdef CONFIG_SYSCTL extern ctl_table slow_work_sysctls[]; #endif diff --git a/kernel/slow-work.c b/kernel/slow-work.c index 671cc43..f67e1da 100644 --- a/kernel/slow-work.c +++ b/kernel/slow-work.c @@ -406,11 +406,40 @@ void slow_work_cancel(struct slow_work *work) bool wait = true, put = false; set_bit(SLOW_WORK_CANCELLING, &work->flags); + smp_mb(); + + /* if the work item is a delayed work item with an active timer, we + * need to wait for the timer to finish _before_ getting the spinlock, + * lest we deadlock against the timer routine + * + * the timer routine will leave DELAYED set if it notices the + * CANCELLING flag in time + */ + if (test_bit(SLOW_WORK_DELAYED, &work->flags)) { + struct delayed_slow_work *dwork = + container_of(work, struct delayed_slow_work, work); + del_timer_sync(&dwork->timer); + } spin_lock_irq(&slow_work_queue_lock); - if (test_bit(SLOW_WORK_PENDING, &work->flags) && - !list_empty(&work->link)) { + if (test_bit(SLOW_WORK_DELAYED, &work->flags)) { + /* the timer routine aborted or never happened, so we are left + * holding the timer's reference on the item and should just + * drop the pending flag and wait for any ongoing execution to + * finish */ + struct delayed_slow_work *dwork = + container_of(work, struct delayed_slow_work, work); + + BUG_ON(timer_pending(&dwork->timer)); + BUG_ON(!list_empty(&work->link)); + + clear_bit(SLOW_WORK_DELAYED, &work->flags); + put = true; + clear_bit(SLOW_WORK_PENDING, &work->flags); + + } else if (test_bit(SLOW_WORK_PENDING, &work->flags) && + !list_empty(&work->link)) { /* the link in the pending queue holds a reference on the item * that we will need to release */ list_del_init(&work->link); @@ -441,6 +470,102 @@ void slow_work_cancel(struct slow_work *work) EXPORT_SYMBOL(slow_work_cancel); /* + * Handle expiry of the delay timer, indicating that a delayed slow work item + * should now be queued if not cancelled + */ +static void delayed_slow_work_timer(unsigned long data) +{ + struct slow_work *work = (struct slow_work *) data; + unsigned long flags; + bool queued = false, put = false; + + spin_lock_irqsave(&slow_work_queue_lock, flags); + if (likely(!test_bit(SLOW_WORK_CANCELLING, &work->flags))) { + clear_bit(SLOW_WORK_DELAYED, &work->flags); + + if (test_bit(SLOW_WORK_EXECUTING, &work->flags)) { + /* we discard the reference the timer was holding in + * favour of the one the executor holds */ + set_bit(SLOW_WORK_ENQ_DEFERRED, &work->flags); + put = true; + } else { + if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags)) + list_add_tail(&work->link, &vslow_work_queue); + else + list_add_tail(&work->link, &slow_work_queue); + queued = true; + } + } + + spin_unlock_irqrestore(&slow_work_queue_lock, flags); + if (put) + slow_work_put_ref(work); + if (queued) + wake_up(&slow_work_thread_wq); +} + +/** + * delayed_slow_work_enqueue - Schedule a delayed slow work item for processing + * @dwork: The delayed work item to queue + * @delay: When to start executing the work, in jiffies from now + * + * This is similar to slow_work_enqueue(), but it adds a delay before the work + * is actually queued for processing. + * + * The item can have delayed processing requested on it whilst it is being + * executed. The delay will begin immediately, and if it expires before the + * item finishes executing, the item will be placed back on the queue when it + * has done executing. + */ +int delayed_slow_work_enqueue(struct delayed_slow_work *dwork, + unsigned long delay) +{ + struct slow_work *work = &dwork->work; + unsigned long flags; + int ret; + + if (delay == 0) + return slow_work_enqueue(&dwork->work); + + BUG_ON(slow_work_user_count <= 0); + BUG_ON(!work); + BUG_ON(!work->ops); + + if (test_bit(SLOW_WORK_CANCELLING, &work->flags)) + return -ECANCELED; + + if (!test_and_set_bit_lock(SLOW_WORK_PENDING, &work->flags)) { + spin_lock_irqsave(&slow_work_queue_lock, flags); + + if (test_bit(SLOW_WORK_CANCELLING, &work->flags)) + goto cancelled; + + /* the timer holds a reference whilst it is pending */ + ret = work->ops->get_ref(work); + if (ret < 0) + goto cant_get_ref; + + if (test_and_set_bit(SLOW_WORK_DELAYED, &work->flags)) + BUG(); + dwork->timer.expires = jiffies + delay; + dwork->timer.data = (unsigned long) work; + dwork->timer.function = delayed_slow_work_timer; + add_timer(&dwork->timer); + + spin_unlock_irqrestore(&slow_work_queue_lock, flags); + } + + return 0; + +cancelled: + ret = -ECANCELED; +cant_get_ref: + spin_unlock_irqrestore(&slow_work_queue_lock, flags); + return ret; +} +EXPORT_SYMBOL(delayed_slow_work_enqueue); + +/* * Schedule a cull of the thread pool at some time in the near future */ static void slow_work_schedule_cull(void) -- cgit v0.10.2 From 8fba10a42d191de612e60e7009c8f0313f90a9b3 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 19 Nov 2009 18:10:51 +0000 Subject: SLOW_WORK: Allow the work items to be viewed through a /proc file Allow the executing and queued work items to be viewed through a /proc file for debugging purposes. The contents look something like the following: THR PID ITEM ADDR FL MARK DESC === ===== ================ == ===== ========== 0 3005 ffff880023f52348 a 952ms FSC: OBJ17d3: LOOK 1 3006 ffff880024e33668 2 160ms FSC: OBJ17e5 OP60d3b: Write1/Store fl=2 2 3165 ffff8800296dd180 a 424ms FSC: OBJ17e4: LOOK 3 4089 ffff8800262c8d78 a 212ms FSC: OBJ17ea: CRTN 4 4090 ffff88002792bed8 2 388ms FSC: OBJ17e8 OP60d36: Write1/Store fl=2 5 4092 ffff88002a0ef308 2 388ms FSC: OBJ17e7 OP60d2e: Write1/Store fl=2 6 4094 ffff88002abaf4b8 2 132ms FSC: OBJ17e2 OP60d4e: Write1/Store fl=2 7 4095 ffff88002bb188e0 a 388ms FSC: OBJ17e9: CRTN vsq - ffff880023d99668 1 308ms FSC: OBJ17e0 OP60f91: Write1/EnQ fl=2 vsq - ffff8800295d1740 1 212ms FSC: OBJ16be OP4d4b6: Write1/EnQ fl=2 vsq - ffff880025ba3308 1 160ms FSC: OBJ179a OP58dec: Write1/EnQ fl=2 vsq - ffff880024ec83e0 1 160ms FSC: OBJ17ae OP599f2: Write1/EnQ fl=2 vsq - ffff880026618e00 1 160ms FSC: OBJ17e6 OP60d33: Write1/EnQ fl=2 vsq - ffff880025a2a4b8 1 132ms FSC: OBJ16a2 OP4d583: Write1/EnQ fl=2 vsq - ffff880023cbe6d8 9 212ms FSC: OBJ17eb: LOOK vsq - ffff880024d37590 9 212ms FSC: OBJ17ec: LOOK vsq - ffff880027746cb0 9 212ms FSC: OBJ17ed: LOOK vsq - ffff880024d37ae8 9 212ms FSC: OBJ17ee: LOOK vsq - ffff880024d37cb0 9 212ms FSC: OBJ17ef: LOOK vsq - ffff880025036550 9 212ms FSC: OBJ17f0: LOOK vsq - ffff8800250368e0 9 212ms FSC: OBJ17f1: LOOK vsq - ffff880025036aa8 9 212ms FSC: OBJ17f2: LOOK In the 'THR' column, executing items show the thread they're occupying and queued threads indicate which queue they're on. 'PID' shows the process ID of a slow-work thread that's executing something. 'FL' shows the work item flags. 'MARK' indicates how long since an item was queued or began executing. Lastly, the 'DESC' column permits the owner of an item to give some information. Signed-off-by: David Howells diff --git a/Documentation/slow-work.txt b/Documentation/slow-work.txt index a9d1b0f..f120238 100644 --- a/Documentation/slow-work.txt +++ b/Documentation/slow-work.txt @@ -149,7 +149,8 @@ ITEM OPERATIONS =============== Each work item requires a table of operations of type struct slow_work_ops. -Only ->execute() is required, getting and putting of a reference are optional. +Only ->execute() is required; the getting and putting of a reference and the +describing of an item are all optional. (*) Get a reference on an item: @@ -179,6 +180,16 @@ Only ->execute() is required, getting and putting of a reference are optional. This should perform the work required of the item. It may sleep, it may perform disk I/O and it may wait for locks. + (*) View an item through /proc: + + void (*desc)(struct slow_work *work, struct seq_file *m); + + If supplied, this should print to 'm' a small string describing the work + the item is to do. This should be no more than about 40 characters, and + shouldn't include a newline character. + + See the 'Viewing executing and queued items' section below. + ================== POOL CONFIGURATION @@ -203,3 +214,50 @@ The slow-work thread pool has a number of configurables: is bounded to between 1 and one fewer than the number of active threads. This ensures there is always at least one thread that can process very slow work items, and always at least one thread that won't. + + +================================== +VIEWING EXECUTING AND QUEUED ITEMS +================================== + +If CONFIG_SLOW_WORK_PROC is enabled, a proc file is made available: + + /proc/slow_work_rq + +through which the list of work items being executed and the queues of items to +be executed may be viewed. The owner of a work item is given the chance to +add some information of its own. + +The contents look something like the following: + + THR PID ITEM ADDR FL MARK DESC + === ===== ================ == ===== ========== + 0 3005 ffff880023f52348 a 952ms FSC: OBJ17d3: LOOK + 1 3006 ffff880024e33668 2 160ms FSC: OBJ17e5 OP60d3b: Write1/Store fl=2 + 2 3165 ffff8800296dd180 a 424ms FSC: OBJ17e4: LOOK + 3 4089 ffff8800262c8d78 a 212ms FSC: OBJ17ea: CRTN + 4 4090 ffff88002792bed8 2 388ms FSC: OBJ17e8 OP60d36: Write1/Store fl=2 + 5 4092 ffff88002a0ef308 2 388ms FSC: OBJ17e7 OP60d2e: Write1/Store fl=2 + 6 4094 ffff88002abaf4b8 2 132ms FSC: OBJ17e2 OP60d4e: Write1/Store fl=2 + 7 4095 ffff88002bb188e0 a 388ms FSC: OBJ17e9: CRTN + vsq - ffff880023d99668 1 308ms FSC: OBJ17e0 OP60f91: Write1/EnQ fl=2 + vsq - ffff8800295d1740 1 212ms FSC: OBJ16be OP4d4b6: Write1/EnQ fl=2 + vsq - ffff880025ba3308 1 160ms FSC: OBJ179a OP58dec: Write1/EnQ fl=2 + vsq - ffff880024ec83e0 1 160ms FSC: OBJ17ae OP599f2: Write1/EnQ fl=2 + vsq - ffff880026618e00 1 160ms FSC: OBJ17e6 OP60d33: Write1/EnQ fl=2 + vsq - ffff880025a2a4b8 1 132ms FSC: OBJ16a2 OP4d583: Write1/EnQ fl=2 + vsq - ffff880023cbe6d8 9 212ms FSC: OBJ17eb: LOOK + vsq - ffff880024d37590 9 212ms FSC: OBJ17ec: LOOK + vsq - ffff880027746cb0 9 212ms FSC: OBJ17ed: LOOK + vsq - ffff880024d37ae8 9 212ms FSC: OBJ17ee: LOOK + vsq - ffff880024d37cb0 9 212ms FSC: OBJ17ef: LOOK + vsq - ffff880025036550 9 212ms FSC: OBJ17f0: LOOK + vsq - ffff8800250368e0 9 212ms FSC: OBJ17f1: LOOK + vsq - ffff880025036aa8 9 212ms FSC: OBJ17f2: LOOK + +In the 'THR' column, executing items show the thread they're occupying and +queued threads indicate which queue they're on. 'PID' shows the process ID of +a slow-work thread that's executing something. 'FL' shows the work item flags. +'MARK' indicates how long since an item was queued or began executing. Lastly, +the 'DESC' column permits the owner of an item to give some information. + diff --git a/include/linux/slow-work.h b/include/linux/slow-work.h index b245b9a..f414851 100644 --- a/include/linux/slow-work.h +++ b/include/linux/slow-work.h @@ -20,6 +20,9 @@ #include struct slow_work; +#ifdef CONFIG_SLOW_WORK_PROC +struct seq_file; +#endif /* * The operations used to support slow work items @@ -38,6 +41,11 @@ struct slow_work_ops { /* execute a work item */ void (*execute)(struct slow_work *work); + +#ifdef CONFIG_SLOW_WORK_PROC + /* describe a work item for /proc */ + void (*desc)(struct slow_work *work, struct seq_file *m); +#endif }; /* @@ -56,6 +64,9 @@ struct slow_work { #define SLOW_WORK_DELAYED 5 /* item is struct delayed_slow_work with active timer */ const struct slow_work_ops *ops; /* operations table for this item */ struct list_head link; /* link in queue */ +#ifdef CONFIG_SLOW_WORK_PROC + struct timespec mark; /* jiffies at which queued or exec begun */ +#endif }; struct delayed_slow_work { diff --git a/init/Kconfig b/init/Kconfig index 9e03ef8..ab5c648 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1098,6 +1098,16 @@ config SLOW_WORK See Documentation/slow-work.txt. +config SLOW_WORK_PROC + bool "Slow work debugging through /proc" + default n + depends on SLOW_WORK && PROC_FS + help + Display the contents of the slow work run queue through /proc, + including items currently executing. + + See Documentation/slow-work.txt. + endmenu # General setup config HAVE_GENERIC_DMA_COHERENT diff --git a/kernel/Makefile b/kernel/Makefile index b8d4cd8..776ffed 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -94,6 +94,7 @@ obj-$(CONFIG_X86_DS) += trace/ obj-$(CONFIG_RING_BUFFER) += trace/ obj-$(CONFIG_SMP) += sched_cpupri.o obj-$(CONFIG_SLOW_WORK) += slow-work.o +obj-$(CONFIG_SLOW_WORK_PROC) += slow-work-proc.o obj-$(CONFIG_PERF_EVENTS) += perf_event.o ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) diff --git a/kernel/slow-work-proc.c b/kernel/slow-work-proc.c new file mode 100644 index 0000000..3988032 --- /dev/null +++ b/kernel/slow-work-proc.c @@ -0,0 +1,227 @@ +/* Slow work debugging + * + * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include "slow-work.h" + +#define ITERATOR_SHIFT (BITS_PER_LONG - 4) +#define ITERATOR_SELECTOR (0xfUL << ITERATOR_SHIFT) +#define ITERATOR_COUNTER (~ITERATOR_SELECTOR) + +void slow_work_new_thread_desc(struct slow_work *work, struct seq_file *m) +{ + seq_puts(m, "Slow-work: New thread"); +} + +/* + * Render the time mark field on a work item into a 5-char time with units plus + * a space + */ +static void slow_work_print_mark(struct seq_file *m, struct slow_work *work) +{ + struct timespec now, diff; + + now = CURRENT_TIME; + diff = timespec_sub(now, work->mark); + + if (diff.tv_sec < 0) + seq_puts(m, " -ve "); + else if (diff.tv_sec == 0 && diff.tv_nsec < 1000) + seq_printf(m, "%3luns ", diff.tv_nsec); + else if (diff.tv_sec == 0 && diff.tv_nsec < 1000000) + seq_printf(m, "%3luus ", diff.tv_nsec / 1000); + else if (diff.tv_sec == 0 && diff.tv_nsec < 1000000000) + seq_printf(m, "%3lums ", diff.tv_nsec / 1000000); + else if (diff.tv_sec <= 1) + seq_puts(m, " 1s "); + else if (diff.tv_sec < 60) + seq_printf(m, "%4lus ", diff.tv_sec); + else if (diff.tv_sec < 60 * 60) + seq_printf(m, "%4lum ", diff.tv_sec / 60); + else if (diff.tv_sec < 60 * 60 * 24) + seq_printf(m, "%4luh ", diff.tv_sec / 3600); + else + seq_puts(m, "exces "); +} + +/* + * Describe a slow work item for /proc + */ +static int slow_work_runqueue_show(struct seq_file *m, void *v) +{ + struct slow_work *work; + struct list_head *p = v; + unsigned long id; + + switch ((unsigned long) v) { + case 1: + seq_puts(m, "THR PID ITEM ADDR FL MARK DESC\n"); + return 0; + case 2: + seq_puts(m, "=== ===== ================ == ===== ==========\n"); + return 0; + + case 3 ... 3 + SLOW_WORK_THREAD_LIMIT - 1: + id = (unsigned long) v - 3; + + read_lock(&slow_work_execs_lock); + work = slow_work_execs[id]; + if (work) { + smp_read_barrier_depends(); + + seq_printf(m, "%3lu %5d %16p %2lx ", + id, slow_work_pids[id], work, work->flags); + slow_work_print_mark(m, work); + + if (work->ops->desc) + work->ops->desc(work, m); + seq_putc(m, '\n'); + } + read_unlock(&slow_work_execs_lock); + return 0; + + default: + work = list_entry(p, struct slow_work, link); + seq_printf(m, "%3s - %16p %2lx ", + work->flags & SLOW_WORK_VERY_SLOW ? "vsq" : "sq", + work, work->flags); + slow_work_print_mark(m, work); + + if (work->ops->desc) + work->ops->desc(work, m); + seq_putc(m, '\n'); + return 0; + } +} + +/* + * map the iterator to a work item + */ +static void *slow_work_runqueue_index(struct seq_file *m, loff_t *_pos) +{ + struct list_head *p; + unsigned long count, id; + + switch (*_pos >> ITERATOR_SHIFT) { + case 0x0: + if (*_pos == 0) + *_pos = 1; + if (*_pos < 3) + return (void *)(unsigned long) *_pos; + if (*_pos < 3 + SLOW_WORK_THREAD_LIMIT) + for (id = *_pos - 3; + id < SLOW_WORK_THREAD_LIMIT; + id++, (*_pos)++) + if (slow_work_execs[id]) + return (void *)(unsigned long) *_pos; + *_pos = 0x1UL << ITERATOR_SHIFT; + + case 0x1: + count = *_pos & ITERATOR_COUNTER; + list_for_each(p, &slow_work_queue) { + if (count == 0) + return p; + count--; + } + *_pos = 0x2UL << ITERATOR_SHIFT; + + case 0x2: + count = *_pos & ITERATOR_COUNTER; + list_for_each(p, &vslow_work_queue) { + if (count == 0) + return p; + count--; + } + *_pos = 0x3UL << ITERATOR_SHIFT; + + default: + return NULL; + } +} + +/* + * set up the iterator to start reading from the first line + */ +static void *slow_work_runqueue_start(struct seq_file *m, loff_t *_pos) +{ + spin_lock_irq(&slow_work_queue_lock); + return slow_work_runqueue_index(m, _pos); +} + +/* + * move to the next line + */ +static void *slow_work_runqueue_next(struct seq_file *m, void *v, loff_t *_pos) +{ + struct list_head *p = v; + unsigned long selector = *_pos >> ITERATOR_SHIFT; + + (*_pos)++; + switch (selector) { + case 0x0: + return slow_work_runqueue_index(m, _pos); + + case 0x1: + if (*_pos >> ITERATOR_SHIFT == 0x1) { + p = p->next; + if (p != &slow_work_queue) + return p; + } + *_pos = 0x2UL << ITERATOR_SHIFT; + p = &vslow_work_queue; + + case 0x2: + if (*_pos >> ITERATOR_SHIFT == 0x2) { + p = p->next; + if (p != &vslow_work_queue) + return p; + } + *_pos = 0x3UL << ITERATOR_SHIFT; + + default: + return NULL; + } +} + +/* + * clean up after reading + */ +static void slow_work_runqueue_stop(struct seq_file *m, void *v) +{ + spin_unlock_irq(&slow_work_queue_lock); +} + +static const struct seq_operations slow_work_runqueue_ops = { + .start = slow_work_runqueue_start, + .stop = slow_work_runqueue_stop, + .next = slow_work_runqueue_next, + .show = slow_work_runqueue_show, +}; + +/* + * open "/proc/slow_work_rq" to list queue contents + */ +static int slow_work_runqueue_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &slow_work_runqueue_ops); +} + +const struct file_operations slow_work_runqueue_fops = { + .owner = THIS_MODULE, + .open = slow_work_runqueue_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; diff --git a/kernel/slow-work.c b/kernel/slow-work.c index f67e1da..b763bc2 100644 --- a/kernel/slow-work.c +++ b/kernel/slow-work.c @@ -16,13 +16,8 @@ #include #include #include - -#define SLOW_WORK_CULL_TIMEOUT (5 * HZ) /* cull threads 5s after running out of - * things to do */ -#define SLOW_WORK_OOM_TIMEOUT (5 * HZ) /* can't start new threads for 5s after - * OOM */ - -#define SLOW_WORK_THREAD_LIMIT 255 /* abs maximum number of slow-work threads */ +#include +#include "slow-work.h" static void slow_work_cull_timeout(unsigned long); static void slow_work_oom_timeout(unsigned long); @@ -117,6 +112,15 @@ static DEFINE_MUTEX(slow_work_unreg_sync_lock); #endif /* + * Data for tracking currently executing items for indication through /proc + */ +#ifdef CONFIG_SLOW_WORK_PROC +struct slow_work *slow_work_execs[SLOW_WORK_THREAD_LIMIT]; +pid_t slow_work_pids[SLOW_WORK_THREAD_LIMIT]; +DEFINE_RWLOCK(slow_work_execs_lock); +#endif + +/* * The queues of work items and the lock governing access to them. These are * shared between all the CPUs. It doesn't make sense to have per-CPU queues * as the number of threads bears no relation to the number of CPUs. @@ -124,9 +128,9 @@ static DEFINE_MUTEX(slow_work_unreg_sync_lock); * There are two queues of work items: one for slow work items, and one for * very slow work items. */ -static LIST_HEAD(slow_work_queue); -static LIST_HEAD(vslow_work_queue); -static DEFINE_SPINLOCK(slow_work_queue_lock); +LIST_HEAD(slow_work_queue); +LIST_HEAD(vslow_work_queue); +DEFINE_SPINLOCK(slow_work_queue_lock); /* * The thread controls. A variable used to signal to the threads that they @@ -182,7 +186,7 @@ static unsigned slow_work_calc_vsmax(void) * Attempt to execute stuff queued on a slow thread. Return true if we managed * it, false if there was nothing to do. */ -static bool slow_work_execute(int id) +static noinline bool slow_work_execute(int id) { #ifdef CONFIG_MODULES struct module *module; @@ -227,6 +231,10 @@ static bool slow_work_execute(int id) if (work) slow_work_thread_processing[id] = work->owner; #endif + if (work) { + slow_work_mark_time(work); + slow_work_begin_exec(id, work); + } spin_unlock_irq(&slow_work_queue_lock); @@ -247,6 +255,8 @@ static bool slow_work_execute(int id) /* wake up anyone waiting for this work to be complete */ wake_up_bit(&work->flags, SLOW_WORK_EXECUTING); + slow_work_end_exec(id, work); + /* if someone tried to enqueue the item whilst we were executing it, * then it'll be left unenqueued to avoid multiple threads trying to * execute it simultaneously @@ -285,6 +295,7 @@ auto_requeue: * - we transfer our ref on the item back to the appropriate queue * - don't wake another thread up as we're awake already */ + slow_work_mark_time(work); if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags)) list_add_tail(&work->link, &vslow_work_queue); else @@ -368,6 +379,7 @@ int slow_work_enqueue(struct slow_work *work) ret = slow_work_get_ref(work); if (ret < 0) goto failed; + slow_work_mark_time(work); if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags)) list_add_tail(&work->link, &vslow_work_queue); else @@ -489,6 +501,7 @@ static void delayed_slow_work_timer(unsigned long data) set_bit(SLOW_WORK_ENQ_DEFERRED, &work->flags); put = true; } else { + slow_work_mark_time(work); if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags)) list_add_tail(&work->link, &vslow_work_queue); else @@ -627,6 +640,7 @@ static int slow_work_thread(void *_data) id = find_first_zero_bit(slow_work_ids, SLOW_WORK_THREAD_LIMIT); BUG_ON(id < 0 || id >= SLOW_WORK_THREAD_LIMIT); __set_bit(id, slow_work_ids); + slow_work_set_thread_pid(id, current->pid); spin_unlock_irq(&slow_work_queue_lock); sprintf(current->comm, "kslowd%03u", id); @@ -669,6 +683,7 @@ static int slow_work_thread(void *_data) } spin_lock_irq(&slow_work_queue_lock); + slow_work_set_thread_pid(id, 0); __clear_bit(id, slow_work_ids); spin_unlock_irq(&slow_work_queue_lock); @@ -722,6 +737,9 @@ static void slow_work_new_thread_execute(struct slow_work *work) static const struct slow_work_ops slow_work_new_thread_ops = { .owner = THIS_MODULE, .execute = slow_work_new_thread_execute, +#ifdef CONFIG_SLOW_WORK_PROC + .desc = slow_work_new_thread_desc, +#endif }; /* @@ -949,6 +967,10 @@ static int __init init_slow_work(void) if (slow_work_max_max_threads < nr_cpus * 2) slow_work_max_max_threads = nr_cpus * 2; #endif +#ifdef CONFIG_SLOW_WORK_PROC + proc_create("slow_work_rq", S_IFREG | 0400, NULL, + &slow_work_runqueue_fops); +#endif return 0; } diff --git a/kernel/slow-work.h b/kernel/slow-work.h new file mode 100644 index 0000000..3c2f007 --- /dev/null +++ b/kernel/slow-work.h @@ -0,0 +1,72 @@ +/* Slow work private definitions + * + * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#define SLOW_WORK_CULL_TIMEOUT (5 * HZ) /* cull threads 5s after running out of + * things to do */ +#define SLOW_WORK_OOM_TIMEOUT (5 * HZ) /* can't start new threads for 5s after + * OOM */ + +#define SLOW_WORK_THREAD_LIMIT 255 /* abs maximum number of slow-work threads */ + +/* + * slow-work.c + */ +#ifdef CONFIG_SLOW_WORK_PROC +extern struct slow_work *slow_work_execs[]; +extern pid_t slow_work_pids[]; +extern rwlock_t slow_work_execs_lock; +#endif + +extern struct list_head slow_work_queue; +extern struct list_head vslow_work_queue; +extern spinlock_t slow_work_queue_lock; + +/* + * slow-work-proc.c + */ +#ifdef CONFIG_SLOW_WORK_PROC +extern const struct file_operations slow_work_runqueue_fops; + +extern void slow_work_new_thread_desc(struct slow_work *, struct seq_file *); +#endif + +/* + * Helper functions + */ +static inline void slow_work_set_thread_pid(int id, pid_t pid) +{ +#ifdef CONFIG_SLOW_WORK_PROC + slow_work_pids[id] = pid; +#endif +} + +static inline void slow_work_mark_time(struct slow_work *work) +{ +#ifdef CONFIG_SLOW_WORK_PROC + work->mark = CURRENT_TIME; +#endif +} + +static inline void slow_work_begin_exec(int id, struct slow_work *work) +{ +#ifdef CONFIG_SLOW_WORK_PROC + slow_work_execs[id] = work; +#endif +} + +static inline void slow_work_end_exec(int id, struct slow_work *work) +{ +#ifdef CONFIG_SLOW_WORK_PROC + write_lock(&slow_work_execs_lock); + slow_work_execs[id] = NULL; + write_unlock(&slow_work_execs_lock); +#endif +} -- cgit v0.10.2 From 31ba99d304494cb28fa8671ccc769c5543e1165d Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 19 Nov 2009 18:10:53 +0000 Subject: SLOW_WORK: Allow the owner of a work item to determine if it is queued or not Add a function (slow_work_is_queued()) to permit the owner of a work item to determine if the item is queued or not. The work item is counted as being queued if it is actually on the queue, not just if it is pending. If it is executing and pending, then it is not on the queue, but will rather be put back on the queue when execution finishes. This permits a caller to quickly work out if it may be able to put another, dependent work item on the queue behind it, or whether it will have to wait till that is finished. This can be used by CacheFiles to work out whether the creation a new object can be immediately deferred when it has to wait for an old object to be deleted, or whether a wait must take place. If a wait is necessary, then the slow-work thread can otherwise get blocked, preventing the deletion from taking place. Signed-off-by: David Howells diff --git a/Documentation/slow-work.txt b/Documentation/slow-work.txt index f120238..0169c9d 100644 --- a/Documentation/slow-work.txt +++ b/Documentation/slow-work.txt @@ -144,6 +144,21 @@ from being taken away before it completes. module should almost certainly be THIS_MODULE. +================ +HELPER FUNCTIONS +================ + +The slow-work facility provides a function by which it can be determined +whether or not an item is queued for later execution: + + bool queued = slow_work_is_queued(struct slow_work *work); + +If it returns false, then the item is not on the queue (it may be executing +with a requeue pending). This can be used to work out whether an item on which +another depends is on the queue, thus allowing a dependent item to be queued +after it. + + =============== ITEM OPERATIONS =============== diff --git a/include/linux/slow-work.h b/include/linux/slow-work.h index f414851..bfd3ab4 100644 --- a/include/linux/slow-work.h +++ b/include/linux/slow-work.h @@ -120,6 +120,25 @@ static inline void vslow_work_init(struct slow_work *work, INIT_LIST_HEAD(&work->link); } +/** + * slow_work_is_queued - Determine if a slow work item is on the work queue + * work: The work item to test + * + * Determine if the specified slow-work item is on the work queue. This + * returns true if it is actually on the queue. + * + * If the item is executing and has been marked for requeue when execution + * finishes, then false will be returned. + * + * Anyone wishing to wait for completion of execution can wait on the + * SLOW_WORK_EXECUTING bit. + */ +static inline bool slow_work_is_queued(struct slow_work *work) +{ + unsigned long flags = work->flags; + return flags & SLOW_WORK_PENDING && !(flags & SLOW_WORK_EXECUTING); +} + extern int slow_work_enqueue(struct slow_work *work); extern void slow_work_cancel(struct slow_work *work); extern int slow_work_register_user(struct module *owner); -- cgit v0.10.2 From 3bde31a4ac225cb5805be02eff6eaaf7e0766ccd Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 19 Nov 2009 18:10:57 +0000 Subject: SLOW_WORK: Allow a requeueable work item to sleep till the thread is needed Add a function to allow a requeueable work item to sleep till the thread processing it is needed by the slow-work facility to perform other work. Sometimes a work item can't progress immediately, but must wait for the completion of another work item that's currently being processed by another slow-work thread. In some circumstances, the waiting item could instead - theoretically - put itself back on the queue and yield its thread back to the slow-work facility, thus waiting till it gets processing time again before attempting to progress. This would allow other work items processing time on that thread. However, this only works if there is something on the queue for it to queue behind - otherwise it will just get a thread again immediately, and will end up cycling between the queue and the thread, eating up valuable CPU time. So, slow_work_sleep_till_thread_needed() is provided such that an item can put itself on a wait queue that will wake it up when the event it is actually interested in occurs, then call this function in lieu of calling schedule(). This function will then sleep until either the item's event occurs or another work item appears on the queue. If another work item is queued, but the item's event hasn't occurred, then the work item should requeue itself and yield the thread back to the slow-work facility by returning. This can be used by CacheFiles for an object that is being created on one thread to wait for an object being deleted on another thread where there is nothing on the queue for the creation to go and wait behind. As soon as an item appears on the queue that could be given thread time instead, CacheFiles can stick the creating object back on the queue and return to the slow-work facility - assuming the object deletion didn't also complete. Signed-off-by: David Howells diff --git a/Documentation/slow-work.txt b/Documentation/slow-work.txt index 0169c9d..52bc314 100644 --- a/Documentation/slow-work.txt +++ b/Documentation/slow-work.txt @@ -158,6 +158,50 @@ with a requeue pending). This can be used to work out whether an item on which another depends is on the queue, thus allowing a dependent item to be queued after it. +If the above shows an item on which another depends not to be queued, then the +owner of the dependent item might need to wait. However, to avoid locking up +the threads unnecessarily be sleeping in them, it can make sense under some +circumstances to return the work item to the queue, thus deferring it until +some other items have had a chance to make use of the yielded thread. + +To yield a thread and defer an item, the work function should simply enqueue +the work item again and return. However, this doesn't work if there's nothing +actually on the queue, as the thread just vacated will jump straight back into +the item's work function, thus busy waiting on a CPU. + +Instead, the item should use the thread to wait for the dependency to go away, +but rather than using schedule() or schedule_timeout() to sleep, it should use +the following function: + + bool requeue = slow_work_sleep_till_thread_needed( + struct slow_work *work, + signed long *_timeout); + +This will add a second wait and then sleep, such that it will be woken up if +either something appears on the queue that could usefully make use of the +thread - and behind which this item can be queued, or if the event the caller +set up to wait for happens. True will be returned if something else appeared +on the queue and this work function should perhaps return, of false if +something else woke it up. The timeout is as for schedule_timeout(). + +For example: + + wq = bit_waitqueue(&my_flags, MY_BIT); + init_wait(&wait); + requeue = false; + do { + prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE); + if (!test_bit(MY_BIT, &my_flags)) + break; + requeue = slow_work_sleep_till_thread_needed(&my_work, + &timeout); + } while (timeout > 0 && !requeue); + finish_wait(wq, &wait); + if (!test_bit(MY_BIT, &my_flags) + goto do_my_thing; + if (requeue) + return; // to slow_work + =============== ITEM OPERATIONS diff --git a/include/linux/slow-work.h b/include/linux/slow-work.h index bfd3ab4..5035a26 100644 --- a/include/linux/slow-work.h +++ b/include/linux/slow-work.h @@ -152,6 +152,9 @@ static inline void delayed_slow_work_cancel(struct delayed_slow_work *dwork) slow_work_cancel(&dwork->work); } +extern bool slow_work_sleep_till_thread_needed(struct slow_work *work, + signed long *_timeout); + #ifdef CONFIG_SYSCTL extern ctl_table slow_work_sysctls[]; #endif diff --git a/kernel/slow-work.c b/kernel/slow-work.c index b763bc2..da94f3c 100644 --- a/kernel/slow-work.c +++ b/kernel/slow-work.c @@ -133,6 +133,15 @@ LIST_HEAD(vslow_work_queue); DEFINE_SPINLOCK(slow_work_queue_lock); /* + * The following are two wait queues that get pinged when a work item is placed + * on an empty queue. These allow work items that are hogging a thread by + * sleeping in a way that could be deferred to yield their thread and enqueue + * themselves. + */ +static DECLARE_WAIT_QUEUE_HEAD(slow_work_queue_waits_for_occupation); +static DECLARE_WAIT_QUEUE_HEAD(vslow_work_queue_waits_for_occupation); + +/* * The thread controls. A variable used to signal to the threads that they * should exit when the queue is empty, a waitqueue used by the threads to wait * for signals, and a completion set by the last thread to exit. @@ -306,6 +315,50 @@ auto_requeue: } /** + * slow_work_sleep_till_thread_needed - Sleep till thread needed by other work + * work: The work item under execution that wants to sleep + * _timeout: Scheduler sleep timeout + * + * Allow a requeueable work item to sleep on a slow-work processor thread until + * that thread is needed to do some other work or the sleep is interrupted by + * some other event. + * + * The caller must set up a wake up event before calling this and must have set + * the appropriate sleep mode (such as TASK_UNINTERRUPTIBLE) and tested its own + * condition before calling this function as no test is made here. + * + * False is returned if there is nothing on the queue; true is returned if the + * work item should be requeued + */ +bool slow_work_sleep_till_thread_needed(struct slow_work *work, + signed long *_timeout) +{ + wait_queue_head_t *wfo_wq; + struct list_head *queue; + + DEFINE_WAIT(wait); + + if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags)) { + wfo_wq = &vslow_work_queue_waits_for_occupation; + queue = &vslow_work_queue; + } else { + wfo_wq = &slow_work_queue_waits_for_occupation; + queue = &slow_work_queue; + } + + if (!list_empty(queue)) + return true; + + add_wait_queue_exclusive(wfo_wq, &wait); + if (list_empty(queue)) + *_timeout = schedule_timeout(*_timeout); + finish_wait(wfo_wq, &wait); + + return !list_empty(queue); +} +EXPORT_SYMBOL(slow_work_sleep_till_thread_needed); + +/** * slow_work_enqueue - Schedule a slow work item for processing * @work: The work item to queue * @@ -335,6 +388,8 @@ auto_requeue: */ int slow_work_enqueue(struct slow_work *work) { + wait_queue_head_t *wfo_wq; + struct list_head *queue; unsigned long flags; int ret; @@ -354,6 +409,14 @@ int slow_work_enqueue(struct slow_work *work) * maintaining our promise */ if (!test_and_set_bit_lock(SLOW_WORK_PENDING, &work->flags)) { + if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags)) { + wfo_wq = &vslow_work_queue_waits_for_occupation; + queue = &vslow_work_queue; + } else { + wfo_wq = &slow_work_queue_waits_for_occupation; + queue = &slow_work_queue; + } + spin_lock_irqsave(&slow_work_queue_lock, flags); if (unlikely(test_bit(SLOW_WORK_CANCELLING, &work->flags))) @@ -380,11 +443,13 @@ int slow_work_enqueue(struct slow_work *work) if (ret < 0) goto failed; slow_work_mark_time(work); - if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags)) - list_add_tail(&work->link, &vslow_work_queue); - else - list_add_tail(&work->link, &slow_work_queue); + list_add_tail(&work->link, queue); wake_up(&slow_work_thread_wq); + + /* if someone who could be requeued is sleeping on a + * thread, then ask them to yield their thread */ + if (work->link.prev == queue) + wake_up(wfo_wq); } spin_unlock_irqrestore(&slow_work_queue_lock, flags); @@ -487,9 +552,19 @@ EXPORT_SYMBOL(slow_work_cancel); */ static void delayed_slow_work_timer(unsigned long data) { + wait_queue_head_t *wfo_wq; + struct list_head *queue; struct slow_work *work = (struct slow_work *) data; unsigned long flags; - bool queued = false, put = false; + bool queued = false, put = false, first = false; + + if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags)) { + wfo_wq = &vslow_work_queue_waits_for_occupation; + queue = &vslow_work_queue; + } else { + wfo_wq = &slow_work_queue_waits_for_occupation; + queue = &slow_work_queue; + } spin_lock_irqsave(&slow_work_queue_lock, flags); if (likely(!test_bit(SLOW_WORK_CANCELLING, &work->flags))) { @@ -502,17 +577,18 @@ static void delayed_slow_work_timer(unsigned long data) put = true; } else { slow_work_mark_time(work); - if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags)) - list_add_tail(&work->link, &vslow_work_queue); - else - list_add_tail(&work->link, &slow_work_queue); + list_add_tail(&work->link, queue); queued = true; + if (work->link.prev == queue) + first = true; } } spin_unlock_irqrestore(&slow_work_queue_lock, flags); if (put) slow_work_put_ref(work); + if (first) + wake_up(wfo_wq); if (queued) wake_up(&slow_work_thread_wq); } -- cgit v0.10.2 From 440f0affe247e9990c8f8778f1861da4fd7d5e50 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 19 Nov 2009 18:11:01 +0000 Subject: FS-Cache: Annotate slow-work runqueue proc lines for FS-Cache work items Annotate slow-work runqueue proc lines for FS-Cache work items. Objects include the object ID and the state. Operations include the object ID, the operation ID and the operation type and state. Signed-off-by: David Howells diff --git a/fs/fscache/object.c b/fs/fscache/object.c index d236eb1..615b63d 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c @@ -14,9 +14,10 @@ #define FSCACHE_DEBUG_LEVEL COOKIE #include +#include #include "internal.h" -const char *fscache_object_states[] = { +const char *fscache_object_states[FSCACHE_OBJECT__NSTATES] = { [FSCACHE_OBJECT_INIT] = "OBJECT_INIT", [FSCACHE_OBJECT_LOOKING_UP] = "OBJECT_LOOKING_UP", [FSCACHE_OBJECT_CREATING] = "OBJECT_CREATING", @@ -33,9 +34,28 @@ const char *fscache_object_states[] = { }; EXPORT_SYMBOL(fscache_object_states); +static const char fscache_object_states_short[FSCACHE_OBJECT__NSTATES][5] = { + [FSCACHE_OBJECT_INIT] = "INIT", + [FSCACHE_OBJECT_LOOKING_UP] = "LOOK", + [FSCACHE_OBJECT_CREATING] = "CRTN", + [FSCACHE_OBJECT_AVAILABLE] = "AVBL", + [FSCACHE_OBJECT_ACTIVE] = "ACTV", + [FSCACHE_OBJECT_UPDATING] = "UPDT", + [FSCACHE_OBJECT_DYING] = "DYNG", + [FSCACHE_OBJECT_LC_DYING] = "LCDY", + [FSCACHE_OBJECT_ABORT_INIT] = "ABTI", + [FSCACHE_OBJECT_RELEASING] = "RELS", + [FSCACHE_OBJECT_RECYCLING] = "RCYC", + [FSCACHE_OBJECT_WITHDRAWING] = "WTHD", + [FSCACHE_OBJECT_DEAD] = "DEAD", +}; + static void fscache_object_slow_work_put_ref(struct slow_work *); static int fscache_object_slow_work_get_ref(struct slow_work *); static void fscache_object_slow_work_execute(struct slow_work *); +#ifdef CONFIG_SLOW_WORK_PROC +static void fscache_object_slow_work_desc(struct slow_work *, struct seq_file *); +#endif static void fscache_initialise_object(struct fscache_object *); static void fscache_lookup_object(struct fscache_object *); static void fscache_object_available(struct fscache_object *); @@ -49,6 +69,9 @@ const struct slow_work_ops fscache_object_slow_work_ops = { .get_ref = fscache_object_slow_work_get_ref, .put_ref = fscache_object_slow_work_put_ref, .execute = fscache_object_slow_work_execute, +#ifdef CONFIG_SLOW_WORK_PROC + .desc = fscache_object_slow_work_desc, +#endif }; EXPORT_SYMBOL(fscache_object_slow_work_ops); @@ -327,6 +350,22 @@ static void fscache_object_slow_work_execute(struct slow_work *work) } /* + * describe an object for slow-work debugging + */ +#ifdef CONFIG_SLOW_WORK_PROC +static void fscache_object_slow_work_desc(struct slow_work *work, + struct seq_file *m) +{ + struct fscache_object *object = + container_of(work, struct fscache_object, work); + + seq_printf(m, "FSC: OBJ%x: %s", + object->debug_id, + fscache_object_states_short[object->state]); +} +#endif + +/* * initialise an object * - check the specified object's parent to see if we can make use of it * immediately to do a creation diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c index f1a2857..91bbe6f 100644 --- a/fs/fscache/operation.c +++ b/fs/fscache/operation.c @@ -13,6 +13,7 @@ #define FSCACHE_DEBUG_LEVEL OPERATION #include +#include #include "internal.h" atomic_t fscache_op_debug_id; @@ -31,6 +32,8 @@ void fscache_enqueue_operation(struct fscache_operation *op) _enter("{OBJ%x OP%x,%u}", op->object->debug_id, op->debug_id, atomic_read(&op->usage)); + fscache_set_op_state(op, "EnQ"); + ASSERT(op->processor != NULL); ASSERTCMP(op->object->state, >=, FSCACHE_OBJECT_AVAILABLE); ASSERTCMP(atomic_read(&op->usage), >, 0); @@ -67,6 +70,8 @@ EXPORT_SYMBOL(fscache_enqueue_operation); static void fscache_run_op(struct fscache_object *object, struct fscache_operation *op) { + fscache_set_op_state(op, "Run"); + object->n_in_progress++; if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags)) wake_up_bit(&op->flags, FSCACHE_OP_WAITING); @@ -87,6 +92,8 @@ int fscache_submit_exclusive_op(struct fscache_object *object, _enter("{OBJ%x OP%x},", object->debug_id, op->debug_id); + fscache_set_op_state(op, "SubmitX"); + spin_lock(&object->lock); ASSERTCMP(object->n_ops, >=, object->n_in_progress); ASSERTCMP(object->n_ops, >=, object->n_exclusive); @@ -190,6 +197,8 @@ int fscache_submit_op(struct fscache_object *object, ASSERTCMP(atomic_read(&op->usage), >, 0); + fscache_set_op_state(op, "Submit"); + spin_lock(&object->lock); ASSERTCMP(object->n_ops, >=, object->n_in_progress); ASSERTCMP(object->n_ops, >=, object->n_exclusive); @@ -298,6 +307,8 @@ void fscache_put_operation(struct fscache_operation *op) if (!atomic_dec_and_test(&op->usage)) return; + fscache_set_op_state(op, "Put"); + _debug("PUT OP"); if (test_and_set_bit(FSCACHE_OP_DEAD, &op->flags)) BUG(); @@ -452,9 +463,27 @@ static void fscache_op_execute(struct slow_work *work) _leave(""); } +/* + * describe an operation for slow-work debugging + */ +#ifdef CONFIG_SLOW_WORK_PROC +static void fscache_op_desc(struct slow_work *work, struct seq_file *m) +{ + struct fscache_operation *op = + container_of(work, struct fscache_operation, slow_work); + + seq_printf(m, "FSC: OBJ%x OP%x: %s/%s fl=%lx", + op->object->debug_id, op->debug_id, + op->name, op->state, op->flags); +} +#endif + const struct slow_work_ops fscache_op_slow_work_ops = { .owner = THIS_MODULE, .get_ref = fscache_op_get_ref, .put_ref = fscache_op_put_ref, .execute = fscache_op_execute, +#ifdef CONFIG_SLOW_WORK_PROC + .desc = fscache_op_desc, +#endif }; diff --git a/fs/fscache/page.c b/fs/fscache/page.c index 2568e0e..e8bbc39 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -63,14 +63,19 @@ static void fscache_end_page_write(struct fscache_cookie *cookie, struct page *p static void fscache_attr_changed_op(struct fscache_operation *op) { struct fscache_object *object = op->object; + int ret; _enter("{OBJ%x OP%x}", object->debug_id, op->debug_id); fscache_stat(&fscache_n_attr_changed_calls); - if (fscache_object_is_active(object) && - object->cache->ops->attr_changed(object) < 0) - fscache_abort_object(object); + if (fscache_object_is_active(object)) { + fscache_set_op_state(op, "CallFS"); + ret = object->cache->ops->attr_changed(object); + fscache_set_op_state(op, "Done"); + if (ret < 0) + fscache_abort_object(object); + } _leave(""); } @@ -99,6 +104,7 @@ int __fscache_attr_changed(struct fscache_cookie *cookie) fscache_operation_init(op, NULL); fscache_operation_init_slow(op, fscache_attr_changed_op); op->flags = FSCACHE_OP_SLOW | (1 << FSCACHE_OP_EXCLUSIVE); + fscache_set_op_name(op, "Attr"); spin_lock(&cookie->lock); @@ -184,6 +190,7 @@ static struct fscache_retrieval *fscache_alloc_retrieval( op->start_time = jiffies; INIT_WORK(&op->op.fast_work, fscache_retrieval_work); INIT_LIST_HEAD(&op->to_do); + fscache_set_op_name(&op->op, "Retr"); return op; } @@ -257,6 +264,7 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie, _leave(" = -ENOMEM"); return -ENOMEM; } + fscache_set_op_name(&op->op, "RetrRA1"); spin_lock(&cookie->lock); @@ -369,6 +377,7 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie, op = fscache_alloc_retrieval(mapping, end_io_func, context); if (!op) return -ENOMEM; + fscache_set_op_name(&op->op, "RetrRAN"); spin_lock(&cookie->lock); @@ -461,6 +470,7 @@ int __fscache_alloc_page(struct fscache_cookie *cookie, op = fscache_alloc_retrieval(page->mapping, NULL, NULL); if (!op) return -ENOMEM; + fscache_set_op_name(&op->op, "RetrAL1"); spin_lock(&cookie->lock); @@ -529,6 +539,8 @@ static void fscache_write_op(struct fscache_operation *_op) _enter("{OP%x,%d}", op->op.debug_id, atomic_read(&op->op.usage)); + fscache_set_op_state(&op->op, "GetPage"); + spin_lock(&cookie->lock); spin_lock(&object->lock); @@ -559,13 +571,17 @@ static void fscache_write_op(struct fscache_operation *_op) spin_unlock(&cookie->lock); if (page) { + fscache_set_op_state(&op->op, "Store"); ret = object->cache->ops->write_page(op, page); + fscache_set_op_state(&op->op, "EndWrite"); fscache_end_page_write(cookie, page); page_cache_release(page); - if (ret < 0) + if (ret < 0) { + fscache_set_op_state(&op->op, "Abort"); fscache_abort_object(object); - else + } else { fscache_enqueue_operation(&op->op); + } } _leave(""); @@ -634,6 +650,7 @@ int __fscache_write_page(struct fscache_cookie *cookie, fscache_operation_init(&op->op, fscache_release_write_op); fscache_operation_init_slow(&op->op, fscache_write_op); op->op.flags = FSCACHE_OP_SLOW | (1 << FSCACHE_OP_WAITING); + fscache_set_op_name(&op->op, "Write1"); ret = radix_tree_preload(gfp & ~__GFP_HIGHMEM); if (ret < 0) diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 84d3532..7a9847c 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -102,6 +102,16 @@ struct fscache_operation { /* operation releaser */ fscache_operation_release_t release; + +#ifdef CONFIG_SLOW_WORK_PROC + const char *name; /* operation name */ + const char *state; /* operation state */ +#define fscache_set_op_name(OP, N) do { (OP)->name = (N); } while(0) +#define fscache_set_op_state(OP, S) do { (OP)->state = (S); } while(0) +#else +#define fscache_set_op_name(OP, N) do { } while(0) +#define fscache_set_op_state(OP, S) do { } while(0) +#endif }; extern atomic_t fscache_op_debug_id; @@ -125,6 +135,7 @@ static inline void fscache_operation_init(struct fscache_operation *op, op->debug_id = atomic_inc_return(&fscache_op_debug_id); op->release = release; INIT_LIST_HEAD(&op->pend_link); + fscache_set_op_state(op, "Init"); } /** @@ -337,6 +348,7 @@ struct fscache_object { FSCACHE_OBJECT_RECYCLING, /* retiring object */ FSCACHE_OBJECT_WITHDRAWING, /* withdrawing object */ FSCACHE_OBJECT_DEAD, /* object is now dead */ + FSCACHE_OBJECT__NSTATES } state; int debug_id; /* debugging ID */ -- cgit v0.10.2 From 4fbf4291aa15926cd4fdca0ffe9122e89d0459db Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 19 Nov 2009 18:11:04 +0000 Subject: FS-Cache: Allow the current state of all objects to be dumped Allow the current state of all fscache objects to be dumped by doing: cat /proc/fs/fscache/objects By default, all objects and all fields will be shown. This can be restricted by adding a suitable key to one of the caller's keyrings (such as the session keyring): keyctl add user fscache:objlist "" @s The are: K Show hexdump of object key (don't show if not given) A Show hexdump of object aux data (don't show if not given) And paired restrictions: C Show objects that have a cookie c Show objects that don't have a cookie B Show objects that are busy b Show objects that aren't busy W Show objects that have pending writes w Show objects that don't have pending writes R Show objects that have outstanding reads r Show objects that don't have outstanding reads S Show objects that have slow work queued s Show objects that don't have slow work queued If neither side of a restriction pair is given, then both are implied. For example: keyctl add user fscache:objlist KB @s shows objects that are busy, and lists their object keys, but does not dump their auxiliary data. It also implies "CcWwRrSs", but as 'B' is given, 'b' is not implied. Signed-off-by: David Howells diff --git a/Documentation/filesystems/caching/fscache.txt b/Documentation/filesystems/caching/fscache.txt index 9e94b94..cac09e1 100644 --- a/Documentation/filesystems/caching/fscache.txt +++ b/Documentation/filesystems/caching/fscache.txt @@ -299,6 +299,87 @@ proc files. jiffy range covered, and the SECS field the equivalent number of seconds. +=========== +OBJECT LIST +=========== + +If CONFIG_FSCACHE_OBJECT_LIST is enabled, the FS-Cache facility will maintain a +list of all the objects currently allocated and allow them to be viewed +through: + + /proc/fs/fscache/objects + +This will look something like: + + [root@andromeda ~]# head /proc/fs/fscache/objects + OBJECT PARENT STAT CHLDN OPS OOP IPR EX READS EM EV F S | NETFS_COOKIE_DEF TY FL NETFS_DATA OBJECT_KEY, AUX_DATA + ======== ======== ==== ===== === === === == ===== == == = = | ================ == == ================ ================ + 17e4b 2 ACTV 0 0 0 0 0 0 7b 4 0 8 | NFS.fh DT 0 ffff88001dd82820 010006017edcf8bbc93b43298fdfbe71e50b57b13a172c0117f38472, e567634700000000000000000000000063f2404a000000000000000000000000c9030000000000000000000063f2404a + 1693a 2 ACTV 0 0 0 0 0 0 7b 4 0 8 | NFS.fh DT 0 ffff88002db23380 010006017edcf8bbc93b43298fdfbe71e50b57b1e0162c01a2df0ea6, 420ebc4a000000000000000000000000420ebc4a0000000000000000000000000e1801000000000000000000420ebc4a + +where the first set of columns before the '|' describe the object: + + COLUMN DESCRIPTION + ======= =============================================================== + OBJECT Object debugging ID (appears as OBJ%x in some debug messages) + PARENT Debugging ID of parent object + STAT Object state + CHLDN Number of child objects of this object + OPS Number of outstanding operations on this object + OOP Number of outstanding child object management operations + IPR + EX Number of outstanding exclusive operations + READS Number of outstanding read operations + EM Object's event mask + EV Events raised on this object + F Object flags + S Object slow-work work item flags + +and the second set of columns describe the object's cookie, if present: + + COLUMN DESCRIPTION + =============== ======================================================= + NETFS_COOKIE_DEF Name of netfs cookie definition + TY Cookie type (IX - index, DT - data, hex - special) + FL Cookie flags + NETFS_DATA Netfs private data stored in the cookie + OBJECT_KEY Object key } 1 column, with separating comma + AUX_DATA Object aux data } presence may be configured + +The data shown may be filtered by attaching the a key to an appropriate keyring +before viewing the file. Something like: + + keyctl add user fscache:objlist @s + +where are a selection of the following letters: + + K Show hexdump of object key (don't show if not given) + A Show hexdump of object aux data (don't show if not given) + +and the following paired letters: + + C Show objects that have a cookie + c Show objects that don't have a cookie + B Show objects that are busy + b Show objects that aren't busy + W Show objects that have pending writes + w Show objects that don't have pending writes + R Show objects that have outstanding reads + r Show objects that don't have outstanding reads + S Show objects that have slow work queued + s Show objects that don't have slow work queued + +If neither side of a letter pair is given, then both are implied. For example: + + keyctl add user fscache:objlist KB @s + +shows objects that are busy, and lists their object keys, but does not dump +their auxiliary data. It also implies "CcWwRrSs", but as 'B' is given, 'b' is +not implied. + +By default all objects and all fields will be shown. + + ========= DEBUGGING ========= diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c index 431accd..dd7f852 100644 --- a/fs/cachefiles/interface.c +++ b/fs/cachefiles/interface.c @@ -331,6 +331,7 @@ static void cachefiles_put_object(struct fscache_object *_object) } cache = object->fscache.cache; + fscache_object_destroy(&object->fscache); kmem_cache_free(cachefiles_object_jar, object); fscache_object_destroyed(cache); } diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index a69787e..3304646 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -333,7 +333,8 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op, shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits; - op->op.flags = FSCACHE_OP_FAST; + op->op.flags &= FSCACHE_OP_KEEP_FLAGS; + op->op.flags |= FSCACHE_OP_FAST; op->op.processor = cachefiles_read_copier; pagevec_init(&pagevec, 0); @@ -639,7 +640,8 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op, pagevec_init(&pagevec, 0); - op->op.flags = FSCACHE_OP_FAST; + op->op.flags &= FSCACHE_OP_KEEP_FLAGS; + op->op.flags |= FSCACHE_OP_FAST; op->op.processor = cachefiles_read_copier; INIT_LIST_HEAD(&backpages); diff --git a/fs/fscache/Kconfig b/fs/fscache/Kconfig index 9bbb8ce..864dac2 100644 --- a/fs/fscache/Kconfig +++ b/fs/fscache/Kconfig @@ -54,3 +54,10 @@ config FSCACHE_DEBUG enabled by setting bits in /sys/modules/fscache/parameter/debug. See Documentation/filesystems/caching/fscache.txt for more information. + +config FSCACHE_OBJECT_LIST + bool "Maintain global object list for debugging purposes" + depends on FSCACHE && PROC_FS + help + Maintain a global list of active fscache objects that can be + retrieved through /proc/fs/fscache/objects for debugging purposes diff --git a/fs/fscache/Makefile b/fs/fscache/Makefile index 91571b9..6d56153 100644 --- a/fs/fscache/Makefile +++ b/fs/fscache/Makefile @@ -15,5 +15,6 @@ fscache-y := \ fscache-$(CONFIG_PROC_FS) += proc.o fscache-$(CONFIG_FSCACHE_STATS) += stats.o fscache-$(CONFIG_FSCACHE_HISTOGRAM) += histogram.o +fscache-$(CONFIG_FSCACHE_OBJECT_LIST) += object-list.o obj-$(CONFIG_FSCACHE) := fscache.o diff --git a/fs/fscache/cache.c b/fs/fscache/cache.c index e21985b..724384e 100644 --- a/fs/fscache/cache.c +++ b/fs/fscache/cache.c @@ -263,6 +263,7 @@ int fscache_add_cache(struct fscache_cache *cache, spin_lock(&cache->object_list_lock); list_add_tail(&ifsdef->cache_link, &cache->object_list); spin_unlock(&cache->object_list_lock); + fscache_objlist_add(ifsdef); /* add the cache's netfs definition index object to the top level index * cookie as a known backing object */ diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index 72fd18f..9b51873 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c @@ -349,6 +349,8 @@ static int fscache_attach_object(struct fscache_cookie *cookie, object->cookie = cookie; atomic_inc(&cookie->usage); hlist_add_head(&object->cookie_link, &cookie->backing_objects); + + fscache_objlist_add(object); ret = 0; cant_attach_object: diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index 1c34130..fe02973 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -88,11 +88,24 @@ extern int fscache_wait_bit_interruptible(void *); /* * object.c */ +extern const char fscache_object_states_short[FSCACHE_OBJECT__NSTATES][5]; + extern void fscache_withdrawing_object(struct fscache_cache *, struct fscache_object *); extern void fscache_enqueue_object(struct fscache_object *); /* + * object-list.c + */ +#ifdef CONFIG_FSCACHE_OBJECT_LIST +extern const struct file_operations fscache_objlist_fops; + +extern void fscache_objlist_add(struct fscache_object *); +#else +#define fscache_objlist_add(object) do {} while(0) +#endif + +/* * operation.c */ extern int fscache_submit_exclusive_op(struct fscache_object *, diff --git a/fs/fscache/object-list.c b/fs/fscache/object-list.c new file mode 100644 index 0000000..e590242 --- /dev/null +++ b/fs/fscache/object-list.c @@ -0,0 +1,432 @@ +/* Global fscache object list maintainer and viewer + * + * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#define FSCACHE_DEBUG_LEVEL COOKIE +#include +#include +#include +#include +#include "internal.h" + +static struct rb_root fscache_object_list; +static DEFINE_RWLOCK(fscache_object_list_lock); + +struct fscache_objlist_data { + unsigned long config; /* display configuration */ +#define FSCACHE_OBJLIST_CONFIG_KEY 0x00000001 /* show object keys */ +#define FSCACHE_OBJLIST_CONFIG_AUX 0x00000002 /* show object auxdata */ +#define FSCACHE_OBJLIST_CONFIG_COOKIE 0x00000004 /* show objects with cookies */ +#define FSCACHE_OBJLIST_CONFIG_NOCOOKIE 0x00000008 /* show objects without cookies */ +#define FSCACHE_OBJLIST_CONFIG_BUSY 0x00000010 /* show busy objects */ +#define FSCACHE_OBJLIST_CONFIG_IDLE 0x00000020 /* show idle objects */ +#define FSCACHE_OBJLIST_CONFIG_PENDWR 0x00000040 /* show objects with pending writes */ +#define FSCACHE_OBJLIST_CONFIG_NOPENDWR 0x00000080 /* show objects without pending writes */ +#define FSCACHE_OBJLIST_CONFIG_READS 0x00000100 /* show objects with active reads */ +#define FSCACHE_OBJLIST_CONFIG_NOREADS 0x00000200 /* show objects without active reads */ +#define FSCACHE_OBJLIST_CONFIG_EVENTS 0x00000400 /* show objects with events */ +#define FSCACHE_OBJLIST_CONFIG_NOEVENTS 0x00000800 /* show objects without no events */ +#define FSCACHE_OBJLIST_CONFIG_WORK 0x00001000 /* show objects with slow work */ +#define FSCACHE_OBJLIST_CONFIG_NOWORK 0x00002000 /* show objects without slow work */ + + u8 buf[512]; /* key and aux data buffer */ +}; + +/* + * Add an object to the object list + * - we use the address of the fscache_object structure as the key into the + * tree + */ +void fscache_objlist_add(struct fscache_object *obj) +{ + struct fscache_object *xobj; + struct rb_node **p = &fscache_object_list.rb_node, *parent = NULL; + + write_lock(&fscache_object_list_lock); + + while (*p) { + parent = *p; + xobj = rb_entry(parent, struct fscache_object, objlist_link); + + if (obj < xobj) + p = &(*p)->rb_left; + else if (obj > xobj) + p = &(*p)->rb_right; + else + BUG(); + } + + rb_link_node(&obj->objlist_link, parent, p); + rb_insert_color(&obj->objlist_link, &fscache_object_list); + + write_unlock(&fscache_object_list_lock); +} + +/** + * fscache_object_destroy - Note that a cache object is about to be destroyed + * @object: The object to be destroyed + * + * Note the imminent destruction and deallocation of a cache object record. + */ +void fscache_object_destroy(struct fscache_object *obj) +{ + write_lock(&fscache_object_list_lock); + + BUG_ON(RB_EMPTY_ROOT(&fscache_object_list)); + rb_erase(&obj->objlist_link, &fscache_object_list); + + write_unlock(&fscache_object_list_lock); +} +EXPORT_SYMBOL(fscache_object_destroy); + +/* + * find the object in the tree on or after the specified index + */ +static struct fscache_object *fscache_objlist_lookup(loff_t *_pos) +{ + struct fscache_object *pobj, *obj, *minobj = NULL; + struct rb_node *p; + unsigned long pos; + + if (*_pos >= (unsigned long) ERR_PTR(-ENOENT)) + return NULL; + pos = *_pos; + + /* banners (can't represent line 0 by pos 0 as that would involve + * returning a NULL pointer) */ + if (pos == 0) + return (struct fscache_object *) ++(*_pos); + if (pos < 3) + return (struct fscache_object *)pos; + + pobj = (struct fscache_object *)pos; + p = fscache_object_list.rb_node; + while (p) { + obj = rb_entry(p, struct fscache_object, objlist_link); + if (pobj < obj) { + if (!minobj || minobj > obj) + minobj = obj; + p = p->rb_left; + } else if (pobj > obj) { + p = p->rb_right; + } else { + minobj = obj; + break; + } + obj = NULL; + } + + if (!minobj) + *_pos = (unsigned long) ERR_PTR(-ENOENT); + else if (minobj != obj) + *_pos = (unsigned long) minobj; + return minobj; +} + +/* + * set up the iterator to start reading from the first line + */ +static void *fscache_objlist_start(struct seq_file *m, loff_t *_pos) + __acquires(&fscache_object_list_lock) +{ + read_lock(&fscache_object_list_lock); + return fscache_objlist_lookup(_pos); +} + +/* + * move to the next line + */ +static void *fscache_objlist_next(struct seq_file *m, void *v, loff_t *_pos) +{ + (*_pos)++; + return fscache_objlist_lookup(_pos); +} + +/* + * clean up after reading + */ +static void fscache_objlist_stop(struct seq_file *m, void *v) + __releases(&fscache_object_list_lock) +{ + read_unlock(&fscache_object_list_lock); +} + +/* + * display an object + */ +static int fscache_objlist_show(struct seq_file *m, void *v) +{ + struct fscache_objlist_data *data = m->private; + struct fscache_object *obj = v; + unsigned long config = data->config; + uint16_t keylen, auxlen; + char _type[3], *type; + bool no_cookie; + u8 *buf = data->buf, *p; + + if ((unsigned long) v == 1) { + seq_puts(m, "OBJECT PARENT STAT CHLDN OPS OOP IPR EX READS" + " EM EV F S" + " | NETFS_COOKIE_DEF TY FL NETFS_DATA"); + if (config & (FSCACHE_OBJLIST_CONFIG_KEY | + FSCACHE_OBJLIST_CONFIG_AUX)) + seq_puts(m, " "); + if (config & FSCACHE_OBJLIST_CONFIG_KEY) + seq_puts(m, "OBJECT_KEY"); + if ((config & (FSCACHE_OBJLIST_CONFIG_KEY | + FSCACHE_OBJLIST_CONFIG_AUX)) == + (FSCACHE_OBJLIST_CONFIG_KEY | FSCACHE_OBJLIST_CONFIG_AUX)) + seq_puts(m, ", "); + if (config & FSCACHE_OBJLIST_CONFIG_AUX) + seq_puts(m, "AUX_DATA"); + seq_puts(m, "\n"); + return 0; + } + + if ((unsigned long) v == 2) { + seq_puts(m, "======== ======== ==== ===== === === === == =====" + " == == = =" + " | ================ == == ================"); + if (config & (FSCACHE_OBJLIST_CONFIG_KEY | + FSCACHE_OBJLIST_CONFIG_AUX)) + seq_puts(m, " ================"); + seq_puts(m, "\n"); + return 0; + } + + /* filter out any unwanted objects */ +#define FILTER(criterion, _yes, _no) \ + do { \ + unsigned long yes = FSCACHE_OBJLIST_CONFIG_##_yes; \ + unsigned long no = FSCACHE_OBJLIST_CONFIG_##_no; \ + if (criterion) { \ + if (!(config & yes)) \ + return 0; \ + } else { \ + if (!(config & no)) \ + return 0; \ + } \ + } while(0) + + if (~config) { + FILTER(obj->cookie, + COOKIE, NOCOOKIE); + FILTER(obj->state != FSCACHE_OBJECT_ACTIVE || + obj->n_ops != 0 || + obj->n_obj_ops != 0 || + obj->flags || + !list_empty(&obj->dependents), + BUSY, IDLE); + FILTER(test_bit(FSCACHE_OBJECT_PENDING_WRITE, &obj->flags), + PENDWR, NOPENDWR); + FILTER(atomic_read(&obj->n_reads), + READS, NOREADS); + FILTER(obj->events & obj->event_mask, + EVENTS, NOEVENTS); + FILTER(obj->work.flags & ~(1UL << SLOW_WORK_VERY_SLOW), + WORK, NOWORK); + } + + seq_printf(m, + "%8x %8x %s %5u %3u %3u %3u %2u %5u %2lx %2lx %1lx %1lx | ", + obj->debug_id, + obj->parent ? obj->parent->debug_id : -1, + fscache_object_states_short[obj->state], + obj->n_children, + obj->n_ops, + obj->n_obj_ops, + obj->n_in_progress, + obj->n_exclusive, + atomic_read(&obj->n_reads), + obj->event_mask & FSCACHE_OBJECT_EVENTS_MASK, + obj->events, + obj->flags, + obj->work.flags); + + no_cookie = true; + keylen = auxlen = 0; + if (obj->cookie) { + spin_lock(&obj->lock); + if (obj->cookie) { + switch (obj->cookie->def->type) { + case 0: + type = "IX"; + break; + case 1: + type = "DT"; + break; + default: + sprintf(_type, "%02u", + obj->cookie->def->type); + type = _type; + break; + } + + seq_printf(m, "%-16s %s %2lx %16p", + obj->cookie->def->name, + type, + obj->cookie->flags, + obj->cookie->netfs_data); + + if (obj->cookie->def->get_key && + config & FSCACHE_OBJLIST_CONFIG_KEY) + keylen = obj->cookie->def->get_key( + obj->cookie->netfs_data, + buf, 400); + + if (obj->cookie->def->get_aux && + config & FSCACHE_OBJLIST_CONFIG_AUX) + auxlen = obj->cookie->def->get_aux( + obj->cookie->netfs_data, + buf + keylen, 512 - keylen); + + no_cookie = false; + } + spin_unlock(&obj->lock); + + if (!no_cookie && (keylen > 0 || auxlen > 0)) { + seq_printf(m, " "); + for (p = buf; keylen > 0; keylen--) + seq_printf(m, "%02x", *p++); + if (auxlen > 0) { + if (config & FSCACHE_OBJLIST_CONFIG_KEY) + seq_printf(m, ", "); + for (; auxlen > 0; auxlen--) + seq_printf(m, "%02x", *p++); + } + } + } + + if (no_cookie) + seq_printf(m, "\n"); + else + seq_printf(m, "\n"); + return 0; +} + +static const struct seq_operations fscache_objlist_ops = { + .start = fscache_objlist_start, + .stop = fscache_objlist_stop, + .next = fscache_objlist_next, + .show = fscache_objlist_show, +}; + +/* + * get the configuration for filtering the list + */ +static void fscache_objlist_config(struct fscache_objlist_data *data) +{ +#ifdef CONFIG_KEYS + struct user_key_payload *confkey; + unsigned long config; + struct key *key; + const char *buf; + int len; + + key = request_key(&key_type_user, "fscache:objlist", NULL); + if (IS_ERR(key)) + goto no_config; + + config = 0; + rcu_read_lock(); + + confkey = key->payload.data; + buf = confkey->data; + + for (len = confkey->datalen - 1; len >= 0; len--) { + switch (buf[len]) { + case 'K': config |= FSCACHE_OBJLIST_CONFIG_KEY; break; + case 'A': config |= FSCACHE_OBJLIST_CONFIG_AUX; break; + case 'C': config |= FSCACHE_OBJLIST_CONFIG_COOKIE; break; + case 'c': config |= FSCACHE_OBJLIST_CONFIG_NOCOOKIE; break; + case 'B': config |= FSCACHE_OBJLIST_CONFIG_BUSY; break; + case 'b': config |= FSCACHE_OBJLIST_CONFIG_IDLE; break; + case 'W': config |= FSCACHE_OBJLIST_CONFIG_PENDWR; break; + case 'w': config |= FSCACHE_OBJLIST_CONFIG_NOPENDWR; break; + case 'R': config |= FSCACHE_OBJLIST_CONFIG_READS; break; + case 'r': config |= FSCACHE_OBJLIST_CONFIG_NOREADS; break; + case 'S': config |= FSCACHE_OBJLIST_CONFIG_WORK; break; + case 's': config |= FSCACHE_OBJLIST_CONFIG_NOWORK; break; + } + } + + rcu_read_unlock(); + key_put(key); + + if (!(config & (FSCACHE_OBJLIST_CONFIG_COOKIE | FSCACHE_OBJLIST_CONFIG_NOCOOKIE))) + config |= FSCACHE_OBJLIST_CONFIG_COOKIE | FSCACHE_OBJLIST_CONFIG_NOCOOKIE; + if (!(config & (FSCACHE_OBJLIST_CONFIG_BUSY | FSCACHE_OBJLIST_CONFIG_IDLE))) + config |= FSCACHE_OBJLIST_CONFIG_BUSY | FSCACHE_OBJLIST_CONFIG_IDLE; + if (!(config & (FSCACHE_OBJLIST_CONFIG_PENDWR | FSCACHE_OBJLIST_CONFIG_NOPENDWR))) + config |= FSCACHE_OBJLIST_CONFIG_PENDWR | FSCACHE_OBJLIST_CONFIG_NOPENDWR; + if (!(config & (FSCACHE_OBJLIST_CONFIG_READS | FSCACHE_OBJLIST_CONFIG_NOREADS))) + config |= FSCACHE_OBJLIST_CONFIG_READS | FSCACHE_OBJLIST_CONFIG_NOREADS; + if (!(config & (FSCACHE_OBJLIST_CONFIG_EVENTS | FSCACHE_OBJLIST_CONFIG_NOEVENTS))) + config |= FSCACHE_OBJLIST_CONFIG_EVENTS | FSCACHE_OBJLIST_CONFIG_NOEVENTS; + if (!(config & (FSCACHE_OBJLIST_CONFIG_WORK | FSCACHE_OBJLIST_CONFIG_NOWORK))) + config |= FSCACHE_OBJLIST_CONFIG_WORK | FSCACHE_OBJLIST_CONFIG_NOWORK; + + data->config = config; + return; + +no_config: +#endif + data->config = ULONG_MAX; +} + +/* + * open "/proc/fs/fscache/objects" to provide a list of active objects + * - can be configured by a user-defined key added to the caller's keyrings + */ +static int fscache_objlist_open(struct inode *inode, struct file *file) +{ + struct fscache_objlist_data *data; + struct seq_file *m; + int ret; + + ret = seq_open(file, &fscache_objlist_ops); + if (ret < 0) + return ret; + + m = file->private_data; + + /* buffer for key extraction */ + data = kmalloc(sizeof(struct fscache_objlist_data), GFP_KERNEL); + if (!data) { + seq_release(inode, file); + return -ENOMEM; + } + + /* get the configuration key */ + fscache_objlist_config(data); + + m->private = data; + return 0; +} + +/* + * clean up on close + */ +static int fscache_objlist_release(struct inode *inode, struct file *file) +{ + struct seq_file *m = file->private_data; + + kfree(m->private); + m->private = NULL; + return seq_release(inode, file); +} + +const struct file_operations fscache_objlist_fops = { + .owner = THIS_MODULE, + .open = fscache_objlist_open, + .read = seq_read, + .llseek = seq_lseek, + .release = fscache_objlist_release, +}; diff --git a/fs/fscache/object.c b/fs/fscache/object.c index 615b63d..ad1644f 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c @@ -34,7 +34,7 @@ const char *fscache_object_states[FSCACHE_OBJECT__NSTATES] = { }; EXPORT_SYMBOL(fscache_object_states); -static const char fscache_object_states_short[FSCACHE_OBJECT__NSTATES][5] = { +const char fscache_object_states_short[FSCACHE_OBJECT__NSTATES][5] = { [FSCACHE_OBJECT_INIT] = "INIT", [FSCACHE_OBJECT_LOOKING_UP] = "LOOK", [FSCACHE_OBJECT_CREATING] = "CRTN", diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c index 91bbe6f..09e43b6 100644 --- a/fs/fscache/operation.c +++ b/fs/fscache/operation.c @@ -322,6 +322,9 @@ void fscache_put_operation(struct fscache_operation *op) object = op->object; + if (test_bit(FSCACHE_OP_DEC_READ_CNT, &op->flags)) + atomic_dec(&object->n_reads); + /* now... we may get called with the object spinlock held, so we * complete the cleanup here only if we can immediately acquire the * lock, and defer it otherwise */ diff --git a/fs/fscache/page.c b/fs/fscache/page.c index e8bbc39..c5973e3 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -275,6 +275,9 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie, ASSERTCMP(object->state, >, FSCACHE_OBJECT_LOOKING_UP); + atomic_inc(&object->n_reads); + set_bit(FSCACHE_OP_DEC_READ_CNT, &op->op.flags); + if (fscache_submit_op(object, &op->op) < 0) goto nobufs_unlock; spin_unlock(&cookie->lock); @@ -386,6 +389,9 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie, object = hlist_entry(cookie->backing_objects.first, struct fscache_object, cookie_link); + atomic_inc(&object->n_reads); + set_bit(FSCACHE_OP_DEC_READ_CNT, &op->op.flags); + if (fscache_submit_op(object, &op->op) < 0) goto nobufs_unlock; spin_unlock(&cookie->lock); diff --git a/fs/fscache/proc.c b/fs/fscache/proc.c index beeab44..1d9e495 100644 --- a/fs/fscache/proc.c +++ b/fs/fscache/proc.c @@ -37,10 +37,20 @@ int __init fscache_proc_init(void) goto error_histogram; #endif +#ifdef CONFIG_FSCACHE_OBJECT_LIST + if (!proc_create("fs/fscache/objects", S_IFREG | 0444, NULL, + &fscache_objlist_fops)) + goto error_objects; +#endif + _leave(" = 0"); return 0; +#ifdef CONFIG_FSCACHE_OBJECT_LIST +error_objects: +#endif #ifdef CONFIG_FSCACHE_HISTOGRAM + remove_proc_entry("fs/fscache/histogram", NULL); error_histogram: #endif #ifdef CONFIG_FSCACHE_STATS @@ -58,6 +68,9 @@ error_dir: */ void fscache_proc_cleanup(void) { +#ifdef CONFIG_FSCACHE_OBJECT_LIST + remove_proc_entry("fs/fscache/objects", NULL); +#endif #ifdef CONFIG_FSCACHE_HISTOGRAM remove_proc_entry("fs/fscache/histogram", NULL); #endif diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 7a9847c..184cbdf 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -91,6 +91,8 @@ struct fscache_operation { #define FSCACHE_OP_WAITING 4 /* cleared when op is woken */ #define FSCACHE_OP_EXCLUSIVE 5 /* exclusive op, other ops must wait */ #define FSCACHE_OP_DEAD 6 /* op is now dead */ +#define FSCACHE_OP_DEC_READ_CNT 7 /* decrement object->n_reads on destruction */ +#define FSCACHE_OP_KEEP_FLAGS 0xc0 /* flags to keep when repurposing an op */ atomic_t usage; unsigned debug_id; /* debugging ID */ @@ -357,6 +359,7 @@ struct fscache_object { int n_obj_ops; /* number of object ops outstanding on object */ int n_in_progress; /* number of ops in progress */ int n_exclusive; /* number of exclusive ops queued */ + atomic_t n_reads; /* number of read ops in progress */ spinlock_t lock; /* state and operations lock */ unsigned long lookup_jif; /* time at which lookup started */ @@ -370,6 +373,7 @@ struct fscache_object { #define FSCACHE_OBJECT_EV_RELEASE 4 /* T if netfs requested object release */ #define FSCACHE_OBJECT_EV_RETIRE 5 /* T if netfs requested object retirement */ #define FSCACHE_OBJECT_EV_WITHDRAW 6 /* T if cache requested object withdrawal */ +#define FSCACHE_OBJECT_EVENTS_MASK 0x7f /* mask of all events*/ unsigned long flags; #define FSCACHE_OBJECT_LOCK 0 /* T if object is busy being processed */ @@ -385,6 +389,9 @@ struct fscache_object { struct list_head dependents; /* FIFO of dependent objects */ struct list_head dep_link; /* link in parent's dependents list */ struct list_head pending_ops; /* unstarted operations on this object */ +#ifdef CONFIG_FSCACHE_OBJECT_LIST + struct rb_node objlist_link; /* link in global object list */ +#endif pgoff_t store_limit; /* current storage limit */ }; @@ -434,6 +441,12 @@ void fscache_object_init(struct fscache_object *object, extern void fscache_object_lookup_negative(struct fscache_object *object); extern void fscache_obtained_object(struct fscache_object *object); +#ifdef CONFIG_FSCACHE_OBJECT_LIST +extern void fscache_object_destroy(struct fscache_object *object); +#else +#define fscache_object_destroy(object) do {} while(0) +#endif + /** * fscache_object_destroyed - Note destruction of an object in a cache * @cache: The cache from which the object came -- cgit v0.10.2 From 52bd75fdb135d6133d878ae60c6e7e3f4ebc1cfc Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 19 Nov 2009 18:11:08 +0000 Subject: FS-Cache: Add counters for entry/exit to/from cache operation functions Count entries to and exits from cache operation table functions. Maintain these as a single counter that's added to or removed from as appropriate. Signed-off-by: David Howells diff --git a/Documentation/filesystems/caching/fscache.txt b/Documentation/filesystems/caching/fscache.txt index cac09e1..b6c32c0 100644 --- a/Documentation/filesystems/caching/fscache.txt +++ b/Documentation/filesystems/caching/fscache.txt @@ -274,6 +274,22 @@ proc files. dfr=N Number of async ops queued for deferred release rel=N Number of async ops released gc=N Number of deferred-release async ops garbage collected + CacheOp alo=N Number of in-progress alloc_object() cache ops + luo=N Number of in-progress lookup_object() cache ops + luc=N Number of in-progress lookup_complete() cache ops + gro=N Number of in-progress grab_object() cache ops + upo=N Number of in-progress update_object() cache ops + dro=N Number of in-progress drop_object() cache ops + pto=N Number of in-progress put_object() cache ops + syn=N Number of in-progress sync_cache() cache ops + atc=N Number of in-progress attr_changed() cache ops + rap=N Number of in-progress read_or_alloc_page() cache ops + ras=N Number of in-progress read_or_alloc_pages() cache ops + alp=N Number of in-progress allocate_page() cache ops + als=N Number of in-progress allocate_pages() cache ops + wrp=N Number of in-progress write_page() cache ops + ucp=N Number of in-progress uncache_page() cache ops + dsp=N Number of in-progress dissociate_pages() cache ops (*) /proc/fs/fscache/histogram diff --git a/fs/fscache/cache.c b/fs/fscache/cache.c index 724384e..6a3c48a 100644 --- a/fs/fscache/cache.c +++ b/fs/fscache/cache.c @@ -381,11 +381,15 @@ void fscache_withdraw_cache(struct fscache_cache *cache) /* make sure all pages pinned by operations on behalf of the netfs are * written to disk */ + fscache_stat(&fscache_n_cop_sync_cache); cache->ops->sync_cache(cache); + fscache_stat_d(&fscache_n_cop_sync_cache); /* dissociate all the netfs pages backed by this cache from the block * mappings in the cache */ + fscache_stat(&fscache_n_cop_dissociate_pages); cache->ops->dissociate_pages(cache); + fscache_stat_d(&fscache_n_cop_dissociate_pages); /* we now have to destroy all the active objects pertaining to this * cache - which we do by passing them off to thread pool to be diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index 9b51873..432482e 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c @@ -249,7 +249,9 @@ static int fscache_alloc_object(struct fscache_cache *cache, /* ask the cache to allocate an object (we may end up with duplicate * objects at this stage, but we sort that out later) */ + fscache_stat(&fscache_n_cop_alloc_object); object = cache->ops->alloc_object(cache, cookie); + fscache_stat_d(&fscache_n_cop_alloc_object); if (IS_ERR(object)) { fscache_stat(&fscache_n_object_no_alloc); ret = PTR_ERR(object); @@ -270,8 +272,11 @@ static int fscache_alloc_object(struct fscache_cache *cache, /* only attach if we managed to allocate all we needed, otherwise * discard the object we just allocated and instead use the one * attached to the cookie */ - if (fscache_attach_object(cookie, object) < 0) + if (fscache_attach_object(cookie, object) < 0) { + fscache_stat(&fscache_n_cop_put_object); cache->ops->put_object(object); + fscache_stat_d(&fscache_n_cop_put_object); + } _leave(" = 0"); return 0; @@ -287,7 +292,9 @@ object_already_extant: return 0; error_put: + fscache_stat(&fscache_n_cop_put_object); cache->ops->put_object(object); + fscache_stat_d(&fscache_n_cop_put_object); error: _leave(" = %d", ret); return ret; diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index fe02973..b85cc89 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -208,11 +208,33 @@ extern atomic_t fscache_n_checkaux_okay; extern atomic_t fscache_n_checkaux_update; extern atomic_t fscache_n_checkaux_obsolete; +extern atomic_t fscache_n_cop_alloc_object; +extern atomic_t fscache_n_cop_lookup_object; +extern atomic_t fscache_n_cop_lookup_complete; +extern atomic_t fscache_n_cop_grab_object; +extern atomic_t fscache_n_cop_update_object; +extern atomic_t fscache_n_cop_drop_object; +extern atomic_t fscache_n_cop_put_object; +extern atomic_t fscache_n_cop_sync_cache; +extern atomic_t fscache_n_cop_attr_changed; +extern atomic_t fscache_n_cop_read_or_alloc_page; +extern atomic_t fscache_n_cop_read_or_alloc_pages; +extern atomic_t fscache_n_cop_allocate_page; +extern atomic_t fscache_n_cop_allocate_pages; +extern atomic_t fscache_n_cop_write_page; +extern atomic_t fscache_n_cop_uncache_page; +extern atomic_t fscache_n_cop_dissociate_pages; + static inline void fscache_stat(atomic_t *stat) { atomic_inc(stat); } +static inline void fscache_stat_d(atomic_t *stat) +{ + atomic_dec(stat); +} + extern const struct file_operations fscache_stats_fops; #else diff --git a/fs/fscache/object.c b/fs/fscache/object.c index ad1644f..0d65c0c 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c @@ -144,13 +144,17 @@ static void fscache_object_state_machine(struct fscache_object *object) case FSCACHE_OBJECT_UPDATING: clear_bit(FSCACHE_OBJECT_EV_UPDATE, &object->events); fscache_stat(&fscache_n_updates_run); + fscache_stat(&fscache_n_cop_update_object); object->cache->ops->update_object(object); + fscache_stat_d(&fscache_n_cop_update_object); goto active_transit; /* handle an object dying during lookup or creation */ case FSCACHE_OBJECT_LC_DYING: object->event_mask &= ~(1 << FSCACHE_OBJECT_EV_UPDATE); + fscache_stat(&fscache_n_cop_lookup_complete); object->cache->ops->lookup_complete(object); + fscache_stat_d(&fscache_n_cop_lookup_complete); spin_lock(&object->lock); object->state = FSCACHE_OBJECT_DYING; @@ -416,7 +420,9 @@ static void fscache_initialise_object(struct fscache_object *object) * binding on to us, so we need to make sure we don't * add ourself to the list multiple times */ if (list_empty(&object->dep_link)) { + fscache_stat(&fscache_n_cop_grab_object); object->cache->ops->grab_object(object); + fscache_stat_d(&fscache_n_cop_grab_object); list_add(&object->dep_link, &parent->dependents); @@ -478,7 +484,9 @@ static void fscache_lookup_object(struct fscache_object *object) object->cache->tag->name); fscache_stat(&fscache_n_object_lookups); + fscache_stat(&fscache_n_cop_lookup_object); object->cache->ops->lookup_object(object); + fscache_stat_d(&fscache_n_cop_lookup_object); if (test_bit(FSCACHE_OBJECT_EV_ERROR, &object->events)) set_bit(FSCACHE_COOKIE_UNAVAILABLE, &cookie->flags); @@ -602,7 +610,9 @@ static void fscache_object_available(struct fscache_object *object) } spin_unlock(&object->lock); + fscache_stat(&fscache_n_cop_lookup_complete); object->cache->ops->lookup_complete(object); + fscache_stat_d(&fscache_n_cop_lookup_complete); fscache_enqueue_dependents(object); fscache_hist(fscache_obj_instantiate_histogram, object->lookup_jif); @@ -625,7 +635,9 @@ static void fscache_drop_object(struct fscache_object *object) list_del_init(&object->cache_link); spin_unlock(&cache->object_list_lock); + fscache_stat(&fscache_n_cop_drop_object); cache->ops->drop_object(object); + fscache_stat_d(&fscache_n_cop_drop_object); if (parent) { _debug("release parent OBJ%x {%d}", @@ -640,7 +652,9 @@ static void fscache_drop_object(struct fscache_object *object) } /* this just shifts the object release to the slow work processor */ + fscache_stat(&fscache_n_cop_put_object); object->cache->ops->put_object(object); + fscache_stat_d(&fscache_n_cop_put_object); _leave(""); } @@ -730,8 +744,12 @@ static int fscache_object_slow_work_get_ref(struct slow_work *work) { struct fscache_object *object = container_of(work, struct fscache_object, work); + int ret; - return object->cache->ops->grab_object(object) ? 0 : -EAGAIN; + fscache_stat(&fscache_n_cop_grab_object); + ret = object->cache->ops->grab_object(object) ? 0 : -EAGAIN; + fscache_stat_d(&fscache_n_cop_grab_object); + return ret; } /* @@ -742,7 +760,9 @@ static void fscache_object_slow_work_put_ref(struct slow_work *work) struct fscache_object *object = container_of(work, struct fscache_object, work); - return object->cache->ops->put_object(object); + fscache_stat(&fscache_n_cop_put_object); + object->cache->ops->put_object(object); + fscache_stat_d(&fscache_n_cop_put_object); } /* @@ -779,7 +799,9 @@ static void fscache_enqueue_dependents(struct fscache_object *object) /* sort onto appropriate lists */ fscache_enqueue_object(dep); + fscache_stat(&fscache_n_cop_put_object); dep->cache->ops->put_object(dep); + fscache_stat_d(&fscache_n_cop_put_object); if (!list_empty(&object->dependents)) cond_resched_lock(&object->lock); diff --git a/fs/fscache/page.c b/fs/fscache/page.c index c5973e3..250dfd3 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -71,7 +71,9 @@ static void fscache_attr_changed_op(struct fscache_operation *op) if (fscache_object_is_active(object)) { fscache_set_op_state(op, "CallFS"); + fscache_stat(&fscache_n_cop_attr_changed); ret = object->cache->ops->attr_changed(object); + fscache_stat_d(&fscache_n_cop_attr_changed); fscache_set_op_state(op, "Done"); if (ret < 0) fscache_abort_object(object); @@ -300,11 +302,15 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie, /* ask the cache to honour the operation */ if (test_bit(FSCACHE_COOKIE_NO_DATA_YET, &object->cookie->flags)) { + fscache_stat(&fscache_n_cop_allocate_page); ret = object->cache->ops->allocate_page(op, page, gfp); + fscache_stat_d(&fscache_n_cop_allocate_page); if (ret == 0) ret = -ENODATA; } else { + fscache_stat(&fscache_n_cop_read_or_alloc_page); ret = object->cache->ops->read_or_alloc_page(op, page, gfp); + fscache_stat_d(&fscache_n_cop_read_or_alloc_page); } if (ret == -ENOMEM) @@ -358,7 +364,6 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie, void *context, gfp_t gfp) { - fscache_pages_retrieval_func_t func; struct fscache_retrieval *op; struct fscache_object *object; int ret; @@ -413,11 +418,17 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie, } /* ask the cache to honour the operation */ - if (test_bit(FSCACHE_COOKIE_NO_DATA_YET, &object->cookie->flags)) - func = object->cache->ops->allocate_pages; - else - func = object->cache->ops->read_or_alloc_pages; - ret = func(op, pages, nr_pages, gfp); + if (test_bit(FSCACHE_COOKIE_NO_DATA_YET, &object->cookie->flags)) { + fscache_stat(&fscache_n_cop_allocate_pages); + ret = object->cache->ops->allocate_pages( + op, pages, nr_pages, gfp); + fscache_stat_d(&fscache_n_cop_allocate_pages); + } else { + fscache_stat(&fscache_n_cop_read_or_alloc_pages); + ret = object->cache->ops->read_or_alloc_pages( + op, pages, nr_pages, gfp); + fscache_stat_d(&fscache_n_cop_read_or_alloc_pages); + } if (ret == -ENOMEM) fscache_stat(&fscache_n_retrievals_nomem); @@ -500,7 +511,9 @@ int __fscache_alloc_page(struct fscache_cookie *cookie, } /* ask the cache to honour the operation */ + fscache_stat(&fscache_n_cop_allocate_page); ret = object->cache->ops->allocate_page(op, page, gfp); + fscache_stat_d(&fscache_n_cop_allocate_page); if (ret < 0) fscache_stat(&fscache_n_allocs_nobufs); @@ -578,7 +591,9 @@ static void fscache_write_op(struct fscache_operation *_op) if (page) { fscache_set_op_state(&op->op, "Store"); + fscache_stat(&fscache_n_cop_write_page); ret = object->cache->ops->write_page(op, page); + fscache_stat_d(&fscache_n_cop_write_page); fscache_set_op_state(&op->op, "EndWrite"); fscache_end_page_write(cookie, page); page_cache_release(page); @@ -786,7 +801,9 @@ void __fscache_uncache_page(struct fscache_cookie *cookie, struct page *page) if (TestClearPageFsCache(page) && object->cache->ops->uncache_page) { /* the cache backend releases the cookie lock */ + fscache_stat(&fscache_n_cop_uncache_page); object->cache->ops->uncache_page(object, page); + fscache_stat_d(&fscache_n_cop_uncache_page); goto done; } diff --git a/fs/fscache/stats.c b/fs/fscache/stats.c index 65deb99..20233fb 100644 --- a/fs/fscache/stats.c +++ b/fs/fscache/stats.c @@ -93,6 +93,23 @@ atomic_t fscache_n_checkaux_okay; atomic_t fscache_n_checkaux_update; atomic_t fscache_n_checkaux_obsolete; +atomic_t fscache_n_cop_alloc_object; +atomic_t fscache_n_cop_lookup_object; +atomic_t fscache_n_cop_lookup_complete; +atomic_t fscache_n_cop_grab_object; +atomic_t fscache_n_cop_update_object; +atomic_t fscache_n_cop_drop_object; +atomic_t fscache_n_cop_put_object; +atomic_t fscache_n_cop_sync_cache; +atomic_t fscache_n_cop_attr_changed; +atomic_t fscache_n_cop_read_or_alloc_page; +atomic_t fscache_n_cop_read_or_alloc_pages; +atomic_t fscache_n_cop_allocate_page; +atomic_t fscache_n_cop_allocate_pages; +atomic_t fscache_n_cop_write_page; +atomic_t fscache_n_cop_uncache_page; +atomic_t fscache_n_cop_dissociate_pages; + /* * display the general statistics */ @@ -192,6 +209,26 @@ static int fscache_stats_show(struct seq_file *m, void *v) atomic_read(&fscache_n_op_deferred_release), atomic_read(&fscache_n_op_release), atomic_read(&fscache_n_op_gc)); + + seq_printf(m, "CacheOp: alo=%d luo=%d luc=%d gro=%d\n", + atomic_read(&fscache_n_cop_alloc_object), + atomic_read(&fscache_n_cop_lookup_object), + atomic_read(&fscache_n_cop_lookup_complete), + atomic_read(&fscache_n_cop_grab_object)); + seq_printf(m, "CacheOp: upo=%d dro=%d pto=%d atc=%d syn=%d\n", + atomic_read(&fscache_n_cop_update_object), + atomic_read(&fscache_n_cop_drop_object), + atomic_read(&fscache_n_cop_put_object), + atomic_read(&fscache_n_cop_attr_changed), + atomic_read(&fscache_n_cop_sync_cache)); + seq_printf(m, "CacheOp: rap=%d ras=%d alp=%d als=%d wrp=%d ucp=%d dsp=%d\n", + atomic_read(&fscache_n_cop_read_or_alloc_page), + atomic_read(&fscache_n_cop_read_or_alloc_pages), + atomic_read(&fscache_n_cop_allocate_page), + atomic_read(&fscache_n_cop_allocate_pages), + atomic_read(&fscache_n_cop_write_page), + atomic_read(&fscache_n_cop_uncache_page), + atomic_read(&fscache_n_cop_dissociate_pages)); return 0; } -- cgit v0.10.2 From 7e311a207d596b9273d811149d6e3e14f05ac4c0 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 19 Nov 2009 18:11:11 +0000 Subject: FS-Cache: Clear netfs pointers in cookie after detaching object, not before Clear the pointers from the fscache_cookie struct to netfs private data after clearing the pointer to the cookie from the fscache_object struct and releasing the object lock, rather than before. This allows the netfs private data pointers to be relied on simply by holding the object lock, rather than having to hold the cookie lock. This is makes things simpler as the cookie lock has to be taken before the object lock, but sometimes the object pointer is all that the code has. Signed-off-by: David Howells diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index 432482e..b1870a6 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c @@ -437,12 +437,8 @@ void __fscache_relinquish_cookie(struct fscache_cookie *cookie, int retire) event = retire ? FSCACHE_OBJECT_EV_RETIRE : FSCACHE_OBJECT_EV_RELEASE; - /* detach pointers back to the netfs */ spin_lock(&cookie->lock); - cookie->netfs_data = NULL; - cookie->def = NULL; - /* break links with all the active objects */ while (!hlist_empty(&cookie->backing_objects)) { object = hlist_entry(cookie->backing_objects.first, @@ -465,6 +461,10 @@ void __fscache_relinquish_cookie(struct fscache_cookie *cookie, int retire) BUG(); } + /* detach pointers back to the netfs */ + cookie->netfs_data = NULL; + cookie->def = NULL; + spin_unlock(&cookie->lock); if (cookie->parent) { -- cgit v0.10.2 From b34df792b4e9e311db47fad27949095d0629c197 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 19 Nov 2009 18:11:14 +0000 Subject: FS-Cache: Use radix tree preload correctly in tracking of pages to be stored __fscache_write_page() attempts to load the radix tree preallocation pool for the CPU it is on before calling radix_tree_insert(), as the insertion must be done inside a pair of spinlocks. Use of the preallocation pool, however, is contingent on the radix tree being initialised without __GFP_WAIT specified. __fscache_acquire_cookie() was passing GFP_NOFS to INIT_RADIX_TREE() - but that includes __GFP_WAIT. The solution is to AND out __GFP_WAIT. Additionally, the banner comment to radix_tree_preload() is altered to make note of this prerequisite. Possibly there should be a WARN_ON() too. Without this fix, I have seen the following recursive deadlock caused by radix_tree_insert() attempting to allocate memory inside the spinlocked region, which resulted in FS-Cache being called back into to release memory - which required the spinlock already held. ============================================= [ INFO: possible recursive locking detected ] 2.6.32-rc6-cachefs #24 --------------------------------------------- nfsiod/7916 is trying to acquire lock: (&cookie->lock){+.+.-.}, at: [] __fscache_uncache_page+0xdb/0x160 [fscache] but task is already holding lock: (&cookie->lock){+.+.-.}, at: [] __fscache_write_page+0x15c/0x3f3 [fscache] other info that might help us debug this: 5 locks held by nfsiod/7916: #0: (nfsiod){+.+.+.}, at: [] worker_thread+0x19a/0x2e2 #1: (&task->u.tk_work#2){+.+.+.}, at: [] worker_thread+0x19a/0x2e2 #2: (&cookie->lock){+.+.-.}, at: [] __fscache_write_page+0x15c/0x3f3 [fscache] #3: (&object->lock#2){+.+.-.}, at: [] __fscache_write_page+0x197/0x3f3 [fscache] #4: (&cookie->stores_lock){+.+...}, at: [] __fscache_write_page+0x19f/0x3f3 [fscache] stack backtrace: Pid: 7916, comm: nfsiod Not tainted 2.6.32-rc6-cachefs #24 Call Trace: [] __lock_acquire+0x1649/0x16e3 [] ? __lock_acquire+0x7b7/0x16e3 [] ? dump_trace+0x248/0x257 [] lock_acquire+0x57/0x6d [] ? __fscache_uncache_page+0xdb/0x160 [fscache] [] _spin_lock+0x2c/0x3b [] ? __fscache_uncache_page+0xdb/0x160 [fscache] [] __fscache_uncache_page+0xdb/0x160 [fscache] [] ? __fscache_check_page_write+0x0/0x71 [fscache] [] nfs_fscache_release_page+0x86/0xc4 [nfs] [] nfs_release_page+0x3c/0x41 [nfs] [] try_to_release_page+0x32/0x3b [] shrink_page_list+0x316/0x4ac [] ? mark_held_locks+0x52/0x70 [] ? _spin_unlock_irq+0x2b/0x31 [] shrink_inactive_list+0x392/0x67c [] ? mark_held_locks+0x52/0x70 [] shrink_list+0x8d/0x8f [] shrink_zone+0x278/0x33c [] ? ktime_get_ts+0xad/0xba [] try_to_free_pages+0x22e/0x392 [] ? isolate_pages_global+0x0/0x212 [] __alloc_pages_nodemask+0x3dc/0x5cf [] cache_alloc_refill+0x34d/0x6c1 [] ? radix_tree_node_alloc+0x52/0x5c [] kmem_cache_alloc+0xb2/0x118 [] radix_tree_node_alloc+0x52/0x5c [] radix_tree_insert+0x57/0x19c [] __fscache_write_page+0x1e3/0x3f3 [fscache] [] __nfs_readpage_to_fscache+0x58/0x11e [nfs] [] nfs_readpage_release+0x34/0x9b [nfs] [] nfs_readpage_release_full+0x32/0x4b [nfs] [] rpc_release_calldata+0x12/0x14 [sunrpc] [] rpc_free_task+0x59/0x61 [sunrpc] [] rpc_async_release+0x10/0x12 [sunrpc] [] worker_thread+0x1ef/0x2e2 [] ? worker_thread+0x19a/0x2e2 [] ? thread_return+0x3e/0x101 [] ? rpc_async_release+0x0/0x12 [sunrpc] [] ? autoremove_wake_function+0x0/0x34 [] ? trace_hardirqs_on+0xd/0xf [] ? worker_thread+0x0/0x2e2 [] kthread+0x7a/0x82 [] child_rip+0xa/0x20 [] ? restore_args+0x0/0x30 [] ? add_wait_queue+0x15/0x44 [] ? kthread+0x0/0x82 [] ? child_rip+0x0/0x20 Signed-off-by: David Howells diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index b1870a6..e6854f5 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c @@ -102,7 +102,9 @@ struct fscache_cookie *__fscache_acquire_cookie( cookie->netfs_data = netfs_data; cookie->flags = 0; - INIT_RADIX_TREE(&cookie->stores, GFP_NOFS); + /* radix tree insertion won't use the preallocation pool unless it's + * told it may not wait */ + INIT_RADIX_TREE(&cookie->stores, GFP_NOFS & ~__GFP_WAIT); switch (cookie->def->type) { case FSCACHE_COOKIE_TYPE_INDEX: diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 23abbd9..ae61068 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -200,6 +200,9 @@ radix_tree_node_free(struct radix_tree_node *node) * ensure that the addition of a single element in the tree cannot fail. On * success, return zero, with preemption disabled. On error, return -ENOMEM * with preemption not disabled. + * + * To make use of this facility, the radix tree must be initialised without + * __GFP_WAIT being passed to INIT_RADIX_TREE(). */ int radix_tree_preload(gfp_t gfp_mask) { -- cgit v0.10.2 From 5753c441889253e4323eee85f791a1d64cf08196 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 19 Nov 2009 18:11:19 +0000 Subject: FS-Cache: Permit cache retrieval ops to be interrupted in the initial wait phase Permit the operations to retrieve data from the cache or to allocate space in the cache for future writes to be interrupted whilst they're waiting for permission for the operation to proceed. Typically this wait occurs whilst the cache object is being looked up on disk in the background. If an interruption occurs, and the operation has not yet been given the go-ahead to run, the operation is dequeued and cancelled, and control returns to the read operation of the netfs routine with none of the requested pages having been read or in any way marked as known by the cache. This means that the initial wait is done interruptibly rather than uninterruptibly. In addition, extra stats values are made available to show the number of ops cancelled and the number of cache space allocations interrupted. Signed-off-by: David Howells diff --git a/Documentation/filesystems/caching/fscache.txt b/Documentation/filesystems/caching/fscache.txt index b6c32c0..0a77868 100644 --- a/Documentation/filesystems/caching/fscache.txt +++ b/Documentation/filesystems/caching/fscache.txt @@ -250,6 +250,7 @@ proc files. ok=N Number of successful alloc reqs wt=N Number of alloc reqs that waited on lookup completion nbf=N Number of alloc reqs rejected -ENOBUFS + int=N Number of alloc reqs aborted -ERESTARTSYS ops=N Number of alloc reqs submitted owt=N Number of alloc reqs waited for CPU time Retrvls n=N Number of retrieval (read) requests seen @@ -271,6 +272,7 @@ proc files. Ops pend=N Number of times async ops added to pending queues run=N Number of times async ops given CPU time enq=N Number of times async ops queued for processing + can=N Number of async ops cancelled dfr=N Number of async ops queued for deferred release rel=N Number of async ops released gc=N Number of deferred-release async ops garbage collected diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index b85cc89..50324ad 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -112,6 +112,7 @@ extern int fscache_submit_exclusive_op(struct fscache_object *, struct fscache_operation *); extern int fscache_submit_op(struct fscache_object *, struct fscache_operation *); +extern int fscache_cancel_op(struct fscache_operation *); extern void fscache_abort_object(struct fscache_object *); extern void fscache_start_operations(struct fscache_object *); extern void fscache_operation_gc(struct work_struct *); @@ -140,6 +141,7 @@ extern atomic_t fscache_n_op_enqueue; extern atomic_t fscache_n_op_deferred_release; extern atomic_t fscache_n_op_release; extern atomic_t fscache_n_op_gc; +extern atomic_t fscache_n_op_cancelled; extern atomic_t fscache_n_attr_changed; extern atomic_t fscache_n_attr_changed_ok; @@ -151,6 +153,7 @@ extern atomic_t fscache_n_allocs; extern atomic_t fscache_n_allocs_ok; extern atomic_t fscache_n_allocs_wait; extern atomic_t fscache_n_allocs_nobufs; +extern atomic_t fscache_n_allocs_intr; extern atomic_t fscache_n_alloc_ops; extern atomic_t fscache_n_alloc_op_waits; diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c index 09e43b6..296492e 100644 --- a/fs/fscache/operation.c +++ b/fs/fscache/operation.c @@ -34,32 +34,31 @@ void fscache_enqueue_operation(struct fscache_operation *op) fscache_set_op_state(op, "EnQ"); + ASSERT(list_empty(&op->pend_link)); ASSERT(op->processor != NULL); ASSERTCMP(op->object->state, >=, FSCACHE_OBJECT_AVAILABLE); ASSERTCMP(atomic_read(&op->usage), >, 0); - if (list_empty(&op->pend_link)) { - switch (op->flags & FSCACHE_OP_TYPE) { - case FSCACHE_OP_FAST: - _debug("queue fast"); - atomic_inc(&op->usage); - if (!schedule_work(&op->fast_work)) - fscache_put_operation(op); - break; - case FSCACHE_OP_SLOW: - _debug("queue slow"); - slow_work_enqueue(&op->slow_work); - break; - case FSCACHE_OP_MYTHREAD: - _debug("queue for caller's attention"); - break; - default: - printk(KERN_ERR "FS-Cache: Unexpected op type %lx", - op->flags); - BUG(); - break; - } - fscache_stat(&fscache_n_op_enqueue); + fscache_stat(&fscache_n_op_enqueue); + switch (op->flags & FSCACHE_OP_TYPE) { + case FSCACHE_OP_FAST: + _debug("queue fast"); + atomic_inc(&op->usage); + if (!schedule_work(&op->fast_work)) + fscache_put_operation(op); + break; + case FSCACHE_OP_SLOW: + _debug("queue slow"); + slow_work_enqueue(&op->slow_work); + break; + case FSCACHE_OP_MYTHREAD: + _debug("queue for caller's attention"); + break; + default: + printk(KERN_ERR "FS-Cache: Unexpected op type %lx", + op->flags); + BUG(); + break; } } EXPORT_SYMBOL(fscache_enqueue_operation); @@ -97,6 +96,7 @@ int fscache_submit_exclusive_op(struct fscache_object *object, spin_lock(&object->lock); ASSERTCMP(object->n_ops, >=, object->n_in_progress); ASSERTCMP(object->n_ops, >=, object->n_exclusive); + ASSERT(list_empty(&op->pend_link)); ret = -ENOBUFS; if (fscache_object_is_active(object)) { @@ -202,6 +202,7 @@ int fscache_submit_op(struct fscache_object *object, spin_lock(&object->lock); ASSERTCMP(object->n_ops, >=, object->n_in_progress); ASSERTCMP(object->n_ops, >=, object->n_exclusive); + ASSERT(list_empty(&op->pend_link)); ostate = object->state; smp_rmb(); @@ -273,12 +274,7 @@ void fscache_start_operations(struct fscache_object *object) stop = true; } list_del_init(&op->pend_link); - object->n_in_progress++; - - if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags)) - wake_up_bit(&op->flags, FSCACHE_OP_WAITING); - if (op->processor) - fscache_enqueue_operation(op); + fscache_run_op(object, op); /* the pending queue was holding a ref on the object */ fscache_put_operation(op); @@ -291,6 +287,36 @@ void fscache_start_operations(struct fscache_object *object) } /* + * cancel an operation that's pending on an object + */ +int fscache_cancel_op(struct fscache_operation *op) +{ + struct fscache_object *object = op->object; + int ret; + + _enter("OBJ%x OP%x}", op->object->debug_id, op->debug_id); + + spin_lock(&object->lock); + + ret = -EBUSY; + if (!list_empty(&op->pend_link)) { + fscache_stat(&fscache_n_op_cancelled); + list_del_init(&op->pend_link); + object->n_ops--; + if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) + object->n_exclusive--; + if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags)) + wake_up_bit(&op->flags, FSCACHE_OP_WAITING); + fscache_put_operation(op); + ret = 0; + } + + spin_unlock(&object->lock); + _leave(" = %d", ret); + return ret; +} + +/* * release an operation * - queues pending ops if this is the last in-progress op */ diff --git a/fs/fscache/page.c b/fs/fscache/page.c index 250dfd3..e6f2e61 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -295,8 +295,20 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie, if (test_bit(FSCACHE_OP_WAITING, &op->op.flags)) { _debug(">>> WT"); fscache_stat(&fscache_n_retrieval_op_waits); - wait_on_bit(&op->op.flags, FSCACHE_OP_WAITING, - fscache_wait_bit, TASK_UNINTERRUPTIBLE); + if (wait_on_bit(&op->op.flags, FSCACHE_OP_WAITING, + fscache_wait_bit_interruptible, + TASK_INTERRUPTIBLE) < 0) { + ret = fscache_cancel_op(&op->op); + if (ret == 0) { + ret = -ERESTARTSYS; + goto error; + } + + /* it's been removed from the pending queue by another + * party, so we should get to run shortly */ + wait_on_bit(&op->op.flags, FSCACHE_OP_WAITING, + fscache_wait_bit, TASK_UNINTERRUPTIBLE); + } _debug("<<< GO"); } @@ -313,6 +325,7 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie, fscache_stat_d(&fscache_n_cop_read_or_alloc_page); } +error: if (ret == -ENOMEM) fscache_stat(&fscache_n_retrievals_nomem); else if (ret == -ERESTARTSYS) @@ -412,8 +425,20 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie, if (test_bit(FSCACHE_OP_WAITING, &op->op.flags)) { _debug(">>> WT"); fscache_stat(&fscache_n_retrieval_op_waits); - wait_on_bit(&op->op.flags, FSCACHE_OP_WAITING, - fscache_wait_bit, TASK_UNINTERRUPTIBLE); + if (wait_on_bit(&op->op.flags, FSCACHE_OP_WAITING, + fscache_wait_bit_interruptible, + TASK_INTERRUPTIBLE) < 0) { + ret = fscache_cancel_op(&op->op); + if (ret == 0) { + ret = -ERESTARTSYS; + goto error; + } + + /* it's been removed from the pending queue by another + * party, so we should get to run shortly */ + wait_on_bit(&op->op.flags, FSCACHE_OP_WAITING, + fscache_wait_bit, TASK_UNINTERRUPTIBLE); + } _debug("<<< GO"); } @@ -430,6 +455,7 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie, fscache_stat_d(&fscache_n_cop_read_or_alloc_pages); } +error: if (ret == -ENOMEM) fscache_stat(&fscache_n_retrievals_nomem); else if (ret == -ERESTARTSYS) @@ -505,8 +531,20 @@ int __fscache_alloc_page(struct fscache_cookie *cookie, if (test_bit(FSCACHE_OP_WAITING, &op->op.flags)) { _debug(">>> WT"); fscache_stat(&fscache_n_alloc_op_waits); - wait_on_bit(&op->op.flags, FSCACHE_OP_WAITING, - fscache_wait_bit, TASK_UNINTERRUPTIBLE); + if (wait_on_bit(&op->op.flags, FSCACHE_OP_WAITING, + fscache_wait_bit_interruptible, + TASK_INTERRUPTIBLE) < 0) { + ret = fscache_cancel_op(&op->op); + if (ret == 0) { + ret = -ERESTARTSYS; + goto error; + } + + /* it's been removed from the pending queue by another + * party, so we should get to run shortly */ + wait_on_bit(&op->op.flags, FSCACHE_OP_WAITING, + fscache_wait_bit, TASK_UNINTERRUPTIBLE); + } _debug("<<< GO"); } @@ -515,7 +553,10 @@ int __fscache_alloc_page(struct fscache_cookie *cookie, ret = object->cache->ops->allocate_page(op, page, gfp); fscache_stat_d(&fscache_n_cop_allocate_page); - if (ret < 0) +error: + if (ret == -ERESTARTSYS) + fscache_stat(&fscache_n_allocs_intr); + else if (ret < 0) fscache_stat(&fscache_n_allocs_nobufs); else fscache_stat(&fscache_n_allocs_ok); diff --git a/fs/fscache/stats.c b/fs/fscache/stats.c index 20233fb..4c07439 100644 --- a/fs/fscache/stats.c +++ b/fs/fscache/stats.c @@ -25,6 +25,7 @@ atomic_t fscache_n_op_requeue; atomic_t fscache_n_op_deferred_release; atomic_t fscache_n_op_release; atomic_t fscache_n_op_gc; +atomic_t fscache_n_op_cancelled; atomic_t fscache_n_attr_changed; atomic_t fscache_n_attr_changed_ok; @@ -36,6 +37,7 @@ atomic_t fscache_n_allocs; atomic_t fscache_n_allocs_ok; atomic_t fscache_n_allocs_wait; atomic_t fscache_n_allocs_nobufs; +atomic_t fscache_n_allocs_intr; atomic_t fscache_n_alloc_ops; atomic_t fscache_n_alloc_op_waits; @@ -169,11 +171,12 @@ static int fscache_stats_show(struct seq_file *m, void *v) atomic_read(&fscache_n_attr_changed_nomem), atomic_read(&fscache_n_attr_changed_calls)); - seq_printf(m, "Allocs : n=%u ok=%u wt=%u nbf=%u\n", + seq_printf(m, "Allocs : n=%u ok=%u wt=%u nbf=%u int=%u\n", atomic_read(&fscache_n_allocs), atomic_read(&fscache_n_allocs_ok), atomic_read(&fscache_n_allocs_wait), - atomic_read(&fscache_n_allocs_nobufs)); + atomic_read(&fscache_n_allocs_nobufs), + atomic_read(&fscache_n_allocs_intr)); seq_printf(m, "Allocs : ops=%u owt=%u\n", atomic_read(&fscache_n_alloc_ops), atomic_read(&fscache_n_alloc_op_waits)); @@ -201,10 +204,11 @@ static int fscache_stats_show(struct seq_file *m, void *v) atomic_read(&fscache_n_store_ops), atomic_read(&fscache_n_store_calls)); - seq_printf(m, "Ops : pend=%u run=%u enq=%u\n", + seq_printf(m, "Ops : pend=%u run=%u enq=%u can=%u\n", atomic_read(&fscache_n_op_pend), atomic_read(&fscache_n_op_run), - atomic_read(&fscache_n_op_enqueue)); + atomic_read(&fscache_n_op_enqueue), + atomic_read(&fscache_n_op_cancelled)); seq_printf(m, "Ops : dfr=%u rel=%u gc=%u\n", atomic_read(&fscache_n_op_deferred_release), atomic_read(&fscache_n_op_release), -- cgit v0.10.2 From 6897e3df8fc37bd4a58bbcdef8306da7fc175584 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 19 Nov 2009 18:11:22 +0000 Subject: FS-Cache: The object-available state can't rely on the cookie to be available The object-available state in the object processing state machine (as processed by fscache_object_available()) can't rely on the cookie to be available because the FSCACHE_COOKIE_CREATING bit may have been cleared by fscache_obtained_object() prior to the object being put into the FSCACHE_OBJECT_AVAILABLE state. Clearing the FSCACHE_COOKIE_CREATING bit on a cookie permits __fscache_relinquish_cookie() to proceed and detach the cookie from the object. To deal with this, we don't dereference object->cookie in fscache_object_available() if the object has already been detached. In addition, a couple of assertions are added into fscache_drop_object() to make sure the object is unbound from the cookie before it gets there. Signed-off-by: David Howells diff --git a/fs/fscache/object.c b/fs/fscache/object.c index 0d65c0c..1a1afa8 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c @@ -158,7 +158,8 @@ static void fscache_object_state_machine(struct fscache_object *object) spin_lock(&object->lock); object->state = FSCACHE_OBJECT_DYING; - if (test_and_clear_bit(FSCACHE_COOKIE_CREATING, + if (object->cookie && + test_and_clear_bit(FSCACHE_COOKIE_CREATING, &object->cookie->flags)) wake_up_bit(&object->cookie->flags, FSCACHE_COOKIE_CREATING); @@ -594,7 +595,8 @@ static void fscache_object_available(struct fscache_object *object) spin_lock(&object->lock); - if (test_and_clear_bit(FSCACHE_COOKIE_CREATING, &object->cookie->flags)) + if (object->cookie && + test_and_clear_bit(FSCACHE_COOKIE_CREATING, &object->cookie->flags)) wake_up_bit(&object->cookie->flags, FSCACHE_COOKIE_CREATING); fscache_done_parent_op(object); @@ -631,6 +633,9 @@ static void fscache_drop_object(struct fscache_object *object) _enter("{OBJ%x,%d}", object->debug_id, object->n_children); + ASSERTCMP(object->cookie, ==, NULL); + ASSERT(hlist_unhashed(&object->cookie_link)); + spin_lock(&cache->object_list_lock); list_del_init(&object->cache_link); spin_unlock(&cache->object_list_lock); -- cgit v0.10.2 From 1bccf513ac49d44604ba1cddcc29f5886e70f1b6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 19 Nov 2009 18:11:25 +0000 Subject: FS-Cache: Fix lock misorder in fscache_write_op() FS-Cache has two structs internally for keeping track of the internal state of a cached file: the fscache_cookie struct, which represents the netfs's state, and fscache_object struct, which represents the cache's state. Each has a pointer that points to the other (when both are in existence), and each has a spinlock for pointer maintenance. Since netfs operations approach these structures from the cookie side, they get the cookie lock first, then the object lock. Cache operations, on the other hand, approach from the object side, and get the object lock first. It is not then permitted for a cache operation to get the cookie lock whilst it is holding the object lock lest deadlock occur; instead, it must do one of two things: (1) increment the cookie usage counter, drop the object lock and then get both locks in order, or (2) simply hold the object lock as certain parts of the cookie may not be altered whilst the object lock is held. It is also not permitted to follow either pointer without holding the lock at the end you start with. To break the pointers between the cookie and the object, both locks must be held. fscache_write_op(), however, violates the locking rules: It attempts to get the cookie lock without (a) checking that the cookie pointer is a valid pointer, and (b) holding the object lock to protect the cookie pointer whilst it follows it. This is so that it can access the pending page store tree without interference from __fscache_write_page(). This is fixed by splitting the cookie lock, such that the page store tracking tree is protected by its own lock, and checking that the cookie pointer is non-NULL before we attempt to follow it whilst holding the object lock. The new lock is subordinate to both the cookie lock and the object lock, and so should be taken after those. Signed-off-by: David Howells diff --git a/Documentation/filesystems/caching/fscache.txt b/Documentation/filesystems/caching/fscache.txt index 0a77868..9cf2cfb 100644 --- a/Documentation/filesystems/caching/fscache.txt +++ b/Documentation/filesystems/caching/fscache.txt @@ -269,6 +269,9 @@ proc files. oom=N Number of store reqs failed -ENOMEM ops=N Number of store reqs submitted run=N Number of store reqs granted CPU time + pgs=N Number of pages given store req processing time + rxd=N Number of store reqs deleted from tracking tree + olm=N Number of store reqs over store limit Ops pend=N Number of times async ops added to pending queues run=N Number of times async ops given CPU time enq=N Number of times async ops queued for processing diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index e6854f5..f979659 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c @@ -36,6 +36,7 @@ void fscache_cookie_init_once(void *_cookie) memset(cookie, 0, sizeof(*cookie)); spin_lock_init(&cookie->lock); + spin_lock_init(&cookie->stores_lock); INIT_HLIST_HEAD(&cookie->backing_objects); } diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index 50324ad..ba1853f 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -17,6 +17,7 @@ * - cache->object_list_lock * - object->lock * - object->parent->lock + * - cookie->stores_lock * - fscache_thread_lock * */ @@ -174,6 +175,9 @@ extern atomic_t fscache_n_stores_nobufs; extern atomic_t fscache_n_stores_oom; extern atomic_t fscache_n_store_ops; extern atomic_t fscache_n_store_calls; +extern atomic_t fscache_n_store_pages; +extern atomic_t fscache_n_store_radix_deletes; +extern atomic_t fscache_n_store_pages_over_limit; extern atomic_t fscache_n_marks; extern atomic_t fscache_n_uncaches; diff --git a/fs/fscache/page.c b/fs/fscache/page.c index e6f2e61..3ea8897 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -45,16 +45,26 @@ EXPORT_SYMBOL(__fscache_wait_on_page_write); /* * note that a page has finished being written to the cache */ -static void fscache_end_page_write(struct fscache_cookie *cookie, struct page *page) +static void fscache_end_page_write(struct fscache_object *object, + struct page *page) { - struct page *xpage; + struct fscache_cookie *cookie; + struct page *xpage = NULL; - spin_lock(&cookie->lock); - xpage = radix_tree_delete(&cookie->stores, page->index); - spin_unlock(&cookie->lock); - ASSERT(xpage != NULL); - - wake_up_bit(&cookie->flags, 0); + spin_lock(&object->lock); + cookie = object->cookie; + if (cookie) { + /* delete the page from the tree if it is now no longer + * pending */ + spin_lock(&cookie->stores_lock); + fscache_stat(&fscache_n_store_radix_deletes); + xpage = radix_tree_delete(&cookie->stores, page->index); + spin_unlock(&cookie->stores_lock); + wake_up_bit(&cookie->flags, 0); + } + spin_unlock(&object->lock); + if (xpage) + page_cache_release(xpage); } /* @@ -591,7 +601,7 @@ static void fscache_write_op(struct fscache_operation *_op) struct fscache_storage *op = container_of(_op, struct fscache_storage, op); struct fscache_object *object = op->op.object; - struct fscache_cookie *cookie = object->cookie; + struct fscache_cookie *cookie; struct page *page; unsigned n; void *results[1]; @@ -601,16 +611,17 @@ static void fscache_write_op(struct fscache_operation *_op) fscache_set_op_state(&op->op, "GetPage"); - spin_lock(&cookie->lock); spin_lock(&object->lock); + cookie = object->cookie; - if (!fscache_object_is_active(object)) { + if (!fscache_object_is_active(object) || !cookie) { spin_unlock(&object->lock); - spin_unlock(&cookie->lock); _leave(""); return; } + spin_lock(&cookie->stores_lock); + fscache_stat(&fscache_n_store_calls); /* find a page to store */ @@ -621,23 +632,25 @@ static void fscache_write_op(struct fscache_operation *_op) goto superseded; page = results[0]; _debug("gang %d [%lx]", n, page->index); - if (page->index > op->store_limit) + if (page->index > op->store_limit) { + fscache_stat(&fscache_n_store_pages_over_limit); goto superseded; + } radix_tree_tag_clear(&cookie->stores, page->index, FSCACHE_COOKIE_PENDING_TAG); + spin_unlock(&cookie->stores_lock); spin_unlock(&object->lock); - spin_unlock(&cookie->lock); if (page) { fscache_set_op_state(&op->op, "Store"); + fscache_stat(&fscache_n_store_pages); fscache_stat(&fscache_n_cop_write_page); ret = object->cache->ops->write_page(op, page); fscache_stat_d(&fscache_n_cop_write_page); fscache_set_op_state(&op->op, "EndWrite"); - fscache_end_page_write(cookie, page); - page_cache_release(page); + fscache_end_page_write(object, page); if (ret < 0) { fscache_set_op_state(&op->op, "Abort"); fscache_abort_object(object); @@ -653,9 +666,9 @@ superseded: /* this writer is going away and there aren't any more things to * write */ _debug("cease"); + spin_unlock(&cookie->stores_lock); clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags); spin_unlock(&object->lock); - spin_unlock(&cookie->lock); _leave(""); } @@ -731,6 +744,7 @@ int __fscache_write_page(struct fscache_cookie *cookie, /* add the page to the pending-storage radix tree on the backing * object */ spin_lock(&object->lock); + spin_lock(&cookie->stores_lock); _debug("store limit %llx", (unsigned long long) object->store_limit); @@ -751,6 +765,7 @@ int __fscache_write_page(struct fscache_cookie *cookie, if (test_and_set_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags)) goto already_pending; + spin_unlock(&cookie->stores_lock); spin_unlock(&object->lock); op->op.debug_id = atomic_inc_return(&fscache_op_debug_id); @@ -772,6 +787,7 @@ int __fscache_write_page(struct fscache_cookie *cookie, already_queued: fscache_stat(&fscache_n_stores_again); already_pending: + spin_unlock(&cookie->stores_lock); spin_unlock(&object->lock); spin_unlock(&cookie->lock); radix_tree_preload_end(); @@ -781,7 +797,9 @@ already_pending: return 0; submit_failed: + spin_lock(&cookie->stores_lock); radix_tree_delete(&cookie->stores, page->index); + spin_unlock(&cookie->stores_lock); page_cache_release(page); ret = -ENOBUFS; goto nobufs; diff --git a/fs/fscache/stats.c b/fs/fscache/stats.c index 4c07439..1d53ea6 100644 --- a/fs/fscache/stats.c +++ b/fs/fscache/stats.c @@ -58,6 +58,9 @@ atomic_t fscache_n_stores_nobufs; atomic_t fscache_n_stores_oom; atomic_t fscache_n_store_ops; atomic_t fscache_n_store_calls; +atomic_t fscache_n_store_pages; +atomic_t fscache_n_store_radix_deletes; +atomic_t fscache_n_store_pages_over_limit; atomic_t fscache_n_marks; atomic_t fscache_n_uncaches; @@ -200,9 +203,12 @@ static int fscache_stats_show(struct seq_file *m, void *v) atomic_read(&fscache_n_stores_again), atomic_read(&fscache_n_stores_nobufs), atomic_read(&fscache_n_stores_oom)); - seq_printf(m, "Stores : ops=%u run=%u\n", + seq_printf(m, "Stores : ops=%u run=%u pgs=%u rxd=%u olm=%u\n", atomic_read(&fscache_n_store_ops), - atomic_read(&fscache_n_store_calls)); + atomic_read(&fscache_n_store_calls), + atomic_read(&fscache_n_store_pages), + atomic_read(&fscache_n_store_radix_deletes), + atomic_read(&fscache_n_store_pages_over_limit)); seq_printf(m, "Ops : pend=%u run=%u enq=%u can=%u\n", atomic_read(&fscache_n_op_pend), diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 184cbdf..f3aa4bd 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -310,6 +310,7 @@ struct fscache_cookie { atomic_t usage; /* number of users of this cookie */ atomic_t n_children; /* number of children of this cookie */ spinlock_t lock; + spinlock_t stores_lock; /* lock on page store tree */ struct hlist_head backing_objects; /* object(s) backing this file/index */ const struct fscache_cookie_def *def; /* definition */ struct fscache_cookie *parent; /* parent of this entry */ -- cgit v0.10.2 From 285e728b0ac55b53a673114096168d6f74930167 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 19 Nov 2009 18:11:29 +0000 Subject: FS-Cache: Don't delete pending pages from the page-store tracking tree Don't delete pending pages from the page-store tracking tree, but rather send them for another write as they've presumably been updated. Signed-off-by: David Howells diff --git a/fs/fscache/page.c b/fs/fscache/page.c index 3ea8897..022a5da 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -57,8 +57,11 @@ static void fscache_end_page_write(struct fscache_object *object, /* delete the page from the tree if it is now no longer * pending */ spin_lock(&cookie->stores_lock); - fscache_stat(&fscache_n_store_radix_deletes); - xpage = radix_tree_delete(&cookie->stores, page->index); + if (!radix_tree_tag_get(&cookie->stores, page->index, + FSCACHE_COOKIE_PENDING_TAG)) { + fscache_stat(&fscache_n_store_radix_deletes); + xpage = radix_tree_delete(&cookie->stores, page->index); + } spin_unlock(&cookie->stores_lock); wake_up_bit(&cookie->flags, 0); } diff --git a/lib/radix-tree.c b/lib/radix-tree.c index ae61068..92cdd99 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -546,7 +546,6 @@ out: } EXPORT_SYMBOL(radix_tree_tag_clear); -#ifndef __KERNEL__ /* Only the test harness uses this at present */ /** * radix_tree_tag_get - get a tag on a radix tree node * @root: radix tree root @@ -609,7 +608,6 @@ int radix_tree_tag_get(struct radix_tree_root *root, } } EXPORT_SYMBOL(radix_tree_tag_get); -#endif /** * radix_tree_next_hole - find the next hole (not-present entry) -- cgit v0.10.2 From e3d4d28b1c8cc7c26536a50b43d86ccd39878550 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 19 Nov 2009 18:11:32 +0000 Subject: FS-Cache: Handle read request vs lookup, creation or other cache failure FS-Cache doesn't correctly handle the netfs requesting a read from the cache on an object that failed or was withdrawn by the cache. A trace similar to the following might be seen: CacheFiles: Lookup failed error -105 [exe ] unexpected submission OP165afe [OBJ6cac OBJECT_LC_DYING] [exe ] objstate=OBJECT_LC_DYING [OBJECT_LC_DYING] [exe ] objflags=0 [exe ] objevent=9 [fffffffffffffffb] [exe ] ops=0 inp=0 exc=0 Pid: 6970, comm: exe Not tainted 2.6.32-rc6-cachefs #50 Call Trace: [] fscache_submit_op+0x3ff/0x45a [fscache] [] __fscache_read_or_alloc_pages+0x187/0x3c4 [fscache] [] ? nfs_readpage_from_fscache_complete+0x0/0x66 [nfs] [] __nfs_readpages_from_fscache+0x7e/0x176 [nfs] [] ? __alloc_pages_nodemask+0x11c/0x5cf [] nfs_readpages+0x114/0x1d7 [nfs] [] __do_page_cache_readahead+0x15f/0x1ec [] ? __do_page_cache_readahead+0x73/0x1ec [] ra_submit+0x1c/0x20 [] ondemand_readahead+0x227/0x23a [] page_cache_sync_readahead+0x17/0x19 [] generic_file_aio_read+0x236/0x5a0 [] nfs_file_read+0xe4/0xf3 [nfs] [] do_sync_read+0xe3/0x120 [] ? _spin_unlock_irq+0x2b/0x31 [] ? autoremove_wake_function+0x0/0x34 [] ? selinux_file_permission+0x5d/0x10f [] ? thread_return+0x3e/0x101 [] ? security_file_permission+0x11/0x13 [] vfs_read+0xaa/0x16f [] ? trace_hardirqs_on_caller+0x10c/0x130 [] sys_read+0x45/0x6c [] system_call_fastpath+0x16/0x1b The object state might also be OBJECT_DYING or OBJECT_WITHDRAWING. This should be handled by simply rejecting the new operation with ENOBUFS. There's no need to log an error for it. Events of this type now appear in the stats file under Ops:rej. Signed-off-by: David Howells diff --git a/Documentation/filesystems/caching/fscache.txt b/Documentation/filesystems/caching/fscache.txt index 9cf2cfb..057a3c7 100644 --- a/Documentation/filesystems/caching/fscache.txt +++ b/Documentation/filesystems/caching/fscache.txt @@ -276,6 +276,7 @@ proc files. run=N Number of times async ops given CPU time enq=N Number of times async ops queued for processing can=N Number of async ops cancelled + rej=N Number of async ops rejected due to object lookup/create failure dfr=N Number of async ops queued for deferred release rel=N Number of async ops released gc=N Number of deferred-release async ops garbage collected diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index ba1853f..a076987 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -143,6 +143,7 @@ extern atomic_t fscache_n_op_deferred_release; extern atomic_t fscache_n_op_release; extern atomic_t fscache_n_op_gc; extern atomic_t fscache_n_op_cancelled; +extern atomic_t fscache_n_op_rejected; extern atomic_t fscache_n_attr_changed; extern atomic_t fscache_n_attr_changed_ok; diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c index 296492e..313e79a 100644 --- a/fs/fscache/operation.c +++ b/fs/fscache/operation.c @@ -232,6 +232,11 @@ int fscache_submit_op(struct fscache_object *object, list_add_tail(&op->pend_link, &object->pending_ops); fscache_stat(&fscache_n_op_pend); ret = 0; + } else if (object->state == FSCACHE_OBJECT_DYING || + object->state == FSCACHE_OBJECT_LC_DYING || + object->state == FSCACHE_OBJECT_WITHDRAWING) { + fscache_stat(&fscache_n_op_rejected); + ret = -ENOBUFS; } else if (!test_bit(FSCACHE_IOERROR, &object->cache->flags)) { fscache_report_unexpected_submission(object, op, ostate); ASSERT(!fscache_object_is_active(object)); diff --git a/fs/fscache/stats.c b/fs/fscache/stats.c index 1d53ea6..045ba39 100644 --- a/fs/fscache/stats.c +++ b/fs/fscache/stats.c @@ -26,6 +26,7 @@ atomic_t fscache_n_op_deferred_release; atomic_t fscache_n_op_release; atomic_t fscache_n_op_gc; atomic_t fscache_n_op_cancelled; +atomic_t fscache_n_op_rejected; atomic_t fscache_n_attr_changed; atomic_t fscache_n_attr_changed_ok; @@ -210,11 +211,12 @@ static int fscache_stats_show(struct seq_file *m, void *v) atomic_read(&fscache_n_store_radix_deletes), atomic_read(&fscache_n_store_pages_over_limit)); - seq_printf(m, "Ops : pend=%u run=%u enq=%u can=%u\n", + seq_printf(m, "Ops : pend=%u run=%u enq=%u can=%u rej=%u\n", atomic_read(&fscache_n_op_pend), atomic_read(&fscache_n_op_run), atomic_read(&fscache_n_op_enqueue), - atomic_read(&fscache_n_op_cancelled)); + atomic_read(&fscache_n_op_cancelled), + atomic_read(&fscache_n_op_rejected)); seq_printf(m, "Ops : dfr=%u rel=%u gc=%u\n", atomic_read(&fscache_n_op_deferred_release), atomic_read(&fscache_n_op_release), -- cgit v0.10.2 From 201a15428bd54f83eccec8b7c64a04b8f9431204 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 19 Nov 2009 18:11:35 +0000 Subject: FS-Cache: Handle pages pending storage that get evicted under OOM conditions Handle netfs pages that the vmscan algorithm wants to evict from the pagecache under OOM conditions, but that are waiting for write to the cache. Under these conditions, vmscan calls the releasepage() function of the netfs, asking if a page can be discarded. The problem is typified by the following trace of a stuck process: kslowd005 D 0000000000000000 0 4253 2 0x00000080 ffff88001b14f370 0000000000000046 ffff880020d0d000 0000000000000007 0000000000000006 0000000000000001 ffff88001b14ffd8 ffff880020d0d2a8 000000000000ddf0 00000000000118c0 00000000000118c0 ffff880020d0d2a8 Call Trace: [] __fscache_wait_on_page_write+0x8b/0xa7 [fscache] [] ? autoremove_wake_function+0x0/0x34 [] ? __fscache_check_page_write+0x63/0x70 [fscache] [] nfs_fscache_release_page+0x4e/0xc4 [nfs] [] nfs_release_page+0x3c/0x41 [nfs] [] try_to_release_page+0x32/0x3b [] shrink_page_list+0x316/0x4ac [] shrink_inactive_list+0x392/0x67c [] ? __mutex_unlock_slowpath+0x100/0x10b [] ? trace_hardirqs_on_caller+0x10c/0x130 [] ? mutex_unlock+0x9/0xb [] shrink_list+0x8d/0x8f [] shrink_zone+0x278/0x33c [] ? ktime_get_ts+0xad/0xba [] try_to_free_pages+0x22e/0x392 [] ? isolate_pages_global+0x0/0x212 [] __alloc_pages_nodemask+0x3dc/0x5cf [] grab_cache_page_write_begin+0x65/0xaa [] ext3_write_begin+0x78/0x1eb [] generic_file_buffered_write+0x109/0x28c [] ? current_fs_time+0x22/0x29 [] __generic_file_aio_write+0x350/0x385 [] ? generic_file_aio_write+0x4a/0xae [] generic_file_aio_write+0x60/0xae [] do_sync_write+0xe3/0x120 [] ? autoremove_wake_function+0x0/0x34 [] ? __dentry_open+0x1a5/0x2b8 [] ? dentry_open+0x82/0x89 [] cachefiles_write_page+0x298/0x335 [cachefiles] [] fscache_write_op+0x178/0x2c2 [fscache] [] fscache_op_execute+0x7a/0xd1 [fscache] [] slow_work_execute+0x18f/0x2d1 [] slow_work_thread+0x1c5/0x308 [] ? autoremove_wake_function+0x0/0x34 [] ? slow_work_thread+0x0/0x308 [] kthread+0x7a/0x82 [] child_rip+0xa/0x20 [] ? restore_args+0x0/0x30 [] ? tg_shares_up+0x171/0x227 [] ? kthread+0x0/0x82 [] ? child_rip+0x0/0x20 In the above backtrace, the following is happening: (1) A page storage operation is being executed by a slow-work thread (fscache_write_op()). (2) FS-Cache farms the operation out to the cache to perform (cachefiles_write_page()). (3) CacheFiles is then calling Ext3 to perform the actual write, using Ext3's standard write (do_sync_write()) under KERNEL_DS directly from the netfs page. (4) However, for Ext3 to perform the write, it must allocate some memory, in particular, it must allocate at least one page cache page into which it can copy the data from the netfs page. (5) Under OOM conditions, the memory allocator can't immediately come up with a page, so it uses vmscan to find something to discard (try_to_free_pages()). (6) vmscan finds a clean netfs page it might be able to discard (possibly the one it's trying to write out). (7) The netfs is called to throw the page away (nfs_release_page()) - but it's called with __GFP_WAIT, so the netfs decides to wait for the store to complete (__fscache_wait_on_page_write()). (8) This blocks a slow-work processing thread - possibly against itself. The system ends up stuck because it can't write out any netfs pages to the cache without allocating more memory. To avoid this, we make FS-Cache cancel some writes that aren't in the middle of actually being performed. This means that some data won't make it into the cache this time. To support this, a new FS-Cache function is added fscache_maybe_release_page() that replaces what the netfs releasepage() functions used to do with respect to the cache. The decisions fscache_maybe_release_page() makes are counted and displayed through /proc/fs/fscache/stats on a line labelled "VmScan". There are four counters provided: "nos=N" - pages that weren't pending storage; "gon=N" - pages that were pending storage when we first looked, but weren't by the time we got the object lock; "bsy=N" - pages that we ignored as they were actively being written when we looked; and "can=N" - pages that we cancelled the storage of. What I'd really like to do is alter the behaviour of the cancellation heuristics, depending on how necessary it is to expel pages. If there are plenty of other pages that aren't waiting to be written to the cache that could be ejected first, then it would be nice to hold up on immediate cancellation of cache writes - but I don't see a way of doing that. Signed-off-by: David Howells diff --git a/Documentation/filesystems/caching/fscache.txt b/Documentation/filesystems/caching/fscache.txt index 057a3c7..7097fd2 100644 --- a/Documentation/filesystems/caching/fscache.txt +++ b/Documentation/filesystems/caching/fscache.txt @@ -272,6 +272,10 @@ proc files. pgs=N Number of pages given store req processing time rxd=N Number of store reqs deleted from tracking tree olm=N Number of store reqs over store limit + VmScan nos=N Number of release reqs against pages with no pending store + gon=N Number of release reqs against pages stored by time lock granted + bsy=N Number of release reqs ignored due to in-progress store + can=N Number of page stores cancelled due to release req Ops pend=N Number of times async ops added to pending queues run=N Number of times async ops given CPU time enq=N Number of times async ops queued for processing diff --git a/Documentation/filesystems/caching/netfs-api.txt b/Documentation/filesystems/caching/netfs-api.txt index 2666b1e..1902c57 100644 --- a/Documentation/filesystems/caching/netfs-api.txt +++ b/Documentation/filesystems/caching/netfs-api.txt @@ -641,7 +641,7 @@ data file must be retired (see the relinquish cookie function below). Furthermore, note that this does not cancel the asynchronous read or write operation started by the read/alloc and write functions, so the page -invalidation and release functions must use: +invalidation functions must use: bool fscache_check_page_write(struct fscache_cookie *cookie, struct page *page); @@ -654,6 +654,25 @@ to see if a page is being written to the cache, and: to wait for it to finish if it is. +When releasepage() is being implemented, a special FS-Cache function exists to +manage the heuristics of coping with vmscan trying to eject pages, which may +conflict with the cache trying to write pages to the cache (which may itself +need to allocate memory): + + bool fscache_maybe_release_page(struct fscache_cookie *cookie, + struct page *page, + gfp_t gfp); + +This takes the netfs cookie, and the page and gfp arguments as supplied to +releasepage(). It will return false if the page cannot be released yet for +some reason and if it returns true, the page has been uncached and can now be +released. + +To make a page available for release, this function may wait for an outstanding +storage request to complete, or it may attempt to cancel the storage request - +in which case the page will not be stored in the cache this time. + + ========================== INDEX AND DATA FILE UPDATE ========================== diff --git a/fs/9p/cache.c b/fs/9p/cache.c index 51c94e2..bcc5357 100644 --- a/fs/9p/cache.c +++ b/fs/9p/cache.c @@ -343,18 +343,7 @@ int __v9fs_fscache_release_page(struct page *page, gfp_t gfp) BUG_ON(!vcookie->fscache); - if (PageFsCache(page)) { - if (fscache_check_page_write(vcookie->fscache, page)) { - if (!(gfp & __GFP_WAIT)) - return 0; - fscache_wait_on_page_write(vcookie->fscache, page); - } - - fscache_uncache_page(vcookie->fscache, page); - ClearPageFsCache(page); - } - - return 1; + return fscache_maybe_release_page(vnode->cache, page, gfp); } void __v9fs_fscache_invalidate_page(struct page *page) @@ -368,7 +357,6 @@ void __v9fs_fscache_invalidate_page(struct page *page) fscache_wait_on_page_write(vcookie->fscache, page); BUG_ON(!PageLocked(page)); fscache_uncache_page(vcookie->fscache, page); - ClearPageFsCache(page); } } diff --git a/fs/afs/file.c b/fs/afs/file.c index 681c2a7..39b3016 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -315,7 +315,6 @@ static void afs_invalidatepage(struct page *page, unsigned long offset) struct afs_vnode *vnode = AFS_FS_I(page->mapping->host); fscache_wait_on_page_write(vnode->cache, page); fscache_uncache_page(vnode->cache, page); - ClearPageFsCache(page); } #endif @@ -349,17 +348,9 @@ static int afs_releasepage(struct page *page, gfp_t gfp_flags) /* deny if page is being written to the cache and the caller hasn't * elected to wait */ #ifdef CONFIG_AFS_FSCACHE - if (PageFsCache(page)) { - if (fscache_check_page_write(vnode->cache, page)) { - if (!(gfp_flags & __GFP_WAIT)) { - _leave(" = F [cache busy]"); - return 0; - } - fscache_wait_on_page_write(vnode->cache, page); - } - - fscache_uncache_page(vnode->cache, page); - ClearPageFsCache(page); + if (!fscache_maybe_release_page(vnode->cache, page, gfp_flags)) { + _leave(" = F [cache busy]"); + return 0; } #endif diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index a076987..e504651 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -180,6 +180,11 @@ extern atomic_t fscache_n_store_pages; extern atomic_t fscache_n_store_radix_deletes; extern atomic_t fscache_n_store_pages_over_limit; +extern atomic_t fscache_n_store_vmscan_not_storing; +extern atomic_t fscache_n_store_vmscan_gone; +extern atomic_t fscache_n_store_vmscan_busy; +extern atomic_t fscache_n_store_vmscan_cancelled; + extern atomic_t fscache_n_marks; extern atomic_t fscache_n_uncaches; diff --git a/fs/fscache/page.c b/fs/fscache/page.c index 022a5da..fc76798 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -43,6 +43,75 @@ void __fscache_wait_on_page_write(struct fscache_cookie *cookie, struct page *pa EXPORT_SYMBOL(__fscache_wait_on_page_write); /* + * decide whether a page can be released, possibly by cancelling a store to it + * - we're allowed to sleep if __GFP_WAIT is flagged + */ +bool __fscache_maybe_release_page(struct fscache_cookie *cookie, + struct page *page, + gfp_t gfp) +{ + struct page *xpage; + void *val; + + _enter("%p,%p,%x", cookie, page, gfp); + + rcu_read_lock(); + val = radix_tree_lookup(&cookie->stores, page->index); + if (!val) { + rcu_read_unlock(); + fscache_stat(&fscache_n_store_vmscan_not_storing); + __fscache_uncache_page(cookie, page); + return true; + } + + /* see if the page is actually undergoing storage - if so we can't get + * rid of it till the cache has finished with it */ + if (radix_tree_tag_get(&cookie->stores, page->index, + FSCACHE_COOKIE_STORING_TAG)) { + rcu_read_unlock(); + goto page_busy; + } + + /* the page is pending storage, so we attempt to cancel the store and + * discard the store request so that the page can be reclaimed */ + spin_lock(&cookie->stores_lock); + rcu_read_unlock(); + + if (radix_tree_tag_get(&cookie->stores, page->index, + FSCACHE_COOKIE_STORING_TAG)) { + /* the page started to undergo storage whilst we were looking, + * so now we can only wait or return */ + spin_unlock(&cookie->stores_lock); + goto page_busy; + } + + xpage = radix_tree_delete(&cookie->stores, page->index); + spin_unlock(&cookie->stores_lock); + + if (xpage) { + fscache_stat(&fscache_n_store_vmscan_cancelled); + fscache_stat(&fscache_n_store_radix_deletes); + ASSERTCMP(xpage, ==, page); + } else { + fscache_stat(&fscache_n_store_vmscan_gone); + } + + wake_up_bit(&cookie->flags, 0); + if (xpage) + page_cache_release(xpage); + __fscache_uncache_page(cookie, page); + return true; + +page_busy: + /* we might want to wait here, but that could deadlock the allocator as + * the slow-work threads writing to the cache may all end up sleeping + * on memory allocation */ + fscache_stat(&fscache_n_store_vmscan_busy); + return false; +} +EXPORT_SYMBOL(__fscache_maybe_release_page); + +/* * note that a page has finished being written to the cache */ static void fscache_end_page_write(struct fscache_object *object, @@ -57,6 +126,8 @@ static void fscache_end_page_write(struct fscache_object *object, /* delete the page from the tree if it is now no longer * pending */ spin_lock(&cookie->stores_lock); + radix_tree_tag_clear(&cookie->stores, page->index, + FSCACHE_COOKIE_STORING_TAG); if (!radix_tree_tag_get(&cookie->stores, page->index, FSCACHE_COOKIE_PENDING_TAG)) { fscache_stat(&fscache_n_store_radix_deletes); @@ -640,8 +711,12 @@ static void fscache_write_op(struct fscache_operation *_op) goto superseded; } - radix_tree_tag_clear(&cookie->stores, page->index, - FSCACHE_COOKIE_PENDING_TAG); + if (page) { + radix_tree_tag_set(&cookie->stores, page->index, + FSCACHE_COOKIE_STORING_TAG); + radix_tree_tag_clear(&cookie->stores, page->index, + FSCACHE_COOKIE_PENDING_TAG); + } spin_unlock(&cookie->stores_lock); spin_unlock(&object->lock); diff --git a/fs/fscache/stats.c b/fs/fscache/stats.c index 045ba39..cda6999 100644 --- a/fs/fscache/stats.c +++ b/fs/fscache/stats.c @@ -63,6 +63,11 @@ atomic_t fscache_n_store_pages; atomic_t fscache_n_store_radix_deletes; atomic_t fscache_n_store_pages_over_limit; +atomic_t fscache_n_store_vmscan_not_storing; +atomic_t fscache_n_store_vmscan_gone; +atomic_t fscache_n_store_vmscan_busy; +atomic_t fscache_n_store_vmscan_cancelled; + atomic_t fscache_n_marks; atomic_t fscache_n_uncaches; @@ -211,6 +216,12 @@ static int fscache_stats_show(struct seq_file *m, void *v) atomic_read(&fscache_n_store_radix_deletes), atomic_read(&fscache_n_store_pages_over_limit)); + seq_printf(m, "VmScan : nos=%u gon=%u bsy=%u can=%u\n", + atomic_read(&fscache_n_store_vmscan_not_storing), + atomic_read(&fscache_n_store_vmscan_gone), + atomic_read(&fscache_n_store_vmscan_busy), + atomic_read(&fscache_n_store_vmscan_cancelled)); + seq_printf(m, "Ops : pend=%u run=%u enq=%u can=%u rej=%u\n", atomic_read(&fscache_n_op_pend), atomic_read(&fscache_n_op_run), diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index 70fad69..fa58800 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -359,17 +359,13 @@ int nfs_fscache_release_page(struct page *page, gfp_t gfp) BUG_ON(!cookie); - if (fscache_check_page_write(cookie, page)) { - if (!(gfp & __GFP_WAIT)) - return 0; - fscache_wait_on_page_write(cookie, page); - } - if (PageFsCache(page)) { dfprintk(FSCACHE, "NFS: fscache releasepage (0x%p/0x%p/0x%p)\n", cookie, page, nfsi); - fscache_uncache_page(cookie, page); + if (!fscache_maybe_release_page(cookie, page, gfp)) + return 0; + nfs_add_fscache_stats(page->mapping->host, NFSIOS_FSCACHE_PAGES_UNCACHED, 1); } diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index f3aa4bd..4750d5f 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -317,6 +317,7 @@ struct fscache_cookie { void *netfs_data; /* back pointer to netfs */ struct radix_tree_root stores; /* pages to be stored on this cookie */ #define FSCACHE_COOKIE_PENDING_TAG 0 /* pages tag: pending write to cache */ +#define FSCACHE_COOKIE_STORING_TAG 1 /* pages tag: writing to cache */ unsigned long flags; #define FSCACHE_COOKIE_LOOKING_UP 0 /* T if non-index cookie being looked up still */ diff --git a/include/linux/fscache.h b/include/linux/fscache.h index 6d8ee46..595ce49 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -202,6 +202,8 @@ extern int __fscache_write_page(struct fscache_cookie *, struct page *, gfp_t); extern void __fscache_uncache_page(struct fscache_cookie *, struct page *); extern bool __fscache_check_page_write(struct fscache_cookie *, struct page *); extern void __fscache_wait_on_page_write(struct fscache_cookie *, struct page *); +extern bool __fscache_maybe_release_page(struct fscache_cookie *, struct page *, + gfp_t); /** * fscache_register_netfs - Register a filesystem as desiring caching services @@ -615,4 +617,29 @@ void fscache_wait_on_page_write(struct fscache_cookie *cookie, __fscache_wait_on_page_write(cookie, page); } +/** + * fscache_maybe_release_page - Consider releasing a page, cancelling a store + * @cookie: The cookie representing the cache object + * @page: The netfs page that is being cached. + * @gfp: The gfp flags passed to releasepage() + * + * Consider releasing a page for the vmscan algorithm, on behalf of the netfs's + * releasepage() call. A storage request on the page may cancelled if it is + * not currently being processed. + * + * The function returns true if the page no longer has a storage request on it, + * and false if a storage request is left in place. If true is returned, the + * page will have been passed to fscache_uncache_page(). If false is returned + * the page cannot be freed yet. + */ +static inline +bool fscache_maybe_release_page(struct fscache_cookie *cookie, + struct page *page, + gfp_t gfp) +{ + if (fscache_cookie_valid(cookie) && PageFsCache(page)) + return __fscache_maybe_release_page(cookie, page, gfp); + return false; +} + #endif /* _LINUX_FSCACHE_H */ -- cgit v0.10.2 From 2175bb06dc6cf2af9c098a1770561f9e63edae4e Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 19 Nov 2009 18:11:38 +0000 Subject: FS-Cache: Add a retirement stat counter Add a stat counter to count retirement events rather than ordinary release events (the retire argument to fscache_relinquish_cookie()). Signed-off-by: David Howells diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index f979659..9905350 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c @@ -415,6 +415,8 @@ void __fscache_relinquish_cookie(struct fscache_cookie *cookie, int retire) unsigned long event; fscache_stat(&fscache_n_relinquishes); + if (retire) + fscache_stat(&fscache_n_relinquishes_retire); if (!cookie) { fscache_stat(&fscache_n_relinquishes_null); diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index e504651..2bf463d 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -202,6 +202,7 @@ extern atomic_t fscache_n_updates_run; extern atomic_t fscache_n_relinquishes; extern atomic_t fscache_n_relinquishes_null; extern atomic_t fscache_n_relinquishes_waitcrt; +extern atomic_t fscache_n_relinquishes_retire; extern atomic_t fscache_n_cookie_index; extern atomic_t fscache_n_cookie_data; diff --git a/fs/fscache/stats.c b/fs/fscache/stats.c index cda6999..9e15289 100644 --- a/fs/fscache/stats.c +++ b/fs/fscache/stats.c @@ -85,6 +85,7 @@ atomic_t fscache_n_updates_run; atomic_t fscache_n_relinquishes; atomic_t fscache_n_relinquishes_null; atomic_t fscache_n_relinquishes_waitcrt; +atomic_t fscache_n_relinquishes_retire; atomic_t fscache_n_cookie_index; atomic_t fscache_n_cookie_data; @@ -168,10 +169,11 @@ static int fscache_stats_show(struct seq_file *m, void *v) atomic_read(&fscache_n_updates_null), atomic_read(&fscache_n_updates_run)); - seq_printf(m, "Relinqs: n=%u nul=%u wcr=%u\n", + seq_printf(m, "Relinqs: n=%u nul=%u wcr=%u rtr=%u\n", atomic_read(&fscache_n_relinquishes), atomic_read(&fscache_n_relinquishes_null), - atomic_read(&fscache_n_relinquishes_waitcrt)); + atomic_read(&fscache_n_relinquishes_waitcrt), + atomic_read(&fscache_n_relinquishes_retire)); seq_printf(m, "AttrChg: n=%u ok=%u nbf=%u oom=%u run=%u\n", atomic_read(&fscache_n_attr_changed), -- cgit v0.10.2 From d461d26dde901b0523c46b0317e7fccf574a3933 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 19 Nov 2009 18:11:41 +0000 Subject: FS-Cache: Make sure FSCACHE_COOKIE_LOOKING_UP cleared on lookup failure We must make sure that FSCACHE_COOKIE_LOOKING_UP is cleared on lookup failure (if an object reaches the LC_DYING state), and we should clear it before clearing FSCACHE_COOKIE_CREATING. If this doesn't happen then fscache_wait_for_deferred_lookup() may hold allocation and retrieval operations indefinitely until they're interrupted by signals - which in turn pins the dying object until they go away. Signed-off-by: David Howells diff --git a/fs/fscache/object.c b/fs/fscache/object.c index 1a1afa8..74bc562 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c @@ -105,6 +105,7 @@ static inline void fscache_done_parent_op(struct fscache_object *object) static void fscache_object_state_machine(struct fscache_object *object) { enum fscache_object_state new_state; + struct fscache_cookie *cookie; ASSERT(object != NULL); @@ -158,11 +159,17 @@ static void fscache_object_state_machine(struct fscache_object *object) spin_lock(&object->lock); object->state = FSCACHE_OBJECT_DYING; - if (object->cookie && - test_and_clear_bit(FSCACHE_COOKIE_CREATING, - &object->cookie->flags)) - wake_up_bit(&object->cookie->flags, - FSCACHE_COOKIE_CREATING); + cookie = object->cookie; + if (cookie) { + if (test_and_clear_bit(FSCACHE_COOKIE_LOOKING_UP, + &cookie->flags)) + wake_up_bit(&cookie->flags, + FSCACHE_COOKIE_LOOKING_UP); + if (test_and_clear_bit(FSCACHE_COOKIE_CREATING, + &cookie->flags)) + wake_up_bit(&cookie->flags, + FSCACHE_COOKIE_CREATING); + } spin_unlock(&object->lock); fscache_done_parent_op(object); -- cgit v0.10.2 From 60d543ca724be155c2b6166e36a00c80b21bd810 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 19 Nov 2009 18:11:45 +0000 Subject: FS-Cache: Start processing an object's operations on that object's death Start processing an object's operations when that object moves into the DYING state as the object cannot be destroyed until all its outstanding operations have completed. Furthermore, make sure that read and allocation operations handle being woken up on a dead object. Such events are recorded in the Allocs.abt and Retrvls.abt statistics as viewable through /proc/fs/fscache/stats. The code for waiting for object activation for the read and allocation operations is also extracted into its own function as it is much the same in all cases, differing only in the stats incremented. Signed-off-by: David Howells diff --git a/Documentation/filesystems/caching/fscache.txt b/Documentation/filesystems/caching/fscache.txt index 7097fd2..3c23411 100644 --- a/Documentation/filesystems/caching/fscache.txt +++ b/Documentation/filesystems/caching/fscache.txt @@ -253,6 +253,7 @@ proc files. int=N Number of alloc reqs aborted -ERESTARTSYS ops=N Number of alloc reqs submitted owt=N Number of alloc reqs waited for CPU time + abt=N Number of alloc reqs aborted due to object death Retrvls n=N Number of retrieval (read) requests seen ok=N Number of successful retr reqs wt=N Number of retr reqs that waited on lookup completion @@ -262,6 +263,7 @@ proc files. oom=N Number of retr reqs failed -ENOMEM ops=N Number of retr reqs submitted owt=N Number of retr reqs waited for CPU time + abt=N Number of retr reqs aborted due to object death Stores n=N Number of storage (write) requests seen ok=N Number of successful store reqs agn=N Number of store reqs on a page already pending storage diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index 2bf463d..5b49a37 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -156,6 +156,7 @@ extern atomic_t fscache_n_allocs_ok; extern atomic_t fscache_n_allocs_wait; extern atomic_t fscache_n_allocs_nobufs; extern atomic_t fscache_n_allocs_intr; +extern atomic_t fscache_n_allocs_object_dead; extern atomic_t fscache_n_alloc_ops; extern atomic_t fscache_n_alloc_op_waits; @@ -166,6 +167,7 @@ extern atomic_t fscache_n_retrievals_nodata; extern atomic_t fscache_n_retrievals_nobufs; extern atomic_t fscache_n_retrievals_intr; extern atomic_t fscache_n_retrievals_nomem; +extern atomic_t fscache_n_retrievals_object_dead; extern atomic_t fscache_n_retrieval_ops; extern atomic_t fscache_n_retrieval_op_waits; @@ -249,9 +251,12 @@ static inline void fscache_stat_d(atomic_t *stat) atomic_dec(stat); } +#define __fscache_stat(stat) (stat) + extern const struct file_operations fscache_stats_fops; #else +#define __fscache_stat(stat) (NULL) #define fscache_stat(stat) do {} while (0) #endif diff --git a/fs/fscache/object.c b/fs/fscache/object.c index 74bc562..c85c9f5 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c @@ -201,6 +201,7 @@ static void fscache_object_state_machine(struct fscache_object *object) } spin_unlock(&object->lock); fscache_enqueue_dependents(object); + fscache_start_operations(object); goto terminal_transit; /* handle an abort during initialisation */ diff --git a/fs/fscache/page.c b/fs/fscache/page.c index fc76798..c598ea4 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -314,6 +314,43 @@ static int fscache_wait_for_deferred_lookup(struct fscache_cookie *cookie) } /* + * wait for an object to become active (or dead) + */ +static int fscache_wait_for_retrieval_activation(struct fscache_object *object, + struct fscache_retrieval *op, + atomic_t *stat_op_waits, + atomic_t *stat_object_dead) +{ + int ret; + + if (!test_bit(FSCACHE_OP_WAITING, &op->op.flags)) + goto check_if_dead; + + _debug(">>> WT"); + fscache_stat(stat_op_waits); + if (wait_on_bit(&op->op.flags, FSCACHE_OP_WAITING, + fscache_wait_bit_interruptible, + TASK_INTERRUPTIBLE) < 0) { + ret = fscache_cancel_op(&op->op); + if (ret == 0) + return -ERESTARTSYS; + + /* it's been removed from the pending queue by another party, + * so we should get to run shortly */ + wait_on_bit(&op->op.flags, FSCACHE_OP_WAITING, + fscache_wait_bit, TASK_UNINTERRUPTIBLE); + } + _debug("<<< GO"); + +check_if_dead: + if (unlikely(fscache_object_is_dead(object))) { + fscache_stat(stat_object_dead); + return -ENOBUFS; + } + return 0; +} + +/* * read a page from the cache or allocate a block in which to store it * - we return: * -ENOMEM - out of memory, nothing done @@ -376,25 +413,12 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie, /* we wait for the operation to become active, and then process it * *here*, in this thread, and not in the thread pool */ - if (test_bit(FSCACHE_OP_WAITING, &op->op.flags)) { - _debug(">>> WT"); - fscache_stat(&fscache_n_retrieval_op_waits); - if (wait_on_bit(&op->op.flags, FSCACHE_OP_WAITING, - fscache_wait_bit_interruptible, - TASK_INTERRUPTIBLE) < 0) { - ret = fscache_cancel_op(&op->op); - if (ret == 0) { - ret = -ERESTARTSYS; - goto error; - } - - /* it's been removed from the pending queue by another - * party, so we should get to run shortly */ - wait_on_bit(&op->op.flags, FSCACHE_OP_WAITING, - fscache_wait_bit, TASK_UNINTERRUPTIBLE); - } - _debug("<<< GO"); - } + ret = fscache_wait_for_retrieval_activation( + object, op, + __fscache_stat(&fscache_n_retrieval_op_waits), + __fscache_stat(&fscache_n_retrievals_object_dead)); + if (ret < 0) + goto error; /* ask the cache to honour the operation */ if (test_bit(FSCACHE_COOKIE_NO_DATA_YET, &object->cookie->flags)) { @@ -506,25 +530,12 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie, /* we wait for the operation to become active, and then process it * *here*, in this thread, and not in the thread pool */ - if (test_bit(FSCACHE_OP_WAITING, &op->op.flags)) { - _debug(">>> WT"); - fscache_stat(&fscache_n_retrieval_op_waits); - if (wait_on_bit(&op->op.flags, FSCACHE_OP_WAITING, - fscache_wait_bit_interruptible, - TASK_INTERRUPTIBLE) < 0) { - ret = fscache_cancel_op(&op->op); - if (ret == 0) { - ret = -ERESTARTSYS; - goto error; - } - - /* it's been removed from the pending queue by another - * party, so we should get to run shortly */ - wait_on_bit(&op->op.flags, FSCACHE_OP_WAITING, - fscache_wait_bit, TASK_UNINTERRUPTIBLE); - } - _debug("<<< GO"); - } + ret = fscache_wait_for_retrieval_activation( + object, op, + __fscache_stat(&fscache_n_retrieval_op_waits), + __fscache_stat(&fscache_n_retrievals_object_dead)); + if (ret < 0) + goto error; /* ask the cache to honour the operation */ if (test_bit(FSCACHE_COOKIE_NO_DATA_YET, &object->cookie->flags)) { @@ -612,25 +623,12 @@ int __fscache_alloc_page(struct fscache_cookie *cookie, fscache_stat(&fscache_n_alloc_ops); - if (test_bit(FSCACHE_OP_WAITING, &op->op.flags)) { - _debug(">>> WT"); - fscache_stat(&fscache_n_alloc_op_waits); - if (wait_on_bit(&op->op.flags, FSCACHE_OP_WAITING, - fscache_wait_bit_interruptible, - TASK_INTERRUPTIBLE) < 0) { - ret = fscache_cancel_op(&op->op); - if (ret == 0) { - ret = -ERESTARTSYS; - goto error; - } - - /* it's been removed from the pending queue by another - * party, so we should get to run shortly */ - wait_on_bit(&op->op.flags, FSCACHE_OP_WAITING, - fscache_wait_bit, TASK_UNINTERRUPTIBLE); - } - _debug("<<< GO"); - } + ret = fscache_wait_for_retrieval_activation( + object, op, + __fscache_stat(&fscache_n_alloc_op_waits), + __fscache_stat(&fscache_n_allocs_object_dead)); + if (ret < 0) + goto error; /* ask the cache to honour the operation */ fscache_stat(&fscache_n_cop_allocate_page); diff --git a/fs/fscache/stats.c b/fs/fscache/stats.c index 9e15289..05f77ca 100644 --- a/fs/fscache/stats.c +++ b/fs/fscache/stats.c @@ -39,6 +39,7 @@ atomic_t fscache_n_allocs_ok; atomic_t fscache_n_allocs_wait; atomic_t fscache_n_allocs_nobufs; atomic_t fscache_n_allocs_intr; +atomic_t fscache_n_allocs_object_dead; atomic_t fscache_n_alloc_ops; atomic_t fscache_n_alloc_op_waits; @@ -49,6 +50,7 @@ atomic_t fscache_n_retrievals_nodata; atomic_t fscache_n_retrievals_nobufs; atomic_t fscache_n_retrievals_intr; atomic_t fscache_n_retrievals_nomem; +atomic_t fscache_n_retrievals_object_dead; atomic_t fscache_n_retrieval_ops; atomic_t fscache_n_retrieval_op_waits; @@ -188,9 +190,10 @@ static int fscache_stats_show(struct seq_file *m, void *v) atomic_read(&fscache_n_allocs_wait), atomic_read(&fscache_n_allocs_nobufs), atomic_read(&fscache_n_allocs_intr)); - seq_printf(m, "Allocs : ops=%u owt=%u\n", + seq_printf(m, "Allocs : ops=%u owt=%u abt=%u\n", atomic_read(&fscache_n_alloc_ops), - atomic_read(&fscache_n_alloc_op_waits)); + atomic_read(&fscache_n_alloc_op_waits), + atomic_read(&fscache_n_allocs_object_dead)); seq_printf(m, "Retrvls: n=%u ok=%u wt=%u nod=%u nbf=%u" " int=%u oom=%u\n", @@ -201,9 +204,10 @@ static int fscache_stats_show(struct seq_file *m, void *v) atomic_read(&fscache_n_retrievals_nobufs), atomic_read(&fscache_n_retrievals_intr), atomic_read(&fscache_n_retrievals_nomem)); - seq_printf(m, "Retrvls: ops=%u owt=%u\n", + seq_printf(m, "Retrvls: ops=%u owt=%u abt=%u\n", atomic_read(&fscache_n_retrieval_ops), - atomic_read(&fscache_n_retrieval_op_waits)); + atomic_read(&fscache_n_retrieval_op_waits), + atomic_read(&fscache_n_retrievals_object_dead)); seq_printf(m, "Stores : n=%u ok=%u agn=%u nbf=%u oom=%u\n", atomic_read(&fscache_n_stores), diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 4750d5f..907bb56 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -404,6 +404,10 @@ extern const char *fscache_object_states[]; (obj)->state >= FSCACHE_OBJECT_AVAILABLE && \ (obj)->state < FSCACHE_OBJECT_DYING) +#define fscache_object_is_dead(obj) \ + (test_bit(FSCACHE_IOERROR, &(obj)->cache->flags) && \ + (obj)->state >= FSCACHE_OBJECT_DYING) + extern const struct slow_work_ops fscache_object_slow_work_ops; /** -- cgit v0.10.2 From 868411be3f445a83fafbd734f3e426400138add5 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 19 Nov 2009 18:11:48 +0000 Subject: FS-Cache: Actually requeue an object when requested FS-Cache objects have an FSCACHE_OBJECT_EV_REQUEUE event that can theoretically be raised to ask the state machine to requeue the object for further processing before the work function returns to the slow-work facility. However, fscache_object_work_execute() was clearing that bit before checking the event mask to see whether the object has any pending events that require it to be requeued immediately. Instead, the bit should be cleared after the check and enqueue. Signed-off-by: David Howells diff --git a/fs/fscache/object.c b/fs/fscache/object.c index c85c9f5..f3f952c 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c @@ -353,13 +353,12 @@ static void fscache_object_slow_work_execute(struct slow_work *work) _enter("{OBJ%x}", object->debug_id); - clear_bit(FSCACHE_OBJECT_EV_REQUEUE, &object->events); - start = jiffies; fscache_object_state_machine(object); fscache_hist(fscache_objs_histogram, start); if (object->events & object->event_mask) fscache_enqueue_object(object); + clear_bit(FSCACHE_OBJECT_EV_REQUEUE, &object->events); } /* -- cgit v0.10.2 From a17754fb8c28af19cd70dcbec6d5b0773b94e0c1 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 19 Nov 2009 18:11:52 +0000 Subject: CacheFiles: Don't write a full page if there's only a partial page to cache cachefiles_write_page() writes a full page to the backing file for the last page of the netfs file, even if the netfs file's last page is only a partial page. This causes the EOF on the backing file to be extended beyond the EOF of the netfs, and thus the backing file will be truncated by cachefiles_attr_changed() called from cachefiles_lookup_object(). So we need to limit the write we make to the backing file on that last page such that it doesn't push the EOF too far. Also, if a backing file that has a partial page at the end is expanded, we discard the partial page and refetch it on the basis that we then have a hole in the file with invalid data, and should the power go out... A better way to deal with this could be to record a note that the partial page contains invalid data until the correct data is written into it. This isn't a problem for netfs's that discard the whole backing file if the file size changes (such as NFS). Signed-off-by: David Howells diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c index dd7f852..8e67abf 100644 --- a/fs/cachefiles/interface.c +++ b/fs/cachefiles/interface.c @@ -404,12 +404,26 @@ static int cachefiles_attr_changed(struct fscache_object *_object) if (oi_size == ni_size) return 0; - newattrs.ia_size = ni_size; - newattrs.ia_valid = ATTR_SIZE; - cachefiles_begin_secure(cache, &saved_cred); mutex_lock(&object->backer->d_inode->i_mutex); + + /* if there's an extension to a partial page at the end of the backing + * file, we need to discard the partial page so that we pick up new + * data after it */ + if (oi_size & ~PAGE_MASK && ni_size > oi_size) { + _debug("discard tail %llx", oi_size); + newattrs.ia_valid = ATTR_SIZE; + newattrs.ia_size = oi_size & PAGE_MASK; + ret = notify_change(object->backer, &newattrs); + if (ret < 0) + goto truncate_failed; + } + + newattrs.ia_valid = ATTR_SIZE; + newattrs.ia_size = ni_size; ret = notify_change(object->backer, &newattrs); + +truncate_failed: mutex_unlock(&object->backer->d_inode->i_mutex); cachefiles_end_secure(cache, saved_cred); diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index 3304646..5a84fd7 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -803,7 +803,8 @@ int cachefiles_write_page(struct fscache_storage *op, struct page *page) struct cachefiles_cache *cache; mm_segment_t old_fs; struct file *file; - loff_t pos; + loff_t pos, eof; + size_t len; void *data; int ret; @@ -837,15 +838,29 @@ int cachefiles_write_page(struct fscache_storage *op, struct page *page) ret = -EIO; if (file->f_op->write) { pos = (loff_t) page->index << PAGE_SHIFT; + + /* we mustn't write more data than we have, so we have + * to beware of a partial page at EOF */ + eof = object->fscache.store_limit_l; + len = PAGE_SIZE; + if (eof & ~PAGE_MASK) { + ASSERTCMP(pos, <, eof); + if (eof - pos < PAGE_SIZE) { + _debug("cut short %llx to %llx", + pos, eof); + len = eof - pos; + ASSERTCMP(pos + len, ==, eof); + } + } + data = kmap(page); old_fs = get_fs(); set_fs(KERNEL_DS); ret = file->f_op->write( - file, (const void __user *) data, PAGE_SIZE, - &pos); + file, (const void __user *) data, len, &pos); set_fs(old_fs); kunmap(page); - if (ret != PAGE_SIZE) + if (ret != len) ret = -EIO; } fput(file); diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 907bb56..5db5000 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -395,6 +395,7 @@ struct fscache_object { struct rb_node objlist_link; /* link in global object list */ #endif pgoff_t store_limit; /* current storage limit */ + loff_t store_limit_l; /* current storage limit */ }; extern const char *fscache_object_states[]; @@ -439,6 +440,7 @@ void fscache_object_init(struct fscache_object *object, object->events = object->event_mask = 0; object->flags = 0; object->store_limit = 0; + object->store_limit_l = 0; object->cache = cache; object->cookie = cookie; object->parent = NULL; @@ -491,6 +493,7 @@ static inline void fscache_object_lookup_error(struct fscache_object *object) static inline void fscache_set_store_limit(struct fscache_object *object, loff_t i_size) { + object->store_limit_l = i_size; object->store_limit = i_size >> PAGE_SHIFT; if (i_size & ~PAGE_MASK) object->store_limit++; -- cgit v0.10.2 From 5e929b33c3935ecb029b3e495356b2b8af432efa Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 19 Nov 2009 18:11:55 +0000 Subject: CacheFiles: Handle truncate unlocking the page we're reading Handle truncate unlocking the page we're attempting to read from the backing device before the read has completed. This was causing reports like the following to occur: Pid: 4765, comm: kslowd Not tainted 2.6.30.1 #1 Call Trace: [] ? cachefiles_read_waiter+0xd9/0x147 [cachefiles] [] ? __wait_on_bit+0x60/0x6f [] ? __wake_up_common+0x3f/0x71 [] ? __wake_up+0x30/0x44 [] ? __wake_up_bit+0x28/0x2d [] ? ext3_truncate+0x4d7/0x8ed [ext3] [] ? pagevec_lookup+0x17/0x1f [] ? unmap_mapping_range+0x59/0x1ff [] ? __wake_up+0x30/0x44 [] ? vmtruncate+0xc2/0xe2 [] ? inode_setattr+0x22/0x10a [] ? ext3_setattr+0x17b/0x1e6 [ext3] [] ? notify_change+0x186/0x2c9 [] ? cachefiles_attr_changed+0x133/0x1cd [cachefiles] [] ? cachefiles_lookup_object+0xcf/0x12a [cachefiles] [] ? fscache_lookup_object+0x110/0x122 [fscache] [] ? fscache_object_slow_work_execute+0x590/0x6bc [fscache] [] ? slow_work_thread+0x285/0x43a [] ? autoremove_wake_function+0x0/0x2e [] ? slow_work_thread+0x0/0x43a [] ? kthread+0x54/0x81 [] ? child_rip+0xa/0x20 [] ? kthread+0x0/0x81 [] ? child_rip+0x0/0x20 CacheFiles: I/O Error: Readpage failed on backing file 200000000000810 FS-Cache: Cache cachefiles stopped due to I/O error Reported-by: Christian Kujau Reported-by: Takashi Iwai Reported-by: Duc Le Minh Signed-off-by: David Howells diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index 5a84fd7..1d83325 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -40,8 +40,10 @@ static int cachefiles_read_waiter(wait_queue_t *wait, unsigned mode, _debug("--- monitor %p %lx ---", page, page->flags); - if (!PageUptodate(page) && !PageError(page)) - dump_stack(); + if (!PageUptodate(page) && !PageError(page)) { + /* unlocked, not uptodate and not erronous? */ + _debug("page probably truncated"); + } /* remove from the waitqueue */ list_del(&wait->task_list); @@ -61,6 +63,84 @@ static int cachefiles_read_waiter(wait_queue_t *wait, unsigned mode, } /* + * handle a probably truncated page + * - check to see if the page is still relevant and reissue the read if + * possible + * - return -EIO on error, -ENODATA if the page is gone, -EINPROGRESS if we + * must wait again and 0 if successful + */ +static int cachefiles_read_reissue(struct cachefiles_object *object, + struct cachefiles_one_read *monitor) +{ + struct address_space *bmapping = object->backer->d_inode->i_mapping; + struct page *backpage = monitor->back_page, *backpage2; + int ret; + + kenter("{ino=%lx},{%lx,%lx}", + object->backer->d_inode->i_ino, + backpage->index, backpage->flags); + + /* skip if the page was truncated away completely */ + if (backpage->mapping != bmapping) { + kleave(" = -ENODATA [mapping]"); + return -ENODATA; + } + + backpage2 = find_get_page(bmapping, backpage->index); + if (!backpage2) { + kleave(" = -ENODATA [gone]"); + return -ENODATA; + } + + if (backpage != backpage2) { + put_page(backpage2); + kleave(" = -ENODATA [different]"); + return -ENODATA; + } + + /* the page is still there and we already have a ref on it, so we don't + * need a second */ + put_page(backpage2); + + INIT_LIST_HEAD(&monitor->op_link); + add_page_wait_queue(backpage, &monitor->monitor); + + if (trylock_page(backpage)) { + ret = -EIO; + if (PageError(backpage)) + goto unlock_discard; + ret = 0; + if (PageUptodate(backpage)) + goto unlock_discard; + + kdebug("reissue read"); + ret = bmapping->a_ops->readpage(NULL, backpage); + if (ret < 0) + goto unlock_discard; + } + + /* but the page may have been read before the monitor was installed, so + * the monitor may miss the event - so we have to ensure that we do get + * one in such a case */ + if (trylock_page(backpage)) { + _debug("jumpstart %p {%lx}", backpage, backpage->flags); + unlock_page(backpage); + } + + /* it'll reappear on the todo list */ + kleave(" = -EINPROGRESS"); + return -EINPROGRESS; + +unlock_discard: + unlock_page(backpage); + spin_lock_irq(&object->work_lock); + list_del(&monitor->op_link); + spin_unlock_irq(&object->work_lock); + kleave(" = %d", ret); + return ret; +} + +/* * copy data from backing pages to netfs pages to complete a read operation * - driven by FS-Cache's thread pool */ @@ -92,20 +172,26 @@ static void cachefiles_read_copier(struct fscache_operation *_op) _debug("- copy {%lu}", monitor->back_page->index); - error = -EIO; + recheck: if (PageUptodate(monitor->back_page)) { copy_highpage(monitor->netfs_page, monitor->back_page); pagevec_add(&pagevec, monitor->netfs_page); fscache_mark_pages_cached(monitor->op, &pagevec); error = 0; - } - - if (error) + } else if (!PageError(monitor->back_page)) { + /* the page has probably been truncated */ + error = cachefiles_read_reissue(object, monitor); + if (error == -EINPROGRESS) + goto next; + goto recheck; + } else { cachefiles_io_error_obj( object, "Readpage failed on backing file %lx", (unsigned long) monitor->back_page->flags); + error = -EIO; + } page_cache_release(monitor->back_page); @@ -114,6 +200,7 @@ static void cachefiles_read_copier(struct fscache_operation *_op) fscache_put_retrieval(op); kfree(monitor); + next: /* let the thread pool have some air occasionally */ max--; if (max < 0 || need_resched()) { -- cgit v0.10.2 From 6511de33c877a53b3df545bc06c29e0f272837ff Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 19 Nov 2009 18:11:58 +0000 Subject: CacheFiles: Mark parent directory locks as I_MUTEX_PARENT to keep lockdep happy Mark parent directory locks as I_MUTEX_PARENT in the callers of cachefiles_bury_object() so that lockdep doesn't complain when that invokes vfs_unlink(): ============================================= [ INFO: possible recursive locking detected ] 2.6.32-rc6-cachefs #47 --------------------------------------------- kslowd002/3089 is trying to acquire lock: (&sb->s_type->i_mutex_key#7){+.+.+.}, at: [] vfs_unlink+0x8b/0x128 but task is already holding lock: (&sb->s_type->i_mutex_key#7){+.+.+.}, at: [] cachefiles_walk_to_object+0x1b0/0x831 [cachefiles] other info that might help us debug this: 1 lock held by kslowd002/3089: #0: (&sb->s_type->i_mutex_key#7){+.+.+.}, at: [] cachefiles_walk_to_object+0x1b0/0x831 [cachefiles] stack backtrace: Pid: 3089, comm: kslowd002 Not tainted 2.6.32-rc6-cachefs #47 Call Trace: [] __lock_acquire+0x1649/0x16e3 [] ? inode_has_perm+0x5f/0x61 [] lock_acquire+0x57/0x6d [] ? vfs_unlink+0x8b/0x128 [] mutex_lock_nested+0x54/0x292 [] ? vfs_unlink+0x8b/0x128 [] ? selinux_inode_permission+0x8e/0x90 [] ? security_inode_permission+0x1c/0x1e [] ? inode_permission+0x99/0xa5 [] vfs_unlink+0x8b/0x128 [] ? kfree+0xed/0xf9 [] cachefiles_bury_object+0xb6/0x420 [cachefiles] [] ? trace_hardirqs_on+0xd/0xf [] ? cachefiles_check_object_xattr+0x233/0x293 [cachefiles] [] cachefiles_walk_to_object+0x4ff/0x831 [cachefiles] [] ? finish_task_switch+0x0/0xb2 [] cachefiles_lookup_object+0xac/0x12a [cachefiles] [] fscache_lookup_object+0x1c7/0x214 [fscache] [] fscache_object_state_machine+0xa5/0x52d [fscache] [] fscache_object_slow_work_execute+0x5f/0xa0 [fscache] [] slow_work_execute+0x18f/0x2d1 [] slow_work_thread+0x1c5/0x308 [] ? autoremove_wake_function+0x0/0x34 [] ? slow_work_thread+0x0/0x308 [] kthread+0x7a/0x82 [] child_rip+0xa/0x20 [] ? restore_args+0x0/0x30 [] ? kthread+0x0/0x82 [] ? child_rip+0x0/0x20 Signed-off-by: Daivd Howells diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index 4ce818a..3df8695 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -254,7 +254,7 @@ int cachefiles_delete_object(struct cachefiles_cache *cache, dir = dget_parent(object->dentry); - mutex_lock(&dir->d_inode->i_mutex); + mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); ret = cachefiles_bury_object(cache, dir, object->dentry); dput(dir); @@ -307,7 +307,7 @@ lookup_again: /* search the current directory for the element name */ _debug("lookup '%s'", name); - mutex_lock(&dir->d_inode->i_mutex); + mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); start = jiffies; next = lookup_one_len(name, dir, nlen); -- cgit v0.10.2 From d0e27b7808dc667f3015be0b6888f6d680e222c8 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 19 Nov 2009 18:12:02 +0000 Subject: CacheFiles: Better showing of debugging information in active object problems Show more debugging information if cachefiles_mark_object_active() is asked to activate an active object. This may happen, for instance, if the netfs tries to register an object with the same key multiple times. The code is changed to (a) get the appropriate object lock to protect the cookie pointer whilst we dereference it, and (b) get and display the cookie key if available. Signed-off-by: David Howells diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index 3df8695..00a0cda 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -27,6 +27,76 @@ static int cachefiles_wait_bit(void *flags) return 0; } +#define CACHEFILES_KEYBUF_SIZE 512 + +/* + * dump debugging info about an object + */ +static noinline +void __cachefiles_printk_object(struct cachefiles_object *object, + const char *prefix, + u8 *keybuf) +{ + struct fscache_cookie *cookie; + unsigned keylen, loop; + + printk(KERN_ERR "%sobject: OBJ%x\n", + prefix, object->fscache.debug_id); + printk(KERN_ERR "%sobjstate=%s fl=%lx swfl=%lx ev=%lx[%lx]\n", + prefix, fscache_object_states[object->fscache.state], + object->fscache.flags, object->fscache.work.flags, + object->fscache.events, + object->fscache.event_mask & FSCACHE_OBJECT_EVENTS_MASK); + printk(KERN_ERR "%sops=%u inp=%u exc=%u\n", + prefix, object->fscache.n_ops, object->fscache.n_in_progress, + object->fscache.n_exclusive); + printk(KERN_ERR "%sparent=%p\n", + prefix, object->fscache.parent); + + spin_lock(&object->fscache.lock); + cookie = object->fscache.cookie; + if (cookie) { + printk(KERN_ERR "%scookie=%p [pr=%p nd=%p fl=%lx]\n", + prefix, + object->fscache.cookie, + object->fscache.cookie->parent, + object->fscache.cookie->netfs_data, + object->fscache.cookie->flags); + if (keybuf) + keylen = cookie->def->get_key(cookie->netfs_data, keybuf, + CACHEFILES_KEYBUF_SIZE); + else + keylen = 0; + } else { + printk(KERN_ERR "%scookie=NULL\n", prefix); + keylen = 0; + } + spin_unlock(&object->fscache.lock); + + if (keylen) { + printk(KERN_ERR "%skey=[%u] '", prefix, keylen); + for (loop = 0; loop < keylen; loop++) + printk("%02x", keybuf[loop]); + printk("'\n"); + } +} + +/* + * dump debugging info about a pair of objects + */ +static noinline void cachefiles_printk_object(struct cachefiles_object *object, + struct cachefiles_object *xobject) +{ + u8 *keybuf; + + keybuf = kmalloc(CACHEFILES_KEYBUF_SIZE, GFP_NOIO); + if (object) + __cachefiles_printk_object(object, "", keybuf); + if (xobject) + __cachefiles_printk_object(xobject, "x", keybuf); + kfree(keybuf); +} + /* * record the fact that an object is now active */ @@ -42,8 +112,11 @@ static void cachefiles_mark_object_active(struct cachefiles_cache *cache, try_again: write_lock(&cache->active_lock); - if (test_and_set_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags)) + if (test_and_set_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags)) { + printk(KERN_ERR "CacheFiles: Error: Object already active\n"); + cachefiles_printk_object(object, NULL); BUG(); + } dentry = object->dentry; _p = &cache->active_nodes.rb_node; @@ -76,32 +149,7 @@ wait_for_old_object: printk(KERN_ERR "\n"); printk(KERN_ERR "CacheFiles: Error:" " Unexpected object collision\n"); - printk(KERN_ERR "xobject: OBJ%x\n", - xobject->fscache.debug_id); - printk(KERN_ERR "xobjstate=%s\n", - fscache_object_states[xobject->fscache.state]); - printk(KERN_ERR "xobjflags=%lx\n", xobject->fscache.flags); - printk(KERN_ERR "xobjevent=%lx [%lx]\n", - xobject->fscache.events, xobject->fscache.event_mask); - printk(KERN_ERR "xops=%u inp=%u exc=%u\n", - xobject->fscache.n_ops, xobject->fscache.n_in_progress, - xobject->fscache.n_exclusive); - printk(KERN_ERR "xcookie=%p [pr=%p nd=%p fl=%lx]\n", - xobject->fscache.cookie, - xobject->fscache.cookie->parent, - xobject->fscache.cookie->netfs_data, - xobject->fscache.cookie->flags); - printk(KERN_ERR "xparent=%p\n", - xobject->fscache.parent); - printk(KERN_ERR "object: OBJ%x\n", - object->fscache.debug_id); - printk(KERN_ERR "cookie=%p [pr=%p nd=%p fl=%lx]\n", - object->fscache.cookie, - object->fscache.cookie->parent, - object->fscache.cookie->netfs_data, - object->fscache.cookie->flags); - printk(KERN_ERR "parent=%p\n", - object->fscache.parent); + cachefiles_printk_object(object, xobject); BUG(); } atomic_inc(&xobject->usage); -- cgit v0.10.2 From fee096deb4f33897937b974cb2c5168bab7935be Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 19 Nov 2009 18:12:05 +0000 Subject: CacheFiles: Catch an overly long wait for an old active object Catch an overly long wait for an old, dying active object when we want to replace it with a new one. The probability is that all the slow-work threads are hogged, and the delete can't get a look in. What we do instead is: (1) if there's nothing in the slow work queue, we sleep until either the dying object has finished dying or there is something in the slow work queue behind which we can queue our object. (2) if there is something in the slow work queue, we return ETIMEDOUT to fscache_lookup_object(), which then puts us back on the slow work queue, presumably behind the deletion that we're blocked by. We are then deferred for a while until we work our way back through the queue - without blocking a slow-work thread unnecessarily. A backtrace similar to the following may appear in the log without this patch: INFO: task kslowd004:5711 blocked for more than 120 seconds. "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. kslowd004 D 0000000000000000 0 5711 2 0x00000080 ffff88000340bb80 0000000000000046 ffff88002550d000 0000000000000000 ffff88002550d000 0000000000000007 ffff88000340bfd8 ffff88002550d2a8 000000000000ddf0 00000000000118c0 00000000000118c0 ffff88002550d2a8 Call Trace: [] ? trace_hardirqs_on+0xd/0xf [] ? cachefiles_wait_bit+0x0/0xd [cachefiles] [] cachefiles_wait_bit+0x9/0xd [cachefiles] [] __wait_on_bit+0x43/0x76 [] ? ext3_xattr_get+0x1ec/0x270 [] out_of_line_wait_on_bit+0x69/0x74 [] ? cachefiles_wait_bit+0x0/0xd [cachefiles] [] ? wake_bit_function+0x0/0x2e [] cachefiles_mark_object_active+0x203/0x23b [cachefiles] [] cachefiles_walk_to_object+0x558/0x827 [cachefiles] [] cachefiles_lookup_object+0xac/0x12a [cachefiles] [] fscache_lookup_object+0x1c7/0x214 [fscache] [] fscache_object_state_machine+0xa5/0x52d [fscache] [] fscache_object_slow_work_execute+0x5f/0xa0 [fscache] [] slow_work_execute+0x18f/0x2d1 [] slow_work_thread+0x1c5/0x308 [] ? autoremove_wake_function+0x0/0x34 [] ? slow_work_thread+0x0/0x308 [] kthread+0x7a/0x82 [] child_rip+0xa/0x20 [] ? restore_args+0x0/0x30 [] ? kthread+0x0/0x82 [] ? child_rip+0x0/0x20 1 lock held by kslowd004/5711: #0: (&sb->s_type->i_mutex_key#7/1){+.+.+.}, at: [] cachefiles_walk_to_object+0x1b3/0x827 [cachefiles] Signed-off-by: David Howells diff --git a/Documentation/filesystems/caching/fscache.txt b/Documentation/filesystems/caching/fscache.txt index 3c23411..a91e2e2 100644 --- a/Documentation/filesystems/caching/fscache.txt +++ b/Documentation/filesystems/caching/fscache.txt @@ -235,6 +235,7 @@ proc files. neg=N Number of negative lookups made pos=N Number of positive lookups made crt=N Number of objects created by lookup + tmo=N Number of lookups timed out and requeued Updates n=N Number of update cookie requests seen nul=N Number of upd reqs given a NULL parent run=N Number of upd reqs granted CPU time diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c index 8e67abf..9d3c426 100644 --- a/fs/cachefiles/interface.c +++ b/fs/cachefiles/interface.c @@ -114,8 +114,9 @@ nomem_lookup_data: /* * attempt to look up the nominated node in this cache + * - return -ETIMEDOUT to be scheduled again */ -static void cachefiles_lookup_object(struct fscache_object *_object) +static int cachefiles_lookup_object(struct fscache_object *_object) { struct cachefiles_lookup_data *lookup_data; struct cachefiles_object *parent, *object; @@ -145,13 +146,14 @@ static void cachefiles_lookup_object(struct fscache_object *_object) object->fscache.cookie->def->type != FSCACHE_COOKIE_TYPE_INDEX) cachefiles_attr_changed(&object->fscache); - if (ret < 0) { + if (ret < 0 && ret != -ETIMEDOUT) { printk(KERN_WARNING "CacheFiles: Lookup failed error %d\n", ret); fscache_object_lookup_error(&object->fscache); } _leave(" [%d]", ret); + return ret; } /* diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index 00a0cda..14ac480 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -21,12 +21,6 @@ #include #include "internal.h" -static int cachefiles_wait_bit(void *flags) -{ - schedule(); - return 0; -} - #define CACHEFILES_KEYBUF_SIZE 512 /* @@ -100,8 +94,8 @@ static noinline void cachefiles_printk_object(struct cachefiles_object *object, /* * record the fact that an object is now active */ -static void cachefiles_mark_object_active(struct cachefiles_cache *cache, - struct cachefiles_object *object) +static int cachefiles_mark_object_active(struct cachefiles_cache *cache, + struct cachefiles_object *object) { struct cachefiles_object *xobject; struct rb_node **_p, *_parent = NULL; @@ -139,8 +133,8 @@ try_again: rb_insert_color(&object->active_node, &cache->active_nodes); write_unlock(&cache->active_lock); - _leave(""); - return; + _leave(" = 0"); + return 0; /* an old object from a previous incarnation is hogging the slot - we * need to wait for it to be destroyed */ @@ -155,13 +149,64 @@ wait_for_old_object: atomic_inc(&xobject->usage); write_unlock(&cache->active_lock); - _debug(">>> wait"); - wait_on_bit(&xobject->flags, CACHEFILES_OBJECT_ACTIVE, - cachefiles_wait_bit, TASK_UNINTERRUPTIBLE); - _debug("<<< waited"); + if (test_bit(CACHEFILES_OBJECT_ACTIVE, &xobject->flags)) { + wait_queue_head_t *wq; + + signed long timeout = 60 * HZ; + wait_queue_t wait; + bool requeue; + + /* if the object we're waiting for is queued for processing, + * then just put ourselves on the queue behind it */ + if (slow_work_is_queued(&xobject->fscache.work)) { + _debug("queue OBJ%x behind OBJ%x immediately", + object->fscache.debug_id, + xobject->fscache.debug_id); + goto requeue; + } + + /* otherwise we sleep until either the object we're waiting for + * is done, or the slow-work facility wants the thread back to + * do other work */ + wq = bit_waitqueue(&xobject->flags, CACHEFILES_OBJECT_ACTIVE); + init_wait(&wait); + requeue = false; + do { + prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE); + if (!test_bit(CACHEFILES_OBJECT_ACTIVE, &xobject->flags)) + break; + requeue = slow_work_sleep_till_thread_needed( + &object->fscache.work, &timeout); + } while (timeout > 0 && !requeue); + finish_wait(wq, &wait); + + if (requeue && + test_bit(CACHEFILES_OBJECT_ACTIVE, &xobject->flags)) { + _debug("queue OBJ%x behind OBJ%x after wait", + object->fscache.debug_id, + xobject->fscache.debug_id); + goto requeue; + } + + if (timeout <= 0) { + printk(KERN_ERR "\n"); + printk(KERN_ERR "CacheFiles: Error: Overlong" + " wait for old active object to go away\n"); + cachefiles_printk_object(object, xobject); + goto requeue; + } + } + + ASSERT(!test_bit(CACHEFILES_OBJECT_ACTIVE, &xobject->flags)); cache->cache.ops->put_object(&xobject->fscache); goto try_again; + +requeue: + clear_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags); + cache->cache.ops->put_object(&xobject->fscache); + _leave(" = -ETIMEDOUT"); + return -ETIMEDOUT; } /* @@ -466,12 +511,15 @@ lookup_again: } /* note that we're now using this object */ - cachefiles_mark_object_active(cache, object); + ret = cachefiles_mark_object_active(cache, object); mutex_unlock(&dir->d_inode->i_mutex); dput(dir); dir = NULL; + if (ret == -ETIMEDOUT) + goto mark_active_timed_out; + _debug("=== OBTAINED_OBJECT ==="); if (object->new) { @@ -515,6 +563,10 @@ create_error: cachefiles_io_error(cache, "Create/mkdir failed"); goto error; +mark_active_timed_out: + _debug("mark active timed out"); + goto release_dentry; + check_error: _debug("check error %d", ret); write_lock(&cache->active_lock); @@ -522,7 +574,7 @@ check_error: clear_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags); wake_up_bit(&object->flags, CACHEFILES_OBJECT_ACTIVE); write_unlock(&cache->active_lock); - +release_dentry: dput(object->dentry); object->dentry = NULL; goto error_out; @@ -543,9 +595,6 @@ error: error_out2: dput(dir); error_out: - if (ret == -ENOSPC) - ret = -ENOBUFS; - _leave(" = error %d", -ret); return ret; } diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index 5b49a37..0ca2566 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -215,6 +215,7 @@ extern atomic_t fscache_n_object_no_alloc; extern atomic_t fscache_n_object_lookups; extern atomic_t fscache_n_object_lookups_negative; extern atomic_t fscache_n_object_lookups_positive; +extern atomic_t fscache_n_object_lookups_timed_out; extern atomic_t fscache_n_object_created; extern atomic_t fscache_n_object_avail; extern atomic_t fscache_n_object_dead; diff --git a/fs/fscache/object.c b/fs/fscache/object.c index f3f952c..e513ac5 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c @@ -468,6 +468,7 @@ static void fscache_lookup_object(struct fscache_object *object) { struct fscache_cookie *cookie = object->cookie; struct fscache_object *parent; + int ret; _enter(""); @@ -493,12 +494,19 @@ static void fscache_lookup_object(struct fscache_object *object) fscache_stat(&fscache_n_object_lookups); fscache_stat(&fscache_n_cop_lookup_object); - object->cache->ops->lookup_object(object); + ret = object->cache->ops->lookup_object(object); fscache_stat_d(&fscache_n_cop_lookup_object); if (test_bit(FSCACHE_OBJECT_EV_ERROR, &object->events)) set_bit(FSCACHE_COOKIE_UNAVAILABLE, &cookie->flags); + if (ret == -ETIMEDOUT) { + /* probably stuck behind another object, so move this one to + * the back of the queue */ + fscache_stat(&fscache_n_object_lookups_timed_out); + set_bit(FSCACHE_OBJECT_EV_REQUEUE, &object->events); + } + _leave(""); } diff --git a/fs/fscache/stats.c b/fs/fscache/stats.c index 05f77ca..46435f3a 100644 --- a/fs/fscache/stats.c +++ b/fs/fscache/stats.c @@ -98,6 +98,7 @@ atomic_t fscache_n_object_no_alloc; atomic_t fscache_n_object_lookups; atomic_t fscache_n_object_lookups_negative; atomic_t fscache_n_object_lookups_positive; +atomic_t fscache_n_object_lookups_timed_out; atomic_t fscache_n_object_created; atomic_t fscache_n_object_avail; atomic_t fscache_n_object_dead; @@ -160,10 +161,11 @@ static int fscache_stats_show(struct seq_file *m, void *v) atomic_read(&fscache_n_acquires_nobufs), atomic_read(&fscache_n_acquires_oom)); - seq_printf(m, "Lookups: n=%u neg=%u pos=%u crt=%u\n", + seq_printf(m, "Lookups: n=%u neg=%u pos=%u crt=%u tmo=%u\n", atomic_read(&fscache_n_object_lookups), atomic_read(&fscache_n_object_lookups_negative), atomic_read(&fscache_n_object_lookups_positive), + atomic_read(&fscache_n_object_lookups_timed_out), atomic_read(&fscache_n_object_created)); seq_printf(m, "Updates: n=%u nul=%u run=%u\n", diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 5db5000..7be0c6f 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -234,8 +234,10 @@ struct fscache_cache_ops { struct fscache_object *(*alloc_object)(struct fscache_cache *cache, struct fscache_cookie *cookie); - /* look up the object for a cookie */ - void (*lookup_object)(struct fscache_object *object); + /* look up the object for a cookie + * - return -ETIMEDOUT to be requeued + */ + int (*lookup_object)(struct fscache_object *object); /* finished looking up */ void (*lookup_complete)(struct fscache_object *object); -- cgit v0.10.2 From 14e69647c868459bcb910f771851ca7c699efd21 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 19 Nov 2009 18:12:08 +0000 Subject: CacheFiles: Don't log lookup/create failing with ENOBUFS Don't log the CacheFiles lookup/create object routined failing with ENOBUFS as under high memory load or high cache load they can do this quite a lot. This error simply means that the requested object cannot be created on disk due to lack of space, or due to failure of the backing filesystem to find sufficient resources. Signed-off-by: David Howells diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c index 9d3c426..2708931 100644 --- a/fs/cachefiles/interface.c +++ b/fs/cachefiles/interface.c @@ -147,8 +147,9 @@ static int cachefiles_lookup_object(struct fscache_object *_object) cachefiles_attr_changed(&object->fscache); if (ret < 0 && ret != -ETIMEDOUT) { - printk(KERN_WARNING "CacheFiles: Lookup failed error %d\n", - ret); + if (ret != -ENOBUFS) + printk(KERN_WARNING + "CacheFiles: Lookup failed error %d\n", ret); fscache_object_lookup_error(&object->fscache); } -- cgit v0.10.2 From 5854d9c8d18359b1fc2f23c0ef2d51dd53281bd6 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 19 Nov 2009 02:18:44 +0000 Subject: Fix handling of the HP/Acer 'DMAR at zero' BIOS error for machines with <4GiB RAM. Commit 86cf898e1d0fca245173980e3897580db38569a8 ("intel-iommu: Check for 'DMAR at zero' BIOS error earlier.") was supposed to work by pretending not to detect an IOMMU if it was actually being reported by the BIOS at physical address zero. However, the intel_iommu_init() function is called unconditionally, as are the corresponding functions for other IOMMU hardware. So the patch only worked if you have RAM above the 4GiB boundary. It caused swiotlb to be initialised when no IOMMU was detected during early boot, and thus the later IOMMU init would refuse to run. But if you have less RAM than that, swiotlb wouldn't get set up and the IOMMU _would_ still end up being initialised, even though we never claimed to detect it. This patch also sets the dmar_disabled flag when the error is detected during the initial detection phase -- so that the later call to intel_iommu_init() will return without doing anything, regardless of whether swiotlb is used or not. Signed-off-by: David Woodhouse Signed-off-by: Linus Torvalds diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index e5f8fc1..b952ebc 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -609,6 +609,9 @@ int __init check_zero_address(void) dmi_get_system_info(DMI_BIOS_VENDOR), dmi_get_system_info(DMI_BIOS_VERSION), dmi_get_system_info(DMI_PRODUCT_VERSION)); +#ifdef CONFIG_DMAR + dmar_disabled = 1; +#endif return 0; } break; -- cgit v0.10.2 From 308efab5e231d1510cd35931d87629bf5171caae Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Thu, 19 Nov 2009 13:30:36 +0000 Subject: vt: Fix use of "new" in a struct field As this struct is exposed to user space and the API was added for this release it's a bit of a pain for the C++ world and we still have time to fix it. Rename the fields before we end up with that pain in an actual release. Signed-off-by: Alan Cox Reported-by: Olivier Goffart Signed-off-by: Linus Torvalds diff --git a/drivers/char/vt_ioctl.c b/drivers/char/vt_ioctl.c index ed86d3b..6aa1028 100644 --- a/drivers/char/vt_ioctl.c +++ b/drivers/char/vt_ioctl.c @@ -103,8 +103,8 @@ void vt_event_post(unsigned int event, unsigned int old, unsigned int new) ve->event.event = event; /* kernel view is consoles 0..n-1, user space view is console 1..n with 0 meaning current, so we must bias */ - ve->event.old = old + 1; - ve->event.new = new + 1; + ve->event.oldev = old + 1; + ve->event.newev = new + 1; wake = 1; ve->done = 1; } @@ -186,7 +186,7 @@ int vt_waitactive(int n) vt_event_wait(&vw); if (vw.done == 0) return -EINTR; - } while (vw.event.new != n); + } while (vw.event.newev != n); return 0; } diff --git a/include/linux/vt.h b/include/linux/vt.h index 7afca0d..7ffa11f 100644 --- a/include/linux/vt.h +++ b/include/linux/vt.h @@ -70,8 +70,8 @@ struct vt_event { #define VT_EVENT_UNBLANK 0x0004 /* Screen unblank */ #define VT_EVENT_RESIZE 0x0008 /* Resize display */ #define VT_MAX_EVENT 0x000F - unsigned int old; /* Old console */ - unsigned int new; /* New console (if changing) */ + unsigned int oldev; /* Old console */ + unsigned int newev; /* New console (if changing) */ unsigned int pad[4]; /* Padding for expansion */ }; -- cgit v0.10.2 From 648f4e3e50c4793d9dbf9a09afa193631f76fa26 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 19 Nov 2009 14:32:38 -0800 Subject: Linux 2.6.32-rc8 diff --git a/Makefile b/Makefile index aa3e13a..ad82601 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 32 -EXTRAVERSION = -rc7 +EXTRAVERSION = -rc8 NAME = Man-Eating Seals of Antiquity # *DOCUMENTATION* -- cgit v0.10.2 From a0a74d1ee2e38eb936a0437330da3a2fbc12b54e Mon Sep 17 00:00:00 2001 From: Jiang Yutang Date: Fri, 16 Oct 2009 20:44:36 +0400 Subject: sata_fsl: Split hard and soft reset Split sata_fsl_softreset() into hard and soft resets to make error-handling more efficient & device and PMP detection more reliable. Also includes fix for PMP support, driver tested with Sil3726, Sil4726 & Exar PMP controllers. [AV: Also fixes resuming from deep sleep on MPC8315 CPUs] Signed-off-by: Jiang Yutang Signed-off-by: Anton Vorontsov Signed-off-by: Jeff Garzik diff --git a/drivers/ata/sata_fsl.c b/drivers/ata/sata_fsl.c index d344db4..172b57e 100644 --- a/drivers/ata/sata_fsl.c +++ b/drivers/ata/sata_fsl.c @@ -707,34 +707,17 @@ static unsigned int sata_fsl_dev_classify(struct ata_port *ap) return ata_dev_classify(&tf); } -static int sata_fsl_prereset(struct ata_link *link, unsigned long deadline) -{ - /* FIXME: Never skip softreset, sata_fsl_softreset() is - * combination of soft and hard resets. sata_fsl_softreset() - * needs to be splitted into soft and hard resets. - */ - return 0; -} - -static int sata_fsl_softreset(struct ata_link *link, unsigned int *class, +static int sata_fsl_hardreset(struct ata_link *link, unsigned int *class, unsigned long deadline) { struct ata_port *ap = link->ap; - struct sata_fsl_port_priv *pp = ap->private_data; struct sata_fsl_host_priv *host_priv = ap->host->private_data; void __iomem *hcr_base = host_priv->hcr_base; - int pmp = sata_srst_pmp(link); u32 temp; - struct ata_taskfile tf; - u8 *cfis; - u32 Serror; int i = 0; unsigned long start_jiffies; - DPRINTK("in xx_softreset\n"); - - if (pmp != SATA_PMP_CTRL_PORT) - goto issue_srst; + DPRINTK("in xx_hardreset\n"); try_offline_again: /* @@ -749,7 +732,7 @@ try_offline_again: if (temp & ONLINE) { ata_port_printk(ap, KERN_ERR, - "Softreset failed, not off-lined %d\n", i); + "Hardreset failed, not off-lined %d\n", i); /* * Try to offline controller atleast twice @@ -761,7 +744,7 @@ try_offline_again: goto try_offline_again; } - DPRINTK("softreset, controller off-lined\n"); + DPRINTK("hardreset, controller off-lined\n"); VPRINTK("HStatus = 0x%x\n", ioread32(hcr_base + HSTATUS)); VPRINTK("HControl = 0x%x\n", ioread32(hcr_base + HCONTROL)); @@ -786,11 +769,11 @@ try_offline_again: if (!(temp & ONLINE)) { ata_port_printk(ap, KERN_ERR, - "Softreset failed, not on-lined\n"); + "Hardreset failed, not on-lined\n"); goto err; } - DPRINTK("softreset, controller off-lined & on-lined\n"); + DPRINTK("hardreset, controller off-lined & on-lined\n"); VPRINTK("HStatus = 0x%x\n", ioread32(hcr_base + HSTATUS)); VPRINTK("HControl = 0x%x\n", ioread32(hcr_base + HCONTROL)); @@ -806,7 +789,7 @@ try_offline_again: "No Device OR PHYRDY change,Hstatus = 0x%x\n", ioread32(hcr_base + HSTATUS)); *class = ATA_DEV_NONE; - goto out; + return 0; } /* @@ -819,11 +802,44 @@ try_offline_again: if ((temp & 0xFF) != 0x18) { ata_port_printk(ap, KERN_WARNING, "No Signature Update\n"); *class = ATA_DEV_NONE; - goto out; + goto do_followup_srst; } else { ata_port_printk(ap, KERN_INFO, "Signature Update detected @ %d msecs\n", jiffies_to_msecs(jiffies - start_jiffies)); + *class = sata_fsl_dev_classify(ap); + return 0; + } + +do_followup_srst: + /* + * request libATA to perform follow-up softreset + */ + return -EAGAIN; + +err: + return -EIO; +} + +static int sata_fsl_softreset(struct ata_link *link, unsigned int *class, + unsigned long deadline) +{ + struct ata_port *ap = link->ap; + struct sata_fsl_port_priv *pp = ap->private_data; + struct sata_fsl_host_priv *host_priv = ap->host->private_data; + void __iomem *hcr_base = host_priv->hcr_base; + int pmp = sata_srst_pmp(link); + u32 temp; + struct ata_taskfile tf; + u8 *cfis; + u32 Serror; + + DPRINTK("in xx_softreset\n"); + + if (ata_link_offline(link)) { + DPRINTK("PHY reports no device\n"); + *class = ATA_DEV_NONE; + return 0; } /* @@ -834,7 +850,6 @@ try_offline_again: * reached here, we can send a command to the target device */ -issue_srst: DPRINTK("Sending SRST/device reset\n"); ata_tf_init(link->device, &tf); @@ -860,6 +875,8 @@ issue_srst: ioread32(CA + hcr_base), ioread32(CC + hcr_base)); iowrite32(0xFFFF, CC + hcr_base); + if (pmp != SATA_PMP_CTRL_PORT) + iowrite32(pmp, CQPMP + hcr_base); iowrite32(1, CQ + hcr_base); temp = ata_wait_register(CQ + hcr_base, 0x1, 0x1, 1, 5000); @@ -926,7 +943,6 @@ issue_srst: VPRINTK("cereg = 0x%x\n", ioread32(hcr_base + CE)); } -out: return 0; err: @@ -988,18 +1004,6 @@ static void sata_fsl_error_intr(struct ata_port *ap) ehi->err_mask |= AC_ERR_ATA_BUS; ehi->action |= ATA_EH_SOFTRESET; - /* - * Ignore serror in case of fatal errors as we always want - * to do a soft-reset of the FSL SATA controller. Analyzing - * serror may cause libata to schedule a hard-reset action, - * and hard-reset currently does not do controller - * offline/online, causing command timeouts and leads to an - * un-recoverable state, hence make libATA ignore - * autopsy in case of fatal errors. - */ - - ehi->flags |= ATA_EHI_NO_AUTOPSY; - freeze = 1; } @@ -1267,8 +1271,8 @@ static struct ata_port_operations sata_fsl_ops = { .freeze = sata_fsl_freeze, .thaw = sata_fsl_thaw, - .prereset = sata_fsl_prereset, .softreset = sata_fsl_softreset, + .hardreset = sata_fsl_hardreset, .pmp_softreset = sata_fsl_softreset, .error_handler = sata_fsl_error_handler, .post_internal_cmd = sata_fsl_post_internal_cmd, -- cgit v0.10.2 From 228c4f5cfbf1cda411d9aa7204a612a63c89b1e8 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 19 Nov 2009 17:07:10 -0700 Subject: ioat3: dca and raid operations are incompatible RAID operations cause a system hang on platforms with DCA (Direct-Cache-Access) enabled. So turn off RAID capabilities in this case. Signed-off-by: Dan Williams diff --git a/drivers/dma/ioat/dca.c b/drivers/dma/ioat/dca.c index 14f0a78..abd9038 100644 --- a/drivers/dma/ioat/dca.c +++ b/drivers/dma/ioat/dca.c @@ -103,7 +103,7 @@ static int dca_enabled_in_bios(struct pci_dev *pdev) return res; } -static int system_has_dca_enabled(struct pci_dev *pdev) +int system_has_dca_enabled(struct pci_dev *pdev) { if (boot_cpu_has(X86_FEATURE_DCA)) return dca_enabled_in_bios(pdev); diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c index 35d1e33..4f305f6 100644 --- a/drivers/dma/ioat/dma_v3.c +++ b/drivers/dma/ioat/dma_v3.c @@ -1117,6 +1117,7 @@ static int __devinit ioat3_dma_self_test(struct ioatdma_device *device) int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca) { struct pci_dev *pdev = device->pdev; + int dca_en = system_has_dca_enabled(pdev); struct dma_device *dma; struct dma_chan *c; struct ioat_chan_common *chan; @@ -1137,6 +1138,11 @@ int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca) dma->device_prep_dma_interrupt = ioat3_prep_interrupt_lock; cap = readl(device->reg_base + IOAT_DMA_CAP_OFFSET); + + /* dca is incompatible with raid operations */ + if (dca_en && (cap & (IOAT_CAP_XOR|IOAT_CAP_PQ))) + cap &= ~(IOAT_CAP_XOR|IOAT_CAP_PQ); + if (cap & IOAT_CAP_XOR) { is_raid_device = true; dma->max_xor = 8; diff --git a/drivers/dma/ioat/hw.h b/drivers/dma/ioat/hw.h index 99afb12b..60e6754 100644 --- a/drivers/dma/ioat/hw.h +++ b/drivers/dma/ioat/hw.h @@ -39,6 +39,8 @@ #define IOAT_VER_3_0 0x30 /* Version 3.0 */ #define IOAT_VER_3_2 0x32 /* Version 3.2 */ +int system_has_dca_enabled(struct pci_dev *pdev); + struct ioat_dma_descriptor { uint32_t size; union { -- cgit v0.10.2 From 6f82b83b7a56bc6e9dd6d7b93531dde6027c5309 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 19 Nov 2009 17:07:57 -0700 Subject: ioat2,3: disable asynchronous error notifications Error interrupts and error completions may cause channel hangs, so poll the channel status register after a timeout. Signed-off-by: Dan Williams diff --git a/drivers/dma/ioat/registers.h b/drivers/dma/ioat/registers.h index 63038e1..f015ec1 100644 --- a/drivers/dma/ioat/registers.h +++ b/drivers/dma/ioat/registers.h @@ -92,9 +92,7 @@ #define IOAT_CHANCTRL_ERR_COMPLETION_EN 0x0004 #define IOAT_CHANCTRL_INT_REARM 0x0001 #define IOAT_CHANCTRL_RUN (IOAT_CHANCTRL_INT_REARM |\ - IOAT_CHANCTRL_ERR_COMPLETION_EN |\ - IOAT_CHANCTRL_ANY_ERR_ABORT_EN |\ - IOAT_CHANCTRL_ERR_INT_EN) + IOAT_CHANCTRL_ANY_ERR_ABORT_EN) #define IOAT_DMA_COMP_OFFSET 0x02 /* 16-bit DMA channel compatibility */ #define IOAT_DMA_COMP_V1 0x0001 /* Compatibility with DMA version 1 */ -- cgit v0.10.2 From de581b65f6fe78168affa552c3bd15b8c80ed614 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 19 Nov 2009 17:08:45 -0700 Subject: ioat3: specify valid address for disabled-Q or disabled-P Although disabled, hardware still checks address validity, so duplicate the known address. Signed-off-by: Dan Williams diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c index 4f305f6..fad2e9f 100644 --- a/drivers/dma/ioat/dma_v3.c +++ b/drivers/dma/ioat/dma_v3.c @@ -736,10 +736,16 @@ ioat3_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src, unsigned int src_cnt, const unsigned char *scf, size_t len, unsigned long flags) { + /* specify valid address for disabled result */ + if (flags & DMA_PREP_PQ_DISABLE_P) + dst[0] = dst[1]; + if (flags & DMA_PREP_PQ_DISABLE_Q) + dst[1] = dst[0]; + /* handle the single source multiply case from the raid6 * recovery path */ - if (unlikely((flags & DMA_PREP_PQ_DISABLE_P) && src_cnt == 1)) { + if ((flags & DMA_PREP_PQ_DISABLE_P) && src_cnt == 1) { dma_addr_t single_source[2]; unsigned char single_source_coef[2]; @@ -761,6 +767,12 @@ ioat3_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src, unsigned int src_cnt, const unsigned char *scf, size_t len, enum sum_check_flags *pqres, unsigned long flags) { + /* specify valid address for disabled result */ + if (flags & DMA_PREP_PQ_DISABLE_P) + pq[0] = pq[1]; + if (flags & DMA_PREP_PQ_DISABLE_Q) + pq[1] = pq[0]; + /* the cleanup routine only sets bits on validate failure, it * does not clear bits on validate success... so clear it here */ @@ -778,9 +790,9 @@ ioat3_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src, dma_addr_t pq[2]; memset(scf, 0, src_cnt); - flags |= DMA_PREP_PQ_DISABLE_Q; pq[0] = dst; - pq[1] = ~0; + flags |= DMA_PREP_PQ_DISABLE_Q; + pq[1] = dst; /* specify valid address for disabled result */ return __ioat3_prep_pq_lock(chan, NULL, pq, src, src_cnt, scf, len, flags); @@ -800,9 +812,9 @@ ioat3_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src, *result = 0; memset(scf, 0, src_cnt); - flags |= DMA_PREP_PQ_DISABLE_Q; pq[0] = src[0]; - pq[1] = ~0; + flags |= DMA_PREP_PQ_DISABLE_Q; + pq[1] = pq[0]; /* specify valid address for disabled result */ return __ioat3_prep_pq_lock(chan, result, pq, &src[1], src_cnt - 1, scf, len, flags); -- cgit v0.10.2 From b2f125bcf5eac41a6d74f75ac573b77753213b74 Mon Sep 17 00:00:00 2001 From: Kevin Wells Date: Thu, 12 Nov 2009 00:28:13 +0100 Subject: i2c: i2c-pnx: Limit minimum jiffie timeout to 2 Limit minimum jiffie timeout to 2 to prevent early timeout on systems with low tick rates Signed-off-by: Kevin Wells Signed-off-by: Ben Dooks diff --git a/drivers/i2c/busses/i2c-pnx.c b/drivers/i2c/busses/i2c-pnx.c index 6ff6c20..68c8233 100644 --- a/drivers/i2c/busses/i2c-pnx.c +++ b/drivers/i2c/busses/i2c-pnx.c @@ -54,6 +54,9 @@ static inline void i2c_pnx_arm_timer(struct i2c_adapter *adap) struct timer_list *timer = &data->mif.timer; int expires = I2C_PNX_TIMEOUT / (1000 / HZ); + if (expires <= 1) + expires = 2; + del_timer_sync(timer); dev_dbg(&adap->dev, "Timer armed at %lu plus %u jiffies.\n", -- cgit v0.10.2 From 4ced24c8973f79113444d1e00ee8bd9e74fbf43e Mon Sep 17 00:00:00 2001 From: Kevin Wells Date: Thu, 12 Nov 2009 00:23:00 +0100 Subject: i2c: i2c-pnx: Made buf type unsigned to prevent sign extension Made buf type unsigned to prevent sign extension Signed-off-by: Kevin Wells Signed-off-by: Ben Dooks diff --git a/include/linux/i2c-pnx.h b/include/linux/i2c-pnx.h index f13255e..9eb07bb 100644 --- a/include/linux/i2c-pnx.h +++ b/include/linux/i2c-pnx.h @@ -21,7 +21,7 @@ struct i2c_pnx_mif { int mode; /* Interface mode */ struct completion complete; /* I/O completion */ struct timer_list timer; /* Timeout */ - char * buf; /* Data buffer */ + u8 * buf; /* Data buffer */ int len; /* Length of data buffer */ }; -- cgit v0.10.2 From a7d73d8c686d919cc18dec63e5e194f0e5a7206e Mon Sep 17 00:00:00 2001 From: Kevin Wells Date: Thu, 12 Nov 2009 00:25:52 +0100 Subject: i2c: i2c-pnx: Added missing mach/i2c.h and linux/io.h header file includes Added missing mach/i2c.h and linux/io.h header file includes Signed-off-by: Kevin Wells Signed-off-by: Ben Dooks diff --git a/drivers/i2c/busses/i2c-pnx.c b/drivers/i2c/busses/i2c-pnx.c index 68c8233..1fca590 100644 --- a/drivers/i2c/busses/i2c-pnx.c +++ b/drivers/i2c/busses/i2c-pnx.c @@ -19,7 +19,9 @@ #include #include #include +#include #include +#include #include #include -- cgit v0.10.2 From b57014def9afc2bd8a62299d2f51b77dad5ae0c7 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 19 Nov 2009 17:10:07 -0700 Subject: ioat2,3: report all uncorrectable errors Modify is_ioat_bug() to catch all errors that are uncorrectable, or not currently handled. Signed-off-by: Dan Williams diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h index c14fdfe..45edde9 100644 --- a/drivers/dma/ioat/dma.h +++ b/drivers/dma/ioat/dma.h @@ -297,9 +297,7 @@ static inline bool is_ioat_suspended(unsigned long status) /* channel was fatally programmed */ static inline bool is_ioat_bug(unsigned long err) { - return !!(err & (IOAT_CHANERR_SRC_ADDR_ERR|IOAT_CHANERR_DEST_ADDR_ERR| - IOAT_CHANERR_NEXT_ADDR_ERR|IOAT_CHANERR_CONTROL_ERR| - IOAT_CHANERR_LENGTH_ERR)); + return !!err; } static inline void ioat_unmap(struct pci_dev *pdev, dma_addr_t addr, size_t len, diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c index 96ffab7..8f1f7f0 100644 --- a/drivers/dma/ioat/dma_v2.c +++ b/drivers/dma/ioat/dma_v2.c @@ -279,6 +279,8 @@ void ioat2_timer_event(unsigned long data) u32 chanerr; chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); + dev_err(to_dev(chan), "%s: Channel halted (%x)\n", + __func__, chanerr); BUG_ON(is_ioat_bug(chanerr)); } diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c index fad2e9f..252cf2d 100644 --- a/drivers/dma/ioat/dma_v3.c +++ b/drivers/dma/ioat/dma_v3.c @@ -378,6 +378,8 @@ static void ioat3_timer_event(unsigned long data) u32 chanerr; chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); + dev_err(to_dev(chan), "%s: Channel halted (%x)\n", + __func__, chanerr); BUG_ON(is_ioat_bug(chanerr)); } -- cgit v0.10.2 From 4499a24dec00e037da7d09caccad45e7594a9c19 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 19 Nov 2009 17:10:25 -0700 Subject: dmaengine: include xor/pq validate in device_has_all_tx_types() A channel must include these capabilities to satisfy ASYNC_TX_DISABLE_CHANNEL_SWITCH. Signed-off-by: Dan Williams diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index bd0b248..b6442f0 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -632,11 +632,15 @@ static bool device_has_all_tx_types(struct dma_device *device) #if defined(CONFIG_ASYNC_XOR) || defined(CONFIG_ASYNC_XOR_MODULE) if (!dma_has_cap(DMA_XOR, device->cap_mask)) return false; + if (!dma_has_cap(DMA_XOR_VAL, device->cap_mask)) + return false; #endif #if defined(CONFIG_ASYNC_PQ) || defined(CONFIG_ASYNC_PQ_MODULE) if (!dma_has_cap(DMA_PQ, device->cap_mask)) return false; + if (!dma_has_cap(DMA_PQ_VAL, device->cap_mask)) + return false; #endif return true; -- cgit v0.10.2 From 7b3cc2b1fc2066391e498f3387204908c4eced21 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 19 Nov 2009 17:10:37 -0700 Subject: async_tx: build-time toggling of async_{syndrome,xor}_val dma support ioat3.2 does not support asynchronous error notifications which makes the driver experience latencies when non-zero pq validate results are expected. Provide a mechanism for turning off async_xor_val and async_syndrome_val via Kconfig. This approach is generally useful for any driver that specifies ASYNC_TX_DISABLE_CHANNEL_SWITCH and would like to force the async_tx api to fall back to the synchronous path for certain operations. Signed-off-by: Dan Williams diff --git a/crypto/async_tx/Kconfig b/crypto/async_tx/Kconfig index e5aeb2b..e28e276 100644 --- a/crypto/async_tx/Kconfig +++ b/crypto/async_tx/Kconfig @@ -23,3 +23,8 @@ config ASYNC_RAID6_RECOV select ASYNC_CORE select ASYNC_PQ +config ASYNC_TX_DISABLE_PQ_VAL_DMA + bool + +config ASYNC_TX_DISABLE_XOR_VAL_DMA + bool diff --git a/crypto/async_tx/async_pq.c b/crypto/async_tx/async_pq.c index 6b5cc4f..ec87f53 100644 --- a/crypto/async_tx/async_pq.c +++ b/crypto/async_tx/async_pq.c @@ -240,6 +240,16 @@ async_gen_syndrome(struct page **blocks, unsigned int offset, int disks, } EXPORT_SYMBOL_GPL(async_gen_syndrome); +static inline struct dma_chan * +pq_val_chan(struct async_submit_ctl *submit, struct page **blocks, int disks, size_t len) +{ + #ifdef CONFIG_ASYNC_TX_DISABLE_PQ_VAL_DMA + return NULL; + #endif + return async_tx_find_channel(submit, DMA_PQ_VAL, NULL, 0, blocks, + disks, len); +} + /** * async_syndrome_val - asynchronously validate a raid6 syndrome * @blocks: source blocks from idx 0..disks-3, P @ disks-2 and Q @ disks-1 @@ -260,9 +270,7 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int disks, size_t len, enum sum_check_flags *pqres, struct page *spare, struct async_submit_ctl *submit) { - struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ_VAL, - NULL, 0, blocks, disks, - len); + struct dma_chan *chan = pq_val_chan(submit, blocks, disks, len); struct dma_device *device = chan ? chan->device : NULL; struct dma_async_tx_descriptor *tx; unsigned char coefs[disks-2]; diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c index 79182dc..079ae8c 100644 --- a/crypto/async_tx/async_xor.c +++ b/crypto/async_tx/async_xor.c @@ -234,6 +234,17 @@ static int page_is_zero(struct page *p, unsigned int offset, size_t len) memcmp(a, a + 4, len - 4) == 0); } +static inline struct dma_chan * +xor_val_chan(struct async_submit_ctl *submit, struct page *dest, + struct page **src_list, int src_cnt, size_t len) +{ + #ifdef CONFIG_ASYNC_TX_DISABLE_XOR_VAL_DMA + return NULL; + #endif + return async_tx_find_channel(submit, DMA_XOR_VAL, &dest, 1, src_list, + src_cnt, len); +} + /** * async_xor_val - attempt a xor parity check with a dma engine. * @dest: destination page used if the xor is performed synchronously @@ -255,9 +266,7 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset, int src_cnt, size_t len, enum sum_check_flags *result, struct async_submit_ctl *submit) { - struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR_VAL, - &dest, 1, src_list, - src_cnt, len); + struct dma_chan *chan = xor_val_chan(submit, dest, src_list, src_cnt, len); struct dma_device *device = chan ? chan->device : NULL; struct dma_async_tx_descriptor *tx = NULL; dma_addr_t *dma_src = NULL; diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index 5903a88..b401dad 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -26,6 +26,8 @@ config INTEL_IOATDMA select DMA_ENGINE select DCA select ASYNC_TX_DISABLE_CHANNEL_SWITCH + select ASYNC_TX_DISABLE_PQ_VAL_DMA + select ASYNC_TX_DISABLE_XOR_VAL_DMA help Enable support for the Intel(R) I/OAT DMA engine present in recent Intel Xeon chipsets. diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index b6442f0..8f99354 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -632,16 +632,22 @@ static bool device_has_all_tx_types(struct dma_device *device) #if defined(CONFIG_ASYNC_XOR) || defined(CONFIG_ASYNC_XOR_MODULE) if (!dma_has_cap(DMA_XOR, device->cap_mask)) return false; + + #ifndef CONFIG_ASYNC_TX_DISABLE_XOR_VAL_DMA if (!dma_has_cap(DMA_XOR_VAL, device->cap_mask)) return false; #endif + #endif #if defined(CONFIG_ASYNC_PQ) || defined(CONFIG_ASYNC_PQ_MODULE) if (!dma_has_cap(DMA_PQ, device->cap_mask)) return false; + + #ifndef CONFIG_ASYNC_TX_DISABLE_PQ_VAL_DMA if (!dma_has_cap(DMA_PQ_VAL, device->cap_mask)) return false; #endif + #endif return true; } diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c index 252cf2d..189788f 100644 --- a/drivers/dma/ioat/dma_v3.c +++ b/drivers/dma/ioat/dma_v3.c @@ -1206,6 +1206,16 @@ int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca) device->timer_fn = ioat2_timer_event; } + #ifdef CONFIG_ASYNC_TX_DISABLE_PQ_VAL_DMA + dma_cap_clear(DMA_PQ_VAL, dma->cap_mask); + dma->device_prep_dma_pq_val = NULL; + #endif + + #ifdef CONFIG_ASYNC_TX_DISABLE_XOR_VAL_DMA + dma_cap_clear(DMA_XOR_VAL, dma->cap_mask); + dma->device_prep_dma_xor_val = NULL; + #endif + /* -= IOAT ver.3 workarounds =- */ /* Write CHANERRMSK_INT with 3E07h to mask out the errors * that can cause stability issues for IOAT ver.3 -- cgit v0.10.2 From 49954c1567cb0d70d28bb5512d471dc5bd4e2c3f Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 19 Nov 2009 17:11:03 -0700 Subject: ioat3: fix pq completion versus channel deallocation race The completion of a pq operation is notified with a null descriptor appended to the end of the chain. This descriptor needs to be visible to dma clients otherwise the client is precluded from ensuring all operations are quiesced before freeing channel resources, i.e. due to descriptor polling it may get the completion notification ahead of the interrupt delivered by the null descriptor. Signed-off-by: Dan Williams diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c index 189788f..42f6f10 100644 --- a/drivers/dma/ioat/dma_v3.c +++ b/drivers/dma/ioat/dma_v3.c @@ -571,7 +571,7 @@ __ioat3_prep_xor_lock(struct dma_chan *c, enum sum_check_flags *result, dump_desc_dbg(ioat, compl_desc); /* we leave the channel locked to ensure in order submission */ - return &desc->txd; + return &compl_desc->txd; } static struct dma_async_tx_descriptor * @@ -730,7 +730,7 @@ __ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result, dump_desc_dbg(ioat, compl_desc); /* we leave the channel locked to ensure in order submission */ - return &desc->txd; + return &compl_desc->txd; } static struct dma_async_tx_descriptor * -- cgit v0.10.2 From 4ff1fa278b0bd1b2dd3c42efc0cb86788ffe05d5 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 18 Nov 2009 18:03:19 +0000 Subject: [ARM] kmap: fix build errors with DEBUG_HIGHMEM enabled d451564 broke ARM by requiring KM_IRQ_PTE, KM_NMI and KM_NMI_PTE to always be defined. Solve this by providing invalid definitions for these constants, but only if CONFIG_DEBUG_HIGHMEM is enabled. Signed-off-by: Russell King diff --git a/arch/arm/include/asm/kmap_types.h b/arch/arm/include/asm/kmap_types.h index d16ec97..c019949 100644 --- a/arch/arm/include/asm/kmap_types.h +++ b/arch/arm/include/asm/kmap_types.h @@ -22,4 +22,10 @@ enum km_type { KM_TYPE_NR }; +#ifdef CONFIG_DEBUG_HIGHMEM +#define KM_NMI (-1) +#define KM_NMI_PTE (-1) +#define KM_IRQ_PTE (-1) +#endif + #endif -- cgit v0.10.2 From 1508c99506b5d57b8892a5d759176000c88c93b6 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 20 Nov 2009 13:07:57 +0000 Subject: ARM: PNX4008: fix watchdog device driver name The PNX core code calls the device 'pnx4008-watchdog' not 'watchdog' Signed-off-by: Russell King Acked-by: Wim Van Sebroeck diff --git a/drivers/watchdog/pnx4008_wdt.c b/drivers/watchdog/pnx4008_wdt.c index f24d041..4d227b15 100644 --- a/drivers/watchdog/pnx4008_wdt.c +++ b/drivers/watchdog/pnx4008_wdt.c @@ -317,7 +317,7 @@ static int __devexit pnx4008_wdt_remove(struct platform_device *pdev) static struct platform_driver platform_wdt_driver = { .driver = { - .name = "watchdog", + .name = "pnx4008-watchdog", .owner = THIS_MODULE, }, .probe = pnx4008_wdt_probe, @@ -352,4 +352,4 @@ MODULE_PARM_DESC(nowayout, MODULE_LICENSE("GPL"); MODULE_ALIAS_MISCDEV(WATCHDOG_MINOR); -MODULE_ALIAS("platform:watchdog"); +MODULE_ALIAS("platform:pnx4008-watchdog"); -- cgit v0.10.2 From 463bf9000750e08a85ee0b40da3266aae8a54ba2 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 20 Nov 2009 09:21:12 -0500 Subject: kconfig: Fix make O= local{mod,yes}config When the output directory is something other than the kernel source, the streamline_config script gets confused. This patch passes in the source directory to the script so that it can find the proper files. Reported-by: Peter Zijlstra Tested-by: Peter Zijlstra Signed-off-by: Steven Rostedt diff --git a/scripts/kconfig/Makefile b/scripts/kconfig/Makefile index 6d69c7c..80599e3 100644 --- a/scripts/kconfig/Makefile +++ b/scripts/kconfig/Makefile @@ -30,7 +30,7 @@ silentoldconfig: $(obj)/conf $< -s $(Kconfig) localmodconfig: $(obj)/streamline_config.pl $(obj)/conf - $(Q)perl $< $(Kconfig) > .tmp.config + $(Q)perl $< $(srctree) $(Kconfig) > .tmp.config $(Q)if [ -f .config ]; then \ cmp -s .tmp.config .config || \ (mv -f .config .config.old.1; \ @@ -44,7 +44,7 @@ localmodconfig: $(obj)/streamline_config.pl $(obj)/conf $(Q)rm -f .tmp.config localyesconfig: $(obj)/streamline_config.pl $(obj)/conf - $(Q)perl $< $(Kconfig) > .tmp.config + $(Q)perl $< $(srctree) $(Kconfig) > .tmp.config $(Q)sed -i s/=m/=y/ .tmp.config $(Q)if [ -f .config ]; then \ cmp -s .tmp.config .config || \ diff --git a/scripts/kconfig/streamline_config.pl b/scripts/kconfig/streamline_config.pl index 95984db..0d80082 100644 --- a/scripts/kconfig/streamline_config.pl +++ b/scripts/kconfig/streamline_config.pl @@ -43,7 +43,6 @@ # make oldconfig # my $config = ".config"; -my $linuxpath = "."; my $uname = `uname -r`; chomp $uname; @@ -111,7 +110,11 @@ sub find_config { find_config; -my @makefiles = `find $linuxpath -name Makefile`; +# Get the build source and top level Kconfig file (passed in) +my $ksource = $ARGV[0]; +my $kconfig = $ARGV[1]; + +my @makefiles = `find $ksource -name Makefile`; my %depends; my %selects; my %prompts; @@ -119,9 +122,6 @@ my %objects; my $var; my $cont = 0; -# Get the top level Kconfig file (passed in) -my $kconfig = $ARGV[0]; - # prevent recursion my %read_kconfigs; @@ -132,7 +132,7 @@ sub read_kconfig { my $config; my @kconfigs; - open(KIN, $kconfig) || die "Can't open $kconfig"; + open(KIN, "$ksource/$kconfig") || die "Can't open $kconfig"; while () { chomp; -- cgit v0.10.2 From 68a31de302bae12010ab116752d4362bd7fe2851 Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Fri, 20 Nov 2009 17:50:34 +0100 Subject: [ARM] pxa/cpufreq: fix index assignments for end marker I stumbled over two small things regarding the .index field assignment in the dynamically created cpu frequency tables for pxa2xx and pxa3xx. Even though that doesn't currently cause any problem, it should still be fixed in case the logic in the CPUFREQ core changes. Signed-off-by: Daniel Mack Signed-off-by: Eric Miao diff --git a/arch/arm/mach-pxa/cpufreq-pxa2xx.c b/arch/arm/mach-pxa/cpufreq-pxa2xx.c index 983cc8c..9e4d981 100644 --- a/arch/arm/mach-pxa/cpufreq-pxa2xx.c +++ b/arch/arm/mach-pxa/cpufreq-pxa2xx.c @@ -447,6 +447,7 @@ static __init int pxa_cpufreq_init(struct cpufreq_policy *policy) pxa27x_freq_table[i].frequency = freq; pxa27x_freq_table[i].index = i; } + pxa27x_freq_table[i].index = i; pxa27x_freq_table[i].frequency = CPUFREQ_TABLE_END; /* diff --git a/arch/arm/mach-pxa/cpufreq-pxa3xx.c b/arch/arm/mach-pxa/cpufreq-pxa3xx.c index 67f34a8..149cdd9 100644 --- a/arch/arm/mach-pxa/cpufreq-pxa3xx.c +++ b/arch/arm/mach-pxa/cpufreq-pxa3xx.c @@ -102,7 +102,7 @@ static int setup_freqs_table(struct cpufreq_policy *policy, table[i].index = i; table[i].frequency = freqs[i].cpufreq_mhz * 1000; } - table[num].frequency = i; + table[num].index = i; table[num].frequency = CPUFREQ_TABLE_END; pxa3xx_freqs = freqs; -- cgit v0.10.2 From 0109d7e614e016a842569628116f54847a177f6e Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 20 Nov 2009 21:50:36 +0000 Subject: SLOW_WORK: Fix CIFS to pass THIS_MODULE to slow_work_register_user() As of the patch: SLOW_WORK: Wait for outstanding work items belonging to a module to clear Wait for outstanding slow work items belonging to a module to clear when unregistering that module as a user of the facility. This prevents the put_ref code of a work item from being taken away before it returns. slow_work_register_user() takes a module pointer as an argument. CIFS must now pass THIS_MODULE as that argument, lest the following error be observed: fs/cifs/cifsfs.c: In function 'init_cifs': fs/cifs/cifsfs.c:1040: error: too few arguments to function 'slow_work_register_user' Signed-off-by: David Howells diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 9a5e4f5..29f1da7 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -1037,7 +1037,7 @@ init_cifs(void) if (rc) goto out_unregister_key_type; #endif - rc = slow_work_register_user(); + rc = slow_work_register_user(THIS_MODULE); if (rc) goto out_unregister_resolver_key; -- cgit v0.10.2 From 1c2ea8a2c0b71cc5e07f518370d458d692c9b21a Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 20 Nov 2009 21:50:40 +0000 Subject: SLOW_WORK: Fix GFS2 to #include before using THIS_MODULE GFS2 has been altered to pass THIS_MODULE to slow_work_register_user(), but hasn't been altered to #include to provide it, resulting in the following error: fs/gfs2/recovery.c:596: error: 'THIS_MODULE' undeclared here (not in a function) Add the missing #include. Signed-off-by: David Howells diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index b2bb779..09fa319 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c @@ -7,6 +7,7 @@ * of the GNU General Public License version 2. */ +#include #include #include #include -- cgit v0.10.2 From 4fa9f4ede88b4e2ff135b6e5717499d734508c62 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 20 Nov 2009 21:50:44 +0000 Subject: FS-Cache: Provide nop fscache_stat_d() if CONFIG_FSCACHE_STATS=n Provide nop fscache_stat_d() macro if CONFIG_FSCACHE_STATS=n lest errors like the following occur: fs/fscache/cache.c: In function 'fscache_withdraw_cache': fs/fscache/cache.c:386: error: implicit declaration of function 'fscache_stat_d' fs/fscache/cache.c:386: error: 'fscache_n_cop_sync_cache' undeclared (first use in this function) fs/fscache/cache.c:386: error: (Each undeclared identifier is reported only once fs/fscache/cache.c:386: error: for each function it appears in.) fs/fscache/cache.c:392: error: 'fscache_n_cop_dissociate_pages' undeclared (first use in this function) Signed-off-by: David Howells diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index 0ca2566..edd7434 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -259,6 +259,7 @@ extern const struct file_operations fscache_stats_fops; #define __fscache_stat(stat) (NULL) #define fscache_stat(stat) do {} while (0) +#define fscache_stat_d(stat) do {} while (0) #endif /* -- cgit v0.10.2 From 31769cef2e973544164aa7d0db2e2024660d5e21 Mon Sep 17 00:00:00 2001 From: Jay Fenlason Date: Sat, 21 Nov 2009 00:05:56 +0100 Subject: firewire: ohci: pass correct iso xmit timestamps to core Here is the final set of patches I used to get ffado to work with the new firewire stack. With these patches, I was able to start ardour and record from and playback to my PreSonus Inspire1394 from a (mostly) Fedora 12 system. Signed-off-by: Jay Fenlason Until now, firewire-ohci exposed only the transmit cycle of the last transmitted packet at each isochronous transmit complete event. This made it impossible for FFADO (FireWire audio drivers in userspace) to synchronize audio-out streams. The fix is to store the timestamp of each packet in the iso xmit event. As a bonus, the transfer status is stored too. Signed-off-by: Stefan Richter diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c index c07cfad..94260aa 100644 --- a/drivers/firewire/ohci.c +++ b/drivers/firewire/ohci.c @@ -1904,15 +1904,30 @@ static int handle_it_packet(struct context *context, { struct iso_context *ctx = container_of(context, struct iso_context, context); + int i; + struct descriptor *pd; - if (last->transfer_status == 0) - /* This descriptor isn't done yet, stop iteration. */ + for (pd = d; pd <= last; pd++) + if (pd->transfer_status) + break; + if (pd > last) + /* Descriptor(s) not done yet, stop iteration */ return 0; - if (le16_to_cpu(last->control) & DESCRIPTOR_IRQ_ALWAYS) + i = ctx->header_length; + if (i + 4 < PAGE_SIZE) { + /* Present this value as big-endian to match the receive code */ + *(__be32 *)(ctx->header + i) = cpu_to_be32( + ((u32)le16_to_cpu(pd->transfer_status) << 16) | + le16_to_cpu(pd->res_count)); + ctx->header_length += 4; + } + if (le16_to_cpu(last->control) & DESCRIPTOR_IRQ_ALWAYS) { ctx->base.callback(&ctx->base, le16_to_cpu(last->res_count), - 0, NULL, ctx->base.callback_data); - + ctx->header_length, ctx->header, + ctx->base.callback_data); + ctx->header_length = 0; + } return 1; } -- cgit v0.10.2 From f8d5e5a8f5be475796c7f357e43546c2d7f44540 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 21 Nov 2009 16:00:28 +0000 Subject: ARM: PNX4008: i2c-pnx: use the same dev_id for request_irq and free_irq This allows i2c-pnx to free its interrupt handler when the module is removed or if an error occurs; using the same dev_id for both request_irq and free_irq is desirable. Signed-off-by: Russell King Acked-by: Ben Dooks diff --git a/drivers/i2c/busses/i2c-pnx.c b/drivers/i2c/busses/i2c-pnx.c index 1fca590..fbab684 100644 --- a/drivers/i2c/busses/i2c-pnx.c +++ b/drivers/i2c/busses/i2c-pnx.c @@ -650,7 +650,7 @@ static int __devinit i2c_pnx_probe(struct platform_device *pdev) return 0; out_irq: - free_irq(alg_data->irq, alg_data); + free_irq(alg_data->irq, i2c_pnx->adapter); out_clock: i2c_pnx->set_clock_stop(pdev); out_unmap: @@ -669,7 +669,7 @@ static int __devexit i2c_pnx_remove(struct platform_device *pdev) struct i2c_adapter *adap = i2c_pnx->adapter; struct i2c_pnx_algo_data *alg_data = adap->algo_data; - free_irq(alg_data->irq, alg_data); + free_irq(alg_data->irq, i2c_pnx->adapter); i2c_del_adapter(adap); i2c_pnx->set_clock_stop(pdev); iounmap((void *)alg_data->ioaddr); -- cgit v0.10.2 From 81065518eda14ebbce1b48c98d5077b0a059d4a3 Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Wed, 18 Nov 2009 10:01:00 +0100 Subject: [ARM] pxa/spitz: fix compile regression on spitz If CONFIG_AKITA is not set, spitz fails to compile. It worked ok in rc5. Fix is one more ifdef... Signed-off-by: Pavel Machek Acked-by: Stanislav Brabec Signed-off-by: Eric Miao diff --git a/arch/arm/mach-pxa/spitz.c b/arch/arm/mach-pxa/spitz.c index 3da45d0..d98023f 100644 --- a/arch/arm/mach-pxa/spitz.c +++ b/arch/arm/mach-pxa/spitz.c @@ -802,10 +802,12 @@ static void __init spitz_init(void) { spitz_ficp_platform_data.gpio_pwdown = SPITZ_GPIO_IR_ON; +#ifdef CONFIG_MACH_BORZOI if (machine_is_borzoi()) { sharpsl_nand_platform_data.badblock_pattern = &sharpsl_akita_bbt; sharpsl_nand_platform_data.ecc_layout = &akita_oobinfo; } +#endif platform_scoop_config = &spitz_pcmcia_config; -- cgit v0.10.2 From cefbf4ea629427af2fb4709bab9fe126dcddc234 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 22 Nov 2009 17:40:28 +0000 Subject: MAINTAINERS: add maintainer information for AMBA primecell drivers Signed-off-by: Russell King diff --git a/MAINTAINERS b/MAINTAINERS index c824b4d..e9fcac2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -512,10 +512,32 @@ W: http://www.arm.linux.org.uk/ S: Maintained F: arch/arm/ +ARM PRIMECELL AACI PL041 DRIVER +M: Russell King +S: Maintained +F: sound/arm/aaci.* + +ARM PRIMECELL CLCD PL110 DRIVER +M: Russell King +S: Maintained +F: drivers/video/amba-clcd.* + +ARM PRIMECELL KMI PL050 DRIVER +M: Russell King +S: Maintained +F: drivers/input/serio/ambakmi.* +F: include/linux/amba/kmi.h + ARM PRIMECELL MMCI PL180/1 DRIVER S: Orphan F: drivers/mmc/host/mmci.* +ARM PRIMECELL BUS SUPPORT +M: Russell King +S: Maintained +F: drivers/amba/ +F: include/linux/amba/bus.h + ARM/ADI ROADRUNNER MACHINE SUPPORT M: Lennert Buytenhek L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) -- cgit v0.10.2 From 56adf7e8127d601b172e180b44551ce83404348f Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sun, 22 Nov 2009 12:10:10 -0700 Subject: shdma: fix initialization error handling 1/ Error handling code following a kzalloc should free the allocated data. 2/ Report an error when no platform data is detected Both problems fixed by moving the platform data check before the allocation, and allows a goto to be killed. Reported-by: Julia Lawall Acked-by: Julia Lawall Signed-off-by: Dan Williams diff --git a/drivers/dma/shdma.c b/drivers/dma/shdma.c index b3b065c..034ecf0 100644 --- a/drivers/dma/shdma.c +++ b/drivers/dma/shdma.c @@ -640,17 +640,16 @@ static int __init sh_dmae_probe(struct platform_device *pdev) #endif struct sh_dmae_device *shdev; + /* get platform data */ + if (!pdev->dev.platform_data) + return -ENODEV; + shdev = kzalloc(sizeof(struct sh_dmae_device), GFP_KERNEL); if (!shdev) { dev_err(&pdev->dev, "No enough memory\n"); - err = -ENOMEM; - goto shdev_err; + return -ENOMEM; } - /* get platform data */ - if (!pdev->dev.platform_data) - goto shdev_err; - /* platform data */ memcpy(&shdev->pdata, pdev->dev.platform_data, sizeof(struct sh_dmae_pdata)); @@ -722,7 +721,6 @@ eirq_err: rst_err: kfree(shdev); -shdev_err: return err; } -- cgit v0.10.2 From 50b6bce59d154b5db137907a5c0ed45a4e7a3829 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 23 Nov 2009 13:11:53 +0000 Subject: ASoC: Fix suspend with active audio streams When we get a stream suspend event force the power down since otherwise the stream would remain marked as active. In future we'll probably want to make this stream-specific and add an interface to make the power down of other widgets optional in order to support leaving bypass paths active while suspending the processor. Cc: stable@kernel.org Reported-by: Joonyoung Shim Tested-by: Joonyoung Shim Acked-by: Liam Girdwood Signed-off-by: Mark Brown diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index d89f6dc..66d4c16 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -973,9 +973,19 @@ static int dapm_power_widgets(struct snd_soc_codec *codec, int event) if (!w->power_check) continue; - power = w->power_check(w); - if (power) - sys_power = 1; + /* If we're suspending then pull down all the + * power. */ + switch (event) { + case SND_SOC_DAPM_STREAM_SUSPEND: + power = 0; + break; + + default: + power = w->power_check(w); + if (power) + sys_power = 1; + break; + } if (w->power == power) continue; @@ -999,8 +1009,12 @@ static int dapm_power_widgets(struct snd_soc_codec *codec, int event) case SND_SOC_DAPM_STREAM_RESUME: sys_power = 1; break; + case SND_SOC_DAPM_STREAM_SUSPEND: + sys_power = 0; + break; case SND_SOC_DAPM_STREAM_NOP: sys_power = codec->bias_level != SND_SOC_BIAS_STANDBY; + break; default: break; } -- cgit v0.10.2 From 87c687be055e67bc04189ce476690be73d16063e Mon Sep 17 00:00:00 2001 From: Len Brown Date: Mon, 23 Nov 2009 11:44:12 -0500 Subject: ACPI: DMI init_set_sci_en_on_resume for HP-Compaq C700 ...else ACPI thermal controls fail after resume. http://bugzilla.kernel.org/show_bug.cgi?id=13745 Signed-off-by: Len Brown diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 4cc1b81..5f2c379 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -430,6 +430,14 @@ static struct dmi_system_id __initdata acpisleep_dmi_table[] = { }, { .callback = init_set_sci_en_on_resume, + .ident = "Hewlett-Packard Compaq Presario C700 Notebook PC", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_PRODUCT_NAME, "Compaq Presario C700 Notebook PC"), + }, + }, + { + .callback = init_set_sci_en_on_resume, .ident = "Hewlett-Packard Compaq Presario CQ40 Notebook PC", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), -- cgit v0.10.2 From 3336f4f08e0dad7a2b6493c80b49b685141d53ad Mon Sep 17 00:00:00 2001 From: Jean PIHET Date: Mon, 23 Nov 2009 17:03:32 +0100 Subject: ARM: 5793/1: ARM: Check put_user fail in do_signal when enable OABI_COMPAT Using OABI, the call to put_user in do_signal can fail causing the calling app to hang. The solution is to check if put_user fails and force the app to seg fault in that case. Tested with multiple sleeping apps/threads (using the nanosleep syscall) and suspend/resume. Signed-off-by: janboe Signed-off-by: Jean Pihet Signed-off-by: Russell King diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index 2a573d4..e7714f3 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -662,8 +662,12 @@ static void do_signal(struct pt_regs *regs, int syscall) regs->ARM_sp -= 4; usp = (u32 __user *)regs->ARM_sp; - put_user(regs->ARM_pc, usp); - regs->ARM_pc = KERN_RESTART_CODE; + if (put_user(regs->ARM_pc, usp) == 0) { + regs->ARM_pc = KERN_RESTART_CODE; + } else { + regs->ARM_sp += 4; + force_sigsegv(0, current); + } #endif } } -- cgit v0.10.2 From 9326e3614d72e40ea8d910fed829ec72c9b70798 Mon Sep 17 00:00:00 2001 From: David Daney Date: Thu, 1 Oct 2009 16:47:39 -0700 Subject: MIPS: Octeon: Fix compile error in drivers/staging/octeon/ethernet-mdio.c Signed-off-by: David Daney Signed-off-by: Ralf Baechle diff --git a/drivers/staging/octeon/ethernet-mdio.c b/drivers/staging/octeon/ethernet-mdio.c index 42230e6..31a58e5 100644 --- a/drivers/staging/octeon/ethernet-mdio.c +++ b/drivers/staging/octeon/ethernet-mdio.c @@ -170,7 +170,7 @@ static u32 cvm_oct_get_link(struct net_device *dev) return ret; } -struct const ethtool_ops cvm_oct_ethtool_ops = { +const struct ethtool_ops cvm_oct_ethtool_ops = { .get_drvinfo = cvm_oct_get_drvinfo, .get_settings = cvm_oct_get_settings, .set_settings = cvm_oct_set_settings, -- cgit v0.10.2 From aabb89d6fa4c70d77fe180ee794f857decde4010 Mon Sep 17 00:00:00 2001 From: David Daney Date: Thu, 3 Sep 2009 16:10:47 -0700 Subject: Staging: Octeon: Use symbolic values for irq numbers. In addition to being magic numbers, the irq number passed to free_irq is incorrect. We need to use the correct symbolic value instead. Signed-off-by: David Daney Signed-off-by: Ralf Baechle diff --git a/drivers/staging/octeon/ethernet-spi.c b/drivers/staging/octeon/ethernet-spi.c index 66190b0..00dc0f4 100644 --- a/drivers/staging/octeon/ethernet-spi.c +++ b/drivers/staging/octeon/ethernet-spi.c @@ -317,6 +317,6 @@ void cvm_oct_spi_uninit(struct net_device *dev) cvmx_write_csr(CVMX_SPXX_INT_MSK(interface), 0); cvmx_write_csr(CVMX_STXX_INT_MSK(interface), 0); } - free_irq(8 + 46, &number_spi_ports); + free_irq(OCTEON_IRQ_RML, &number_spi_ports); } } -- cgit v0.10.2 From 13c5939e42dc11da61e00fef7f6cca2a6824a59f Mon Sep 17 00:00:00 2001 From: David Daney Date: Mon, 12 Oct 2009 12:04:32 -0700 Subject: Staging: octeon-ethernet: Assign proper MAC addresses. Allocate MAC addresses using the same method as the bootloader. This avoids changing the MAC between bootloader and kernel operation as well as avoiding duplicates and use of addresses outside of the assigned range. Signed-off-by: David Daney Signed-off-by: Ralf Baechle diff --git a/drivers/staging/octeon/ethernet.c b/drivers/staging/octeon/ethernet.c index b847951..492c502 100644 --- a/drivers/staging/octeon/ethernet.c +++ b/drivers/staging/octeon/ethernet.c @@ -111,6 +111,16 @@ MODULE_PARM_DESC(disable_core_queueing, "\n" "\tallows packets to be sent without lock contention in the packet\n" "\tscheduler resulting in some cases in improved throughput.\n"); + +/* + * The offset from mac_addr_base that should be used for the next port + * that is configured. By convention, if any mgmt ports exist on the + * chip, they get the first mac addresses, The ports controlled by + * this driver are numbered sequencially following any mgmt addresses + * that may exist. + */ +static unsigned int cvm_oct_mac_addr_offset; + /** * Periodic timer to check auto negotiation */ @@ -474,16 +484,30 @@ static int cvm_oct_common_set_mac_address(struct net_device *dev, void *addr) */ int cvm_oct_common_init(struct net_device *dev) { - static int count; - char mac[8] = { 0x00, 0x00, - octeon_bootinfo->mac_addr_base[0], - octeon_bootinfo->mac_addr_base[1], - octeon_bootinfo->mac_addr_base[2], - octeon_bootinfo->mac_addr_base[3], - octeon_bootinfo->mac_addr_base[4], - octeon_bootinfo->mac_addr_base[5] + count - }; struct octeon_ethernet *priv = netdev_priv(dev); + struct sockaddr sa; + u64 mac = ((u64)(octeon_bootinfo->mac_addr_base[0] & 0xff) << 40) | + ((u64)(octeon_bootinfo->mac_addr_base[1] & 0xff) << 32) | + ((u64)(octeon_bootinfo->mac_addr_base[2] & 0xff) << 24) | + ((u64)(octeon_bootinfo->mac_addr_base[3] & 0xff) << 16) | + ((u64)(octeon_bootinfo->mac_addr_base[4] & 0xff) << 8) | + (u64)(octeon_bootinfo->mac_addr_base[5] & 0xff); + + mac += cvm_oct_mac_addr_offset; + sa.sa_data[0] = (mac >> 40) & 0xff; + sa.sa_data[1] = (mac >> 32) & 0xff; + sa.sa_data[2] = (mac >> 24) & 0xff; + sa.sa_data[3] = (mac >> 16) & 0xff; + sa.sa_data[4] = (mac >> 8) & 0xff; + sa.sa_data[5] = mac & 0xff; + + if (cvm_oct_mac_addr_offset >= octeon_bootinfo->mac_addr_count) + printk(KERN_DEBUG "%s: Using MAC outside of the assigned range:" + " %02x:%02x:%02x:%02x:%02x:%02x\n", dev->name, + sa.sa_data[0] & 0xff, sa.sa_data[1] & 0xff, + sa.sa_data[2] & 0xff, sa.sa_data[3] & 0xff, + sa.sa_data[4] & 0xff, sa.sa_data[5] & 0xff); + cvm_oct_mac_addr_offset++; /* * Force the interface to use the POW send if always_use_pow @@ -496,14 +520,12 @@ int cvm_oct_common_init(struct net_device *dev) if (priv->queue != -1 && USE_HW_TCPUDP_CHECKSUM) dev->features |= NETIF_F_IP_CSUM; - count++; - /* We do our own locking, Linux doesn't need to */ dev->features |= NETIF_F_LLTX; SET_ETHTOOL_OPS(dev, &cvm_oct_ethtool_ops); cvm_oct_mdio_setup_device(dev); - dev->netdev_ops->ndo_set_mac_address(dev, mac); + dev->netdev_ops->ndo_set_mac_address(dev, &sa); dev->netdev_ops->ndo_change_mtu(dev, dev->mtu); /* @@ -620,6 +642,13 @@ static int __init cvm_oct_init_module(void) pr_notice("cavium-ethernet %s\n", OCTEON_ETHERNET_VERSION); + if (OCTEON_IS_MODEL(OCTEON_CN52XX)) + cvm_oct_mac_addr_offset = 2; /* First two are the mgmt ports. */ + else if (OCTEON_IS_MODEL(OCTEON_CN56XX)) + cvm_oct_mac_addr_offset = 1; /* First one is the mgmt port. */ + else + cvm_oct_mac_addr_offset = 0; + cvm_oct_proc_initialize(); cvm_oct_rx_initialize(); cvm_oct_configure_common_hw(); -- cgit v0.10.2 From bffbc94a4d2c1769c3826fceddd2dbb75e72c80b Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 23 Nov 2009 16:38:56 -0800 Subject: sparc64: Fix definition of VMEMMAP_SIZE. This was the cause of various boot failures on V480, V880, etc. systems. Kernel image memory was being overwritten because the vmemmap[] array was being sized to small. So if you had physical memory addresses past a certain point, the early bootup would spam all over variables in the kernel data section. The vmemmap mappings map page structs, not page struct pointers. And that was the key thinko in the macro definition. This was fixable thanks to the help, reports, and tireless patience of Hermann Lauer. Reported-by: Hermann Lauer Signed-off-by: David S. Miller diff --git a/arch/sparc/mm/init_64.h b/arch/sparc/mm/init_64.h index c2f772d..77d1b31 100644 --- a/arch/sparc/mm/init_64.h +++ b/arch/sparc/mm/init_64.h @@ -45,7 +45,7 @@ extern void free_initmem(void); #define VMEMMAP_ALIGN(x) (((x)+VMEMMAP_CHUNK-1UL)&VMEMMAP_CHUNK_MASK) #define VMEMMAP_SIZE ((((1UL << MAX_PHYSADDR_BITS) >> PAGE_SHIFT) * \ - sizeof(struct page *)) >> VMEMMAP_CHUNK_SHIFT) + sizeof(struct page)) >> VMEMMAP_CHUNK_SHIFT) extern unsigned long vmemmap_table[VMEMMAP_SIZE]; #endif -- cgit v0.10.2 From 0ebf17174b4bdd99ab81c476714c91ee335fdcbf Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 5 Nov 2009 15:39:10 +1000 Subject: drm/radeon/kms: resume AGP by calling init. AGP resume was broken since we moved to the new init path, because we never re-enabled AGP on these systems at resume time. This patch just calls the AGP resume call which just does the reinit at resume time like the old path did. Since AGP is pretty much gpu independant I did it outside the gpu specific code. Signed-off-by: Dave Airlie diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 757f5cd..224506a 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -519,6 +519,7 @@ typedef int (*radeon_packet3_check_t)(struct radeon_cs_parser *p, * AGP */ int radeon_agp_init(struct radeon_device *rdev); +void radeon_agp_resume(struct radeon_device *rdev); void radeon_agp_fini(struct radeon_device *rdev); diff --git a/drivers/gpu/drm/radeon/radeon_agp.c b/drivers/gpu/drm/radeon/radeon_agp.c index 23ea995..54bf49a 100644 --- a/drivers/gpu/drm/radeon/radeon_agp.c +++ b/drivers/gpu/drm/radeon/radeon_agp.c @@ -237,6 +237,18 @@ int radeon_agp_init(struct radeon_device *rdev) #endif } +void radeon_agp_resume(struct radeon_device *rdev) +{ +#if __OS_HAS_AGP + int r; + if (rdev->flags & RADEON_IS_AGP) { + r = radeon_agp_init(rdev); + if (r) + dev_warn(rdev->dev, "radeon AGP reinit failed\n"); + } +#endif +} + void radeon_agp_fini(struct radeon_device *rdev) { #if __OS_HAS_AGP diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index e3f9edf..41bb76f 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -688,6 +688,8 @@ int radeon_resume_kms(struct drm_device *dev) return -1; } pci_set_master(dev->pdev); + /* resume AGP if in use */ + radeon_agp_resume(rdev); radeon_resume(rdev); radeon_restore_bios_scratch_regs(rdev); fb_set_suspend(rdev->fbdev_info, 0); -- cgit v0.10.2 From 7064fef56369c9e2c6e35ff6d6b4b63d42a859ce Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Thu, 5 Nov 2009 10:12:54 -0800 Subject: drm: work around EDIDs with bad htotal/vtotal values We did this on the userspace side, but we need a similar fix for the kernel. Fixes LP #460664. Signed-off-by: Jesse Barnes Cc: stable@kernel.org Signed-off-by: Dave Airlie diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index cea665d..b54ba63 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -662,6 +662,12 @@ static struct drm_display_mode *drm_mode_detailed(struct drm_device *dev, return NULL; } + /* Some EDIDs have bogus h/vtotal values */ + if (mode->hsync_end > mode->htotal) + mode->htotal = mode->hsync_end + 1; + if (mode->vsync_end > mode->vtotal) + mode->vtotal = mode->vsync_end + 1; + drm_mode_set_name(mode); if (pt->misc & DRM_EDID_PT_INTERLACED) -- cgit v0.10.2 From ef63062716415d6e271815872b6c6654ffa9ac26 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 12 Nov 2009 09:37:39 +1000 Subject: drm/radeon/kms: fix handling of d1/d2 vga An rv515 laptop I got wouldn't startup with a montior plugged in, found the proper bug hopefully with us not turning off D2VGA here when we should. Signed-off-by: Dave Airlie diff --git a/drivers/gpu/drm/radeon/rv515.c b/drivers/gpu/drm/radeon/rv515.c index 7935f79..ba68c9f 100644 --- a/drivers/gpu/drm/radeon/rv515.c +++ b/drivers/gpu/drm/radeon/rv515.c @@ -137,8 +137,6 @@ int rv515_mc_wait_for_idle(struct radeon_device *rdev) void rv515_vga_render_disable(struct radeon_device *rdev) { - WREG32(R_000330_D1VGA_CONTROL, 0); - WREG32(R_000338_D2VGA_CONTROL, 0); WREG32(R_000300_VGA_RENDER_CONTROL, RREG32(R_000300_VGA_RENDER_CONTROL) & C_000300_VGA_VSTATUS_CNTL); } @@ -382,7 +380,6 @@ void rv515_mc_stop(struct radeon_device *rdev, struct rv515_mc_save *save) save->d2crtc_control = RREG32(R_006880_D2CRTC_CONTROL); /* Stop all video */ - WREG32(R_000330_D1VGA_CONTROL, 0); WREG32(R_0068E8_D2CRTC_UPDATE_LOCK, 0); WREG32(R_000300_VGA_RENDER_CONTROL, 0); WREG32(R_0060E8_D1CRTC_UPDATE_LOCK, 1); @@ -391,6 +388,8 @@ void rv515_mc_stop(struct radeon_device *rdev, struct rv515_mc_save *save) WREG32(R_006880_D2CRTC_CONTROL, 0); WREG32(R_0060E8_D1CRTC_UPDATE_LOCK, 0); WREG32(R_0068E8_D2CRTC_UPDATE_LOCK, 0); + WREG32(R_000330_D1VGA_CONTROL, 0); + WREG32(R_000338_D2VGA_CONTROL, 0); } void rv515_mc_resume(struct radeon_device *rdev, struct rv515_mc_save *save) @@ -404,14 +403,14 @@ void rv515_mc_resume(struct radeon_device *rdev, struct rv515_mc_save *save) WREG32(R_000328_VGA_HDP_CONTROL, save->vga_hdp_control); mdelay(1); /* Restore video state */ + WREG32(R_000330_D1VGA_CONTROL, save->d1vga_control); + WREG32(R_000338_D2VGA_CONTROL, save->d2vga_control); WREG32(R_0060E8_D1CRTC_UPDATE_LOCK, 1); WREG32(R_0068E8_D2CRTC_UPDATE_LOCK, 1); WREG32(R_006080_D1CRTC_CONTROL, save->d1crtc_control); WREG32(R_006880_D2CRTC_CONTROL, save->d2crtc_control); WREG32(R_0060E8_D1CRTC_UPDATE_LOCK, 0); WREG32(R_0068E8_D2CRTC_UPDATE_LOCK, 0); - WREG32(R_000330_D1VGA_CONTROL, save->d1vga_control); - WREG32(R_000338_D2VGA_CONTROL, save->d2vga_control); WREG32(R_000300_VGA_RENDER_CONTROL, save->vga_render_control); } -- cgit v0.10.2 From 23115b0592bde5da249fcbdad7714c1f96a8e5f5 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 12 Nov 2009 15:53:44 +1000 Subject: drm/radeon/kms: read back register before writing in IIO. This fixes RH bugzilla #527874. On resume the atom posting wasn't working, however vbe posting was going fine, after 2 weeks over irc, and 8 hrs with the hardware, I tracked it down to the memory device table and it access the MC registers via IIO, it appears the rv515 atom iio table might not be fully functional, so adding a readback before doing a write either provides enough delay to make things resume correctly. Thanks to Peng Huang at Red Hat for coming to Brisbane. Signed-off-by: Dave Airlie diff --git a/drivers/gpu/drm/radeon/atom.c b/drivers/gpu/drm/radeon/atom.c index 901befe..d67c425 100644 --- a/drivers/gpu/drm/radeon/atom.c +++ b/drivers/gpu/drm/radeon/atom.c @@ -107,6 +107,7 @@ static uint32_t atom_iio_execute(struct atom_context *ctx, int base, base += 3; break; case ATOM_IIO_WRITE: + (void)ctx->card->reg_read(ctx->card, CU16(base + 1)); ctx->card->reg_write(ctx->card, CU16(base + 1), temp); base += 3; break; -- cgit v0.10.2 From 0beb81ab45c67de4b3aa85faad604cff8ed133a8 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Fri, 13 Nov 2009 20:56:35 +0100 Subject: drm/radeon/kms: Disable TV load detect on RS400,RC410,RS480 RS400,RC410,RS480 chipset seems to report a lot of false positive with load detect on TV output. We haven't yet found a way to make load detect reliable on those chipset, thus just disable it for TV output. Would avoid user to experience phantom screen because X believe there is a monitor connected to the TV output. Signed-off-by: Jerome Glisse Signed-off-by: Dave Airlie diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index fce4c40..55fecfc 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -1149,6 +1149,13 @@ radeon_add_legacy_connector(struct drm_device *dev, if (ret) goto failed; radeon_connector->dac_load_detect = true; + /* RS400,RC410,RS480 chipset seems to report a lot + * of false positive on load detect, we haven't yet + * found a way to make load detect reliable on those + * chipset, thus just disable it for TV. + */ + if (rdev->family == CHIP_RS400 || rdev->family == CHIP_RS480) + radeon_connector->dac_load_detect = false; drm_connector_attach_property(&radeon_connector->base, rdev->mode_info.load_detect_property, 1); -- cgit v0.10.2 From a698cf34ea867efef12fc29dd63d443f0c71a53c Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Fri, 13 Nov 2009 20:56:58 +0100 Subject: drm: mm always protect change to unused_nodes with unused_lock spinlock unused_nodes modification needs to be protected by unused_lock spinlock. Here is an example of an usage where there is no such protection without this patch. Process 1: 1-drm_mm_pre_get(this function modify unused_nodes list) 2-spin_lock(spinlock protecting mm struct) 3-drm_mm_put_block(this function might modify unused_nodes list but doesn't protect modification with unused_lock) 4-spin_unlock(spinlock protecting mm struct) Process2: 1-drm_mm_pre_get(this function modify unused_nodes list) At this point Process1 & Process2 might both be doing modification to unused_nodes list. This patch add unused_lock protection into drm_mm_put_block to avoid such issue. Signed-off-by: Jerome Glisse Signed-off-by: Dave Airlie diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c index c861d80..97dc5a4 100644 --- a/drivers/gpu/drm/drm_mm.c +++ b/drivers/gpu/drm/drm_mm.c @@ -103,6 +103,11 @@ static struct drm_mm_node *drm_mm_kmalloc(struct drm_mm *mm, int atomic) return child; } +/* drm_mm_pre_get() - pre allocate drm_mm_node structure + * drm_mm: memory manager struct we are pre-allocating for + * + * Returns 0 on success or -ENOMEM if allocation fails. + */ int drm_mm_pre_get(struct drm_mm *mm) { struct drm_mm_node *node; @@ -253,12 +258,14 @@ void drm_mm_put_block(struct drm_mm_node *cur) prev_node->size += next_node->size; list_del(&next_node->ml_entry); list_del(&next_node->fl_entry); + spin_lock(&mm->unused_lock); if (mm->num_unused < MM_UNUSED_TARGET) { list_add(&next_node->fl_entry, &mm->unused_nodes); ++mm->num_unused; } else kfree(next_node); + spin_unlock(&mm->unused_lock); } else { next_node->size += cur->size; next_node->start = cur->start; @@ -271,11 +278,13 @@ void drm_mm_put_block(struct drm_mm_node *cur) list_add(&cur->fl_entry, &mm->fl_entry); } else { list_del(&cur->ml_entry); + spin_lock(&mm->unused_lock); if (mm->num_unused < MM_UNUSED_TARGET) { list_add(&cur->fl_entry, &mm->unused_nodes); ++mm->num_unused; } else kfree(cur); + spin_unlock(&mm->unused_lock); } } -- cgit v0.10.2 From f82f5f3ac4de6c6b872fcbb3dec97f368e78ff58 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Thu, 12 Nov 2009 14:13:53 +0100 Subject: drm/radeon/kms: Report vga connector is connected according to ddc_probe On broken EDID we were reporting vga connector to be disconnected even if ddc probe did found a monitor. This patch report that the connector is connected on such case. This allow drm to add a fail safe mode (800x600 at the time of this patch) thus user can boot and later add a mode which match its monitor capabilities. Signed-off-by: Jerome Glisse Signed-off-by: Dave Airlie diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index 55fecfc..29763ce 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -566,8 +566,9 @@ static enum drm_connector_status radeon_vga_detect(struct drm_connector *connect radeon_i2c_do_lock(radeon_connector, 0); if (!radeon_connector->edid) { - DRM_ERROR("DDC responded but not EDID found for %s\n", - drm_get_connector_name(connector)); + DRM_ERROR("%s: probed a monitor but no|invalid EDID\n", + drm_get_connector_name(connector)); + ret = connector_status_connected; } else { radeon_connector->use_digital = !!(radeon_connector->edid->input & DRM_EDID_INPUT_DIGITAL); @@ -720,8 +721,8 @@ static enum drm_connector_status radeon_dvi_detect(struct drm_connector *connect radeon_i2c_do_lock(radeon_connector, 0); if (!radeon_connector->edid) { - DRM_ERROR("DDC responded but not EDID found for %s\n", - drm_get_connector_name(connector)); + DRM_ERROR("%s: probed a monitor but no|invalid EDID\n", + drm_get_connector_name(connector)); } else { radeon_connector->use_digital = !!(radeon_connector->edid->input & DRM_EDID_INPUT_DIGITAL); -- cgit v0.10.2 From 79cc304f3e2fda202242036326afb2aeca486156 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 17 Nov 2009 14:08:54 -0800 Subject: drm: make sure page protections are updated after changing vm_flags Some architectures compute ->vm_page_prot depending on ->vm_flags, so we need to update the protections after adjusting the flags. AFAIK this only affects running X under Xen; without this patch you get lots of coloured blobs on the screen, or maybe a complete lockup. Or anything really. But that still depends on lots of out-of-tree stuff, so I don't think there are any consequences for anyone else. But it is wrong in principle. Reported-by: Jan Beulich Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andrew Morton Signed-off-by: Dave Airlie diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 8039199..e9dbb48 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -552,7 +552,7 @@ int drm_gem_mmap(struct file *filp, struct vm_area_struct *vma) vma->vm_flags |= VM_RESERVED | VM_IO | VM_PFNMAP | VM_DONTEXPAND; vma->vm_ops = obj->dev->driver->gem_vm_ops; vma->vm_private_data = map->handle; - vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); + vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); /* Take a ref for this mapping of the object, so that the fault * handler can dereference the mmap offset's pointer to the object. -- cgit v0.10.2 From 5349ef3127c77075ff70b2014f17ae0fbcaaf199 Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Wed, 4 Nov 2009 09:42:52 +0100 Subject: drm/fb: fix FBIOGET/PUT_VSCREENINFO pixel clock handling When the framebuffer driver does not publish detailed timing information for the current video mode, the correct value for the pixclock field is zero, not -1. Since pixclock is actually unsigned, the value -1 would be interpreted as 4294967295 picoseconds (i.e., about 4 milliseconds) by register_framebuffer() and userspace programs. This patch allows X.org's fbdev driver to work. Signed-off-by: Clemens Ladisch Tested-by: Paulius Zaleckas Cc: stable@kernel.org Signed-off-by: Dave Airlie diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index dc8e374..65ef011 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -599,7 +599,7 @@ int drm_fb_helper_check_var(struct fb_var_screeninfo *var, struct drm_framebuffer *fb = fb_helper->fb; int depth; - if (var->pixclock == -1 || !var->pixclock) + if (var->pixclock != 0) return -EINVAL; /* Need to resize the fb object !!! */ @@ -691,7 +691,7 @@ int drm_fb_helper_set_par(struct fb_info *info) int ret; int i; - if (var->pixclock != -1) { + if (var->pixclock != 0) { DRM_ERROR("PIXEL CLCOK SET\n"); return -EINVAL; } @@ -904,7 +904,7 @@ int drm_fb_helper_single_fb_probe(struct drm_device *dev, fb_helper->fb = fb; if (new_fb) { - info->var.pixclock = -1; + info->var.pixclock = 0; if (register_framebuffer(info) < 0) return -EINVAL; } else { -- cgit v0.10.2 From d77b81974521c82fa6fda38dfff1b491dcc62a32 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 24 Nov 2009 16:53:00 +0100 Subject: [CPUFREQ] Enable ACPI PDC handshake for VIA/Centaur CPUs In commit 0de51088e6a82bc8413d3ca9e28bbca2788b5b53, we introduced the use of acpi-cpufreq on VIA/Centaur CPU's by removing a vendor check for VENDOR_INTEL. However, as it turns out, at least the Nano CPU's also need the PDC (processor driver capabilities) handshake in order to activate the methods required for acpi-cpufreq. Since arch_acpi_processor_init_pdc() contains another vendor check for Intel, the PDC is not initialized on VIA CPU's. The resulting behavior of a current mainline kernel on such systems is: acpi-cpufreq loads and it indicates CPU frequency changes. However, the CPU stays at a single frequency This trivial patch ensures that init_intel_pdc() is called on Intel and VIA/Centaur CPU's alike. Signed-off-by: Harald Welte Signed-off-by: Dave Jones diff --git a/arch/x86/kernel/acpi/processor.c b/arch/x86/kernel/acpi/processor.c index d296f4a..d85d1b2 100644 --- a/arch/x86/kernel/acpi/processor.c +++ b/arch/x86/kernel/acpi/processor.c @@ -79,7 +79,8 @@ void arch_acpi_processor_init_pdc(struct acpi_processor *pr) struct cpuinfo_x86 *c = &cpu_data(pr->id); pr->pdc = NULL; - if (c->x86_vendor == X86_VENDOR_INTEL) + if (c->x86_vendor == X86_VENDOR_INTEL || + c->x86_vendor == X86_VENDOR_CENTAUR) init_intel_pdc(pr, c); return; -- cgit v0.10.2 From 4e68e188411ea98e40309700cf0c89ad4469ac1d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 24 Nov 2009 13:56:39 -0800 Subject: sunsab: Do not set sunsab_reg.cons right before registering minors. Other Sun serial drivers do not do this, and if we keep it this way it ends up registering all serial devices as consoles rather than just the one which we explicitly register via sunserial_console_match() which uses add_preferred_console(). Signed-off-by: David S. Miller diff --git a/drivers/serial/sunsab.c b/drivers/serial/sunsab.c index d1ad341..8755de8 100644 --- a/drivers/serial/sunsab.c +++ b/drivers/serial/sunsab.c @@ -1116,7 +1116,6 @@ static int __init sunsab_init(void) if (!sunsab_ports) return -ENOMEM; - sunsab_reg.cons = SUNSAB_CONSOLE(); err = sunserial_register_minors(&sunsab_reg, num_channels); if (err) { kfree(sunsab_ports); -- cgit v0.10.2 From 8301d386afc55c877bafe2c6c7dc75a96ddd2838 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 24 Nov 2009 13:58:52 -0800 Subject: sunsu: Fix detection of SU ports which are RSC console or control. These device nodes are named "rsc-console" and "rsc-control" rather than 'serial', but the device_type property is 'serial' so we'll tip off of that for detection. Signed-off-by: David S. Miller diff --git a/drivers/serial/sunsu.c b/drivers/serial/sunsu.c index 68d262b..f6511a4 100644 --- a/drivers/serial/sunsu.c +++ b/drivers/serial/sunsu.c @@ -1517,6 +1517,10 @@ static const struct of_device_id su_match[] = { .name = "serial", .compatible = "su", }, + { + .type = "serial", + .compatible = "su", + }, {}, }; MODULE_DEVICE_TABLE(of, su_match); @@ -1548,6 +1552,12 @@ static int __init sunsu_init(void) num_uart++; } } + for_each_node_by_type(dp, "serial") { + if (of_device_is_compatible(dp, "su")) { + if (su_get_type(dp) == SU_PORT_PORT) + num_uart++; + } + } if (num_uart) { err = sunserial_register_minors(&sunsu_reg, num_uart); -- cgit v0.10.2 From 4e3533d05b6e5e66d1cda27f6671251c99c62894 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 24 Nov 2009 14:03:34 -0800 Subject: serial: suncore: Add 'ignore_line' argument to sunserial_console_match(). This tells the logic to ignore the line match when deciding whether the device is the OpenFirmware specified console device or not. This is going to be used in the SU driver for rsc-console detection. There is probably a better way to handle this, but this is the least intrusive solution for now which we can validate won't break any other cases. Signed-off-by: David S. Miller diff --git a/drivers/serial/suncore.c b/drivers/serial/suncore.c index a2d4a19..50d3b5e 100644 --- a/drivers/serial/suncore.c +++ b/drivers/serial/suncore.c @@ -53,20 +53,21 @@ void sunserial_unregister_minors(struct uart_driver *drv, int count) EXPORT_SYMBOL(sunserial_unregister_minors); int sunserial_console_match(struct console *con, struct device_node *dp, - struct uart_driver *drv, int line) + struct uart_driver *drv, int line, bool ignore_line) { - int off; - if (!con || of_console_device != dp) return 0; - off = 0; - if (of_console_options && - *of_console_options == 'b') - off = 1; + if (!ignore_line) { + int off = 0; - if ((line & 1) != off) - return 0; + if (of_console_options && + *of_console_options == 'b') + off = 1; + + if ((line & 1) != off) + return 0; + } con->index = line; drv->cons = con; diff --git a/drivers/serial/suncore.h b/drivers/serial/suncore.h index 042668a..cab95b3f 100644 --- a/drivers/serial/suncore.h +++ b/drivers/serial/suncore.h @@ -26,7 +26,7 @@ extern int sunserial_register_minors(struct uart_driver *, int); extern void sunserial_unregister_minors(struct uart_driver *, int); extern int sunserial_console_match(struct console *, struct device_node *, - struct uart_driver *, int); + struct uart_driver *, int, bool); extern void sunserial_console_termios(struct console *); #endif /* !(_SERIAL_SUN_H) */ diff --git a/drivers/serial/sunsab.c b/drivers/serial/sunsab.c index 8755de8..e7215c2 100644 --- a/drivers/serial/sunsab.c +++ b/drivers/serial/sunsab.c @@ -1027,10 +1027,12 @@ static int __devinit sab_probe(struct of_device *op, const struct of_device_id * goto out1; sunserial_console_match(SUNSAB_CONSOLE(), op->node, - &sunsab_reg, up[0].port.line); + &sunsab_reg, up[0].port.line, + false); sunserial_console_match(SUNSAB_CONSOLE(), op->node, - &sunsab_reg, up[1].port.line); + &sunsab_reg, up[1].port.line, + false); err = uart_add_one_port(&sunsab_reg, &up[0].port); if (err) diff --git a/drivers/serial/sunsu.c b/drivers/serial/sunsu.c index f6511a4..4868b31 100644 --- a/drivers/serial/sunsu.c +++ b/drivers/serial/sunsu.c @@ -1468,7 +1468,8 @@ static int __devinit su_probe(struct of_device *op, const struct of_device_id *m up->port.ops = &sunsu_pops; sunserial_console_match(SUNSU_CONSOLE(), dp, - &sunsu_reg, up->port.line); + &sunsu_reg, up->port.line, + false); err = uart_add_one_port(&sunsu_reg, &up->port); if (err) goto out_unmap; diff --git a/drivers/serial/sunzilog.c b/drivers/serial/sunzilog.c index ef693ae..3242688 100644 --- a/drivers/serial/sunzilog.c +++ b/drivers/serial/sunzilog.c @@ -1416,7 +1416,8 @@ static int __devinit zs_probe(struct of_device *op, const struct of_device_id *m if (!keyboard_mouse) { if (sunserial_console_match(SUNZILOG_CONSOLE(), op->node, - &sunzilog_reg, up[0].port.line)) + &sunzilog_reg, up[0].port.line, + false)) up->flags |= SUNZILOG_FLAG_IS_CONS; err = uart_add_one_port(&sunzilog_reg, &up[0].port); if (err) { @@ -1425,7 +1426,8 @@ static int __devinit zs_probe(struct of_device *op, const struct of_device_id *m return err; } if (sunserial_console_match(SUNZILOG_CONSOLE(), op->node, - &sunzilog_reg, up[1].port.line)) + &sunzilog_reg, up[1].port.line, + false)) up->flags |= SUNZILOG_FLAG_IS_CONS; err = uart_add_one_port(&sunzilog_reg, &up[1].port); if (err) { -- cgit v0.10.2 From 457931de3b0925dc2eb941bc7d611a509be36dff Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 24 Nov 2009 14:09:56 -0800 Subject: serial: suncore: Fix RSC/LOM handling in sunserial_console_termios(). RSC and LOM devices have fixed speed settings. We already had some code to match and handle "rsc" named devices on E250 systems, but we also have to handle 'rsc-console', 'rsc-control', and 'lom-console'. Also, in order to get this right regardless of what 'output-device' happens to be, explicitly pass the UART device node pointer to this routine. Signed-off-by: David S. Miller diff --git a/drivers/serial/suncore.c b/drivers/serial/suncore.c index 50d3b5e..ed7d958 100644 --- a/drivers/serial/suncore.c +++ b/drivers/serial/suncore.c @@ -77,23 +77,24 @@ int sunserial_console_match(struct console *con, struct device_node *dp, } EXPORT_SYMBOL(sunserial_console_match); -void -sunserial_console_termios(struct console *con) +void sunserial_console_termios(struct console *con, struct device_node *uart_dp) { - struct device_node *dp; - const char *od, *mode, *s; + const char *mode, *s; char mode_prop[] = "ttyX-mode"; int baud, bits, stop, cflag; char parity; - dp = of_find_node_by_path("/options"); - od = of_get_property(dp, "output-device", NULL); - if (!strcmp(od, "rsc")) { - mode = of_get_property(of_console_device, + if (!strcmp(uart_dp->name, "rsc") || + !strcmp(uart_dp->name, "rsc-console") || + !strcmp(uart_dp->name, "rsc-control")) { + mode = of_get_property(uart_dp, "ssp-console-modes", NULL); if (!mode) mode = "115200,8,n,1,-"; + } else if (!strcmp(uart_dp->name, "lom-console")) { + mode = "9600,8,n,1,-"; } else { + struct device_node *dp; char c; c = 'a'; @@ -102,6 +103,7 @@ sunserial_console_termios(struct console *con) mode_prop[3] = c; + dp = of_find_node_by_path("/options"); mode = of_get_property(dp, mode_prop, NULL); if (!mode) mode = "9600,8,n,1,-"; diff --git a/drivers/serial/suncore.h b/drivers/serial/suncore.h index cab95b3f..db20579 100644 --- a/drivers/serial/suncore.h +++ b/drivers/serial/suncore.h @@ -27,6 +27,7 @@ extern void sunserial_unregister_minors(struct uart_driver *, int); extern int sunserial_console_match(struct console *, struct device_node *, struct uart_driver *, int, bool); -extern void sunserial_console_termios(struct console *); +extern void sunserial_console_termios(struct console *, + struct device_node *); #endif /* !(_SERIAL_SUN_H) */ diff --git a/drivers/serial/sunhv.c b/drivers/serial/sunhv.c index d548652..d14cca7 100644 --- a/drivers/serial/sunhv.c +++ b/drivers/serial/sunhv.c @@ -566,7 +566,7 @@ static int __devinit hv_probe(struct of_device *op, const struct of_device_id *m goto out_free_con_read_page; sunserial_console_match(&sunhv_console, op->node, - &sunhv_reg, port->line); + &sunhv_reg, port->line, false); err = uart_add_one_port(&sunhv_reg, port); if (err) diff --git a/drivers/serial/sunsab.c b/drivers/serial/sunsab.c index e7215c2..d514e28 100644 --- a/drivers/serial/sunsab.c +++ b/drivers/serial/sunsab.c @@ -883,7 +883,7 @@ static int sunsab_console_setup(struct console *con, char *options) printk("Console: ttyS%d (SAB82532)\n", (sunsab_reg.minor - 64) + con->index); - sunserial_console_termios(con); + sunserial_console_termios(con, to_of_device(up->port.dev)->node); switch (con->cflag & CBAUD) { case B150: baud = 150; break; diff --git a/drivers/serial/sunzilog.c b/drivers/serial/sunzilog.c index 3242688..2c7a66a 100644 --- a/drivers/serial/sunzilog.c +++ b/drivers/serial/sunzilog.c @@ -1180,7 +1180,7 @@ static int __init sunzilog_console_setup(struct console *con, char *options) (sunzilog_reg.minor - 64) + con->index, con->index); /* Get firmware console settings. */ - sunserial_console_termios(con); + sunserial_console_termios(con, to_of_device(up->port.dev)->node); /* Firmware console speed is limited to 150-->38400 baud so * this hackish cflag thing is OK. -- cgit v0.10.2 From 1917d17b903955b8b2903626a2e01d071a5d0ec9 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 24 Nov 2009 14:11:40 -0800 Subject: sunsu: Pass true 'ignore_line' to console match when RSC or LOM console. Signed-off-by: David S. Miller diff --git a/drivers/serial/sunsu.c b/drivers/serial/sunsu.c index 4868b31..4ee4167 100644 --- a/drivers/serial/sunsu.c +++ b/drivers/serial/sunsu.c @@ -1409,6 +1409,7 @@ static int __devinit su_probe(struct of_device *op, const struct of_device_id *m struct uart_sunsu_port *up; struct resource *rp; enum su_type type; + bool ignore_line; int err; type = su_get_type(dp); @@ -1467,9 +1468,14 @@ static int __devinit su_probe(struct of_device *op, const struct of_device_id *m up->port.ops = &sunsu_pops; + ignore_line = false; + if (!strcmp(dp->name, "rsc-console") || + !strcmp(dp->name, "lom-console")) + ignore_line = true; + sunserial_console_match(SUNSU_CONSOLE(), dp, &sunsu_reg, up->port.line, - false); + ignore_line); err = uart_add_one_port(&sunsu_reg, &up->port); if (err) goto out_unmap; -- cgit v0.10.2 From be24656a5e2d68bfd0744f0742c4aceef2cf44b5 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 24 Nov 2009 14:12:50 -0800 Subject: sunsu: Use sunserial_console_termios() in sunsu_console_setup(). Be like the other Sun serial drivers otherwise the special handling of OpenFirmware options and hard-coded overrides for LOM/RSC consoles will not be handled. Signed-off-by: David S. Miller diff --git a/drivers/serial/sunsu.c b/drivers/serial/sunsu.c index 4ee4167..170d3d6 100644 --- a/drivers/serial/sunsu.c +++ b/drivers/serial/sunsu.c @@ -1329,11 +1329,9 @@ static void sunsu_console_write(struct console *co, const char *s, */ static int __init sunsu_console_setup(struct console *co, char *options) { + static struct ktermios dummy; + struct ktermios termios; struct uart_port *port; - int baud = 9600; - int bits = 8; - int parity = 'n'; - int flow = 'n'; printk("Console: ttyS%d (SU)\n", (sunsu_reg.minor - 64) + co->index); @@ -1352,10 +1350,15 @@ static int __init sunsu_console_setup(struct console *co, char *options) */ spin_lock_init(&port->lock); - if (options) - uart_parse_options(options, &baud, &parity, &bits, &flow); + /* Get firmware console settings. */ + sunserial_console_termios(co, to_of_device(port->dev)->node); - return uart_set_options(port, co, baud, parity, bits, flow); + memset(&termios, 0, sizeof(struct ktermios)); + termios.c_cflag = co->cflag; + port->mctrl |= TIOCM_DTR; + port->ops->set_termios(port, &termios, &dummy); + + return 0; } static struct console sunsu_console = { -- cgit v0.10.2 From 8e6c0332d5032aef2d3bc8f41771f999112c8c66 Mon Sep 17 00:00:00 2001 From: Steve French Date: Tue, 24 Nov 2009 22:17:59 +0000 Subject: [CIFS] fix oops in cifs_lookup during net boot Fixes bugzilla.kernel.org bug number 14641 Lookup called during network boot (network root filesystem for diskless workstation) has case where nd is null in lookup. This patch fixes that in cifs_lookup. (Shirish noted that 2.6.30 and 2.6.31 stable need the same check) Signed-off-by: Shirish Pargaonkar Acked-by: Jeff Layton Tested-by: Vladimir Stavrinov CC: Stable Signed-off-by: Steve French diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 627a60a..32771f5 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -643,7 +643,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, * O_EXCL: optimize away the lookup, but don't hash the dentry. Let * the VFS handle the create. */ - if (nd->flags & LOOKUP_EXCL) { + if (nd && (nd->flags & LOOKUP_EXCL)) { d_instantiate(direntry, NULL); return 0; } @@ -675,7 +675,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, * reduction in network traffic in the other paths. */ if (pTcon->unix_ext) { - if (!(nd->flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY)) && + if (nd && !(nd->flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY)) && (nd->flags & LOOKUP_OPEN) && !pTcon->broken_posix_open && (nd->intent.open.flags & O_CREAT)) { rc = cifs_posix_open(full_path, &newInode, nd->path.mnt, -- cgit v0.10.2 From cea62343956c24452700c06cf028b72414c58a74 Mon Sep 17 00:00:00 2001 From: Steve French Date: Tue, 24 Nov 2009 22:49:37 +0000 Subject: [CIFS] Duplicate data on appending to some Samba servers SMB writes are sent with a starting offset and length. When the server supports the newer SMB trans2 posix open (rather than using the SMB NTCreateX) a file can be opened with SMB_O_APPEND flag, and for that case Samba server assumes that the offset sent in SMBWriteX is unneeded since the write should go to the end of the file - which can cause problems if the write was cached (since the beginning part of a page could be written twice by the client mm). Jeff suggested that masking the flag on posix open on the client is easiest for the time being. Note that recent Samba server also had an unrelated problem with SMB NTCreateX and append (see samba bugzilla bug number 6898) which should not affect current Linux clients (unless cifs Unix Extensions are disabled). The cifs client did not send the O_APPEND flag on posix open before 2.6.29 so the fix is unneeded on early kernels. In the future, for the non-cached case (O_DIRECT, and forcedirectio mounts) it would be possible and useful to send O_APPEND on posix open (for Windows case: FILE_APPEND_DATA but not FILE_WRITE_DATA on SMB NTCreateX) but for cached writes although the vfs sets the offset to end of file it may fragment a write across pages - so we can't send O_APPEND on open (could result in sending part of a page twice). CC: Stable Reviewed-by: Shirish Pargaonkar Signed-off-by: Jeff Layton Signed-off-by: Steve French diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 32771f5..d3a6b07 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -214,8 +214,6 @@ int cifs_posix_open(char *full_path, struct inode **pinode, posix_flags |= SMB_O_EXCL; if (oflags & O_TRUNC) posix_flags |= SMB_O_TRUNC; - if (oflags & O_APPEND) - posix_flags |= SMB_O_APPEND; if (oflags & O_SYNC) posix_flags |= SMB_O_SYNC; if (oflags & O_DIRECTORY) -- cgit v0.10.2 From 2f81e752da4781fc276689fc14391346d0dbbe78 Mon Sep 17 00:00:00 2001 From: Steve French Date: Wed, 25 Nov 2009 00:11:31 +0000 Subject: [CIFS] Fix sparse warning Also update CHANGES file Signed-off-by: Steve French diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES index 145540a..094ea65 100644 --- a/fs/cifs/CHANGES +++ b/fs/cifs/CHANGES @@ -1,3 +1,12 @@ +Version 1.61 +------------ +Fix append problem to Samba servers (files opened with O_APPEND could +have duplicated data). Fix oops in cifs_lookup. Workaround problem +mounting to OS/400 Netserve. Fix oops in cifs_get_tcp_session. +Disable use of server inode numbers when server only +partially supports them (e.g. for one server querying inode numbers on +FindFirst fails but QPathInfo queries works). + Version 1.60 ------------- Fix memory leak in reconnect. Fix oops in DFS mount error path. diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index d3a6b07..1f42f77 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -643,7 +643,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, */ if (nd && (nd->flags & LOOKUP_EXCL)) { d_instantiate(direntry, NULL); - return 0; + return NULL; } /* can not grab the rename sem here since it would -- cgit v0.10.2 From d0b3b119f4d9460f3947334cf384ae69c5417360 Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Tue, 24 Nov 2009 21:34:35 -0500 Subject: ACPICA: Silence the warning about _BIF returning the buffer _BIF was returning buffer instead of a string since day 1 of ACPI. Adding a warning for that is noble, but people don't like when someone cries wolf in a production system. Reference: http://bugzilla.kernel.org/show_bug.cgi?id=14379 Signed-off-by: Alexey Starikovskiy Signed-off-by: Len Brown diff --git a/drivers/acpi/acpica/acpredef.h b/drivers/acpi/acpica/acpredef.h index cd80d1d..57bdaf6 100644 --- a/drivers/acpi/acpica/acpredef.h +++ b/drivers/acpi/acpica/acpredef.h @@ -203,8 +203,9 @@ static const union acpi_predefined_info predefined_names[] = {{"_BCT", 1, ACPI_RTYPE_INTEGER}}, {{"_BDN", 0, ACPI_RTYPE_INTEGER}}, {{"_BFS", 1, 0}}, - {{"_BIF", 0, ACPI_RTYPE_PACKAGE}}, /* Fixed-length (9 Int),(4 Str) */ - {{{ACPI_PTYPE1_FIXED, ACPI_RTYPE_INTEGER, 9, ACPI_RTYPE_STRING}, 4,0}}, + {{"_BIF", 0, ACPI_RTYPE_PACKAGE} }, /* Fixed-length (9 Int),(4 Str/Buf) */ + {{{ACPI_PTYPE1_FIXED, ACPI_RTYPE_INTEGER, 9, + ACPI_RTYPE_STRING | ACPI_RTYPE_BUFFER}, 4, 0} }, {{"_BIX", 0, ACPI_RTYPE_PACKAGE}}, /* Fixed-length (16 Int),(4 Str) */ {{{ACPI_PTYPE1_FIXED, ACPI_RTYPE_INTEGER, 16, ACPI_RTYPE_STRING}, 4, -- cgit v0.10.2 From 8b1edc57a617d00845806ca1fce1799c08d50920 Mon Sep 17 00:00:00 2001 From: Jerone Young Date: Thu, 27 Aug 2009 00:04:44 -0500 Subject: ACPI: Add Thinkpad T400, T500 to OSI(Linux) white-list acpi_osi=Linux helps the mute button work properly by sending Linux a mute key press. http://bugzilla.kernel.org/show_bug.cgi?id=13934 Signed-off-by: Jerone Young Signed-off-by: Len Brown diff --git a/drivers/acpi/blacklist.c b/drivers/acpi/blacklist.c index e56b2a7..23e5a05 100644 --- a/drivers/acpi/blacklist.c +++ b/drivers/acpi/blacklist.c @@ -224,6 +224,7 @@ static struct dmi_system_id acpi_osi_dmi_table[] __initdata = { * _OSI(Linux) helps sound * DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad R61"), * DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad T61"), + * T400, T500 * _OSI(Linux) has Linux specific hooks * DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad X61"), * _OSI(Linux) is a NOP: @@ -254,6 +255,22 @@ static struct dmi_system_id acpi_osi_dmi_table[] __initdata = { DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad X61"), }, }, + { + .callback = dmi_enable_osi_linux, + .ident = "Lenovo ThinkPad T400", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad T400"), + }, + }, + { + .callback = dmi_enable_osi_linux, + .ident = "Lenovo ThinkPad T500", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad T500"), + }, + }, {} }; -- cgit v0.10.2 From 80a8d1228e90349b4514e8c925c061fa5cbcea75 Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Fri, 20 Nov 2009 19:48:23 +0100 Subject: thinkpad-acpi: fix sign of ERESTARTSYS return The returned error should be negative Signed-off-by: Roel Kluin Acked-by: Henrique de Moraes Holschuh Signed-off-by: Andrew Morton Cc: Signed-off-by: Len Brown diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index d93108d..03c0a56 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -6313,7 +6313,7 @@ static int brightness_write(char *buf) * Doing it this way makes the syscall restartable in case of EINTR */ rc = brightness_set(level); - return (rc == -EINTR)? ERESTARTSYS : rc; + return (rc == -EINTR)? -ERESTARTSYS : rc; } static struct ibm_struct brightness_driver_data = { -- cgit v0.10.2 From 275014ae46871ce0ab08550fc4040f12b685813a Mon Sep 17 00:00:00 2001 From: Henrique de Moraes Holschuh Date: Tue, 17 Nov 2009 14:07:22 -0800 Subject: thinkpad-acpi: fix detection of old ThinkPads There is a problem in the quirk tables used by tpacpi_is_fw_known() and tpacpi_check_outdated_fw(), which causes outdated BIOSes that are lacking the EC firmware ID DMI field to never match. This breaks module loading on, e.g. a T23 with outdated BIOS, and the module will refuse to load unless the "force_load=1" parameter is given. Fix the quirk tables so that they can also match the outdated BIOSes, which in turn will both fix the module loading, and also warn the user that he is using outdated firmware and should upgrade. This fixes a serious regression, introduced by commit e675abafcc0df38125e6e94a9ba91c92fe774f52, "thinkpad-acpi: be more strict when detecting a ThinkPad". http://bugzilla.kernel.org/show_bug.cgi?id=14597 Reported-by: Paul Kimoto Signed-off-by: Henrique de Moraes Holschuh Tested-by: Paul Kimoto Signed-off-by: Andrew Morton Signed-off-by: Len Brown diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 03c0a56..a848c7e 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -1680,36 +1680,48 @@ static void tpacpi_remove_driver_attributes(struct device_driver *drv) | (__bv1) << 8 | (__bv2) } #define TPV_Q_X(__v, __bid1, __bid2, __bv1, __bv2, \ - __eid1, __eid2, __ev1, __ev2) \ + __eid, __ev1, __ev2) \ { .vendor = (__v), \ .bios = TPID(__bid1, __bid2), \ - .ec = TPID(__eid1, __eid2), \ + .ec = __eid, \ .quirks = (__ev1) << 24 | (__ev2) << 16 \ | (__bv1) << 8 | (__bv2) } #define TPV_QI0(__id1, __id2, __bv1, __bv2) \ TPV_Q(PCI_VENDOR_ID_IBM, __id1, __id2, __bv1, __bv2) +/* Outdated IBM BIOSes often lack the EC id string */ #define TPV_QI1(__id1, __id2, __bv1, __bv2, __ev1, __ev2) \ TPV_Q_X(PCI_VENDOR_ID_IBM, __id1, __id2, \ - __bv1, __bv2, __id1, __id2, __ev1, __ev2) + __bv1, __bv2, TPID(__id1, __id2), \ + __ev1, __ev2), \ + TPV_Q_X(PCI_VENDOR_ID_IBM, __id1, __id2, \ + __bv1, __bv2, TPACPI_MATCH_UNKNOWN, \ + __ev1, __ev2) +/* Outdated IBM BIOSes often lack the EC id string */ #define TPV_QI2(__bid1, __bid2, __bv1, __bv2, \ __eid1, __eid2, __ev1, __ev2) \ TPV_Q_X(PCI_VENDOR_ID_IBM, __bid1, __bid2, \ - __bv1, __bv2, __eid1, __eid2, __ev1, __ev2) + __bv1, __bv2, TPID(__eid1, __eid2), \ + __ev1, __ev2), \ + TPV_Q_X(PCI_VENDOR_ID_IBM, __bid1, __bid2, \ + __bv1, __bv2, TPACPI_MATCH_UNKNOWN, \ + __ev1, __ev2) #define TPV_QL0(__id1, __id2, __bv1, __bv2) \ TPV_Q(PCI_VENDOR_ID_LENOVO, __id1, __id2, __bv1, __bv2) #define TPV_QL1(__id1, __id2, __bv1, __bv2, __ev1, __ev2) \ TPV_Q_X(PCI_VENDOR_ID_LENOVO, __id1, __id2, \ - __bv1, __bv2, __id1, __id2, __ev1, __ev2) + __bv1, __bv2, TPID(__id1, __id2), \ + __ev1, __ev2) #define TPV_QL2(__bid1, __bid2, __bv1, __bv2, \ __eid1, __eid2, __ev1, __ev2) \ TPV_Q_X(PCI_VENDOR_ID_LENOVO, __bid1, __bid2, \ - __bv1, __bv2, __eid1, __eid2, __ev1, __ev2) + __bv1, __bv2, TPID(__eid1, __eid2), \ + __ev1, __ev2) static const struct tpacpi_quirk tpacpi_bios_version_qtable[] __initconst = { /* Numeric models ------------------ */ -- cgit v0.10.2 From 7005291706341a11c094f39a756a01c9e649e5f9 Mon Sep 17 00:00:00 2001 From: Peter Feuerer Date: Tue, 17 Nov 2009 14:07:21 -0800 Subject: acerhdf: return temperature in milidegree instead of degree Return temperature in milidegree instead of degree, as sysfs-api requires the temperature in milidegree. Signed-off-by: Peter Feuerer Tested-by: Borislav Petkov Cc: Andreas Mohr Signed-off-by: Andrew Morton Signed-off-by: Len Brown diff --git a/drivers/platform/x86/acerhdf.c b/drivers/platform/x86/acerhdf.c index 0a8f735..ab64522 100644 --- a/drivers/platform/x86/acerhdf.c +++ b/drivers/platform/x86/acerhdf.c @@ -52,7 +52,7 @@ */ #undef START_IN_KERNEL_MODE -#define DRV_VER "0.5.17" +#define DRV_VER "0.5.18" /* * According to the Atom N270 datasheet, @@ -61,7 +61,7 @@ * measured by the on-die thermal monitor are within 0 <= Tj <= 90. So, * assume 89°C is critical temperature. */ -#define ACERHDF_TEMP_CRIT 89 +#define ACERHDF_TEMP_CRIT 89000 #define ACERHDF_FAN_OFF 0 #define ACERHDF_FAN_AUTO 1 @@ -69,7 +69,7 @@ * No matter what value the user puts into the fanon variable, turn on the fan * at 80 degree Celsius to prevent hardware damage */ -#define ACERHDF_MAX_FANON 80 +#define ACERHDF_MAX_FANON 80000 /* * Maximum interval between two temperature checks is 15 seconds, as the die @@ -85,8 +85,8 @@ static int kernelmode; #endif static unsigned int interval = 10; -static unsigned int fanon = 63; -static unsigned int fanoff = 58; +static unsigned int fanon = 63000; +static unsigned int fanoff = 58000; static unsigned int verbose; static unsigned int fanstate = ACERHDF_FAN_AUTO; static char force_bios[16]; @@ -171,7 +171,7 @@ static int acerhdf_get_temp(int *temp) if (ec_read(bios_cfg->tempreg, &read_temp)) return -EINVAL; - *temp = read_temp; + *temp = read_temp * 1000; return 0; } -- cgit v0.10.2 From 865bddfbf70d9ba04638c15ef49f17b599d9bbf3 Mon Sep 17 00:00:00 2001 From: Michael Hennerich Date: Tue, 20 Oct 2009 13:38:04 +0000 Subject: Blackfin: fix suspend/resume failure with some on-chip ROMs Some Blackfin on-chip ROMs utilize some MDMA channels during the suspend and resume process, but don't clean up after themselves. So manually clear all DMA channels when resuming since no DMA could have been running at this point in time. Now Linux should be able to work regardless of any laziness on the part of the on-chip ROM or boot loader. Signed-off-by: Michael Hennerich Signed-off-by: Mike Frysinger diff --git a/arch/blackfin/kernel/bfin_dma_5xx.c b/arch/blackfin/kernel/bfin_dma_5xx.c index 1f17021..3946aff 100644 --- a/arch/blackfin/kernel/bfin_dma_5xx.c +++ b/arch/blackfin/kernel/bfin_dma_5xx.c @@ -225,8 +225,13 @@ int blackfin_dma_suspend(void) void blackfin_dma_resume(void) { int i; - for (i = 0; i < MAX_DMA_SUSPEND_CHANNELS; ++i) - dma_ch[i].regs->peripheral_map = dma_ch[i].saved_peripheral_map; + + for (i = 0; i < MAX_DMA_CHANNELS; ++i) { + dma_ch[i].regs->cfg = 0; + + if (i < MAX_DMA_SUSPEND_CHANNELS) + dma_ch[i].regs->peripheral_map = dma_ch[i].saved_peripheral_map; + } } #endif -- cgit v0.10.2 From 7bae2c4898dd6e1e4a8276e5c428c55a7ff01bdf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Goddard=20Rosa?= Date: Fri, 30 Oct 2009 05:57:22 -0200 Subject: Blackfin: fix cache Kconfig typo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Kconfig option is "BFIN_EXTMEM_WRITETHROUGH", not "..._WRITETROUGH". Signed-off-by: André Goddard Rosa Signed-off-by: Mike Frysinger diff --git a/arch/blackfin/kernel/cplb-mpu/cplbinit.c b/arch/blackfin/kernel/cplb-mpu/cplbinit.c index f7b9cdc..b52c1f8 100644 --- a/arch/blackfin/kernel/cplb-mpu/cplbinit.c +++ b/arch/blackfin/kernel/cplb-mpu/cplbinit.c @@ -38,7 +38,7 @@ void __init generate_cplb_tables_cpu(unsigned int cpu) #ifdef CONFIG_BFIN_EXTMEM_DCACHEABLE d_cache = CPLB_L1_CHBL; -#ifdef CONFIG_BFIN_EXTMEM_WRITETROUGH +#ifdef CONFIG_BFIN_EXTMEM_WRITETHROUGH d_cache |= CPLB_L1_AOW | CPLB_WT; #endif #endif -- cgit v0.10.2 From af5d7fc7e4d8e34bad42178f7011287e94eeb3ed Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Sun, 15 Nov 2009 18:18:41 -0500 Subject: Blackfin: update anomaly lists Add some recently documented anomalies (473, 474, 475, 477). Also stick a "do not edit" notice in here so people know these are copies of some master version. Signed-off-by: Mike Frysinger diff --git a/arch/blackfin/mach-bf518/include/mach/anomaly.h b/arch/blackfin/mach-bf518/include/mach/anomaly.h index e9c6539..2829dd0 100644 --- a/arch/blackfin/mach-bf518/include/mach/anomaly.h +++ b/arch/blackfin/mach-bf518/include/mach/anomaly.h @@ -1,9 +1,13 @@ /* - * File: include/asm-blackfin/mach-bf518/anomaly.h - * Bugs: Enter bugs at http://blackfin.uclinux.org/ + * DO NOT EDIT THIS FILE + * This file is under version control at + * svn://sources.blackfin.uclinux.org/toolchain/trunk/proc-defs/header-frags/ + * and can be replaced with that version at any time + * DO NOT EDIT THIS FILE * - * Copyright (C) 2004-2009 Analog Devices Inc. - * Licensed under the GPL-2 or later. + * Copyright 2004-2009 Analog Devices Inc. + * Licensed under the ADI BSD license. + * https://docs.blackfin.uclinux.org/doku.php?id=adi_bsd */ /* This file should be up to date with: @@ -70,6 +74,10 @@ #define ANOMALY_05000461 (1) /* Synchronization Problem at Startup May Cause SPORT Transmit Channels to Misalign */ #define ANOMALY_05000462 (1) +/* Interrupted 32-Bit SPORT Data Register Access Results In Underflow */ +#define ANOMALY_05000473 (1) +/* TESTSET Instruction Cannot Be Interrupted */ +#define ANOMALY_05000477 (1) /* Anomalies that don't exist on this proc */ #define ANOMALY_05000099 (0) @@ -133,5 +141,7 @@ #define ANOMALY_05000450 (0) #define ANOMALY_05000465 (0) #define ANOMALY_05000467 (0) +#define ANOMALY_05000474 (0) +#define ANOMALY_05000475 (0) #endif diff --git a/arch/blackfin/mach-bf527/include/mach/anomaly.h b/arch/blackfin/mach-bf527/include/mach/anomaly.h index 3f90526..02040df 100644 --- a/arch/blackfin/mach-bf527/include/mach/anomaly.h +++ b/arch/blackfin/mach-bf527/include/mach/anomaly.h @@ -1,14 +1,18 @@ /* - * File: include/asm-blackfin/mach-bf527/anomaly.h - * Bugs: Enter bugs at http://blackfin.uclinux.org/ + * DO NOT EDIT THIS FILE + * This file is under version control at + * svn://sources.blackfin.uclinux.org/toolchain/trunk/proc-defs/header-frags/ + * and can be replaced with that version at any time + * DO NOT EDIT THIS FILE * - * Copyright (C) 2004-2009 Analog Devices Inc. - * Licensed under the GPL-2 or later. + * Copyright 2004-2009 Analog Devices Inc. + * Licensed under the ADI BSD license. + * https://docs.blackfin.uclinux.org/doku.php?id=adi_bsd */ /* This file should be up to date with: * - Revision D, 08/14/2009; ADSP-BF526 Blackfin Processor Anomaly List - * - Revision F, 03/03/2009; ADSP-BF527 Blackfin Processor Anomaly List + * - Revision G, 08/25/2009; ADSP-BF527 Blackfin Processor Anomaly List */ #ifndef _MACH_ANOMALY_H_ @@ -200,6 +204,10 @@ #define ANOMALY_05000467 (1) /* PLL Latches Incorrect Settings During Reset */ #define ANOMALY_05000469 (1) +/* Interrupted 32-Bit SPORT Data Register Access Results In Underflow */ +#define ANOMALY_05000473 (1) +/* TESTSET Instruction Cannot Be Interrupted */ +#define ANOMALY_05000477 (1) /* Anomalies that don't exist on this proc */ #define ANOMALY_05000099 (0) @@ -250,5 +258,7 @@ #define ANOMALY_05000412 (0) #define ANOMALY_05000447 (0) #define ANOMALY_05000448 (0) +#define ANOMALY_05000474 (0) +#define ANOMALY_05000475 (0) #endif diff --git a/arch/blackfin/mach-bf533/include/mach/anomaly.h b/arch/blackfin/mach-bf533/include/mach/anomaly.h index cd83db2..9b3f7a2 100644 --- a/arch/blackfin/mach-bf533/include/mach/anomaly.h +++ b/arch/blackfin/mach-bf533/include/mach/anomaly.h @@ -1,9 +1,13 @@ /* - * File: include/asm-blackfin/mach-bf533/anomaly.h - * Bugs: Enter bugs at http://blackfin.uclinux.org/ + * DO NOT EDIT THIS FILE + * This file is under version control at + * svn://sources.blackfin.uclinux.org/toolchain/trunk/proc-defs/header-frags/ + * and can be replaced with that version at any time + * DO NOT EDIT THIS FILE * - * Copyright (C) 2004-2009 Analog Devices Inc. - * Licensed under the GPL-2 or later. + * Copyright 2004-2009 Analog Devices Inc. + * Licensed under the ADI BSD license. + * https://docs.blackfin.uclinux.org/doku.php?id=adi_bsd */ /* This file should be up to date with: @@ -202,6 +206,10 @@ #define ANOMALY_05000443 (1) /* False Hardware Error when RETI Points to Invalid Memory */ #define ANOMALY_05000461 (1) +/* Interrupted 32-Bit SPORT Data Register Access Results In Underflow */ +#define ANOMALY_05000473 (1) +/* TESTSET Instruction Cannot Be Interrupted */ +#define ANOMALY_05000477 (1) /* These anomalies have been "phased" out of analog.com anomaly sheets and are * here to show running on older silicon just isn't feasible. @@ -349,5 +357,7 @@ #define ANOMALY_05000450 (0) #define ANOMALY_05000465 (0) #define ANOMALY_05000467 (0) +#define ANOMALY_05000474 (0) +#define ANOMALY_05000475 (0) #endif diff --git a/arch/blackfin/mach-bf537/include/mach/anomaly.h b/arch/blackfin/mach-bf537/include/mach/anomaly.h index f091ad2..d2c427b 100644 --- a/arch/blackfin/mach-bf537/include/mach/anomaly.h +++ b/arch/blackfin/mach-bf537/include/mach/anomaly.h @@ -1,9 +1,13 @@ /* - * File: include/asm-blackfin/mach-bf537/anomaly.h - * Bugs: Enter bugs at http://blackfin.uclinux.org/ + * DO NOT EDIT THIS FILE + * This file is under version control at + * svn://sources.blackfin.uclinux.org/toolchain/trunk/proc-defs/header-frags/ + * and can be replaced with that version at any time + * DO NOT EDIT THIS FILE * - * Copyright (C) 2004-2009 Analog Devices Inc. - * Licensed under the GPL-2 or later. + * Copyright 2004-2009 Analog Devices Inc. + * Licensed under the ADI BSD license. + * https://docs.blackfin.uclinux.org/doku.php?id=adi_bsd */ /* This file should be up to date with: @@ -156,6 +160,10 @@ #define ANOMALY_05000443 (1) /* False Hardware Error when RETI Points to Invalid Memory */ #define ANOMALY_05000461 (1) +/* Interrupted 32-Bit SPORT Data Register Access Results In Underflow */ +#define ANOMALY_05000473 (1) +/* TESTSET Instruction Cannot Be Interrupted */ +#define ANOMALY_05000477 (1) /* Anomalies that don't exist on this proc */ #define ANOMALY_05000099 (0) @@ -202,5 +210,7 @@ #define ANOMALY_05000450 (0) #define ANOMALY_05000465 (0) #define ANOMALY_05000467 (0) +#define ANOMALY_05000474 (0) +#define ANOMALY_05000475 (0) #endif diff --git a/arch/blackfin/mach-bf538/include/mach/anomaly.h b/arch/blackfin/mach-bf538/include/mach/anomaly.h index 26b7608..d882b7e 100644 --- a/arch/blackfin/mach-bf538/include/mach/anomaly.h +++ b/arch/blackfin/mach-bf538/include/mach/anomaly.h @@ -1,9 +1,13 @@ /* - * File: include/asm-blackfin/mach-bf538/anomaly.h - * Bugs: Enter bugs at http://blackfin.uclinux.org/ + * DO NOT EDIT THIS FILE + * This file is under version control at + * svn://sources.blackfin.uclinux.org/toolchain/trunk/proc-defs/header-frags/ + * and can be replaced with that version at any time + * DO NOT EDIT THIS FILE * - * Copyright (C) 2004-2009 Analog Devices Inc. - * Licensed under the GPL-2 or later. + * Copyright 2004-2009 Analog Devices Inc. + * Licensed under the ADI BSD license. + * https://docs.blackfin.uclinux.org/doku.php?id=adi_bsd */ /* This file should be up to date with: @@ -128,6 +132,10 @@ #define ANOMALY_05000443 (1) /* False Hardware Error when RETI Points to Invalid Memory */ #define ANOMALY_05000461 (1) +/* Interrupted 32-Bit SPORT Data Register Access Results In Underflow */ +#define ANOMALY_05000473 (1) +/* TESTSET Instruction Cannot Be Interrupted */ +#define ANOMALY_05000477 (1) /* Anomalies that don't exist on this proc */ #define ANOMALY_05000099 (0) @@ -176,5 +184,7 @@ #define ANOMALY_05000450 (0) #define ANOMALY_05000465 (0) #define ANOMALY_05000467 (0) +#define ANOMALY_05000474 (0) +#define ANOMALY_05000475 (0) #endif diff --git a/arch/blackfin/mach-bf548/include/mach/anomaly.h b/arch/blackfin/mach-bf548/include/mach/anomaly.h index 52b116a..7d08c75 100644 --- a/arch/blackfin/mach-bf548/include/mach/anomaly.h +++ b/arch/blackfin/mach-bf548/include/mach/anomaly.h @@ -1,9 +1,13 @@ /* - * File: include/asm-blackfin/mach-bf548/anomaly.h - * Bugs: Enter bugs at http://blackfin.uclinux.org/ + * DO NOT EDIT THIS FILE + * This file is under version control at + * svn://sources.blackfin.uclinux.org/toolchain/trunk/proc-defs/header-frags/ + * and can be replaced with that version at any time + * DO NOT EDIT THIS FILE * - * Copyright (C) 2004-2009 Analog Devices Inc. - * Licensed under the GPL-2 or later. + * Copyright 2004-2009 Analog Devices Inc. + * Licensed under the ADI BSD license. + * https://docs.blackfin.uclinux.org/doku.php?id=adi_bsd */ /* This file should be up to date with: @@ -24,6 +28,8 @@ #define ANOMALY_05000119 (1) /* Rx.H Cannot Be Used to Access 16-bit System MMR Registers */ #define ANOMALY_05000122 (1) +/* Data Corruption with Cached External Memory and Non-Cached On-Chip L2 Memory */ +#define ANOMALY_05000220 (1) /* False Hardware Error from an Access in the Shadow of a Conditional Branch */ #define ANOMALY_05000245 (1) /* Sensitivity To Noise with Slow Input Edge Rates on External SPORT TX and RX Clocks */ @@ -200,6 +206,14 @@ #define ANOMALY_05000466 (1) /* Possible RX data corruption when control & data EP FIFOs are accessed via the core */ #define ANOMALY_05000467 (1) +/* Interrupted 32-Bit SPORT Data Register Access Results In Underflow */ +#define ANOMALY_05000473 (1) +/* Access to DDR-SDRAM causes system hang under certain PLL/VR settings */ +#define ANOMALY_05000474 (1) +/* Core Hang With L2/L3 Configured in Writeback Cache Mode */ +#define ANOMALY_05000475 (1) +/* TESTSET Instruction Cannot Be Interrupted */ +#define ANOMALY_05000477 (1) /* Anomalies that don't exist on this proc */ #define ANOMALY_05000099 (0) @@ -215,7 +229,6 @@ #define ANOMALY_05000198 (0) #define ANOMALY_05000202 (0) #define ANOMALY_05000215 (0) -#define ANOMALY_05000220 (0) #define ANOMALY_05000227 (0) #define ANOMALY_05000230 (0) #define ANOMALY_05000231 (0) diff --git a/arch/blackfin/mach-bf561/include/mach/anomaly.h b/arch/blackfin/mach-bf561/include/mach/anomaly.h index 70da495..5ddc981 100644 --- a/arch/blackfin/mach-bf561/include/mach/anomaly.h +++ b/arch/blackfin/mach-bf561/include/mach/anomaly.h @@ -1,9 +1,13 @@ /* - * File: include/asm-blackfin/mach-bf561/anomaly.h - * Bugs: Enter bugs at http://blackfin.uclinux.org/ + * DO NOT EDIT THIS FILE + * This file is under version control at + * svn://sources.blackfin.uclinux.org/toolchain/trunk/proc-defs/header-frags/ + * and can be replaced with that version at any time + * DO NOT EDIT THIS FILE * - * Copyright (C) 2004-2009 Analog Devices Inc. - * Licensed under the GPL-2 or later. + * Copyright 2004-2009 Analog Devices Inc. + * Licensed under the ADI BSD license. + * https://docs.blackfin.uclinux.org/doku.php?id=adi_bsd */ /* This file should be up to date with: @@ -213,7 +217,11 @@ /* Disabling Peripherals with DMA Running May Cause DMA System Instability */ #define ANOMALY_05000278 (__SILICON_REVISION__ < 5) /* False Hardware Error Exception when ISR Context Is Not Restored */ -#define ANOMALY_05000281 (__SILICON_REVISION__ < 5) +/* Temporarily walk around for bug 5423 till this issue is confirmed by + * official anomaly document. It looks 05000281 still exists on bf561 + * v0.5. + */ +#define ANOMALY_05000281 (__SILICON_REVISION__ <= 5) /* System MMR Write Is Stalled Indefinitely when Killed in a Particular Stage */ #define ANOMALY_05000283 (1) /* Reads Will Receive Incorrect Data under Certain Conditions */ @@ -280,6 +288,12 @@ #define ANOMALY_05000443 (1) /* False Hardware Error when RETI Points to Invalid Memory */ #define ANOMALY_05000461 (1) +/* Interrupted 32-Bit SPORT Data Register Access Results In Underflow */ +#define ANOMALY_05000473 (1) +/* Core Hang With L2/L3 Configured in Writeback Cache Mode */ +#define ANOMALY_05000475 (__SILICON_REVISION__ < 4) +/* TESTSET Instruction Cannot Be Interrupted */ +#define ANOMALY_05000477 (1) /* Anomalies that don't exist on this proc */ #define ANOMALY_05000119 (0) @@ -304,5 +318,6 @@ #define ANOMALY_05000450 (0) #define ANOMALY_05000465 (0) #define ANOMALY_05000467 (0) +#define ANOMALY_05000474 (0) #endif -- cgit v0.10.2 From f99e8c1d0f41011870d14d5990c65e65b123f2ef Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Tue, 3 Nov 2009 03:14:38 +0000 Subject: Blackfin: work around testset anomaly 05000477 Ironically, the atomic testset instruction cannot be interrupted else it will produce incorrect results. So disable interrupts to help it out. Signed-off-by: Mike Frysinger diff --git a/arch/blackfin/mach-bf561/atomic.S b/arch/blackfin/mach-bf561/atomic.S index 0261a5e..f99f174 100644 --- a/arch/blackfin/mach-bf561/atomic.S +++ b/arch/blackfin/mach-bf561/atomic.S @@ -19,6 +19,16 @@ \reg\().h = _corelock; .endm +.macro safe_testset addr:req, scratch:req +#if ANOMALY_05000477 + cli \scratch; + testset (\addr); + sti \scratch; +#else + testset (\addr); +#endif +.endm + /* * r0 = address of atomic data to flush and invalidate (32bit). * @@ -33,7 +43,7 @@ ENTRY(_get_core_lock) cli r0; coreslot_loadaddr p0; .Lretry_corelock: - testset (p0); + safe_testset p0, r2; if cc jump .Ldone_corelock; SSYNC(r2); jump .Lretry_corelock @@ -56,7 +66,7 @@ ENTRY(_get_core_lock_noflush) cli r0; coreslot_loadaddr p0; .Lretry_corelock_noflush: - testset (p0); + safe_testset p0, r2; if cc jump .Ldone_corelock_noflush; SSYNC(r2); jump .Lretry_corelock_noflush -- cgit v0.10.2 From a2ca78cee1c428843f6833150c7e64db05f9d668 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Tue, 3 Nov 2009 02:57:45 +0000 Subject: Blackfin: check for anomaly 05000475 Parts that have on-chip L2 SRAM cannot safely utilize writeback caching mode, so reject any attempts to use it. Signed-off-by: Mike Frysinger diff --git a/arch/blackfin/mach-common/arch_checks.c b/arch/blackfin/mach-common/arch_checks.c index 9dbafcd..f2ca211 100644 --- a/arch/blackfin/mach-common/arch_checks.c +++ b/arch/blackfin/mach-common/arch_checks.c @@ -57,3 +57,8 @@ (!defined(CONFIG_BFIN_EXTMEM_DCACHEABLE) && defined(CONFIG_BFIN_L2_WRITEBACK))) # error You are exposing Anomaly 220 in this config, either config L2 as Write Through, or make External Memory WB. #endif + +#if ANOMALY_05000475 && \ + (defined(CONFIG_BFIN_EXTMEM_WRITEBACK) || defined(CONFIG_BFIN_L2_WRITEBACK)) +# error "Anomaly 475 does not allow you to use Write Back cache with L2 or External Memory" +#endif -- cgit v0.10.2 From 46b60faf8c68a200bbccbe5bc6e51c414145b9af Mon Sep 17 00:00:00 2001 From: Jie Zhang Date: Fri, 20 Nov 2009 22:52:08 +0000 Subject: Blackfin: fix typo in ptrace poking Commit c014e15a2f667f9 (Blackfin: convert ptrace to new memory functions) introduced a copy & paste typo in the ptrace poke data/text handling. The access_process_vm() function call was telling it to read instead of write. Signed-off-by: Jie Zhang Signed-off-by: Mike Frysinger diff --git a/arch/blackfin/kernel/ptrace.c b/arch/blackfin/kernel/ptrace.c index 0982b5d..56b0ba1 100644 --- a/arch/blackfin/kernel/ptrace.c +++ b/arch/blackfin/kernel/ptrace.c @@ -315,7 +315,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) case BFIN_MEM_ACCESS_CORE: case BFIN_MEM_ACCESS_CORE_ONLY: copied = access_process_vm(child, addr, &data, - to_copy, 0); + to_copy, 1); if (copied) break; -- cgit v0.10.2 From 05bad36ce7a29e1e5eaf5f730ef004effed3add4 Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Sat, 21 Nov 2009 19:35:58 +0100 Subject: Blackfin: fix memset in smp_send_reschedule() and -stop() To set zeroes the sizeof the struct should be used rather than sizeof the pointer, kzalloc does that. Signed-off-by: Roel Kluin Signed-off-by: Mike Frysinger diff --git a/arch/blackfin/mach-common/smp.c b/arch/blackfin/mach-common/smp.c index d98585f..d92b168 100644 --- a/arch/blackfin/mach-common/smp.c +++ b/arch/blackfin/mach-common/smp.c @@ -276,10 +276,9 @@ void smp_send_reschedule(int cpu) if (cpu_is_offline(cpu)) return; - msg = kmalloc(sizeof(*msg), GFP_ATOMIC); + msg = kzalloc(sizeof(*msg), GFP_ATOMIC); if (!msg) return; - memset(msg, 0, sizeof(msg)); INIT_LIST_HEAD(&msg->list); msg->type = BFIN_IPI_RESCHEDULE; @@ -305,10 +304,9 @@ void smp_send_stop(void) if (cpus_empty(callmap)) return; - msg = kmalloc(sizeof(*msg), GFP_ATOMIC); + msg = kzalloc(sizeof(*msg), GFP_ATOMIC); if (!msg) return; - memset(msg, 0, sizeof(msg)); INIT_LIST_HEAD(&msg->list); msg->type = BFIN_IPI_CPU_STOP; -- cgit v0.10.2 From aa23531ce5fb589d941b5bd84eb258e07131826b Mon Sep 17 00:00:00 2001 From: Graf Yang Date: Mon, 21 Sep 2009 11:51:31 +0000 Subject: Blackfin: fix SMP build error in start_thread() Commit d5ce528c8e46fa5afb9 (Blackfin: convert irq/process to asm-generic) incorrectly merged the smp and non-smp cases of start_thread() causing the L1 stack to be setup on the SMP port instead of the UP port. Signed-off-by: Graf Yang Signed-off-by: Mike Frysinger diff --git a/arch/blackfin/kernel/process.c b/arch/blackfin/kernel/process.c index 430ae39..5cc7e2e 100644 --- a/arch/blackfin/kernel/process.c +++ b/arch/blackfin/kernel/process.c @@ -151,7 +151,7 @@ void start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_ regs->pc = new_ip; if (current->mm) regs->p5 = current->mm->start_data; -#ifdef CONFIG_SMP +#ifndef CONFIG_SMP task_thread_info(current)->l1_task_info.stack_start = (void *)current->mm->context.stack_start; task_thread_info(current)->l1_task_info.lowest_sp = (void *)new_sp; -- cgit v0.10.2 From 8e9e0eea9955bffbe5e5cd6355157cabddc31f17 Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Wed, 11 Nov 2009 02:30:50 +0000 Subject: drm/i915: Fix CRT hotplug detect by checking really no channels attached For CRT hotplug detect status, we have four test results as blue channel only, green channel only, both blue and green channel, and no channel attached. Origin code only marks both blue and green channel case as connected, but ignore other possible connected states. This one trys to detect CRT by checking no channel attached case instead. Signed-off-by: Zhenyu Wang Signed-off-by: Eric Anholt diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index 212e227..e505144 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -262,8 +262,8 @@ static bool intel_crt_detect_hotplug(struct drm_connector *connector) } while (time_after(timeout, jiffies)); } - if ((I915_READ(PORT_HOTPLUG_STAT) & CRT_HOTPLUG_MONITOR_MASK) == - CRT_HOTPLUG_MONITOR_COLOR) + if ((I915_READ(PORT_HOTPLUG_STAT) & CRT_HOTPLUG_MONITOR_MASK) != + CRT_HOTPLUG_MONITOR_NONE) return true; return false; -- cgit v0.10.2 From ca9ab10033d190c1ede85fdf456307bdfdabf079 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 22 Nov 2009 15:40:31 +0000 Subject: drm/i915: Select CONFIG_SHMEM The driver requires shmfs as the backing filesystem to handle the buffer objects, so ensure it is selected if the user chooses to build our driver. Fixes: Bug 14662 - Dell E5500 kernel panic with KMS http://bugzilla.kernel.org/show_bug.cgi?id=14662 The revealing nature of the panic is the NULL function pointer dereference in read_cache_page_async(). Signed-off-by: Chris Wilson Reported-and-tested-by: Mateusz Kaduk Signed-off-by: Eric Anholt Cc: stable@kernel.org diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index f831ea1..96eddd1 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -92,6 +92,7 @@ config DRM_I830 config DRM_I915 tristate "i915 driver" depends on AGP_INTEL + select SHMEM select DRM_KMS_HELPER select FB_CFB_FILLRECT select FB_CFB_COPYAREA -- cgit v0.10.2 From 9faf3c1d0dab9e9f4b0cbd42bc37be515d8b8cb1 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 25 Nov 2009 22:14:59 +0000 Subject: [ARM] Update mach-types Signed-off-by: Russell King diff --git a/arch/arm/tools/mach-types b/arch/arm/tools/mach-types index 94be7bb..07b976d 100644 --- a/arch/arm/tools/mach-types +++ b/arch/arm/tools/mach-types @@ -12,7 +12,7 @@ # # http://www.arm.linux.org.uk/developer/machines/?action=new # -# Last update: Fri Sep 18 21:42:00 2009 +# Last update: Wed Nov 25 22:14:58 2009 # # machine_is_xxx CONFIG_xxxx MACH_TYPE_xxx number # @@ -928,7 +928,7 @@ palmt5 MACH_PALMT5 PALMT5 917 palmtc MACH_PALMTC PALMTC 918 omap_apollon MACH_OMAP_APOLLON OMAP_APOLLON 919 mxc30030evb MACH_MXC30030EVB MXC30030EVB 920 -rea_2d MACH_REA_2D REA_2D 921 +rea_cpu2 MACH_REA_2D REA_2D 921 eti3e524 MACH_TI3E524 TI3E524 922 ateb9200 MACH_ATEB9200 ATEB9200 923 auckland MACH_AUCKLAND AUCKLAND 924 @@ -2421,3 +2421,118 @@ liberty MACH_LIBERTY LIBERTY 2434 mh355 MACH_MH355 MH355 2435 pc7802 MACH_PC7802 PC7802 2436 gnet_sgc MACH_GNET_SGC GNET_SGC 2437 +einstein15 MACH_EINSTEIN15 EINSTEIN15 2438 +cmpd MACH_CMPD CMPD 2439 +davinci_hase1 MACH_DAVINCI_HASE1 DAVINCI_HASE1 2440 +lgeincitephone MACH_LGEINCITEPHONE LGEINCITEPHONE 2441 +ea313x MACH_EA313X EA313X 2442 +fwbd_39064 MACH_FWBD_39064 FWBD_39064 2443 +fwbd_390128 MACH_FWBD_390128 FWBD_390128 2444 +pelco_moe MACH_PELCO_MOE PELCO_MOE 2445 +minimix27 MACH_MINIMIX27 MINIMIX27 2446 +omap3_thunder MACH_OMAP3_THUNDER OMAP3_THUNDER 2447 +passionc MACH_PASSIONC PASSIONC 2448 +mx27amata MACH_MX27AMATA MX27AMATA 2449 +bgat1 MACH_BGAT1 BGAT1 2450 +buzz MACH_BUZZ BUZZ 2451 +mb9g20 MACH_MB9G20 MB9G20 2452 +yushan MACH_YUSHAN YUSHAN 2453 +lizard MACH_LIZARD LIZARD 2454 +omap3polycom MACH_OMAP3POLYCOM OMAP3POLYCOM 2455 +smdkv210 MACH_SMDKV210 SMDKV210 2456 +bravo MACH_BRAVO BRAVO 2457 +siogentoo1 MACH_SIOGENTOO1 SIOGENTOO1 2458 +siogentoo2 MACH_SIOGENTOO2 SIOGENTOO2 2459 +sm3k MACH_SM3K SM3K 2460 +acer_tempo_f900 MACH_ACER_TEMPO_F900 ACER_TEMPO_F900 2461 +sst61vc010_dev MACH_SST61VC010_DEV SST61VC010_DEV 2462 +glittertind MACH_GLITTERTIND GLITTERTIND 2463 +omap_zoom3 MACH_OMAP_ZOOM3 OMAP_ZOOM3 2464 +omap_3630sdp MACH_OMAP_3630SDP OMAP_3630SDP 2465 +cybook2440 MACH_CYBOOK2440 CYBOOK2440 2466 +torino_s MACH_TORINO_S TORINO_S 2467 +havana MACH_HAVANA HAVANA 2468 +beaumont_11 MACH_BEAUMONT_11 BEAUMONT_11 2469 +vanguard MACH_VANGUARD VANGUARD 2470 +s5pc110_draco MACH_S5PC110_DRACO S5PC110_DRACO 2471 +cartesio_two MACH_CARTESIO_TWO CARTESIO_TWO 2472 +aster MACH_ASTER ASTER 2473 +voguesv210 MACH_VOGUESV210 VOGUESV210 2474 +acm500x MACH_ACM500X ACM500X 2475 +km9260 MACH_KM9260 KM9260 2476 +nideflexg1 MACH_NIDEFLEXG1 NIDEFLEXG1 2477 +ctera_plug_io MACH_CTERA_PLUG_IO CTERA_PLUG_IO 2478 +smartq7 MACH_SMARTQ7 SMARTQ7 2479 +at91sam9g10ek2 MACH_AT91SAM9G10EK2 AT91SAM9G10EK2 2480 +asusp527 MACH_ASUSP527 ASUSP527 2481 +at91sam9g20mpm2 MACH_AT91SAM9G20MPM2 AT91SAM9G20MPM2 2482 +topasa900 MACH_TOPASA900 TOPASA900 2483 +electrum_100 MACH_ELECTRUM_100 ELECTRUM_100 2484 +mx51grb MACH_MX51GRB MX51GRB 2485 +xea300 MACH_XEA300 XEA300 2486 +htcstartrek MACH_HTCSTARTREK HTCSTARTREK 2487 +lima MACH_LIMA LIMA 2488 +csb740 MACH_CSB740 CSB740 2489 +usb_s8815 MACH_USB_S8815 USB_S8815 2490 +watson_efm_plugin MACH_WATSON_EFM_PLUGIN WATSON_EFM_PLUGIN 2491 +milkyway MACH_MILKYWAY MILKYWAY 2492 +g4evm MACH_G4EVM G4EVM 2493 +picomod6 MACH_PICOMOD6 PICOMOD6 2494 +omapl138_hawkboard MACH_OMAPL138_HAWKBOARD OMAPL138_HAWKBOARD 2495 +ip6000 MACH_IP6000 IP6000 2496 +ip6010 MACH_IP6010 IP6010 2497 +utm400 MACH_UTM400 UTM400 2498 +omap3_zybex MACH_OMAP3_ZYBEX OMAP3_ZYBEX 2499 +wireless_space MACH_WIRELESS_SPACE WIRELESS_SPACE 2500 +sx560 MACH_SX560 SX560 2501 +ts41x MACH_TS41X TS41X 2502 +elphel10373 MACH_ELPHEL10373 ELPHEL10373 2503 +rhobot MACH_RHOBOT RHOBOT 2504 +mx51_refresh MACH_MX51_REFRESH MX51_REFRESH 2505 +ls9260 MACH_LS9260 LS9260 2506 +shank MACH_SHANK SHANK 2507 +qsd8x50_st1 MACH_QSD8X50_ST1 QSD8X50_ST1 2508 +at91sam9m10ekes MACH_AT91SAM9M10EKES AT91SAM9M10EKES 2509 +hiram MACH_HIRAM HIRAM 2510 +phy3250 MACH_PHY3250 PHY3250 2511 +ea3250 MACH_EA3250 EA3250 2512 +fdi3250 MACH_FDI3250 FDI3250 2513 +whitestone MACH_WHITESTONE WHITESTONE 2514 +at91sam9263nit MACH_AT91SAM9263NIT AT91SAM9263NIT 2515 +ccmx51 MACH_CCMX51 CCMX51 2516 +ccmx51js MACH_CCMX51JS CCMX51JS 2517 +ccwmx51 MACH_CCWMX51 CCWMX51 2518 +ccwmx51js MACH_CCWMX51JS CCWMX51JS 2519 +mini6410 MACH_MINI6410 MINI6410 2520 +tiny6410 MACH_TINY6410 TINY6410 2521 +nano6410 MACH_NANO6410 NANO6410 2522 +at572d940hfnldb MACH_AT572D940HFNLDB AT572D940HFNLDB 2523 +htcleo MACH_HTCLEO HTCLEO 2524 +avp13 MACH_AVP13 AVP13 2525 +xxsvideod MACH_XXSVIDEOD XXSVIDEOD 2526 +vpnext MACH_VPNEXT VPNEXT 2527 +swarco_itc3 MACH_SWARCO_ITC3 SWARCO_ITC3 2528 +tx51 MACH_TX51 TX51 2529 +dolby_cat1021 MACH_DOLBY_CAT1021 DOLBY_CAT1021 2530 +mx28evk MACH_MX28EVK MX28EVK 2531 +phoenix260 MACH_PHOENIX260 PHOENIX260 2532 +uvaca_stork MACH_UVACA_STORK UVACA_STORK 2533 +smartq5 MACH_SMARTQ5 SMARTQ5 2534 +all3078 MACH_ALL3078 ALL3078 2535 +ctera_2bay_ds MACH_CTERA_2BAY_DS CTERA_2BAY_DS 2536 +siogentoo3 MACH_SIOGENTOO3 SIOGENTOO3 2537 +epb5000 MACH_EPB5000 EPB5000 2538 +hy9263 MACH_HY9263 HY9263 2539 +acer_tempo_m900 MACH_ACER_TEMPO_M900 ACER_TEMPO_M900 2540 +acer_tempo_dx650 MACH_ACER_TEMPO_DX900 ACER_TEMPO_DX900 2541 +acer_tempo_x960 MACH_ACER_TEMPO_X960 ACER_TEMPO_X960 2542 +acer_eten_v900 MACH_ACER_ETEN_V900 ACER_ETEN_V900 2543 +acer_eten_x900 MACH_ACER_ETEN_X900 ACER_ETEN_X900 2544 +bonnell MACH_BONNELL BONNELL 2545 +oht_mx27 MACH_OHT_MX27 OHT_MX27 2546 +htcquartz MACH_HTCQUARTZ HTCQUARTZ 2547 +davinci_dm6467tevm MACH_DAVINCI_DM6467TEVM DAVINCI_DM6467TEVM 2548 +c3ax03 MACH_C3AX03 C3AX03 2549 +mxt_td60 MACH_MXT_TD60 MXT_TD60 2550 +esyx MACH_ESYX ESYX 2551 +bulldog MACH_BULLDOG BULLDOG 2553 -- cgit v0.10.2 From 28c1969ff887bc2a7df39272850dece01de03285 Mon Sep 17 00:00:00 2001 From: Hemant Pedanekar Date: Wed, 25 Nov 2009 15:04:54 -0800 Subject: ide: fix ioctl to pass requested transfer mode to ide_find_dma_mode instead of UDMA6 Currently, ide_cmd_ioctl when invoked for setting DMA transfer mode calls ide_find_dma_mode with requested mode as XFER_UDMA_6. This prevents setting DMA mode to any other value than the default (maximum) supported by the device (or UDMA6, if supported) irrespective of the actual requested transfer mode and returns error. For example, setting mode to UDMA2 using hdparm, where UDMA4 is the default transfer mode gives following error: # ./hdparm -d1 -Xudma2 /dev/hda /dev/hda:hda: UDMA/66 mode selected setting using_dma to 1 (on) hda: UDMA/66 mode selected setting xfermode to 66 (UltraDMA mode2) HDIO_DRIVE_CMD(setxfermode) failed: Invalid argument using_dma = 1 (on) This patch fixes the issue. Signed-off-by: Hemant Pedanekar Acked-by: Bartlomiej Zolnierkiewicz Acked-by: Sergei Shtylyov Signed-off-by: David S. Miller diff --git a/drivers/ide/ide-ioctls.c b/drivers/ide/ide-ioctls.c index d3440b5..6e7ae2b 100644 --- a/drivers/ide/ide-ioctls.c +++ b/drivers/ide/ide-ioctls.c @@ -162,7 +162,7 @@ static int ide_cmd_ioctl(ide_drive_t *drive, unsigned long arg) if (tf->command == ATA_CMD_SET_FEATURES && tf->feature == SETFEATURES_XFER && tf->nsect >= XFER_SW_DMA_0) { - xfer_rate = ide_find_dma_mode(drive, XFER_UDMA_6); + xfer_rate = ide_find_dma_mode(drive, tf->nsect); if (xfer_rate != tf->nsect) { err = -EINVAL; goto abort; -- cgit v0.10.2 From 46a965462a1c568a7cd7dc338de4a0afa5ce61c5 Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Wed, 25 Nov 2009 22:28:20 -0800 Subject: Input: keyboard - fix braille keyboard keysym generation Keysyms stored in key_map[] are not simply K() values, but U(K()) values, as can be seen in the KDSKBENT ioctl handler. The kernel-generated braille keysyms thus need a U() call too. Signed-off-by: Samuel Thibault Signed-off-by: Andrew Morton Signed-off-by: Dmitry Torokhov diff --git a/drivers/char/keyboard.c b/drivers/char/keyboard.c index 737be95..950837c 100644 --- a/drivers/char/keyboard.c +++ b/drivers/char/keyboard.c @@ -1249,7 +1249,7 @@ static void kbd_keycode(unsigned int keycode, int down, int hw_raw) if (keycode >= NR_KEYS) if (keycode >= KEY_BRL_DOT1 && keycode <= KEY_BRL_DOT8) - keysym = K(KT_BRL, keycode - KEY_BRL_DOT1 + 1); + keysym = U(K(KT_BRL, keycode - KEY_BRL_DOT1 + 1)); else return; else -- cgit v0.10.2 From 5f5bfb09d81c9a1d26238ae6668e584c14ae3daf Mon Sep 17 00:00:00 2001 From: Michele Jr De Candia Date: Thu, 26 Nov 2009 09:22:32 +0100 Subject: i2c/tsl2550: Fix lux value in extended mode According to the TAOS Application Note 'Controlling a Backlight with the TSL2550 Ambient Light Sensor' (page 14), the actual lux value in extended mode should be obtained multiplying the calculated lux value by 5. Signed-off-by: Michele Jr De Candia Signed-off-by: Jean Delvare diff --git a/drivers/i2c/chips/tsl2550.c b/drivers/i2c/chips/tsl2550.c index aa96bd2..a0702f3 100644 --- a/drivers/i2c/chips/tsl2550.c +++ b/drivers/i2c/chips/tsl2550.c @@ -257,6 +257,7 @@ static DEVICE_ATTR(operating_mode, S_IWUSR | S_IRUGO, static ssize_t __tsl2550_show_lux(struct i2c_client *client, char *buf) { + struct tsl2550_data *data = i2c_get_clientdata(client); u8 ch0, ch1; int ret; @@ -274,6 +275,8 @@ static ssize_t __tsl2550_show_lux(struct i2c_client *client, char *buf) ret = tsl2550_calculate_lux(ch0, ch1); if (ret < 0) return ret; + if (data->operating_mode == 1) + ret *= 5; return sprintf(buf, "%d\n", ret); } -- cgit v0.10.2 From 03b70d625c10d1605012d41489d9df18467c5f55 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Thu, 26 Nov 2009 09:22:32 +0100 Subject: MAINTAINERS: Add missing i2c files Add missing header files to the i2c subsystem section. Signed-off-by: Jean Delvare diff --git a/MAINTAINERS b/MAINTAINERS index c824b4d..d5e8d5a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2533,8 +2533,7 @@ S: Maintained F: Documentation/i2c/ F: drivers/i2c/ F: include/linux/i2c.h -F: include/linux/i2c-dev.h -F: include/linux/i2c-id.h +F: include/linux/i2c-*.h I2C-TINY-USB DRIVER M: Till Harbaum -- cgit v0.10.2 From bbd2d9c9198c6efd449e9d395b3eaf2d03aa3bba Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Thu, 26 Nov 2009 09:22:33 +0100 Subject: i2c: Fix userspace_device list corruption Fix userspace_device list corruption. The corruption was caused by clients not being removed when adapters with such clients were themselves removed. Something like the following would trigger it (assuming i2c-stub gets adapter number 3): # modprobe i2c-stub chip_addr=0x50 # echo 24c08 0x50 > /sys/bus/i2c/devices/i2c-3/new_device # rmmod i2c-stub # modprobe i2c-stub chip_addr=0x50 # echo 24c08 0x50 > /sys/bus/i2c/devices/i2c-3/new_device For the records, the stack trace in the kernel logs look like this: kernel: WARNING: at lib/list_debug.c:30 __list_add+0x8b/0x90() kernel: Hardware name: (...) kernel: list_add corruption. prev->next should be next (c137fc84), but was (null). (prev=f57111b8). kernel: Modules linked in: (...) kernel: Pid: 4669, comm: bash Not tainted 2.6.32-rc8 #259 kernel: Call Trace: kernel: [] ? __list_add+0x8b/0x90 kernel: [] ? __list_add+0x8b/0x90 kernel: [] warn_slowpath_common+0x6c/0xc0 kernel: [] ? __list_add+0x8b/0x90 kernel: [] warn_slowpath_fmt+0x26/0x30 kernel: [] __list_add+0x8b/0x90 kernel: [] i2c_sysfs_new_device+0x1c5/0x250 kernel: [] ? might_fault+0x2e/0x80 kernel: [] ? i2c_sysfs_new_device+0x0/0x250 kernel: [] dev_attr_store+0x25/0x30 kernel: [] sysfs_write_file+0x9c/0xf0 kernel: [] vfs_write+0x9c/0x160 kernel: [] ? sysfs_write_file+0x0/0xf0 kernel: [] sys_write+0x3d/0x70 kernel: [] sysenter_do_call+0x12/0x36 Signed-off-by: Jean Delvare diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c index 8d80fce..2965043 100644 --- a/drivers/i2c/i2c-core.c +++ b/drivers/i2c/i2c-core.c @@ -762,6 +762,7 @@ int i2c_del_adapter(struct i2c_adapter *adap) { int res = 0; struct i2c_adapter *found; + struct i2c_client *client, *next; /* First make sure that this adapter was ever added */ mutex_lock(&core_lock); @@ -781,6 +782,16 @@ int i2c_del_adapter(struct i2c_adapter *adap) if (res) return res; + /* Remove devices instantiated from sysfs */ + list_for_each_entry_safe(client, next, &userspace_devices, detected) { + if (client->adapter == adap) { + dev_dbg(&adap->dev, "Removing %s at 0x%x\n", + client->name, client->addr); + list_del(&client->detected); + i2c_unregister_device(client); + } + } + /* Detach any active clients. This can't fail, thus we do not checking the returned value. */ res = device_for_each_child(&adap->dev, NULL, __unregister_client); -- cgit v0.10.2 From 4d29196c535088e807061ce2a0aa526daec2edfb Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 26 Nov 2009 09:22:33 +0100 Subject: at24: Use timeout also for read Writes may take some time on EEPROMs, so for consecutive writes, we already have a loop waiting for the EEPROM to become ready. Use such a loop for reads, too, in case somebody wants to immediately read after a write. Detailed bug report and test case can be found here: http://article.gmane.org/gmane.linux.drivers.i2c/4660 Reported-by: Aleksandar Ivanov Signed-off-by: Wolfram Sang Tested-by: Aleksandar Ivanov Cc: David Brownell Signed-off-by: Jean Delvare diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c index db39f4a..2cb2736 100644 --- a/drivers/misc/eeprom/at24.c +++ b/drivers/misc/eeprom/at24.c @@ -158,6 +158,7 @@ static ssize_t at24_eeprom_read(struct at24_data *at24, char *buf, struct i2c_msg msg[2]; u8 msgbuf[2]; struct i2c_client *client; + unsigned long timeout, read_time; int status, i; memset(msg, 0, sizeof(msg)); @@ -183,47 +184,60 @@ static ssize_t at24_eeprom_read(struct at24_data *at24, char *buf, if (count > io_limit) count = io_limit; - /* Smaller eeproms can work given some SMBus extension calls */ if (at24->use_smbus) { + /* Smaller eeproms can work given some SMBus extension calls */ if (count > I2C_SMBUS_BLOCK_MAX) count = I2C_SMBUS_BLOCK_MAX; - status = i2c_smbus_read_i2c_block_data(client, offset, - count, buf); - dev_dbg(&client->dev, "smbus read %zu@%d --> %d\n", - count, offset, status); - return (status < 0) ? -EIO : status; + } else { + /* + * When we have a better choice than SMBus calls, use a + * combined I2C message. Write address; then read up to + * io_limit data bytes. Note that read page rollover helps us + * here (unlike writes). msgbuf is u8 and will cast to our + * needs. + */ + i = 0; + if (at24->chip.flags & AT24_FLAG_ADDR16) + msgbuf[i++] = offset >> 8; + msgbuf[i++] = offset; + + msg[0].addr = client->addr; + msg[0].buf = msgbuf; + msg[0].len = i; + + msg[1].addr = client->addr; + msg[1].flags = I2C_M_RD; + msg[1].buf = buf; + msg[1].len = count; } /* - * When we have a better choice than SMBus calls, use a combined - * I2C message. Write address; then read up to io_limit data bytes. - * Note that read page rollover helps us here (unlike writes). - * msgbuf is u8 and will cast to our needs. + * Reads fail if the previous write didn't complete yet. We may + * loop a few times until this one succeeds, waiting at least + * long enough for one entire page write to work. */ - i = 0; - if (at24->chip.flags & AT24_FLAG_ADDR16) - msgbuf[i++] = offset >> 8; - msgbuf[i++] = offset; - - msg[0].addr = client->addr; - msg[0].buf = msgbuf; - msg[0].len = i; + timeout = jiffies + msecs_to_jiffies(write_timeout); + do { + read_time = jiffies; + if (at24->use_smbus) { + status = i2c_smbus_read_i2c_block_data(client, offset, + count, buf); + } else { + status = i2c_transfer(client->adapter, msg, 2); + if (status == 2) + status = count; + } + dev_dbg(&client->dev, "read %zu@%d --> %d (%ld)\n", + count, offset, status, jiffies); - msg[1].addr = client->addr; - msg[1].flags = I2C_M_RD; - msg[1].buf = buf; - msg[1].len = count; + if (status == count) + return count; - status = i2c_transfer(client->adapter, msg, 2); - dev_dbg(&client->dev, "i2c read %zu@%d --> %d\n", - count, offset, status); + /* REVISIT: at HZ=100, this is sloooow */ + msleep(1); + } while (time_before(read_time, timeout)); - if (status == 2) - return count; - else if (status >= 0) - return -EIO; - else - return status; + return -ETIMEDOUT; } static ssize_t at24_read(struct at24_data *at24, -- cgit v0.10.2 From 3bf3583b6a49c318f7ed350862d7a217b500e71c Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Tue, 10 Nov 2009 00:39:12 -0500 Subject: [SCSI] sd: Return correct error code for DIF sd_dif.c was not updated to return -EILSEQ, leading to error handling failures in applications which provide their own integrity metadata (as opposed to being protected by the block layer functions). Signed-off-by: Martin K. Petersen Signed-off-by: James Bottomley diff --git a/drivers/scsi/sd_dif.c b/drivers/scsi/sd_dif.c index 88da977..84be621 100644 --- a/drivers/scsi/sd_dif.c +++ b/drivers/scsi/sd_dif.c @@ -418,7 +418,7 @@ error: __func__, virt, phys, be32_to_cpu(sdt->ref_tag), be16_to_cpu(sdt->app_tag)); - return -EIO; + return -EILSEQ; } /* -- cgit v0.10.2 From 860dc73608a091e0b325218acc2701709d5f221a Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Thu, 19 Nov 2009 17:48:29 -0500 Subject: [SCSI] fix async scan add/remove race resulting in an oops Async scanning introduced a very wide window where the SCSI device is up and running but has not yet been added to sysfs. We delay the adding until all scans have completed to retain the same ordering as sync scanning. This delay in visibility causes an oops if a device is removed before we make it visible because the SCSI removal routines have an inbuilt assumption that if a device is in SDEV_RUNNING state, it must be visible (which is not necessarily true in the async scanning case). Fix this by introducing an additional is_visible flag which we can use to condition the tear down so we do the right thing for running but not yet made visible. Reported-by: Alexey Kuznetsov Signed-off-by: James Bottomley diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 0547a7f..47291bc 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -952,16 +952,6 @@ static int scsi_add_lun(struct scsi_device *sdev, unsigned char *inq_result, return SCSI_SCAN_LUN_PRESENT; } -static inline void scsi_destroy_sdev(struct scsi_device *sdev) -{ - scsi_device_set_state(sdev, SDEV_DEL); - if (sdev->host->hostt->slave_destroy) - sdev->host->hostt->slave_destroy(sdev); - transport_destroy_device(&sdev->sdev_gendev); - put_device(&sdev->sdev_dev); - put_device(&sdev->sdev_gendev); -} - #ifdef CONFIG_SCSI_LOGGING /** * scsi_inq_str - print INQUIRY data from min to max index, strip trailing whitespace @@ -1139,7 +1129,7 @@ static int scsi_probe_and_add_lun(struct scsi_target *starget, } } } else - scsi_destroy_sdev(sdev); + __scsi_remove_device(sdev); out: return res; } @@ -1500,7 +1490,7 @@ static int scsi_report_lun_scan(struct scsi_target *starget, int bflags, /* * the sdev we used didn't appear in the report luns scan */ - scsi_destroy_sdev(sdev); + __scsi_remove_device(sdev); return ret; } @@ -1710,7 +1700,7 @@ static void scsi_sysfs_add_devices(struct Scsi_Host *shost) shost_for_each_device(sdev, shost) { if (!scsi_host_scan_allowed(shost) || scsi_sysfs_add_sdev(sdev) != 0) - scsi_destroy_sdev(sdev); + __scsi_remove_device(sdev); } } @@ -1943,7 +1933,7 @@ void scsi_free_host_dev(struct scsi_device *sdev) { BUG_ON(sdev->id != sdev->host->this_id); - scsi_destroy_sdev(sdev); + __scsi_remove_device(sdev); } EXPORT_SYMBOL(scsi_free_host_dev); diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index 5c7eb63..392d8db 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -854,82 +854,73 @@ int scsi_sysfs_add_sdev(struct scsi_device *sdev) transport_configure_device(&starget->dev); error = device_add(&sdev->sdev_gendev); if (error) { - put_device(sdev->sdev_gendev.parent); printk(KERN_INFO "error 1\n"); - return error; + goto out_remove; } error = device_add(&sdev->sdev_dev); if (error) { printk(KERN_INFO "error 2\n"); - goto clean_device; + device_del(&sdev->sdev_gendev); + goto out_remove; } + transport_add_device(&sdev->sdev_gendev); + sdev->is_visible = 1; /* create queue files, which may be writable, depending on the host */ if (sdev->host->hostt->change_queue_depth) error = device_create_file(&sdev->sdev_gendev, &sdev_attr_queue_depth_rw); else error = device_create_file(&sdev->sdev_gendev, &dev_attr_queue_depth); - if (error) { - __scsi_remove_device(sdev); - goto out; - } + if (error) + goto out_remove; + if (sdev->host->hostt->change_queue_type) error = device_create_file(&sdev->sdev_gendev, &sdev_attr_queue_type_rw); else error = device_create_file(&sdev->sdev_gendev, &dev_attr_queue_type); - if (error) { - __scsi_remove_device(sdev); - goto out; - } + if (error) + goto out_remove; error = bsg_register_queue(rq, &sdev->sdev_gendev, NULL, NULL); if (error) + /* we're treating error on bsg register as non-fatal, + * so pretend nothing went wrong */ sdev_printk(KERN_INFO, sdev, "Failed to register bsg queue, errno=%d\n", error); - /* we're treating error on bsg register as non-fatal, so pretend - * nothing went wrong */ - error = 0; - /* add additional host specific attributes */ if (sdev->host->hostt->sdev_attrs) { for (i = 0; sdev->host->hostt->sdev_attrs[i]; i++) { error = device_create_file(&sdev->sdev_gendev, sdev->host->hostt->sdev_attrs[i]); - if (error) { - __scsi_remove_device(sdev); - goto out; - } + if (error) + goto out_remove; } } - transport_add_device(&sdev->sdev_gendev); - out: - return error; - - clean_device: - scsi_device_set_state(sdev, SDEV_CANCEL); - - device_del(&sdev->sdev_gendev); - transport_destroy_device(&sdev->sdev_gendev); - put_device(&sdev->sdev_dev); - put_device(&sdev->sdev_gendev); + return 0; + out_remove: + __scsi_remove_device(sdev); return error; + } void __scsi_remove_device(struct scsi_device *sdev) { struct device *dev = &sdev->sdev_gendev; - if (scsi_device_set_state(sdev, SDEV_CANCEL) != 0) - return; + if (sdev->is_visible) { + if (scsi_device_set_state(sdev, SDEV_CANCEL) != 0) + return; - bsg_unregister_queue(sdev->request_queue); - device_unregister(&sdev->sdev_dev); - transport_remove_device(dev); - device_del(dev); + bsg_unregister_queue(sdev->request_queue); + device_unregister(&sdev->sdev_dev); + transport_remove_device(dev); + device_del(dev); + } else + put_device(&sdev->sdev_dev); scsi_device_set_state(sdev, SDEV_DEL); if (sdev->host->hostt->slave_destroy) sdev->host->hostt->slave_destroy(sdev); diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 9af48cb..f097ae3 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -145,6 +145,7 @@ struct scsi_device { unsigned retry_hwerror:1; /* Retry HARDWARE_ERROR */ unsigned last_sector_bug:1; /* do not use multisector accesses on SD_LAST_BUGGY_SECTORS */ + unsigned is_visible:1; /* is the device visible in sysfs */ DECLARE_BITMAP(supported_events, SDEV_EVT_MAXBITS); /* supported events */ struct list_head event_list; /* asserted events */ -- cgit v0.10.2 From 4e46bf89972b9d98a9f282a9fed2359756a5e34e Mon Sep 17 00:00:00 2001 From: Alexey Kuznetsov Date: Tue, 17 Nov 2009 14:10:11 -0800 Subject: [SCSI] fix crash when disconnecting usb storage __scsi_remove_device() in scsi_forget_host() is executed out of scan_mutex and races with scsi_destroy_sdev() <- scsi_sysfs_add_devices() <- scsi_finish_async_scan(). The result is use after free and/or double free, oops. The fix is simple, move scsi_forget_host() under scan_mutex. scsi_forget_host() is just sequence of __scsi_remove_device(). All another calls of __scsi_remove_device() are made under scan_mutex. So that it is safe. Signed-off-by: Alexey Kuznetsov Signed-off-by: Denis V. Lunev Signed-off-by: Andrew Morton Signed-off-by: James Bottomley diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c index 5fd2da4..c968cc3 100644 --- a/drivers/scsi/hosts.c +++ b/drivers/scsi/hosts.c @@ -164,8 +164,8 @@ void scsi_remove_host(struct Scsi_Host *shost) return; } spin_unlock_irqrestore(shost->host_lock, flags); - mutex_unlock(&shost->scan_mutex); scsi_forget_host(shost); + mutex_unlock(&shost->scan_mutex); scsi_proc_host_rm(shost); spin_lock_irqsave(shost->host_lock, flags); -- cgit v0.10.2 From 3addbb8075c00e2a2408c192bd1002dead26b2aa Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Thu, 5 Nov 2009 13:26:32 -0300 Subject: V4L/DVB (13321): radio-gemtek-pci: fix double mutex_lock Double mutexlock found by the Linux Driver Verification project and reported by Alexander Strakh. Signed-off-by: Hans Verkuil CC: stable@kernel.org Signed-off-by: Mauro Carvalho Chehab diff --git a/drivers/media/radio/radio-gemtek-pci.c b/drivers/media/radio/radio-gemtek-pci.c index c3f579d..c6cf116 100644 --- a/drivers/media/radio/radio-gemtek-pci.c +++ b/drivers/media/radio/radio-gemtek-pci.c @@ -181,12 +181,10 @@ static void gemtek_pci_mute(struct gemtek_pci *card) static void gemtek_pci_unmute(struct gemtek_pci *card) { - mutex_lock(&card->lock); if (card->mute) { gemtek_pci_setfrequency(card, card->current_frequency); card->mute = false; } - mutex_unlock(&card->lock); } static int gemtek_pci_getsignal(struct gemtek_pci *card) -- cgit v0.10.2 From f39c1ab3c3878f1a50ca129e55d17ae63215fcbe Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Mon, 9 Nov 2009 16:11:34 -0300 Subject: V4L/DVB (13343): v4l: add more missing linux/sched.h includes Signed-off-by: Guennadi Liakhovetski Signed-off-by: Mauro Carvalho Chehab diff --git a/drivers/media/video/mx1_camera.c b/drivers/media/video/mx1_camera.c index 5f37952..7280229 100644 --- a/drivers/media/video/mx1_camera.c +++ b/drivers/media/video/mx1_camera.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/media/video/mx3_camera.c b/drivers/media/video/mx3_camera.c index dff2e5e..7db82bd 100644 --- a/drivers/media/video/mx3_camera.c +++ b/drivers/media/video/mx3_camera.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include diff --git a/drivers/media/video/sh_mobile_ceu_camera.c b/drivers/media/video/sh_mobile_ceu_camera.c index 2f78b4f..55afe3e 100644 --- a/drivers/media/video/sh_mobile_ceu_camera.c +++ b/drivers/media/video/sh_mobile_ceu_camera.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include diff --git a/drivers/media/video/videobuf-dma-contig.c b/drivers/media/video/videobuf-dma-contig.c index 635ffc7..c3065c4 100644 --- a/drivers/media/video/videobuf-dma-contig.c +++ b/drivers/media/video/videobuf-dma-contig.c @@ -19,6 +19,7 @@ #include #include #include +#include #include struct videobuf_dma_contig_memory { -- cgit v0.10.2 From 64ff9ba5f1d771e8405ec766c31f6b32c9708285 Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Mon, 9 Nov 2009 16:12:37 -0300 Subject: V4L/DVB (13344): soc-camera: properly initialise the device object when reusing Commit ef373189f62413803b7b816c972fc154c488cdc0 "fix use-after-free Oops, resulting from a driver-core API change" fixed the Oops, but didn't correct missing device object initialisation. This patch makes unloading and reloading of soc-camera host- and client-drivers possible again. Signed-off-by: Guennadi Liakhovetski Signed-off-by: Mauro Carvalho Chehab diff --git a/drivers/media/video/soc_camera.c b/drivers/media/video/soc_camera.c index 36e617bd..95fdeb2 100644 --- a/drivers/media/video/soc_camera.c +++ b/drivers/media/video/soc_camera.c @@ -1097,6 +1097,13 @@ static int default_s_crop(struct soc_camera_device *icd, struct v4l2_crop *a) return v4l2_subdev_call(sd, video, s_crop, a); } +static void soc_camera_device_init(struct device *dev, void *pdata) +{ + dev->platform_data = pdata; + dev->bus = &soc_camera_bus_type; + dev->release = dummy_release; +} + int soc_camera_host_register(struct soc_camera_host *ici) { struct soc_camera_host *ix; @@ -1158,6 +1165,7 @@ void soc_camera_host_unregister(struct soc_camera_host *ici) list_for_each_entry(icd, &devices, list) { if (icd->iface == ici->nr) { + void *pdata = icd->dev.platform_data; /* The bus->remove will be called */ device_unregister(&icd->dev); /* @@ -1169,6 +1177,7 @@ void soc_camera_host_unregister(struct soc_camera_host *ici) * device private data. */ memset(&icd->dev, 0, sizeof(icd->dev)); + soc_camera_device_init(&icd->dev, pdata); } } @@ -1200,10 +1209,7 @@ static int soc_camera_device_register(struct soc_camera_device *icd) * man, stay reasonable... */ return -ENOMEM; - icd->devnum = num; - icd->dev.bus = &soc_camera_bus_type; - - icd->dev.release = dummy_release; + icd->devnum = num; icd->use_count = 0; icd->host_priv = NULL; mutex_init(&icd->video_lock); @@ -1311,12 +1317,13 @@ static int __devinit soc_camera_pdrv_probe(struct platform_device *pdev) icd->iface = icl->bus_id; icd->pdev = &pdev->dev; platform_set_drvdata(pdev, icd); - icd->dev.platform_data = icl; ret = soc_camera_device_register(icd); if (ret < 0) goto escdevreg; + soc_camera_device_init(&icd->dev, icl); + icd->user_width = DEFAULT_WIDTH; icd->user_height = DEFAULT_HEIGHT; -- cgit v0.10.2 From 055e05a08b462feabbd6b7a33fd2b9f1f23d4476 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Mon, 9 Nov 2009 16:13:24 -0300 Subject: V4L/DVB (13345): soc-camera: sh_mobile_ceu_camera: call pm_runtime_disable pm_runtime_disable is needed if it failed or removed Signed-off-by: Kuninori Morimoto Signed-off-by: Guennadi Liakhovetski Signed-off-by: Mauro Carvalho Chehab diff --git a/drivers/media/video/sh_mobile_ceu_camera.c b/drivers/media/video/sh_mobile_ceu_camera.c index 55afe3e..afdea0d 100644 --- a/drivers/media/video/sh_mobile_ceu_camera.c +++ b/drivers/media/video/sh_mobile_ceu_camera.c @@ -1724,10 +1724,12 @@ static int __devinit sh_mobile_ceu_probe(struct platform_device *pdev) err = soc_camera_host_register(&pcdev->ici); if (err) - goto exit_free_irq; + goto exit_free_clk; return 0; +exit_free_clk: + pm_runtime_disable(&pdev->dev); exit_free_irq: free_irq(pcdev->irq, pcdev); exit_release_mem: @@ -1748,6 +1750,7 @@ static int __devexit sh_mobile_ceu_remove(struct platform_device *pdev) struct sh_mobile_ceu_dev, ici); soc_camera_host_unregister(soc_host); + pm_runtime_disable(&pdev->dev); free_irq(pcdev->irq, pcdev); if (platform_get_resource(pdev, IORESOURCE_MEM, 1)) dma_release_declared_memory(&pdev->dev); -- cgit v0.10.2 From 28f4ddd1e8e341907d8f4f4b9ebf9f441f30420a Mon Sep 17 00:00:00 2001 From: Antti Palosaari Date: Sun, 4 Oct 2009 12:59:35 -0300 Subject: V4L/DVB (13366): em28xx: fix Reddo DVB-C USB TV Box GPIO Set device GPIOs only once. There is no need for .dvb_gpio to select between analog and digital because device is digital only. Signed-off-by: Antti Palosaari Signed-off-by: Mauro Carvalho Chehab diff --git a/drivers/media/video/em28xx/em28xx-cards.c b/drivers/media/video/em28xx/em28xx-cards.c index bdb249b..c0fd5c6 100644 --- a/drivers/media/video/em28xx/em28xx-cards.c +++ b/drivers/media/video/em28xx/em28xx-cards.c @@ -1584,8 +1584,8 @@ struct em28xx_board em28xx_boards[] = { [EM2870_BOARD_REDDO_DVB_C_USB_BOX] = { .name = "Reddo DVB-C USB TV Box", .tuner_type = TUNER_ABSENT, + .tuner_gpio = reddo_dvb_c_usb_box, .has_dvb = 1, - .dvb_gpio = reddo_dvb_c_usb_box, }, }; const unsigned int em28xx_bcount = ARRAY_SIZE(em28xx_boards); -- cgit v0.10.2 From 934949d816004ff66e21adbab225e9cb7f20015a Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Mon, 9 Nov 2009 09:05:11 -0300 Subject: V4L/DVB (13371): davinci: remove stray duplicate config pointer The vpif_config struct was renamed to vpif_display_config, but there is still a stray vpif_config *config pointer in vpif_display.c, preventing it from compiling. Remove this old duplicate pointer. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab diff --git a/drivers/media/video/davinci/vpif_display.c b/drivers/media/video/davinci/vpif_display.c index c015da8..d14cfb2 100644 --- a/drivers/media/video/davinci/vpif_display.c +++ b/drivers/media/video/davinci/vpif_display.c @@ -1426,7 +1426,6 @@ static __init int vpif_probe(struct platform_device *pdev) struct vpif_display_config *config; int i, j = 0, k, q, m, err = 0; struct i2c_adapter *i2c_adap; - struct vpif_config *config; struct common_obj *common; struct channel_obj *ch; struct video_device *vfd; -- cgit v0.10.2 From 4b3fa3c486f53ff267505b115ebf611726217da8 Mon Sep 17 00:00:00 2001 From: Olivier Lorin Date: Sat, 3 Oct 2009 19:09:10 -0300 Subject: V4L/DVB (13372a): MAINTAINERS: addition of gspca_gl860 driver MAINTAINERS: addition of gspca_gl860 driver - addition of gspca_gl860 driver Signed-off-by: Olivier Lorin Signed-off-by: Mauro Carvalho Chehab diff --git a/MAINTAINERS b/MAINTAINERS index c824b4d..4982eb0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2312,6 +2312,13 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-2.6.git S: Maintained F: drivers/media/video/gspca/finepix.c +GSPCA GL860 SUBDRIVER +M: Olivier Lorin +L: linux-media@vger.kernel.org +T: git git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-2.6.git +S: Maintained +F: drivers/media/video/gspca/gl860/ + GSPCA M5602 SUBDRIVER M: Erik Andren L: linux-media@vger.kernel.org -- cgit v0.10.2 From 1b7323965a8c6eee9dc4e345a7ae4bff1dc93149 Mon Sep 17 00:00:00 2001 From: Csaba Henk Date: Fri, 27 Nov 2009 19:30:14 +0530 Subject: fuse: reject O_DIRECT flag also in fuse_create The comment in fuse_open about O_DIRECT: "VFS checks this, but only _after_ ->open()" also holds for fuse_create, however, the same kind of check was missing there. As an impact of this bug, open(newfile, O_RDWR|O_CREAT|O_DIRECT) fails, but a stub newfile will remain if the fuse server handled the implied FUSE_CREATE request appropriately. Other impact: in the above situation ima_file_free() will complain to open/free imbalance if CONFIG_IMA is set. Signed-off-by: Csaba Henk Signed-off-by: Miklos Szeredi Cc: Harshavardhana Cc: stable@kernel.org diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 8ada78a..4787ae6 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -385,6 +385,9 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode, if (fc->no_create) return -ENOSYS; + if (flags & O_DIRECT) + return -EINVAL; + forget_req = fuse_get_req(fc); if (IS_ERR(forget_req)) return PTR_ERR(forget_req); -- cgit v0.10.2 From e9ff5eb2ae018fe2298c68746c873bf828c6b10e Mon Sep 17 00:00:00 2001 From: Anuj Aggarwal Date: Fri, 27 Nov 2009 17:40:58 +0530 Subject: ASoC: AIC23: Fixing infinite loop in resume path This patch fixes two issues: a) Infinite loop in resume function b) Writes to non-existing registers in resume function Cc: stable@kernel.org Signed-off-by: Anuj Aggarwal Acked-by: Liam Girdwood Signed-off-by: Mark Brown diff --git a/sound/soc/codecs/tlv320aic23.c b/sound/soc/codecs/tlv320aic23.c index 6b24d8b..90a0264 100644 --- a/sound/soc/codecs/tlv320aic23.c +++ b/sound/soc/codecs/tlv320aic23.c @@ -625,11 +625,10 @@ static int tlv320aic23_resume(struct platform_device *pdev) { struct snd_soc_device *socdev = platform_get_drvdata(pdev); struct snd_soc_codec *codec = socdev->card->codec; - int i; u16 reg; /* Sync reg_cache with the hardware */ - for (reg = 0; reg < ARRAY_SIZE(tlv320aic23_reg); i++) { + for (reg = 0; reg < TLV320AIC23_RESET; reg++) { u16 val = tlv320aic23_read_reg_cache(codec, reg); tlv320aic23_write(codec, reg, val); } -- cgit v0.10.2 From d02b217a719a8c4debf66ea81ea1b95dbb00e265 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 20 Nov 2009 16:16:52 -0300 Subject: V4L/DVB (13412): SMS_SIANO_MDTV should depend on HAS_DMA When building for Sun 3: drivers/built-in.o: In function `smscore_unregister_device': drivers/media/dvb/siano/smscoreapi.c:723: undefined reference to `dma_free_coherent' drivers/built-in.o: In function `smscore_register_device': drivers/media/dvb/siano/smscoreapi.c:365: undefined reference to `dma_alloc_coherent' Signed-off-by: Geert Uytterhoeven Signed-off-by: Mauro Carvalho Chehab diff --git a/drivers/media/dvb/siano/Kconfig b/drivers/media/dvb/siano/Kconfig index 8c1aed7..85a222c 100644 --- a/drivers/media/dvb/siano/Kconfig +++ b/drivers/media/dvb/siano/Kconfig @@ -4,7 +4,7 @@ config SMS_SIANO_MDTV tristate "Siano SMS1xxx based MDTV receiver" - depends on DVB_CORE && INPUT + depends on DVB_CORE && INPUT && HAS_DMA ---help--- Choose Y or M here if you have MDTV receiver with a Siano chipset. -- cgit v0.10.2 From 0bc3518019f917a370935055f07698a4e9b3ea20 Mon Sep 17 00:00:00 2001 From: Robert Lowery Date: Sun, 8 Nov 2009 00:00:11 -0300 Subject: V4L/DVB (13436): cxusb: Fix hang on DViCO FusionHDTV DVB-T Dual Digital 4 (rev 1) Address yet another regression introduced by the introduction of the zl10353 disable_i2c_gate field. djh - I unmangled the patch which apparently got screwed up in the user's email client. Signed-off-by: Robert Lowery Signed-off-by: Devin Heitmueller CC: stable@kernel.org Signed-off-by: Mauro Carvalho Chehab diff --git a/drivers/media/dvb/dvb-usb/cxusb.c b/drivers/media/dvb/dvb-usb/cxusb.c index f65591fb7..2a53dd0 100644 --- a/drivers/media/dvb/dvb-usb/cxusb.c +++ b/drivers/media/dvb/dvb-usb/cxusb.c @@ -663,6 +663,14 @@ static struct zl10353_config cxusb_zl10353_xc3028_config = { .parallel_ts = 1, }; +static struct zl10353_config cxusb_zl10353_xc3028_config_no_i2c_gate = { + .demod_address = 0x0f, + .if2 = 45600, + .no_tuner = 1, + .parallel_ts = 1, + .disable_i2c_gate_ctrl = 1, +}; + static struct mt352_config cxusb_mt352_xc3028_config = { .demod_address = 0x0f, .if2 = 4560, @@ -894,7 +902,7 @@ static int cxusb_dualdig4_frontend_attach(struct dvb_usb_adapter *adap) cxusb_bluebird_gpio_pulse(adap->dev, 0x02, 1); if ((adap->fe = dvb_attach(zl10353_attach, - &cxusb_zl10353_xc3028_config, + &cxusb_zl10353_xc3028_config_no_i2c_gate, &adap->dev->i2c_adap)) == NULL) return -EIO; -- cgit v0.10.2 From 9807362e4768cf807c9cfad3c10243020365d1ce Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 25 Nov 2009 14:32:57 -0300 Subject: V4L/DVB (13481): sh_mobile_ceu_camera: fix compile warning Trivial fix for this compile warning: v4l/sh_mobile_ceu_camera.c:1789: warning: label 'exit_free_irq' defined but not used Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab diff --git a/drivers/media/video/sh_mobile_ceu_camera.c b/drivers/media/video/sh_mobile_ceu_camera.c index afdea0d..9c8b7c7 100644 --- a/drivers/media/video/sh_mobile_ceu_camera.c +++ b/drivers/media/video/sh_mobile_ceu_camera.c @@ -1730,7 +1730,6 @@ static int __devinit sh_mobile_ceu_probe(struct platform_device *pdev) exit_free_clk: pm_runtime_disable(&pdev->dev); -exit_free_irq: free_irq(pcdev->irq, pcdev); exit_release_mem: if (platform_get_resource(pdev, IORESOURCE_MEM, 1)) -- cgit v0.10.2 From 361c95119a60adb7f84946af648b7c87cdc55e17 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Thu, 26 Nov 2009 12:22:11 -0300 Subject: V4L/DVB (13530): Fix wrong parameter order in memset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Edwin Török found the following: In function ‘memset’, inlined from ‘ir_input_init’ at drivers/media/common/ir-functions.c:67: /home/edwin/builds/linux-2.6/arch/x86/include/asm/string_64.h:61: warning: call to ‘__warn_memset_zero_len’ declared with attribute warning: memset used with constant zero length parameter; this could be due to transposed parameters memset(ir->ir_codes, sizeof(ir->ir_codes), 0); In actual practice the only caller I can find happens to already have cleared the buffer before calling ir_input_init. Signed-off-by: Alan Cox Signed-off-by: Mauro Carvalho Chehab diff --git a/drivers/media/common/ir-functions.c b/drivers/media/common/ir-functions.c index 655474b..abd4791 100644 --- a/drivers/media/common/ir-functions.c +++ b/drivers/media/common/ir-functions.c @@ -64,7 +64,7 @@ void ir_input_init(struct input_dev *dev, struct ir_input_state *ir, ir->ir_type = ir_type; - memset(ir->ir_codes, sizeof(ir->ir_codes), 0); + memset(ir->ir_codes, 0, sizeof(ir->ir_codes)); /* * FIXME: This is a temporary workaround to use the new IR tables -- cgit v0.10.2 From 862f89b3d4c6bf3caab7fc69661fc6e725edd00a Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Wed, 25 Nov 2009 01:06:37 +0100 Subject: PM: fix irq enable/disable in runtime PM code This patch (as1305) fixes a bug in the irq-enable settings and removes some related overhead in the runtime PM code. In __pm_runtime_resume(), within the scope of the original spin_lock_irq(), we know that irqs are disabled. There's no reason to go through a pair of enable/disable cycles when acquiring and releasing the parent's lock. In __pm_runtime_set_status(), irqs are already disabled when the parent's lock is acquired, and they must remain disabled when it is released. Signed-off-by: Alan Stern Signed-off-by: Rafael J. Wysocki diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index a770498..846d89e 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -328,11 +328,11 @@ int __pm_runtime_resume(struct device *dev, bool from_wq) * necessary. */ parent = dev->parent; - spin_unlock_irq(&dev->power.lock); + spin_unlock(&dev->power.lock); pm_runtime_get_noresume(parent); - spin_lock_irq(&parent->power.lock); + spin_lock(&parent->power.lock); /* * We can resume if the parent's run-time PM is disabled or it * is set to ignore children. @@ -343,9 +343,9 @@ int __pm_runtime_resume(struct device *dev, bool from_wq) if (parent->power.runtime_status != RPM_ACTIVE) retval = -EBUSY; } - spin_unlock_irq(&parent->power.lock); + spin_unlock(&parent->power.lock); - spin_lock_irq(&dev->power.lock); + spin_lock(&dev->power.lock); if (retval) goto out; goto repeat; @@ -777,7 +777,7 @@ int __pm_runtime_set_status(struct device *dev, unsigned int status) } if (parent) { - spin_lock_irq(&parent->power.lock); + spin_lock(&parent->power.lock); /* * It is invalid to put an active child under a parent that is @@ -793,7 +793,7 @@ int __pm_runtime_set_status(struct device *dev, unsigned int status) atomic_inc(&parent->power.child_count); } - spin_unlock_irq(&parent->power.lock); + spin_unlock(&parent->power.lock); if (error) goto out; -- cgit v0.10.2 From 98468efddb101f8a29af974101c17ba513b07be1 Mon Sep 17 00:00:00 2001 From: Roger Oksanen Date: Sun, 29 Nov 2009 17:17:29 -0800 Subject: e100: Use pci pool to work around GFP_ATOMIC order 5 memory allocation failure pci_alloc_consistent uses GFP_ATOMIC allocation that may fail on some systems with limited memory (Bug #14265). pci_pool_alloc allows waiting with GFP_KERNEL. Tested-by: Karol Lewandowski Signed-off-by: Roger Oksanen Signed-off-by: David S. Miller diff --git a/drivers/net/e100.c b/drivers/net/e100.c index 3c29a20..d269a68 100644 --- a/drivers/net/e100.c +++ b/drivers/net/e100.c @@ -157,6 +157,7 @@ #include #include #include +#include #include #include #include @@ -602,6 +603,7 @@ struct nic { struct mem *mem; dma_addr_t dma_addr; + struct pci_pool *cbs_pool; dma_addr_t cbs_dma_addr; u8 adaptive_ifs; u8 tx_threshold; @@ -1793,9 +1795,7 @@ static void e100_clean_cbs(struct nic *nic) nic->cb_to_clean = nic->cb_to_clean->next; nic->cbs_avail++; } - pci_free_consistent(nic->pdev, - sizeof(struct cb) * nic->params.cbs.count, - nic->cbs, nic->cbs_dma_addr); + pci_pool_free(nic->cbs_pool, nic->cbs, nic->cbs_dma_addr); nic->cbs = NULL; nic->cbs_avail = 0; } @@ -1813,8 +1813,8 @@ static int e100_alloc_cbs(struct nic *nic) nic->cb_to_use = nic->cb_to_send = nic->cb_to_clean = NULL; nic->cbs_avail = 0; - nic->cbs = pci_alloc_consistent(nic->pdev, - sizeof(struct cb) * count, &nic->cbs_dma_addr); + nic->cbs = pci_pool_alloc(nic->cbs_pool, GFP_KERNEL, + &nic->cbs_dma_addr); if (!nic->cbs) return -ENOMEM; @@ -2841,7 +2841,11 @@ static int __devinit e100_probe(struct pci_dev *pdev, DPRINTK(PROBE, ERR, "Cannot register net device, aborting.\n"); goto err_out_free; } - + nic->cbs_pool = pci_pool_create(netdev->name, + nic->pdev, + nic->params.cbs.count * sizeof(struct cb), + sizeof(u32), + 0); DPRINTK(PROBE, INFO, "addr 0x%llx, irq %d, MAC addr %pM\n", (unsigned long long)pci_resource_start(pdev, use_io ? 1 : 0), pdev->irq, netdev->dev_addr); @@ -2871,6 +2875,7 @@ static void __devexit e100_remove(struct pci_dev *pdev) unregister_netdev(netdev); e100_free(nic); pci_iounmap(pdev, nic->csr); + pci_pool_destroy(nic->cbs_pool); free_netdev(netdev); pci_release_regions(pdev); pci_disable_device(pdev); -- cgit v0.10.2 From bbf31bf18d34caa87dd01f08bf713635593697f2 Mon Sep 17 00:00:00 2001 From: David Ford Date: Sun, 29 Nov 2009 23:02:22 -0800 Subject: ipv4: additional update of dev_net(dev) to struct *net in ip_fragment.c, NULL ptr OOPS ipv4 ip_frag_reasm(), fully replace 'dev_net(dev)' with 'net', defined previously patched into 2.6.29. Between 2.6.28.10 and 2.6.29, net/ipv4/ip_fragment.c was patched, changing from dev_net(dev) to container_of(...). Unfortunately the goto section (out_fail) on oversized packets inside ip_frag_reasm() didn't get touched up as well. Oversized IP packets cause a NULL pointer dereference and immediate hang. I discovered this running openvasd and my previous email on this is titled: NULL pointer dereference at 2.6.32-rc8:net/ipv4/ip_fragment.c:566 Signed-off-by: David Ford Signed-off-by: David S. Miller diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 575f9bd..d3fe10b 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -563,7 +563,7 @@ out_oversize: printk(KERN_INFO "Oversized IP packet from %pI4.\n", &qp->saddr); out_fail: - IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_REASMFAILS); + IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); return err; } -- cgit v0.10.2 From cc098dc705895f6b0109b7e8e026ac2b8ae1c0a1 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Sun, 29 Nov 2009 23:12:52 -0800 Subject: r8169: restore mac addr in rtl8169_remove_one and rtl_shutdown The newer chipsets (all PCI-E) are known that they need full power cycle (AC or battery removal) to reset MAC address to a hardwired one. Previous patch to address this problem loads the original MAC address from EEPROM. But it brought other problem for which it is necessary to introduce a new module parameter. However, it might suffice to restore the initial MAC address before shutdown/reboot/kexec and when removing the module. Signed-off-by: Ivan Vecera Signed-off-by: David S. Miller diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c index b9221bd..0fe2fc9 100644 --- a/drivers/net/r8169.c +++ b/drivers/net/r8169.c @@ -3235,6 +3235,10 @@ static void __devexit rtl8169_remove_one(struct pci_dev *pdev) flush_scheduled_work(); unregister_netdev(dev); + + /* restore original MAC address */ + rtl_rar_set(tp, dev->perm_addr); + rtl_disable_msi(pdev, tp); rtl8169_release_board(pdev, dev, tp->mmio_addr); pci_set_drvdata(pdev, NULL); @@ -4881,6 +4885,9 @@ static void rtl_shutdown(struct pci_dev *pdev) rtl8169_net_suspend(dev); + /* restore original MAC address */ + rtl_rar_set(tp, dev->perm_addr); + spin_lock_irq(&tp->lock); rtl8169_asic_down(ioaddr); -- cgit v0.10.2 From 6c53b1b15e222244358d3cbbefd2a13920faa352 Mon Sep 17 00:00:00 2001 From: Steve Glendinning Date: Sun, 29 Nov 2009 23:14:45 -0800 Subject: smsc9420: prevent BUG() if ethtool is called with interface down This patch fixes a null pointer dereference BUG() if ethtool is used on an smsc9420 interface while it is down, because the phy_dev is only allocated while the interface is up. Signed-off-by: Steve Glendinning Signed-off-by: David S. Miller diff --git a/drivers/net/smsc9420.c b/drivers/net/smsc9420.c index b4909a2..0f79092 100644 --- a/drivers/net/smsc9420.c +++ b/drivers/net/smsc9420.c @@ -252,6 +252,9 @@ static int smsc9420_ethtool_get_settings(struct net_device *dev, { struct smsc9420_pdata *pd = netdev_priv(dev); + if (!pd->phy_dev) + return -ENODEV; + cmd->maxtxpkt = 1; cmd->maxrxpkt = 1; return phy_ethtool_gset(pd->phy_dev, cmd); @@ -262,6 +265,9 @@ static int smsc9420_ethtool_set_settings(struct net_device *dev, { struct smsc9420_pdata *pd = netdev_priv(dev); + if (!pd->phy_dev) + return -ENODEV; + return phy_ethtool_sset(pd->phy_dev, cmd); } @@ -290,6 +296,10 @@ static void smsc9420_ethtool_set_msglevel(struct net_device *netdev, u32 data) static int smsc9420_ethtool_nway_reset(struct net_device *netdev) { struct smsc9420_pdata *pd = netdev_priv(netdev); + + if (!pd->phy_dev) + return -ENODEV; + return phy_start_aneg(pd->phy_dev); } @@ -312,6 +322,10 @@ smsc9420_ethtool_getregs(struct net_device *dev, struct ethtool_regs *regs, for (i = 0; i < 0x100; i += (sizeof(u32))) data[j++] = smsc9420_reg_read(pd, i); + // cannot read phy registers if the net device is down + if (!phy_dev) + return; + for (i = 0; i <= 31; i++) data[j++] = smsc9420_mii_read(phy_dev->bus, phy_dev->addr, i); } -- cgit v0.10.2 From d5ccd67bb77ced5249067d05171992a7d5020393 Mon Sep 17 00:00:00 2001 From: Russell King - ARM Linux Date: Sat, 28 Nov 2009 00:13:23 +0000 Subject: NET: smc91x: Fix irq flags smc91x.h defines SMC_IRQ_FLAGS to be -1 when it wants the interrupt flags to be taken from the resource structure. However, d280ead changed this to checking for non-zero resource flags. Unfortunately, this means that on some platforms, we end up passing '-1' to request_irq rather than the desired result. Combine the two conditions into one so that the IRQ flags are taken from the resource if either SMC_IRQ_FLAGS is -1 or the resource flags specify an interrupt trigger. This restores network on at least the Versatile platform. Signed-off-by: Russell King Acked-by: Eric Miao Signed-off-by: David S. Miller diff --git a/drivers/net/smc91x.c b/drivers/net/smc91x.c index 05c91ee..f12206b 100644 --- a/drivers/net/smc91x.c +++ b/drivers/net/smc91x.c @@ -2283,7 +2283,7 @@ static int __devinit smc_drv_probe(struct platform_device *pdev) ndev->irq = ires->start; - if (ires->flags & IRQF_TRIGGER_MASK) + if (irq_flags == -1 || ires->flags & IRQF_TRIGGER_MASK) irq_flags = ires->flags & IRQF_TRIGGER_MASK; ret = smc_request_attrib(pdev, ndev); -- cgit v0.10.2 From 3c91c7ae84966f992429e1e128c4936f22b89064 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sat, 28 Nov 2009 13:57:00 +0000 Subject: ep93xx-eth: check for zero MAC address on probe, not on device open If we happen to have registered the driver without passing a MAC address, we will print a zero MAC address and register the interface with this invalid address, this is confusin. This patch moves the checking of a valid ethernet address and the generation of a random one down from the open function to the probe function. Signed-off-by: Florian Fainelli Acked-by: Lennert Buytenhek Signed-off-by: David S. Miller diff --git a/drivers/net/arm/ep93xx_eth.c b/drivers/net/arm/ep93xx_eth.c index 2be49c8..b25467a 100644 --- a/drivers/net/arm/ep93xx_eth.c +++ b/drivers/net/arm/ep93xx_eth.c @@ -628,15 +628,6 @@ static int ep93xx_open(struct net_device *dev) if (ep93xx_alloc_buffers(ep)) return -ENOMEM; - if (is_zero_ether_addr(dev->dev_addr)) { - random_ether_addr(dev->dev_addr); - printk(KERN_INFO "%s: generated random MAC address " - "%.2x:%.2x:%.2x:%.2x:%.2x:%.2x.\n", dev->name, - dev->dev_addr[0], dev->dev_addr[1], - dev->dev_addr[2], dev->dev_addr[3], - dev->dev_addr[4], dev->dev_addr[5]); - } - napi_enable(&ep->napi); if (ep93xx_start_hw(dev)) { @@ -877,6 +868,9 @@ static int ep93xx_eth_probe(struct platform_device *pdev) ep->mii.mdio_write = ep93xx_mdio_write; ep->mdc_divisor = 40; /* Max HCLK 100 MHz, min MDIO clk 2.5 MHz. */ + if (is_zero_ether_addr(dev->dev_addr)) + random_ether_addr(dev->dev_addr); + err = register_netdev(dev); if (err) { dev_err(&pdev->dev, "Failed to register netdev\n"); -- cgit v0.10.2 From 40be261dfd8bfba4baeff40168d44a6a4450ace1 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sun, 29 Nov 2009 06:02:55 +0000 Subject: wan: cosa: drop chan->wsem on error path The other paths all drop chan->wsem. This was found by a static checker (smatch). Signed-off-by: Dan Carpenter Acked-by: Jan "Yenya" Kasprzak Signed-off-by: David S. Miller diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c index e2c33c0..8e25ca7 100644 --- a/drivers/net/wan/cosa.c +++ b/drivers/net/wan/cosa.c @@ -907,6 +907,7 @@ static ssize_t cosa_write(struct file *file, current->state = TASK_RUNNING; chan->tx_status = 1; spin_unlock_irqrestore(&cosa->lock, flags); + up(&chan->wsem); return -ERESTARTSYS; } } -- cgit v0.10.2 From 0cae200eec6330cd2c20b24279597be1da50dc93 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 30 Nov 2009 00:13:28 -0800 Subject: b44: Fix wedge when using netconsole. Fixes kernel bugzilla #14691 Due to the way netpoll works, it is perfectly legal to see NAPI already scheduled when new device events are pending in b44_interrupt(). So logging a message about it is wrong and in fact harmful. Based upon a patch by Andreas Mohr. Signed-off-by: David S. Miller diff --git a/drivers/net/b44.c b/drivers/net/b44.c index e046943..2a91323 100644 --- a/drivers/net/b44.c +++ b/drivers/net/b44.c @@ -912,9 +912,6 @@ static irqreturn_t b44_interrupt(int irq, void *dev_id) bp->istat = istat; __b44_disable_ints(bp); __napi_schedule(&bp->napi); - } else { - printk(KERN_ERR PFX "%s: Error, poll already scheduled\n", - dev->name); } irq_ack: -- cgit v0.10.2 From 4acd57c3de62374fe5bb52e5cd24538190f4eab2 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 29 Nov 2009 16:39:52 +0000 Subject: ALSA: AACI: fix AC97 multiple-open bug Signed-off-by: Russell King Cc: Signed-off-by: Takashi Iwai diff --git a/sound/arm/aaci.c b/sound/arm/aaci.c index 1f0f821..1cb7c28 100644 --- a/sound/arm/aaci.c +++ b/sound/arm/aaci.c @@ -504,6 +504,10 @@ static int aaci_pcm_hw_params(struct snd_pcm_substream *substream, int err; aaci_pcm_hw_free(substream); + if (aacirun->pcm_open) { + snd_ac97_pcm_close(aacirun->pcm); + aacirun->pcm_open = 0; + } err = devdma_hw_alloc(NULL, substream, params_buffer_bytes(params)); -- cgit v0.10.2 From 8ee763b9c82c6ca0a59a7271ce4fa29d7baf5c09 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 29 Nov 2009 16:39:59 +0000 Subject: ALSA: AACI: fix recording bug pcm->r[1].slots is the double rate slot information, not the capture information. For capture, 'pcm' will already be the capture ac97 pcm structure. Signed-off-by: Russell King Cc: Signed-off-by: Takashi Iwai diff --git a/sound/arm/aaci.c b/sound/arm/aaci.c index 1cb7c28..6c160a0 100644 --- a/sound/arm/aaci.c +++ b/sound/arm/aaci.c @@ -521,7 +521,7 @@ static int aaci_pcm_hw_params(struct snd_pcm_substream *substream, else err = snd_ac97_pcm_open(aacirun->pcm, params_rate(params), params_channels(params), - aacirun->pcm->r[1].slots); + aacirun->pcm->r[0].slots); if (err) goto out; -- cgit v0.10.2 From 33a932d14323b957a4e17a6c8428d3f048a30822 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 28 Nov 2009 20:33:11 +0000 Subject: parisc: fix unwind with recent gcc versions kernel unwinding is broken with gcc >= 4.x. Part of the problem is that binutils seems very sensitive to where the unwind information is stored. Signed-off-by: Helge Deller Signed-off-by: Kyle McMartin Signed-off-by: Linus Torvalds diff --git a/arch/parisc/kernel/unwind.c b/arch/parisc/kernel/unwind.c index 69dad5a..a36799e 100644 --- a/arch/parisc/kernel/unwind.c +++ b/arch/parisc/kernel/unwind.c @@ -28,7 +28,7 @@ #define dbg(x...) #endif -#define KERNEL_START (KERNEL_BINARY_TEXT_START - 0x1000) +#define KERNEL_START (KERNEL_BINARY_TEXT_START) extern struct unwind_table_entry __start___unwind[]; extern struct unwind_table_entry __stop___unwind[]; diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S index fda4baa..9dab4a4 100644 --- a/arch/parisc/kernel/vmlinux.lds.S +++ b/arch/parisc/kernel/vmlinux.lds.S @@ -78,9 +78,6 @@ SECTIONS */ . = ALIGN(PAGE_SIZE); data_start = .; - EXCEPTION_TABLE(16) - - NOTES /* unwind info */ .PARISC.unwind : { @@ -89,6 +86,9 @@ SECTIONS __stop___unwind = .; } + EXCEPTION_TABLE(16) + NOTES + /* Data */ RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE) -- cgit v0.10.2 From e3a41d7b99e7f97d9a50bec2a8f4eb237ce1d504 Mon Sep 17 00:00:00 2001 From: Gertjan van Wingerde Date: Tue, 24 Nov 2009 06:34:03 +0100 Subject: .gitignore: Add bzip2 compressed files We can have bzip2 compressed images nowadays. Signed-off-by: Gertjan van Wingerde Signed-off-by: Linus Torvalds diff --git a/.gitignore b/.gitignore index b93fb7e..946c7ec 100644 --- a/.gitignore +++ b/.gitignore @@ -25,6 +25,7 @@ *.elf *.bin *.gz +*.bz2 *.lzma *.patch *.gcno -- cgit v0.10.2 From 4253119acf412fd686ef4bd8749b5a4d70ea3a51 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 20 Nov 2009 09:15:51 +0100 Subject: mac80211: fix two remote exploits Lennert Buytenhek noticed a remotely triggerable problem in mac80211, which is due to some code shuffling I did that ended up changing the order in which things were done -- this was in commit d75636ef9c1af224f1097941879d5a8db7cd04e5 Author: Johannes Berg Date: Tue Feb 10 21:25:53 2009 +0100 mac80211: RX aggregation: clean up stop session The problem is that the BUG_ON moved before the various checks, and as such can be triggered. As the comment indicates, the BUG_ON can be removed since the ampdu_action callback must already exist when the state is OPERATIONAL. A similar code path leads to a WARN_ON in ieee80211_stop_tx_ba_session, which can also be removed. Cc: stable@kernel.org [2.6.29+] Cc: Lennert Buytenhek Signed-off-by: Johannes Berg Signed-off-by: John W. Linville diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index bc064d7..ce8e0e7 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -85,10 +85,6 @@ void ieee80211_sta_stop_rx_ba_session(struct ieee80211_sub_if_data *sdata, u8 *r struct ieee80211_local *local = sdata->local; struct sta_info *sta; - /* stop HW Rx aggregation. ampdu_action existence - * already verified in session init so we add the BUG_ON */ - BUG_ON(!local->ops->ampdu_action); - rcu_read_lock(); sta = sta_info_get(local, ra); diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 206fd82..63224d1ee 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -545,7 +545,7 @@ int ieee80211_stop_tx_ba_session(struct ieee80211_hw *hw, struct sta_info *sta; int ret = 0; - if (WARN_ON(!local->ops->ampdu_action)) + if (!local->ops->ampdu_action) return -EINVAL; if (tid >= STA_TID_NUM) -- cgit v0.10.2 From 827d42c9ac91ddd728e4f4a31fefb906ef2ceff7 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 22 Nov 2009 12:28:41 +0100 Subject: mac80211: fix spurious delBA handling Lennert Buytenhek noticed that delBA handling in mac80211 was broken and has remotely triggerable problems, some of which are due to some code shuffling I did that ended up changing the order in which things were done -- this was commit d75636ef9c1af224f1097941879d5a8db7cd04e5 Author: Johannes Berg Date: Tue Feb 10 21:25:53 2009 +0100 mac80211: RX aggregation: clean up stop session and other parts were already present in the original commit d92684e66091c0f0101819619b315b4bb8b5bcc5 Author: Ron Rindjunsky Date: Mon Jan 28 14:07:22 2008 +0200 mac80211: A-MPDU Tx add delBA from recipient support The first problem is that I moved a BUG_ON before various checks -- thereby making it possible to hit. As the comment indicates, the BUG_ON can be removed since the ampdu_action callback must already exist when the state is != IDLE. The second problem isn't easily exploitable but there's a race condition due to unconditionally setting the state to OPERATIONAL when a delBA frame is received, even when no aggregation session was ever initiated. All the drivers accept stopping the session even then, but that opens a race window where crashes could happen before the driver accepts it. Right now, a WARN_ON may happen with non-HT drivers, while the race opens only for HT drivers. For this case, there are two things necessary to fix it: 1) don't process spurious delBA frames, and be more careful about the session state; don't drop the lock 2) HT drivers need to be prepared to handle a session stop even before the session was really started -- this is true for all drivers (that support aggregation) but iwlwifi which can be fixed easily. The other HT drivers (ath9k and ar9170) are behaving properly already. Reported-by: Lennert Buytenhek Cc: stable@kernel.org Signed-off-by: Johannes Berg Signed-off-by: John W. Linville diff --git a/drivers/net/wireless/iwlwifi/iwl-tx.c b/drivers/net/wireless/iwlwifi/iwl-tx.c index fb9bcfa..b7e196e 100644 --- a/drivers/net/wireless/iwlwifi/iwl-tx.c +++ b/drivers/net/wireless/iwlwifi/iwl-tx.c @@ -1277,8 +1277,16 @@ int iwl_tx_agg_stop(struct iwl_priv *priv , const u8 *ra, u16 tid) return -ENXIO; } + if (priv->stations[sta_id].tid[tid].agg.state == + IWL_EMPTYING_HW_QUEUE_ADDBA) { + IWL_DEBUG_HT(priv, "AGG stop before setup done\n"); + ieee80211_stop_tx_ba_cb_irqsafe(priv->hw, ra, tid); + priv->stations[sta_id].tid[tid].agg.state = IWL_AGG_OFF; + return 0; + } + if (priv->stations[sta_id].tid[tid].agg.state != IWL_AGG_ON) - IWL_WARN(priv, "Stopping AGG while state not IWL_AGG_ON\n"); + IWL_WARN(priv, "Stopping AGG while state not ON or starting\n"); tid_data = &priv->stations[sta_id].tid[tid]; ssn = (tid_data->seq_number & IEEE80211_SCTL_SEQ) >> 4; diff --git a/include/net/mac80211.h b/include/net/mac80211.h index c75b960..998c30f 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1283,6 +1283,12 @@ enum ieee80211_filter_flags { * * These flags are used with the ampdu_action() callback in * &struct ieee80211_ops to indicate which action is needed. + * + * Note that drivers MUST be able to deal with a TX aggregation + * session being stopped even before they OK'ed starting it by + * calling ieee80211_start_tx_ba_cb(_irqsafe), because the peer + * might receive the addBA frame and send a delBA right away! + * * @IEEE80211_AMPDU_RX_START: start Rx aggregation * @IEEE80211_AMPDU_RX_STOP: stop Rx aggregation * @IEEE80211_AMPDU_TX_START: start Tx aggregation diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 63224d1ee..89e238b 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -123,13 +123,18 @@ void ieee80211_send_bar(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid, u1 ieee80211_tx_skb(sdata, skb, 0); } -static int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, - enum ieee80211_back_parties initiator) +int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, + enum ieee80211_back_parties initiator) { struct ieee80211_local *local = sta->local; int ret; u8 *state; +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "Tx BA session stop requested for %pM tid %u\n", + sta->sta.addr, tid); +#endif /* CONFIG_MAC80211_HT_DEBUG */ + state = &sta->ampdu_mlme.tid_state_tx[tid]; if (*state == HT_AGG_STATE_OPERATIONAL) @@ -143,7 +148,6 @@ static int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, /* HW shall not deny going back to legacy */ if (WARN_ON(ret)) { - *state = HT_AGG_STATE_OPERATIONAL; /* * We may have pending packets get stuck in this case... * Not bothering with a workaround for now. @@ -525,11 +529,6 @@ int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, goto unlock; } -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "Tx BA session stop requested for %pM tid %u\n", - sta->sta.addr, tid); -#endif /* CONFIG_MAC80211_HT_DEBUG */ - ret = ___ieee80211_stop_tx_ba_session(sta, tid, initiator); unlock: diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 48ef1a2..cdc58e6 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -141,7 +141,6 @@ void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata, struct sta_info *sta, struct ieee80211_mgmt *mgmt, size_t len) { - struct ieee80211_local *local = sdata->local; u16 tid, params; u16 initiator; @@ -161,10 +160,9 @@ void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata, WLAN_BACK_INITIATOR, 0); else { /* WLAN_BACK_RECIPIENT */ spin_lock_bh(&sta->lock); - sta->ampdu_mlme.tid_state_tx[tid] = - HT_AGG_STATE_OPERATIONAL; + if (sta->ampdu_mlme.tid_state_tx[tid] & HT_ADDBA_REQUESTED_MSK) + ___ieee80211_stop_tx_ba_session(sta, tid, + WLAN_BACK_RECIPIENT); spin_unlock_bh(&sta->lock); - ieee80211_stop_tx_ba_session(&local->hw, sta->sta.addr, tid, - WLAN_BACK_RECIPIENT); } } diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index a910bf1..10d316e 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1091,6 +1091,8 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, enum ieee80211_back_parties initiator); +int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, + enum ieee80211_back_parties initiator); /* Spectrum management */ void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata, -- cgit v0.10.2 From 7924326fef03e9992c118a42aa148c4f824a4097 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 30 Nov 2009 17:37:20 -0200 Subject: staging/go7007: Fix compilation by re-adding the missing s2250-loader.h As pointed by Stefan Lippers-Hollmann , Commit: fd9a40da1db372833e1af6397d2f6c94ceff3dad broke s2250 compilation. This patch re-adds the missing s2250-loader.h Signed-off-by: Mauro Carvalho Chehab diff --git a/drivers/staging/go7007/s2250-loader.h b/drivers/staging/go7007/s2250-loader.h new file mode 100644 index 0000000..b7c301a --- /dev/null +++ b/drivers/staging/go7007/s2250-loader.h @@ -0,0 +1,24 @@ +/* + * Copyright (C) 2005-2006 Micronas USA Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (Version 2) as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. + */ + +#ifndef _S2250_LOADER_H_ +#define _S2250_LOADER_H_ + +extern int s2250loader_init(void); +extern void s2250loader_cleanup(void); + +#endif -- cgit v0.10.2 From e49bd72f9cfba9f0d0204f961d036cd0c7e37f2e Mon Sep 17 00:00:00 2001 From: Pete Eberlein Date: Sun, 15 Nov 2009 08:14:14 -0300 Subject: V4L/DVB (13372): staging/go7007: fix mutex function usage for s2250 Fix mutex function usage, which was overlooked in a previous patch. Signed-off-by: Pete Eberlein Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab diff --git a/drivers/staging/go7007/s2250-board.c b/drivers/staging/go7007/s2250-board.c index 8c85a9c..f4a6541 100644 --- a/drivers/staging/go7007/s2250-board.c +++ b/drivers/staging/go7007/s2250-board.c @@ -261,7 +261,7 @@ static int read_reg_fp(struct i2c_client *client, u16 addr, u16 *val) memset(buf, 0xcd, 6); usb = go->hpi_context; - if (down_interruptible(&usb->i2c_lock) != 0) { + if (mutex_lock_interruptible(&usb->i2c_lock) != 0) { printk(KERN_INFO "i2c lock failed\n"); kfree(buf); return -EINTR; @@ -270,7 +270,7 @@ static int read_reg_fp(struct i2c_client *client, u16 addr, u16 *val) kfree(buf); return -EFAULT; } - up(&usb->i2c_lock); + mutex_unlock(&usb->i2c_lock); *val = (buf[0] << 8) | buf[1]; kfree(buf); -- cgit v0.10.2 From 04d8a9db89f00dee78d792d094dc573784ead643 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Mon, 30 Nov 2009 15:37:25 -0500 Subject: arch/alpha/kernel/sys_ruffian.c: Use DIV_ROUND_CLOSEST The kernel.h macro DIV_ROUND_CLOSEST performs the computation (x + d/2)/d but is perhaps more readable. The semantic patch that makes this change is as follows: (http://www.emn.fr/x-info/coccinelle/) // @haskernel@ @@ @depends on haskernel@ expression x,__divisor; @@ - (((x) + ((__divisor) / 2)) / (__divisor)) + DIV_ROUND_CLOSEST(x,__divisor) // Signed-off-by: Julia Lawall Cc: Ivan Kokshaysky Cc: Richard Henderson Signed-off-by: Matt Turner diff --git a/arch/alpha/kernel/sys_ruffian.c b/arch/alpha/kernel/sys_ruffian.c index d9f9cfe..8de1046 100644 --- a/arch/alpha/kernel/sys_ruffian.c +++ b/arch/alpha/kernel/sys_ruffian.c @@ -66,7 +66,7 @@ ruffian_init_irq(void) common_init_isa_dma(); } -#define RUFFIAN_LATCH ((PIT_TICK_RATE + HZ / 2) / HZ) +#define RUFFIAN_LATCH DIV_ROUND_CLOSEST(PIT_TICK_RATE, HZ) static void __init ruffian_init_rtc(void) -- cgit v0.10.2 From cc9a2c8301683f73b7e0d1fc2cb5159110f3469f Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Mon, 30 Nov 2009 15:38:19 -0500 Subject: arch/alpha/kernel: Add kmalloc NULL tests Check that the result of kmalloc is not NULL before passing it to other functions. The semantic match that finds this problem is as follows: (http://www.emn.fr/x-info/coccinelle/) // @@ expression *x; identifier f; constant char *C; @@ x = \(kmalloc\|kcalloc\|kzalloc\)(...); ... when != x == NULL when != x != NULL when != (x || ...) ( kfree(x) f(...,C,...,x,...) | *f(...,x,...) | *x->f ) // Signed-off-by: Julia Lawall Cc: Ivan Kokshaysky Cc: Richard Henderson Signed-off-by: Matt Turner diff --git a/arch/alpha/kernel/core_marvel.c b/arch/alpha/kernel/core_marvel.c index 8e059e5..53dd2f1 100644 --- a/arch/alpha/kernel/core_marvel.c +++ b/arch/alpha/kernel/core_marvel.c @@ -1103,6 +1103,8 @@ marvel_agp_info(void) * Allocate the info structure. */ agp = kmalloc(sizeof(*agp), GFP_KERNEL); + if (!agp) + return NULL; /* * Fill it in. diff --git a/arch/alpha/kernel/core_titan.c b/arch/alpha/kernel/core_titan.c index 7668649..219bf27 100644 --- a/arch/alpha/kernel/core_titan.c +++ b/arch/alpha/kernel/core_titan.c @@ -757,6 +757,8 @@ titan_agp_info(void) * Allocate the info structure. */ agp = kmalloc(sizeof(*agp), GFP_KERNEL); + if (!agp) + return NULL; /* * Fill it in. -- cgit v0.10.2 From 1fdf475aa141a669af8db6ccc7015f0b725087de Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 30 Nov 2009 12:53:30 -0800 Subject: tcp: tcp_disconnect() should clear window_clamp NFS can reuse its TCP socket after calling tcp_disconnect(). We noticed window scaling was not negotiated in SYN packet of next connection request. Fix is to clear tp->window_clamp in tcp_disconnect(). Reported-by: Krzysztof Oledzki Tested-by: Krzysztof Oledzki Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f1813bc..d7a884c 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2059,6 +2059,7 @@ int tcp_disconnect(struct sock *sk, int flags) tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; tp->snd_cwnd_cnt = 0; tp->bytes_acked = 0; + tp->window_clamp = 0; tcp_set_ca_state(sk, TCP_CA_Open); tcp_clear_retrans(tp); inet_csk_delack_init(sk); -- cgit v0.10.2 From c69f677cc852f3f7b2342ab2f1598670a463d576 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 20 Nov 2009 20:48:31 +0100 Subject: fbdev: Migrate mailing lists to vger The fbdev mailing lists at SourceForge have been migrated to a single mailing list at kernel.org: linux-fbdev@vger.kernel.org. Signed-off-by: Geert Uytterhoeven Acked-by: Jean Delvare Signed-off-by: Linus Torvalds diff --git a/Documentation/fb/framebuffer.txt b/Documentation/fb/framebuffer.txt index b3e3a03..fe79e3c 100644 --- a/Documentation/fb/framebuffer.txt +++ b/Documentation/fb/framebuffer.txt @@ -312,10 +312,8 @@ and to the following documentation: 8. Mailing list --------------- -There are several frame buffer device related mailing lists at SourceForge: - - linux-fbdev-announce@lists.sourceforge.net, for announcements, - - linux-fbdev-user@lists.sourceforge.net, for generic user support, - - linux-fbdev-devel@lists.sourceforge.net, for project developers. +There is a frame buffer device related mailing list at kernel.org: +linux-fbdev@vger.kernel.org. Point your web browser to http://sourceforge.net/projects/linux-fbdev/ for subscription information and archive browsing. diff --git a/MAINTAINERS b/MAINTAINERS index c824b4d..5726ffc 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1027,7 +1027,7 @@ F: drivers/serial/atmel_serial.c ATMEL LCDFB DRIVER M: Nicolas Ferre -L: linux-fbdev-devel@lists.sourceforge.net (moderated for non-subscribers) +L: linux-fbdev@vger.kernel.org S: Maintained F: drivers/video/atmel_lcdfb.c F: include/video/atmel_lcdc.h @@ -2113,7 +2113,7 @@ F: drivers/net/wan/dlci.c F: drivers/net/wan/sdla.c FRAMEBUFFER LAYER -L: linux-fbdev-devel@lists.sourceforge.net (moderated for non-subscribers) +L: linux-fbdev@vger.kernel.org W: http://linux-fbdev.sourceforge.net/ S: Orphan F: Documentation/fb/ @@ -2136,7 +2136,7 @@ F: drivers/i2c/busses/i2c-cpm.c FREESCALE IMX / MXC FRAMEBUFFER DRIVER M: Sascha Hauer -L: linux-fbdev-devel@lists.sourceforge.net (moderated for non-subscribers) +L: linux-fbdev@vger.kernel.org L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained F: arch/arm/plat-mxc/include/mach/imxfb.h @@ -2635,7 +2635,7 @@ S: Supported F: security/integrity/ima/ IMS TWINTURBO FRAMEBUFFER DRIVER -L: linux-fbdev-devel@lists.sourceforge.net (moderated for non-subscribers) +L: linux-fbdev@vger.kernel.org S: Orphan F: drivers/video/imsttfb.c @@ -2670,14 +2670,14 @@ F: drivers/input/ INTEL FRAMEBUFFER DRIVER (excluding 810 and 815) M: Sylvain Meyer -L: linux-fbdev-devel@lists.sourceforge.net (moderated for non-subscribers) +L: linux-fbdev@vger.kernel.org S: Maintained F: Documentation/fb/intelfb.txt F: drivers/video/intelfb/ INTEL 810/815 FRAMEBUFFER DRIVER M: Antonino Daplas -L: linux-fbdev-devel@lists.sourceforge.net (moderated for non-subscribers) +L: linux-fbdev@vger.kernel.org S: Maintained F: drivers/video/i810/ @@ -3391,7 +3391,7 @@ S: Supported MATROX FRAMEBUFFER DRIVER M: Petr Vandrovec -L: linux-fbdev-devel@lists.sourceforge.net (moderated for non-subscribers) +L: linux-fbdev@vger.kernel.org S: Maintained F: drivers/video/matrox/matroxfb_* F: include/linux/matroxfb.h @@ -3778,7 +3778,7 @@ F: fs/ntfs/ NVIDIA (rivafb and nvidiafb) FRAMEBUFFER DRIVER M: Antonino Daplas -L: linux-fbdev-devel@lists.sourceforge.net (moderated for non-subscribers) +L: linux-fbdev@vger.kernel.org S: Maintained F: drivers/video/riva/ F: drivers/video/nvidia/ @@ -3813,7 +3813,7 @@ F: sound/soc/omap/ OMAP FRAMEBUFFER SUPPORT M: Imre Deak -L: linux-fbdev-devel@lists.sourceforge.net (moderated for non-subscribers) +L: linux-fbdev@vger.kernel.org L: linux-omap@vger.kernel.org S: Maintained F: drivers/video/omap/ @@ -4319,14 +4319,14 @@ F: include/linux/qnxtypes.h RADEON FRAMEBUFFER DISPLAY DRIVER M: Benjamin Herrenschmidt -L: linux-fbdev-devel@lists.sourceforge.net (moderated for non-subscribers) +L: linux-fbdev@vger.kernel.org S: Maintained F: drivers/video/aty/radeon* F: include/linux/radeonfb.h RAGE128 FRAMEBUFFER DISPLAY DRIVER M: Paul Mackerras -L: linux-fbdev-devel@lists.sourceforge.net (moderated for non-subscribers) +L: linux-fbdev@vger.kernel.org S: Maintained F: drivers/video/aty/aty128fb.c @@ -4465,7 +4465,7 @@ F: drivers/net/wireless/rtl818x/rtl8187* S3 SAVAGE FRAMEBUFFER DRIVER M: Antonino Daplas -L: linux-fbdev-devel@lists.sourceforge.net (moderated for non-subscribers) +L: linux-fbdev@vger.kernel.org S: Maintained F: drivers/video/savage/ @@ -5628,7 +5628,7 @@ S: Maintained UVESAFB DRIVER M: Michal Januszewski -L: linux-fbdev-devel@lists.sourceforge.net (moderated for non-subscribers) +L: linux-fbdev@vger.kernel.org W: http://dev.gentoo.org/~spock/projects/uvesafb/ S: Maintained F: Documentation/fb/uvesafb.txt @@ -5661,7 +5661,7 @@ F: drivers/mmc/host/via-sdmmc.c VIA UNICHROME(PRO)/CHROME9 FRAMEBUFFER DRIVER M: Joseph Chan M: Scott Fang -L: linux-fbdev-devel@lists.sourceforge.net (moderated for non-subscribers) +L: linux-fbdev@vger.kernel.org S: Maintained F: drivers/video/via/ -- cgit v0.10.2 From 199bc9ff5ca5e4b3bcaff8927b2983c65f34c263 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 30 Nov 2009 09:06:40 +0000 Subject: jffs2: Fix memory corruption in jffs2_read_inode_range() In 2.6.23 kernel, commit a32ea1e1f925399e0d81ca3f7394a44a6dafa12c ("Fix read/truncate race") fixed a race in the generic code, and as a side effect, now do_generic_file_read() can ask us to readpage() past the i_size. This seems to be correctly handled by the block routines (e.g. block_read_full_page() fills the page with zeroes in case if somebody is trying to read past the last inode's block). JFFS2 doesn't handle this; it assumes that it won't be asked to read pages which don't exist -- and thus that there will be at least _one_ valid 'frag' on the page it's being asked to read. It will fill any holes with the following memset: memset(buf, 0, min(end, frag->ofs + frag->size) - offset); When the 'closest smaller match' returned by jffs2_lookup_node_frag() is actually on a previous page and ends before 'offset', that results in: memset(buf, 0, ); Hopefully, in most cases the corruption is fatal, and quickly causing random oopses, like this: root@10.0.0.4:~/ltp-fs-20090531# ./testcases/kernel/fs/ftest/ftest01 Unable to handle kernel paging request for data at address 0x00000008 Faulting instruction address: 0xc01cd980 Oops: Kernel access of bad area, sig: 11 [#1] [...] NIP [c01cd980] rb_insert_color+0x38/0x184 LR [c0043978] enqueue_hrtimer+0x88/0xc4 Call Trace: [c6c63b60] [c004f9a8] tick_sched_timer+0xa0/0xe4 (unreliable) [c6c63b80] [c0043978] enqueue_hrtimer+0x88/0xc4 [c6c63b90] [c0043a48] __run_hrtimer+0x94/0xbc [c6c63bb0] [c0044628] hrtimer_interrupt+0x140/0x2b8 [c6c63c10] [c000f8e8] timer_interrupt+0x13c/0x254 [c6c63c30] [c001352c] ret_from_except+0x0/0x14 --- Exception: 901 at memset+0x38/0x5c LR = jffs2_read_inode_range+0x144/0x17c [c6c63cf0] [00000000] (null) (unreliable) This patch fixes the issue, plus fixes all LTP tests on NAND/UBI with JFFS2 filesystem that were failing since 2.6.23 (seems like the bug above also broke the truncation). Reported-By: Anton Vorontsov Tested-By: Anton Vorontsov Signed-off-by: David Woodhouse Cc: stable@kernel.org Signed-off-by: Linus Torvalds diff --git a/fs/jffs2/read.c b/fs/jffs2/read.c index cfe05c1..3f39be1 100644 --- a/fs/jffs2/read.c +++ b/fs/jffs2/read.c @@ -164,12 +164,15 @@ int jffs2_read_inode_range(struct jffs2_sb_info *c, struct jffs2_inode_info *f, /* XXX FIXME: Where a single physical node actually shows up in two frags, we read it twice. Don't do that. */ - /* Now we're pointing at the first frag which overlaps our page */ + /* Now we're pointing at the first frag which overlaps our page + * (or perhaps is before it, if we've been asked to read off the + * end of the file). */ while(offset < end) { D2(printk(KERN_DEBUG "jffs2_read_inode_range: offset %d, end %d\n", offset, end)); - if (unlikely(!frag || frag->ofs > offset)) { + if (unlikely(!frag || frag->ofs > offset || + frag->ofs + frag->size <= offset)) { uint32_t holesize = end - offset; - if (frag) { + if (frag && frag->ofs > offset) { D1(printk(KERN_NOTICE "Eep. Hole in ino #%u fraglist. frag->ofs = 0x%08x, offset = 0x%08x\n", f->inocache->ino, frag->ofs, offset)); holesize = min(holesize, frag->ofs - offset); } -- cgit v0.10.2 From e8105903d78c81119754a42926951d9d17e191ba Mon Sep 17 00:00:00 2001 From: Becky Bruce Date: Mon, 23 Nov 2009 12:28:53 +0000 Subject: powerpc: Fix DEBUG_HIGHMEM build break from d4515646699 Code was added to mm/higmem.c that depends on several kmap types that powerpc does not support. We add dummy invalid definitions for KM_NMI, KM_NM_PTE, and KM_IRQ_PTE. According to list discussion, this fix should not be needed anymore starting with 2.6.33. The code is commented to this effect so hopefully we will remember to remove this. Signed-off-by: Becky Bruce Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/include/asm/kmap_types.h b/arch/powerpc/include/asm/kmap_types.h index b6bac6f..9163695 100644 --- a/arch/powerpc/include/asm/kmap_types.h +++ b/arch/powerpc/include/asm/kmap_types.h @@ -29,5 +29,16 @@ enum km_type { KM_TYPE_NR }; +/* + * This is a temporary build fix that (so they say on lkml....) should no longer + * be required after 2.6.33, because of changes planned to the kmap code. + * Let's try to remove this cruft then. + */ +#ifdef CONFIG_DEBUG_HIGHMEM +#define KM_NMI (-1) +#define KM_NMI_PTE (-1) +#define KM_IRQ_PTE (-1) +#endif + #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_KMAP_TYPES_H */ -- cgit v0.10.2 From 8627b96dd80dca440d91fbb1ec733be25912d0dd Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Wed, 18 Nov 2009 14:12:58 +0000 Subject: tty_port: handle the nonblocking open of a dead port corner case Some drivers allow O_NDELAY of a dead port (eg for setserial to work). In that situation we must not try to raise the carrier. Signed-off-by: Alan Cox Cc: stable Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/char/tty_port.c b/drivers/char/tty_port.c index 2e8552d..c63f3d3 100644 --- a/drivers/char/tty_port.c +++ b/drivers/char/tty_port.c @@ -219,8 +219,11 @@ int tty_port_block_til_ready(struct tty_port *port, /* if non-blocking mode is set we can pass directly to open unless the port has just hung up or is in another error state */ - if ((filp->f_flags & O_NONBLOCK) || - (tty->flags & (1 << TTY_IO_ERROR))) { + if (tty->flags & (1 << TTY_IO_ERROR)) { + port->flags |= ASYNC_NORMAL_ACTIVE; + return 0; + } + if (filp->f_flags & O_NONBLOCK) { /* Indicate we are open */ if (tty->termios->c_cflag & CBAUD) tty_port_raise_dtr_rts(port); -- cgit v0.10.2 From b037179f7a4fff7bd8279b0568a7dc663ebc9d15 Mon Sep 17 00:00:00 2001 From: Maxime Bizon Date: Sun, 15 Nov 2009 05:42:18 +0100 Subject: bcm63xx_uart: Fix serial driver compile breakage. The driver missed a small API change while sitting in Ralf's tree, this patch makes it compile again. Signed-off-by: Maxime Bizon Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/serial/bcm63xx_uart.c b/drivers/serial/bcm63xx_uart.c index beddaa6..37ad0c4 100644 --- a/drivers/serial/bcm63xx_uart.c +++ b/drivers/serial/bcm63xx_uart.c @@ -242,7 +242,7 @@ static void bcm_uart_do_rx(struct uart_port *port) * higher than fifo size anyway since we're much faster than * serial port */ max_count = 32; - tty = port->info->port.tty; + tty = port->state->port.tty; do { unsigned int iestat, c, cstat; char flag; @@ -318,7 +318,7 @@ static void bcm_uart_do_tx(struct uart_port *port) return; } - xmit = &port->info->xmit; + xmit = &port->state->xmit; if (uart_circ_empty(xmit)) goto txq_empty; -- cgit v0.10.2 From 16173c7c2d79da7eb89b41acfdebd74b130f4339 Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Tue, 24 Nov 2009 10:22:41 +0000 Subject: tty/of_serial: add missing ns16550a id Many boards have a bug-free ns16550 compatible serial port, which we should register as PORT_16550A. This introduces a new value "ns16550a" for the compatible property of of_serial to let a firmware choose that model instead of using the crippled PORT_16550 mode. Reported-by: Alon Ziv Signed-off-by: Michal Simek Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/serial/of_serial.c b/drivers/serial/of_serial.c index 02406ba..cdf172e 100644 --- a/drivers/serial/of_serial.c +++ b/drivers/serial/of_serial.c @@ -161,6 +161,7 @@ static int of_platform_serial_remove(struct of_device *ofdev) static struct of_device_id __devinitdata of_platform_serial_table[] = { { .type = "serial", .compatible = "ns8250", .data = (void *)PORT_8250, }, { .type = "serial", .compatible = "ns16450", .data = (void *)PORT_16450, }, + { .type = "serial", .compatible = "ns16550a", .data = (void *)PORT_16550A, }, { .type = "serial", .compatible = "ns16550", .data = (void *)PORT_16550, }, { .type = "serial", .compatible = "ns16750", .data = (void *)PORT_16750, }, { .type = "serial", .compatible = "ns16850", .data = (void *)PORT_16850, }, -- cgit v0.10.2 From 8c960e49d8beaca7c5b3967cede225bbba36bf43 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Wed, 11 Nov 2009 16:57:03 -0500 Subject: Staging: hv: Fix argument order in incorrect memset invocations in hyperv driver. Nearly every invocation of memset in drivers/staging/hv/StorVsc.c has its arguments the wrong way around. Signed-off-by: Dave Jones Cc: Hank Janssen Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/staging/hv/StorVsc.c b/drivers/staging/hv/StorVsc.c index 14015c9..2f7c425 100644 --- a/drivers/staging/hv/StorVsc.c +++ b/drivers/staging/hv/StorVsc.c @@ -196,7 +196,7 @@ static int StorVscChannelInit(struct hv_device *Device) * Now, initiate the vsc/vsp initialization protocol on the open * channel */ - memset(request, sizeof(struct storvsc_request_extension), 0); + memset(request, 0, sizeof(struct storvsc_request_extension)); request->WaitEvent = osd_WaitEventCreate(); vstorPacket->Operation = VStorOperationBeginInitialization; @@ -233,7 +233,7 @@ static int StorVscChannelInit(struct hv_device *Device) DPRINT_INFO(STORVSC, "QUERY_PROTOCOL_VERSION_OPERATION..."); /* reuse the packet for version range supported */ - memset(vstorPacket, sizeof(struct vstor_packet), 0); + memset(vstorPacket, 0, sizeof(struct vstor_packet)); vstorPacket->Operation = VStorOperationQueryProtocolVersion; vstorPacket->Flags = REQUEST_COMPLETION_FLAG; @@ -266,7 +266,7 @@ static int StorVscChannelInit(struct hv_device *Device) /* Query channel properties */ DPRINT_INFO(STORVSC, "QUERY_PROPERTIES_OPERATION..."); - memset(vstorPacket, sizeof(struct vstor_packet), 0); + memset(vstorPacket, 0, sizeof(struct vstor_packet)); vstorPacket->Operation = VStorOperationQueryProperties; vstorPacket->Flags = REQUEST_COMPLETION_FLAG; vstorPacket->StorageChannelProperties.PortNumber = @@ -305,7 +305,7 @@ static int StorVscChannelInit(struct hv_device *Device) DPRINT_INFO(STORVSC, "END_INITIALIZATION_OPERATION..."); - memset(vstorPacket, sizeof(struct vstor_packet), 0); + memset(vstorPacket, 0, sizeof(struct vstor_packet)); vstorPacket->Operation = VStorOperationEndInitialization; vstorPacket->Flags = REQUEST_COMPLETION_FLAG; @@ -508,7 +508,7 @@ static int StorVscConnectToVsp(struct hv_device *Device) int ret; storDriver = (struct storvsc_driver_object *)Device->Driver; - memset(&props, sizeof(struct vmstorage_channel_properties), 0); + memset(&props, 0, sizeof(struct vmstorage_channel_properties)); /* Open the channel */ ret = Device->Driver->VmbusChannelInterface.Open(Device, -- cgit v0.10.2 From 5996b3ddc422a16d53b8acf4980d0d6e8b2bf1ed Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Fri, 20 Nov 2009 16:29:17 +0000 Subject: Staging: hv: Fix vmbus event handler bug The flag ENABLE_POLLING is always enabled in original Makefile, but accidently removed during porting to mainline kernel. The patch fixes this bug which can cause stalled network communication. Credit needs to go to Eric Sesterhenn For pointing out a typo in the original code as well. Signed-off-by: Hank Janssen Signed-off-by: Haiyang Zhang Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/staging/hv/Channel.c b/drivers/staging/hv/Channel.c index d649ee1..746370e 100644 --- a/drivers/staging/hv/Channel.c +++ b/drivers/staging/hv/Channel.c @@ -611,7 +611,7 @@ void VmbusChannelClose(struct vmbus_channel *Channel) /* Stop callback and cancel the timer asap */ Channel->OnChannelCallback = NULL; - del_timer(&Channel->poll_timer); + del_timer_sync(&Channel->poll_timer); /* Send a closing message */ info = kmalloc(sizeof(*info) + @@ -978,14 +978,10 @@ void VmbusChannelOnChannelEvent(struct vmbus_channel *Channel) { DumpVmbusChannel(Channel); ASSERT(Channel->OnChannelCallback); -#ifdef ENABLE_POLLING - del_timer(&Channel->poll_timer); - Channel->OnChannelCallback(Channel->ChannelCallbackContext); - channel->poll_timer.expires(jiffies + usecs_to_jiffies(100); - add_timer(&channel->poll_timer); -#else + Channel->OnChannelCallback(Channel->ChannelCallbackContext); -#endif + + mod_timer(&Channel->poll_timer, jiffies + usecs_to_jiffies(100)); } /** @@ -997,10 +993,6 @@ void VmbusChannelOnTimer(unsigned long data) if (channel->OnChannelCallback) { channel->OnChannelCallback(channel->ChannelCallbackContext); -#ifdef ENABLE_POLLING - channel->poll_timer.expires(jiffies + usecs_to_jiffies(100); - add_timer(&channel->poll_timer); -#endif } } diff --git a/drivers/staging/hv/ChannelMgmt.c b/drivers/staging/hv/ChannelMgmt.c index 3db62ca..ef38467 100644 --- a/drivers/staging/hv/ChannelMgmt.c +++ b/drivers/staging/hv/ChannelMgmt.c @@ -119,7 +119,7 @@ static inline void ReleaseVmbusChannel(void *context) */ void FreeVmbusChannel(struct vmbus_channel *Channel) { - del_timer(&Channel->poll_timer); + del_timer_sync(&Channel->poll_timer); /* * We have to release the channel's workqueue/thread in the vmbus's -- cgit v0.10.2 From d0e94d17ed8590d53252212414a627125825b379 Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Mon, 23 Nov 2009 17:00:22 +0000 Subject: Staging: hv: Fix some missing author names Fix some missing author names. They were accidentally removed by someone within Microsoft before the files were sent for inclusion in the kernel. Signed-off-by: Hank Janssen Signed-off-by: Haiyang Zhang Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/staging/hv/BlkVsc.c b/drivers/staging/hv/BlkVsc.c index 51aa861..a48ee3a 100644 --- a/drivers/staging/hv/BlkVsc.c +++ b/drivers/staging/hv/BlkVsc.c @@ -16,6 +16,7 @@ * Place - Suite 330, Boston, MA 02111-1307 USA. * * Authors: + * Haiyang Zhang * Hank Janssen * */ diff --git a/drivers/staging/hv/NetVsc.c b/drivers/staging/hv/NetVsc.c index d384c0d..1c717f9 100644 --- a/drivers/staging/hv/NetVsc.c +++ b/drivers/staging/hv/NetVsc.c @@ -15,6 +15,7 @@ * Place - Suite 330, Boston, MA 02111-1307 USA. * * Authors: + * Haiyang Zhang * Hank Janssen */ #include diff --git a/drivers/staging/hv/NetVsc.h b/drivers/staging/hv/NetVsc.h index 3e7112f..6e0e034 100644 --- a/drivers/staging/hv/NetVsc.h +++ b/drivers/staging/hv/NetVsc.h @@ -16,6 +16,7 @@ * Place - Suite 330, Boston, MA 02111-1307 USA. * * Authors: + * Haiyang Zhang * Hank Janssen * */ diff --git a/drivers/staging/hv/blkvsc_drv.c b/drivers/staging/hv/blkvsc_drv.c index 99c4926..62b2828 100644 --- a/drivers/staging/hv/blkvsc_drv.c +++ b/drivers/staging/hv/blkvsc_drv.c @@ -15,6 +15,7 @@ * Place - Suite 330, Boston, MA 02111-1307 USA. * * Authors: + * Haiyang Zhang * Hank Janssen */ #include diff --git a/drivers/staging/hv/netvsc_drv.c b/drivers/staging/hv/netvsc_drv.c index 3192d50..0d7459e 100644 --- a/drivers/staging/hv/netvsc_drv.c +++ b/drivers/staging/hv/netvsc_drv.c @@ -15,6 +15,7 @@ * Place - Suite 330, Boston, MA 02111-1307 USA. * * Authors: + * Haiyang Zhang * Hank Janssen */ #include -- cgit v0.10.2 From fb08808ca252a76519d3b17fc3b6cfc6b2806e55 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 21 Oct 2009 14:42:11 +0200 Subject: Staging: update TODO files Remove my mail address. Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/staging/rtl8187se/TODO b/drivers/staging/rtl8187se/TODO index c09a916..a762e79 100644 --- a/drivers/staging/rtl8187se/TODO +++ b/drivers/staging/rtl8187se/TODO @@ -11,5 +11,4 @@ TODO: - sparse fixes - integrate with drivers/net/wireless/rtl818x -Please send any patches to Greg Kroah-Hartman and -Bartlomiej Zolnierkiewicz . +Please send any patches to Greg Kroah-Hartman . diff --git a/drivers/staging/rtl8192su/TODO b/drivers/staging/rtl8192su/TODO index b13be9e..f11eec7 100644 --- a/drivers/staging/rtl8192su/TODO +++ b/drivers/staging/rtl8192su/TODO @@ -14,5 +14,4 @@ TODO: - sparse fixes - integrate with drivers/net/wireless/rtl818x -Please send any patches to Greg Kroah-Hartman and -Bartlomiej Zolnierkiewicz . +Please send any patches to Greg Kroah-Hartman . diff --git a/drivers/staging/vt6655/TODO b/drivers/staging/vt6655/TODO index 8462cd1..cb04aaa 100644 --- a/drivers/staging/vt6655/TODO +++ b/drivers/staging/vt6655/TODO @@ -16,6 +16,5 @@ TODO: - sparse fixes - integrate with drivers/net/wireless -Please send any patches to Greg Kroah-Hartman , -Forest Bond and Bartlomiej Zolnierkiewicz -. +Please send any patches to Greg Kroah-Hartman +and Forest Bond . diff --git a/drivers/staging/vt6656/TODO b/drivers/staging/vt6656/TODO index 17cf50c..a318995 100644 --- a/drivers/staging/vt6656/TODO +++ b/drivers/staging/vt6656/TODO @@ -15,6 +15,5 @@ TODO: - sparse fixes - integrate with drivers/net/wireless -Please send any patches to Greg Kroah-Hartman , -Forest Bond and Bartlomiej Zolnierkiewicz -. +Please send any patches to Greg Kroah-Hartman +and Forest Bond . -- cgit v0.10.2 From c2f6595fbdb408d3d6850cfae590c8fa93e27399 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Wed, 18 Nov 2009 11:37:15 -0500 Subject: USB: EHCI: don't send Clear-TT-Buffer following a STALL This patch (as1304) fixes a regression in ehci-hcd. Evidently some hubs don't handle Clear-TT-Buffer requests correctly, so we should avoid sending them when they don't appear to be absolutely necessary. The reported symptom is that output on a downstream audio device cuts out because the hub stops relaying isochronous packets. The patch prevents Clear-TT-Buffer requests from being sent following a STALL handshake. In theory a STALL indicates either that the downstream device sent a STALL or that no matching TT buffer could be found. In either case, the transfer is completed and the TT buffer does not remain busy, so it doesn't need to be cleared. Also, the patch fixes a minor flaw in the code that actually sends the Clear-TT-Buffer requests. Although the pipe direction isn't really used for control transfers, it should be a Send rather than a Receive. Signed-off-by: Alan Stern Reported-by: Javier Kohen CC: David Brownell Cc: stable Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 5ce8391..0f857e6 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -444,7 +444,7 @@ resubmit: static inline int hub_clear_tt_buffer (struct usb_device *hdev, u16 devinfo, u16 tt) { - return usb_control_msg(hdev, usb_rcvctrlpipe(hdev, 0), + return usb_control_msg(hdev, usb_sndctrlpipe(hdev, 0), HUB_CLEAR_TT_BUFFER, USB_RT_PORT, devinfo, tt, NULL, 0, 1000); } diff --git a/drivers/usb/host/ehci-q.c b/drivers/usb/host/ehci-q.c index 00ad9ce..139a2cc 100644 --- a/drivers/usb/host/ehci-q.c +++ b/drivers/usb/host/ehci-q.c @@ -487,8 +487,20 @@ halt: * we must clear the TT buffer (11.17.5). */ if (unlikely(last_status != -EINPROGRESS && - last_status != -EREMOTEIO)) - ehci_clear_tt_buffer(ehci, qh, urb, token); + last_status != -EREMOTEIO)) { + /* The TT's in some hubs malfunction when they + * receive this request following a STALL (they + * stop sending isochronous packets). Since a + * STALL can't leave the TT buffer in a busy + * state (if you believe Figures 11-48 - 11-51 + * in the USB 2.0 spec), we won't clear the TT + * buffer in this case. Strictly speaking this + * is a violation of the spec. + */ + if (last_status != -EPIPE) + ehci_clear_tt_buffer(ehci, qh, urb, + token); + } } /* if we're removing something not at the queue head, -- cgit v0.10.2 From cea83241b3a84499c4f9b12f8288f787e7aa6383 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Wed, 18 Nov 2009 22:51:18 +0300 Subject: USB: musb_gadget: fix STALL handling The driver incorrectly cancels the mass-storage device CSW request (which leads to device reset) due to giving back URB at the head of endpoint's queue after sending each STALL handshake; stop doing that and start checking for the queue being non-empty before stalling an endpoint and disallowing stall in such case in musb_gadget_set_halt() like the other gadget drivers do. Moreover, the driver starts Rx request despite of the endpoint being halted -- fix this by moving the SendStall bit check from musb_g_rx() to rxstate(). And we also sometimes get into rxstate() with DMA still active after clearing an endpoint's halt (not clear why), so bail out in this case, similarly to what txstate() does... While at it, also do the following changes : - in musb_gadget_set_halt(), remove pointless Tx FIFO flushing (the driver does not allow stalling with non-empty Tx FIFO anyway); - in rxstate(), stop pointlessly zeroing the 'csr' variable; - in musb_gadget_set_halt(), move the 'done' label to a more proper place; - in musb_g_rx(), eliminate the 'done' label completely... Signed-off-by: Sergei Shtylyov Cc: Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c index 8b3c4e2..74073f9 100644 --- a/drivers/usb/musb/musb_gadget.c +++ b/drivers/usb/musb/musb_gadget.c @@ -4,6 +4,7 @@ * Copyright 2005 Mentor Graphics Corporation * Copyright (C) 2005-2006 by Texas Instruments * Copyright (C) 2006-2007 Nokia Corporation + * Copyright (C) 2009 MontaVista Software, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -436,14 +437,6 @@ void musb_g_tx(struct musb *musb, u8 epnum) csr |= MUSB_TXCSR_P_WZC_BITS; csr &= ~MUSB_TXCSR_P_SENTSTALL; musb_writew(epio, MUSB_TXCSR, csr); - if (dma_channel_status(dma) == MUSB_DMA_STATUS_BUSY) { - dma->status = MUSB_DMA_STATUS_CORE_ABORT; - musb->dma_controller->channel_abort(dma); - } - - if (request) - musb_g_giveback(musb_ep, request, -EPIPE); - break; } @@ -582,15 +575,25 @@ void musb_g_tx(struct musb *musb, u8 epnum) */ static void rxstate(struct musb *musb, struct musb_request *req) { - u16 csr = 0; const u8 epnum = req->epnum; struct usb_request *request = &req->request; struct musb_ep *musb_ep = &musb->endpoints[epnum].ep_out; void __iomem *epio = musb->endpoints[epnum].regs; unsigned fifo_count = 0; u16 len = musb_ep->packet_sz; + u16 csr = musb_readw(epio, MUSB_RXCSR); - csr = musb_readw(epio, MUSB_RXCSR); + /* We shouldn't get here while DMA is active, but we do... */ + if (dma_channel_status(musb_ep->dma) == MUSB_DMA_STATUS_BUSY) { + DBG(4, "DMA pending...\n"); + return; + } + + if (csr & MUSB_RXCSR_P_SENDSTALL) { + DBG(5, "%s stalling, RXCSR %04x\n", + musb_ep->end_point.name, csr); + return; + } if (is_cppi_enabled() && musb_ep->dma) { struct dma_controller *c = musb->dma_controller; @@ -761,19 +764,10 @@ void musb_g_rx(struct musb *musb, u8 epnum) csr, dma ? " (dma)" : "", request); if (csr & MUSB_RXCSR_P_SENTSTALL) { - if (dma_channel_status(dma) == MUSB_DMA_STATUS_BUSY) { - dma->status = MUSB_DMA_STATUS_CORE_ABORT; - (void) musb->dma_controller->channel_abort(dma); - request->actual += musb_ep->dma->actual_len; - } - csr |= MUSB_RXCSR_P_WZC_BITS; csr &= ~MUSB_RXCSR_P_SENTSTALL; musb_writew(epio, MUSB_RXCSR, csr); - - if (request) - musb_g_giveback(musb_ep, request, -EPIPE); - goto done; + return; } if (csr & MUSB_RXCSR_P_OVERRUN) { @@ -795,7 +789,7 @@ void musb_g_rx(struct musb *musb, u8 epnum) DBG((csr & MUSB_RXCSR_DMAENAB) ? 4 : 1, "%s busy, csr %04x\n", musb_ep->end_point.name, csr); - goto done; + return; } if (dma && (csr & MUSB_RXCSR_DMAENAB)) { @@ -826,22 +820,15 @@ void musb_g_rx(struct musb *musb, u8 epnum) if ((request->actual < request->length) && (musb_ep->dma->actual_len == musb_ep->packet_sz)) - goto done; + return; #endif musb_g_giveback(musb_ep, request, 0); request = next_request(musb_ep); if (!request) - goto done; - - /* don't start more i/o till the stall clears */ - musb_ep_select(mbase, epnum); - csr = musb_readw(epio, MUSB_RXCSR); - if (csr & MUSB_RXCSR_P_SENDSTALL) - goto done; + return; } - /* analyze request if the ep is hot */ if (request) rxstate(musb, to_musb_request(request)); @@ -849,8 +836,6 @@ void musb_g_rx(struct musb *musb, u8 epnum) DBG(3, "packet waiting for %s%s request\n", musb_ep->desc ? "" : "inactive ", musb_ep->end_point.name); - -done: return; } @@ -1244,7 +1229,7 @@ int musb_gadget_set_halt(struct usb_ep *ep, int value) void __iomem *mbase; unsigned long flags; u16 csr; - struct musb_request *request = NULL; + struct musb_request *request; int status = 0; if (!ep) @@ -1260,24 +1245,29 @@ int musb_gadget_set_halt(struct usb_ep *ep, int value) musb_ep_select(mbase, epnum); - /* cannot portably stall with non-empty FIFO */ request = to_musb_request(next_request(musb_ep)); - if (value && musb_ep->is_in) { - csr = musb_readw(epio, MUSB_TXCSR); - if (csr & MUSB_TXCSR_FIFONOTEMPTY) { - DBG(3, "%s fifo busy, cannot halt\n", ep->name); - spin_unlock_irqrestore(&musb->lock, flags); - return -EAGAIN; + if (value) { + if (request) { + DBG(3, "request in progress, cannot halt %s\n", + ep->name); + status = -EAGAIN; + goto done; + } + /* Cannot portably stall with non-empty FIFO */ + if (musb_ep->is_in) { + csr = musb_readw(epio, MUSB_TXCSR); + if (csr & MUSB_TXCSR_FIFONOTEMPTY) { + DBG(3, "FIFO busy, cannot halt %s\n", ep->name); + status = -EAGAIN; + goto done; + } } - } /* set/clear the stall and toggle bits */ DBG(2, "%s: %s stall\n", ep->name, value ? "set" : "clear"); if (musb_ep->is_in) { csr = musb_readw(epio, MUSB_TXCSR); - if (csr & MUSB_TXCSR_FIFONOTEMPTY) - csr |= MUSB_TXCSR_FLUSHFIFO; csr |= MUSB_TXCSR_P_WZC_BITS | MUSB_TXCSR_CLRDATATOG; if (value) @@ -1300,14 +1290,13 @@ int musb_gadget_set_halt(struct usb_ep *ep, int value) musb_writew(epio, MUSB_RXCSR, csr); } -done: - /* maybe start the first request in the queue */ if (!musb_ep->busy && !value && request) { DBG(3, "restarting the request\n"); musb_ep_restart(musb, request); } +done: spin_unlock_irqrestore(&musb->lock, flags); return status; } -- cgit v0.10.2 From 0de6ab8b91f2e1e8e7fc66a8b5c5e8ca82ea16b7 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 17 Nov 2009 19:10:48 -0800 Subject: USB: ftdi_sio: Keep going when write errors are encountered. The use of urb->actual_length to update tx_outstanding_bytes implicitly assumes that the number of bytes actually written is the same as the number of bytes we tried to write. On error that assumption is violated so just use transfer_buffer_length the number of bytes we intended to write to the device. If an error occurs we need to fall through and call usb_serial_port_softint to wake up processes waiting in tty_wait_until_sent. Signed-off-by: Eric W. Biederman Cc: stable Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 9c60d6d..ebcc6d0 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -1937,7 +1937,7 @@ static void ftdi_write_bulk_callback(struct urb *urb) return; } /* account for transferred data */ - countback = urb->actual_length; + countback = urb->transfer_buffer_length; data_offset = priv->write_offset; if (data_offset > 0) { /* Subtract the control bytes */ @@ -1950,7 +1950,6 @@ static void ftdi_write_bulk_callback(struct urb *urb) if (status) { dbg("nonzero write bulk status received: %d", status); - return; } usb_serial_port_softint(port); -- cgit v0.10.2 From c5deb832d7a3f9618b09e6eeaa91a1a845c90c65 Mon Sep 17 00:00:00 2001 From: Thomas Dahlmann Date: Tue, 17 Nov 2009 14:18:27 -0800 Subject: usb: amd5536udc: fixed shared interrupt bug and warning oops - fixed shared interrupt bug reported by Vadim Lobanov - fixed possible warning oops on driver unload when connected - prevent interrupt flood in PIO mode ("modprobe amd5536udc use_dma=0") when using gadget ether Signed-off-by: Thomas Dahlmann Cc: Robert Richter Cc: David Brownell Cc: stable Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/gadget/amd5536udc.c b/drivers/usb/gadget/amd5536udc.c index d5b6596..731150d4 100644 --- a/drivers/usb/gadget/amd5536udc.c +++ b/drivers/usb/gadget/amd5536udc.c @@ -1213,7 +1213,12 @@ udc_queue(struct usb_ep *usbep, struct usb_request *usbreq, gfp_t gfp) tmp &= AMD_UNMASK_BIT(ep->num); writel(tmp, &dev->regs->ep_irqmsk); } - } + } else if (ep->in) { + /* enable ep irq */ + tmp = readl(&dev->regs->ep_irqmsk); + tmp &= AMD_UNMASK_BIT(ep->num); + writel(tmp, &dev->regs->ep_irqmsk); + } } else if (ep->dma) { @@ -2005,18 +2010,17 @@ __acquires(dev->lock) { int tmp; - /* empty queues and init hardware */ - udc_basic_init(dev); - for (tmp = 0; tmp < UDC_EP_NUM; tmp++) { - empty_req_queue(&dev->ep[tmp]); - } - if (dev->gadget.speed != USB_SPEED_UNKNOWN) { spin_unlock(&dev->lock); driver->disconnect(&dev->gadget); spin_lock(&dev->lock); } - /* init */ + + /* empty queues and init hardware */ + udc_basic_init(dev); + for (tmp = 0; tmp < UDC_EP_NUM; tmp++) + empty_req_queue(&dev->ep[tmp]); + udc_setup_endpoints(dev); } @@ -2472,6 +2476,13 @@ static irqreturn_t udc_data_in_isr(struct udc *dev, int ep_ix) } } + } else if (!use_dma && ep->in) { + /* disable interrupt */ + tmp = readl( + &dev->regs->ep_irqmsk); + tmp |= AMD_BIT(ep->num); + writel(tmp, + &dev->regs->ep_irqmsk); } } /* clear status bits */ @@ -3279,6 +3290,17 @@ static int udc_pci_probe( goto finished; } + spin_lock_init(&dev->lock); + /* udc csr registers base */ + dev->csr = dev->virt_addr + UDC_CSR_ADDR; + /* dev registers base */ + dev->regs = dev->virt_addr + UDC_DEVCFG_ADDR; + /* ep registers base */ + dev->ep_regs = dev->virt_addr + UDC_EPREGS_ADDR; + /* fifo's base */ + dev->rxfifo = (u32 __iomem *)(dev->virt_addr + UDC_RXFIFO_ADDR); + dev->txfifo = (u32 __iomem *)(dev->virt_addr + UDC_TXFIFO_ADDR); + if (request_irq(pdev->irq, udc_irq, IRQF_SHARED, name, dev) != 0) { dev_dbg(&dev->pdev->dev, "request_irq(%d) fail\n", pdev->irq); kfree(dev); @@ -3331,7 +3353,6 @@ static int udc_probe(struct udc *dev) udc_pollstall_timer.data = 0; /* device struct setup */ - spin_lock_init(&dev->lock); dev->gadget.ops = &udc_ops; dev_set_name(&dev->gadget.dev, "gadget"); @@ -3340,16 +3361,6 @@ static int udc_probe(struct udc *dev) dev->gadget.name = name; dev->gadget.is_dualspeed = 1; - /* udc csr registers base */ - dev->csr = dev->virt_addr + UDC_CSR_ADDR; - /* dev registers base */ - dev->regs = dev->virt_addr + UDC_DEVCFG_ADDR; - /* ep registers base */ - dev->ep_regs = dev->virt_addr + UDC_EPREGS_ADDR; - /* fifo's base */ - dev->rxfifo = (u32 __iomem *)(dev->virt_addr + UDC_RXFIFO_ADDR); - dev->txfifo = (u32 __iomem *)(dev->virt_addr + UDC_TXFIFO_ADDR); - /* init registers, interrupts, ... */ startup_registers(dev); -- cgit v0.10.2 From 1230435c258e34b47ab7adc3db572c88a284234a Mon Sep 17 00:00:00 2001 From: Ajay Kumar Gupta Date: Tue, 17 Nov 2009 15:22:54 +0530 Subject: USB: musb: Remove unwanted message in boot log Removes below unnecessary log of almost 28 lines during boot. musb_hdrc: hw_ep 0shared, max 64 musb_hdrc: hw_ep 1tx, max 512 musb_hdrc: hw_ep 1rx, max 512 ... ... musb_hdrc: hw_ep 13shared, max 4096 musb_hdrc: hw_ep 14shared, max 1024 musb_hdrc: hw_ep 15shared, max 1024 Signed-off-by: Ajay Kumar Gupta Acked-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c index 3a61ddb..547e0e3 100644 --- a/drivers/usb/musb/musb_core.c +++ b/drivers/usb/musb/musb_core.c @@ -1450,7 +1450,7 @@ static int __init musb_core_init(u16 musb_type, struct musb *musb) #endif if (hw_ep->max_packet_sz_tx) { - printk(KERN_DEBUG + DBG(1, "%s: hw_ep %d%s, %smax %d\n", musb_driver_name, i, hw_ep->is_shared_fifo ? "shared" : "tx", @@ -1459,7 +1459,7 @@ static int __init musb_core_init(u16 musb_type, struct musb *musb) hw_ep->max_packet_sz_tx); } if (hw_ep->max_packet_sz_rx && !hw_ep->is_shared_fifo) { - printk(KERN_DEBUG + DBG(1, "%s: hw_ep %d%s, %smax %d\n", musb_driver_name, i, "rx", -- cgit v0.10.2 From dfeffa531ccf9c31f2f55df6d7ca86eec92142df Mon Sep 17 00:00:00 2001 From: Ajay Kumar Gupta Date: Tue, 17 Nov 2009 15:22:55 +0530 Subject: USB: musb: fix ISOC Tx programming for CPPI DMAs Isochronous Tx DMA is getting programmed but never getting started for CPPI and TUSB DMAs and thus Isochronous Tx doesn't work. Fixing it by starting DMAs using musb_h_tx_dma_start(). Signed-off-by: Swaminathan S Signed-off-by: Babu Ravi Signed-off-by: Ajay Kumar Gupta Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c index cf94511..e3ab40a 100644 --- a/drivers/usb/musb/musb_host.c +++ b/drivers/usb/musb/musb_host.c @@ -1301,8 +1301,11 @@ void musb_host_tx(struct musb *musb, u8 epnum) return; } else if (usb_pipeisoc(pipe) && dma) { if (musb_tx_dma_program(musb->dma_controller, hw_ep, qh, urb, - offset, length)) + offset, length)) { + if (is_cppi_enabled() || tusb_dma_omap()) + musb_h_tx_dma_start(hw_ep); return; + } } else if (tx_csr & MUSB_TXCSR_DMAENAB) { DBG(1, "not complete, but DMA enabled?\n"); return; -- cgit v0.10.2 From 5542bc2ac7b52c021fc9c7a96329955491b7e763 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Gl=C3=B6ckner?= Date: Tue, 17 Nov 2009 15:22:56 +0530 Subject: USB: musb: respect usb_request->zero in control requests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In gadget mode the answer to a control request should be followed by a zero-length packet if the amount transferred is an exact multiple of the endpoint's packet size and the requests has its "zero" flag set. This patch prevents the request from being immediately removed from the queue when a control IN transfer ends on a full packet and "zero" is set. The next time ep0_txstate is entered, a zero-length packet is queued and the request is removed as fifo_count is 0. Signed-off-by: Daniel Glöckner Signed-off-by: Ajay Kumar Gupta Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/musb/musb_gadget_ep0.c b/drivers/usb/musb/musb_gadget_ep0.c index 7a67786..522efb3 100644 --- a/drivers/usb/musb/musb_gadget_ep0.c +++ b/drivers/usb/musb/musb_gadget_ep0.c @@ -511,7 +511,8 @@ static void ep0_txstate(struct musb *musb) /* update the flags */ if (fifo_count < MUSB_MAX_END0_PACKET - || request->actual == request->length) { + || (request->actual == request->length + && !request->zero)) { musb->ep0_state = MUSB_EP0_STAGE_STATUSOUT; csr |= MUSB_CSR0_P_DATAEND; } else -- cgit v0.10.2 From 8d6499e5bde91ad05dea4f666bdfe79e65e7cf96 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Gl=C3=B6ckner?= Date: Tue, 17 Nov 2009 15:22:57 +0530 Subject: USB: musb: Fix CPPI IRQs not being signaled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On tx channel abort a cppi interrupt is generated for a short time by setting the lowest bit of the TCPPICOMPPTR register. It is then reset immediately by clearing the bit. When the interrupt handler is run, it does not detect an interrupt in the TCPPIMSKSR or RCPPIMSKSR registers and thus exits early without writing the TCPPIEOIR register. It appears that this inhibits further cppi interrupts until the handler is called by chance, f.ex. from davinci_interrupt(). By moving the unmasking of the interrupt below the writes to TCPPICOMPPTR, no interrupt is generated and no write to TCPPIEOIR is necessary. Signed-off-by: Daniel Glöckner Signed-off-by: Ajay Kumar Gupta Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/musb/cppi_dma.c b/drivers/usb/musb/cppi_dma.c index c3577bb..ef2332a 100644 --- a/drivers/usb/musb/cppi_dma.c +++ b/drivers/usb/musb/cppi_dma.c @@ -1442,11 +1442,6 @@ static int cppi_channel_abort(struct dma_channel *channel) musb_writew(regs, MUSB_TXCSR, value); musb_writew(regs, MUSB_TXCSR, value); - /* re-enable interrupt */ - if (enabled) - musb_writel(tibase, DAVINCI_TXCPPI_INTENAB_REG, - (1 << cppi_ch->index)); - /* While we scrub the TX state RAM, ensure that we clean * up any interrupt that's currently asserted: * 1. Write to completion Ptr value 0x1(bit 0 set) @@ -1459,6 +1454,11 @@ static int cppi_channel_abort(struct dma_channel *channel) cppi_reset_tx(tx_ram, 1); musb_writel(&tx_ram->tx_complete, 0, 0); + /* re-enable interrupt */ + if (enabled) + musb_writel(tibase, DAVINCI_TXCPPI_INTENAB_REG, + (1 << cppi_ch->index)); + cppi_dump_tx(5, cppi_ch, " (done teardown)"); /* REVISIT tx side _should_ clean up the same way -- cgit v0.10.2 From ee4ecb8ac63a5792bec448037d4b82ec4144f94b Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Fri, 27 Nov 2009 15:17:59 +0100 Subject: USB: work around for EHCI with quirky periodic schedules a quirky chipset needs periodic schedules to run for a minimum time before they can be disabled again. This enforces the requirement with a time stamp and a calculated delay Signed-off-by: Oliver Neukum Cc: stable Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c index 9835e07..f5f5601 100644 --- a/drivers/usb/host/ehci-hcd.c +++ b/drivers/usb/host/ehci-hcd.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -676,6 +677,7 @@ static int ehci_run (struct usb_hcd *hcd) ehci_readl(ehci, &ehci->regs->command); /* unblock posted writes */ msleep(5); up_write(&ehci_cf_port_reset_rwsem); + ehci->last_periodic_enable = ktime_get_real(); temp = HC_VERSION(ehci_readl(ehci, &ehci->caps->hc_capbase)); ehci_info (ehci, diff --git a/drivers/usb/host/ehci-pci.c b/drivers/usb/host/ehci-pci.c index 378861b..ead5f4f 100644 --- a/drivers/usb/host/ehci-pci.c +++ b/drivers/usb/host/ehci-pci.c @@ -111,6 +111,10 @@ static int ehci_pci_setup(struct usb_hcd *hcd) switch (pdev->vendor) { case PCI_VENDOR_ID_INTEL: ehci->need_io_watchdog = 0; + if (pdev->device == 0x27cc) { + ehci->broken_periodic = 1; + ehci_info(ehci, "using broken periodic workaround\n"); + } break; case PCI_VENDOR_ID_TDI: if (pdev->device == PCI_DEVICE_ID_TDI_EHCI) { diff --git a/drivers/usb/host/ehci-sched.c b/drivers/usb/host/ehci-sched.c index b25cdea..a5535b5 100644 --- a/drivers/usb/host/ehci-sched.c +++ b/drivers/usb/host/ehci-sched.c @@ -475,6 +475,8 @@ static int enable_periodic (struct ehci_hcd *ehci) /* make sure ehci_work scans these */ ehci->next_uframe = ehci_readl(ehci, &ehci->regs->frame_index) % (ehci->periodic_size << 3); + if (unlikely(ehci->broken_periodic)) + ehci->last_periodic_enable = ktime_get_real(); return 0; } @@ -486,6 +488,16 @@ static int disable_periodic (struct ehci_hcd *ehci) if (--ehci->periodic_sched) return 0; + if (unlikely(ehci->broken_periodic)) { + /* delay experimentally determined */ + ktime_t safe = ktime_add_us(ehci->last_periodic_enable, 1000); + ktime_t now = ktime_get_real(); + s64 delay = ktime_us_delta(safe, now); + + if (unlikely(delay > 0)) + udelay(delay); + } + /* did setting PSE not take effect yet? * takes effect only at frame boundaries... */ diff --git a/drivers/usb/host/ehci.h b/drivers/usb/host/ehci.h index 064e768..2d85e21 100644 --- a/drivers/usb/host/ehci.h +++ b/drivers/usb/host/ehci.h @@ -118,6 +118,7 @@ struct ehci_hcd { /* one per controller */ unsigned stamp; unsigned random_frame; unsigned long next_statechange; + ktime_t last_periodic_enable; u32 command; /* SILICON QUIRKS */ @@ -127,6 +128,7 @@ struct ehci_hcd { /* one per controller */ unsigned big_endian_desc:1; unsigned has_amcc_usb23:1; unsigned need_io_watchdog:1; + unsigned broken_periodic:1; /* required for usb32 quirk */ #define OHCI_CTRL_HCFS (3 << 6) -- cgit v0.10.2 From 0ec8648379334f1e127ebd5e57a625890f116824 Mon Sep 17 00:00:00 2001 From: Gernot Hillier Date: Fri, 27 Nov 2009 13:49:23 +0100 Subject: USB: Add support for Mobilcom Debitel USB UMTS Surf-Stick to option driver This patch adds the vendor and device id for the Mobilcom Debitel UMTS surf stick (a.k.a. 4G Systems XSStick W14, MobiData MBD-200HU, ...). To see these ids, you need to switch the stick to modem operation first with the help of usb_modeswitch. This makes it switch from 1c9e:f000 to 1c9e:9603 and thus be recognized by the option driver. Signed-off-by: Gernot Hillier Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 319aaf97..0577e4b 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -336,6 +336,10 @@ static int option_resume(struct usb_serial *serial); #define AIRPLUS_VENDOR_ID 0x1011 #define AIRPLUS_PRODUCT_MCD650 0x3198 +/* 4G Systems products */ +#define FOUR_G_SYSTEMS_VENDOR_ID 0x1c9e +#define FOUR_G_SYSTEMS_PRODUCT_W14 0x9603 + static struct usb_device_id option_ids[] = { { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_COLT) }, { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_RICOLA) }, @@ -599,6 +603,7 @@ static struct usb_device_id option_ids[] = { { USB_DEVICE(ALCATEL_VENDOR_ID, ALCATEL_PRODUCT_X060S) }, { USB_DEVICE(AIRPLUS_VENDOR_ID, AIRPLUS_PRODUCT_MCD650) }, { USB_DEVICE(TLAYTECH_VENDOR_ID, TLAYTECH_PRODUCT_TEU800) }, + { USB_DEVICE(FOUR_G_SYSTEMS_VENDOR_ID, FOUR_G_SYSTEMS_PRODUCT_W14) }, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, option_ids); -- cgit v0.10.2 From 745dd2405e281d96c0a449103bdf6a895048f28c Mon Sep 17 00:00:00 2001 From: Michael Cree Date: Mon, 30 Nov 2009 22:44:40 -0500 Subject: Alpha: Rearrange thread info flags fixing two regressions The removal of the TIF_NOTIFY_RESUME flag, commit a583f1b54249b "remove unused TIF_NOTIFY_RESUME flag," resulted in incorrect setting of the unaligned access control flags by the prctl syscall. The re-addition of the TIF_NOTIFY_RESUME flag, commit d0420c83f39f "KEYS: Extend TIF_NOTIFY_RESUME to (almost) all architectures [try #6]" further caused problems, namely incorrect operands to assembler code as evidenced by: AS arch/alpha/kernel/entry.o arch/alpha/kernel/entry.S: Assembler messages: arch/alpha/kernel/entry.S:326: Warning: operand out of range (0x0000000000000406 is not between 0x0000000000000000 and 0x00000000000000ff) Both regressions fixed by (1) rearranging TIF_NOTIFY_RESUME flag to be in lower 8 bits of the thread info flags, and (2) making sure that ALPHA_UAC_SHIFT matches the rearrangement of the thread info flags. Signed-off-by: Michael Cree Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: David Howells , Signed-off-by: Matt Turner diff --git a/arch/alpha/include/asm/thread_info.h b/arch/alpha/include/asm/thread_info.h index 815680b..b3e8886 100644 --- a/arch/alpha/include/asm/thread_info.h +++ b/arch/alpha/include/asm/thread_info.h @@ -61,21 +61,24 @@ register struct thread_info *__current_thread_info __asm__("$8"); /* * Thread information flags: * - these are process state flags and used from assembly - * - pending work-to-be-done flags come first to fit in and immediate operand. + * - pending work-to-be-done flags come first and must be assigned to be + * within bits 0 to 7 to fit in and immediate operand. + * - ALPHA_UAC_SHIFT below must be kept consistent with the unaligned + * control flags. * * TIF_SYSCALL_TRACE is known to be 0 via blbs. */ #define TIF_SYSCALL_TRACE 0 /* syscall trace active */ -#define TIF_SIGPENDING 1 /* signal pending */ -#define TIF_NEED_RESCHED 2 /* rescheduling necessary */ -#define TIF_POLLING_NRFLAG 3 /* poll_idle is polling NEED_RESCHED */ -#define TIF_DIE_IF_KERNEL 4 /* dik recursion lock */ -#define TIF_UAC_NOPRINT 5 /* see sysinfo.h */ -#define TIF_UAC_NOFIX 6 -#define TIF_UAC_SIGBUS 7 -#define TIF_MEMDIE 8 -#define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal */ -#define TIF_NOTIFY_RESUME 10 /* callback before returning to user */ +#define TIF_NOTIFY_RESUME 1 /* callback before returning to user */ +#define TIF_SIGPENDING 2 /* signal pending */ +#define TIF_NEED_RESCHED 3 /* rescheduling necessary */ +#define TIF_POLLING_NRFLAG 8 /* poll_idle is polling NEED_RESCHED */ +#define TIF_DIE_IF_KERNEL 9 /* dik recursion lock */ +#define TIF_UAC_NOPRINT 10 /* see sysinfo.h */ +#define TIF_UAC_NOFIX 11 +#define TIF_UAC_SIGBUS 12 +#define TIF_MEMDIE 13 +#define TIF_RESTORE_SIGMASK 14 /* restore signal mask in do_signal */ #define TIF_FREEZE 16 /* is freezing for suspend */ #define _TIF_SYSCALL_TRACE (1< Date: Mon, 30 Nov 2009 22:51:31 -0500 Subject: alpha: Fixup last users of irq_chip->typename The typename member of struct irq_chip was kept for migration purposes and is obsolete since more than 2 years. Fix up the leftovers. Signed-off-by: Thomas Gleixner Cc: Richard Henderson Cc: linux-alpha@vger.kernel.org Signed-off-by: Matt Turner diff --git a/arch/alpha/kernel/irq.c b/arch/alpha/kernel/irq.c index cc78346..c0de072 100644 --- a/arch/alpha/kernel/irq.c +++ b/arch/alpha/kernel/irq.c @@ -92,7 +92,7 @@ show_interrupts(struct seq_file *p, void *v) for_each_online_cpu(j) seq_printf(p, "%10u ", kstat_irqs_cpu(irq, j)); #endif - seq_printf(p, " %14s", irq_desc[irq].chip->typename); + seq_printf(p, " %14s", irq_desc[irq].chip->name); seq_printf(p, " %c%s", (action->flags & IRQF_DISABLED)?'+':' ', action->name); diff --git a/arch/alpha/kernel/irq_alpha.c b/arch/alpha/kernel/irq_alpha.c index 38c805d..cfde865 100644 --- a/arch/alpha/kernel/irq_alpha.c +++ b/arch/alpha/kernel/irq_alpha.c @@ -228,7 +228,7 @@ struct irqaction timer_irqaction = { }; static struct irq_chip rtc_irq_type = { - .typename = "RTC", + .name = "RTC", .startup = rtc_startup, .shutdown = rtc_enable_disable, .enable = rtc_enable_disable, diff --git a/arch/alpha/kernel/irq_i8259.c b/arch/alpha/kernel/irq_i8259.c index 50bfec9..83a9ac2 100644 --- a/arch/alpha/kernel/irq_i8259.c +++ b/arch/alpha/kernel/irq_i8259.c @@ -84,7 +84,7 @@ i8259a_end_irq(unsigned int irq) } struct irq_chip i8259a_irq_type = { - .typename = "XT-PIC", + .name = "XT-PIC", .startup = i8259a_startup_irq, .shutdown = i8259a_disable_irq, .enable = i8259a_enable_irq, diff --git a/arch/alpha/kernel/irq_pyxis.c b/arch/alpha/kernel/irq_pyxis.c index 69199a7..989ce46 100644 --- a/arch/alpha/kernel/irq_pyxis.c +++ b/arch/alpha/kernel/irq_pyxis.c @@ -71,7 +71,7 @@ pyxis_mask_and_ack_irq(unsigned int irq) } static struct irq_chip pyxis_irq_type = { - .typename = "PYXIS", + .name = "PYXIS", .startup = pyxis_startup_irq, .shutdown = pyxis_disable_irq, .enable = pyxis_enable_irq, diff --git a/arch/alpha/kernel/irq_srm.c b/arch/alpha/kernel/irq_srm.c index 8522936..d63e93e 100644 --- a/arch/alpha/kernel/irq_srm.c +++ b/arch/alpha/kernel/irq_srm.c @@ -49,7 +49,7 @@ srm_end_irq(unsigned int irq) /* Handle interrupts from the SRM, assuming no additional weirdness. */ static struct irq_chip srm_irq_type = { - .typename = "SRM", + .name = "SRM", .startup = srm_startup_irq, .shutdown = srm_disable_irq, .enable = srm_enable_irq, diff --git a/arch/alpha/kernel/sys_alcor.c b/arch/alpha/kernel/sys_alcor.c index 382035e..20a30b8 100644 --- a/arch/alpha/kernel/sys_alcor.c +++ b/arch/alpha/kernel/sys_alcor.c @@ -90,7 +90,7 @@ alcor_end_irq(unsigned int irq) } static struct irq_chip alcor_irq_type = { - .typename = "ALCOR", + .name = "ALCOR", .startup = alcor_startup_irq, .shutdown = alcor_disable_irq, .enable = alcor_enable_irq, diff --git a/arch/alpha/kernel/sys_cabriolet.c b/arch/alpha/kernel/sys_cabriolet.c index ed34943..affd0f3 100644 --- a/arch/alpha/kernel/sys_cabriolet.c +++ b/arch/alpha/kernel/sys_cabriolet.c @@ -72,7 +72,7 @@ cabriolet_end_irq(unsigned int irq) } static struct irq_chip cabriolet_irq_type = { - .typename = "CABRIOLET", + .name = "CABRIOLET", .startup = cabriolet_startup_irq, .shutdown = cabriolet_disable_irq, .enable = cabriolet_enable_irq, diff --git a/arch/alpha/kernel/sys_dp264.c b/arch/alpha/kernel/sys_dp264.c index 46e70ec..d64e1e4 100644 --- a/arch/alpha/kernel/sys_dp264.c +++ b/arch/alpha/kernel/sys_dp264.c @@ -199,7 +199,7 @@ clipper_set_affinity(unsigned int irq, const struct cpumask *affinity) } static struct irq_chip dp264_irq_type = { - .typename = "DP264", + .name = "DP264", .startup = dp264_startup_irq, .shutdown = dp264_disable_irq, .enable = dp264_enable_irq, @@ -210,7 +210,7 @@ static struct irq_chip dp264_irq_type = { }; static struct irq_chip clipper_irq_type = { - .typename = "CLIPPER", + .name = "CLIPPER", .startup = clipper_startup_irq, .shutdown = clipper_disable_irq, .enable = clipper_enable_irq, diff --git a/arch/alpha/kernel/sys_eb64p.c b/arch/alpha/kernel/sys_eb64p.c index 660c23e..df2090c 100644 --- a/arch/alpha/kernel/sys_eb64p.c +++ b/arch/alpha/kernel/sys_eb64p.c @@ -70,7 +70,7 @@ eb64p_end_irq(unsigned int irq) } static struct irq_chip eb64p_irq_type = { - .typename = "EB64P", + .name = "EB64P", .startup = eb64p_startup_irq, .shutdown = eb64p_disable_irq, .enable = eb64p_enable_irq, diff --git a/arch/alpha/kernel/sys_eiger.c b/arch/alpha/kernel/sys_eiger.c index b99ea48..3ca1dbc 100644 --- a/arch/alpha/kernel/sys_eiger.c +++ b/arch/alpha/kernel/sys_eiger.c @@ -81,7 +81,7 @@ eiger_end_irq(unsigned int irq) } static struct irq_chip eiger_irq_type = { - .typename = "EIGER", + .name = "EIGER", .startup = eiger_startup_irq, .shutdown = eiger_disable_irq, .enable = eiger_enable_irq, diff --git a/arch/alpha/kernel/sys_jensen.c b/arch/alpha/kernel/sys_jensen.c index ef0b83a..7a7ae36 100644 --- a/arch/alpha/kernel/sys_jensen.c +++ b/arch/alpha/kernel/sys_jensen.c @@ -119,7 +119,7 @@ jensen_local_end(unsigned int irq) } static struct irq_chip jensen_local_irq_type = { - .typename = "LOCAL", + .name = "LOCAL", .startup = jensen_local_startup, .shutdown = jensen_local_shutdown, .enable = jensen_local_enable, diff --git a/arch/alpha/kernel/sys_marvel.c b/arch/alpha/kernel/sys_marvel.c index bbfc4f2..0bb3b5c 100644 --- a/arch/alpha/kernel/sys_marvel.c +++ b/arch/alpha/kernel/sys_marvel.c @@ -170,7 +170,7 @@ marvel_irq_noop_return(unsigned int irq) } static struct irq_chip marvel_legacy_irq_type = { - .typename = "LEGACY", + .name = "LEGACY", .startup = marvel_irq_noop_return, .shutdown = marvel_irq_noop, .enable = marvel_irq_noop, @@ -180,7 +180,7 @@ static struct irq_chip marvel_legacy_irq_type = { }; static struct irq_chip io7_lsi_irq_type = { - .typename = "LSI", + .name = "LSI", .startup = io7_startup_irq, .shutdown = io7_disable_irq, .enable = io7_enable_irq, @@ -190,7 +190,7 @@ static struct irq_chip io7_lsi_irq_type = { }; static struct irq_chip io7_msi_irq_type = { - .typename = "MSI", + .name = "MSI", .startup = io7_startup_irq, .shutdown = io7_disable_irq, .enable = io7_enable_irq, diff --git a/arch/alpha/kernel/sys_mikasa.c b/arch/alpha/kernel/sys_mikasa.c index 4e36664..ee88651 100644 --- a/arch/alpha/kernel/sys_mikasa.c +++ b/arch/alpha/kernel/sys_mikasa.c @@ -69,7 +69,7 @@ mikasa_end_irq(unsigned int irq) } static struct irq_chip mikasa_irq_type = { - .typename = "MIKASA", + .name = "MIKASA", .startup = mikasa_startup_irq, .shutdown = mikasa_disable_irq, .enable = mikasa_enable_irq, diff --git a/arch/alpha/kernel/sys_noritake.c b/arch/alpha/kernel/sys_noritake.c index 35753a1..86503fe 100644 --- a/arch/alpha/kernel/sys_noritake.c +++ b/arch/alpha/kernel/sys_noritake.c @@ -74,7 +74,7 @@ noritake_end_irq(unsigned int irq) } static struct irq_chip noritake_irq_type = { - .typename = "NORITAKE", + .name = "NORITAKE", .startup = noritake_startup_irq, .shutdown = noritake_disable_irq, .enable = noritake_enable_irq, diff --git a/arch/alpha/kernel/sys_rawhide.c b/arch/alpha/kernel/sys_rawhide.c index f3aec7e..26c322b 100644 --- a/arch/alpha/kernel/sys_rawhide.c +++ b/arch/alpha/kernel/sys_rawhide.c @@ -136,7 +136,7 @@ rawhide_end_irq(unsigned int irq) } static struct irq_chip rawhide_irq_type = { - .typename = "RAWHIDE", + .name = "RAWHIDE", .startup = rawhide_startup_irq, .shutdown = rawhide_disable_irq, .enable = rawhide_enable_irq, diff --git a/arch/alpha/kernel/sys_rx164.c b/arch/alpha/kernel/sys_rx164.c index fc92463..be16112 100644 --- a/arch/alpha/kernel/sys_rx164.c +++ b/arch/alpha/kernel/sys_rx164.c @@ -73,7 +73,7 @@ rx164_end_irq(unsigned int irq) } static struct irq_chip rx164_irq_type = { - .typename = "RX164", + .name = "RX164", .startup = rx164_startup_irq, .shutdown = rx164_disable_irq, .enable = rx164_enable_irq, diff --git a/arch/alpha/kernel/sys_sable.c b/arch/alpha/kernel/sys_sable.c index 426eb69..b2abe27 100644 --- a/arch/alpha/kernel/sys_sable.c +++ b/arch/alpha/kernel/sys_sable.c @@ -502,7 +502,7 @@ sable_lynx_mask_and_ack_irq(unsigned int irq) } static struct irq_chip sable_lynx_irq_type = { - .typename = "SABLE/LYNX", + .name = "SABLE/LYNX", .startup = sable_lynx_startup_irq, .shutdown = sable_lynx_disable_irq, .enable = sable_lynx_enable_irq, diff --git a/arch/alpha/kernel/sys_takara.c b/arch/alpha/kernel/sys_takara.c index 830318c..2304648 100644 --- a/arch/alpha/kernel/sys_takara.c +++ b/arch/alpha/kernel/sys_takara.c @@ -75,7 +75,7 @@ takara_end_irq(unsigned int irq) } static struct irq_chip takara_irq_type = { - .typename = "TAKARA", + .name = "TAKARA", .startup = takara_startup_irq, .shutdown = takara_disable_irq, .enable = takara_enable_irq, diff --git a/arch/alpha/kernel/sys_titan.c b/arch/alpha/kernel/sys_titan.c index 88978fc..2880533 100644 --- a/arch/alpha/kernel/sys_titan.c +++ b/arch/alpha/kernel/sys_titan.c @@ -195,7 +195,7 @@ init_titan_irqs(struct irq_chip * ops, int imin, int imax) } static struct irq_chip titan_irq_type = { - .typename = "TITAN", + .name = "TITAN", .startup = titan_startup_irq, .shutdown = titan_disable_irq, .enable = titan_enable_irq, diff --git a/arch/alpha/kernel/sys_wildfire.c b/arch/alpha/kernel/sys_wildfire.c index e91b4c3..62fd972 100644 --- a/arch/alpha/kernel/sys_wildfire.c +++ b/arch/alpha/kernel/sys_wildfire.c @@ -158,7 +158,7 @@ wildfire_end_irq(unsigned int irq) } static struct irq_chip wildfire_irq_type = { - .typename = "WILDFIRE", + .name = "WILDFIRE", .startup = wildfire_startup_irq, .shutdown = wildfire_disable_irq, .enable = wildfire_enable_irq, -- cgit v0.10.2 From 6f054164322bc6c1233402b9ed6b40d4af39a98f Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 1 Dec 2009 13:38:45 +0000 Subject: 9p: fix build breakage introduced by FS-Cache While building 2.6.32-rc8-git2 for Fedora I noticed the following thinko in commit 201a15428bd54f83eccec8b7c64a04b8f9431204 ("FS-Cache: Handle pages pending storage that get evicted under OOM conditions"): fs/9p/cache.c: In function '__v9fs_fscache_release_page': fs/9p/cache.c:346: error: 'vnode' undeclared (first use in this function) fs/9p/cache.c:346: error: (Each undeclared identifier is reported only once fs/9p/cache.c:346: error: for each function it appears in.) make[2]: *** [fs/9p/cache.o] Error 1 Fix the 9P filesystem to correctly construct the argument to fscache_maybe_release_page(). Signed-off-by: Kyle McMartin Signed-off-by: Xiaotian Feng [from identical patch] Signed-off-by: Stefan Lippers-Hollmann [from identical patch] Signed-off-by: David Howells Signed-off-by: Linus Torvalds diff --git a/fs/9p/cache.c b/fs/9p/cache.c index bcc5357..e777961 100644 --- a/fs/9p/cache.c +++ b/fs/9p/cache.c @@ -343,7 +343,7 @@ int __v9fs_fscache_release_page(struct page *page, gfp_t gfp) BUG_ON(!vcookie->fscache); - return fscache_maybe_release_page(vnode->cache, page, gfp); + return fscache_maybe_release_page(vcookie->fscache, page, gfp); } void __v9fs_fscache_invalidate_page(struct page *page) -- cgit v0.10.2 From fa1dae4906982b5d896c07613b1fe42456133b1c Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 1 Dec 2009 13:52:08 +0000 Subject: SLOW_WORK: Fix the CONFIG_MODULES=n case Commits 3d7a641 ("SLOW_WORK: Wait for outstanding work items belonging to a module to clear") introduced some code to make sure that all of a module's slow-work items were complete before that module was removed, and commit 3bde31a ("SLOW_WORK: Allow a requeueable work item to sleep till the thread is needed") further extended that, breaking it in the process if CONFIG_MODULES=n: CC kernel/slow-work.o kernel/slow-work.c: In function 'slow_work_execute': kernel/slow-work.c:313: error: 'slow_work_thread_processing' undeclared (first use in this function) kernel/slow-work.c:313: error: (Each undeclared identifier is reported only once kernel/slow-work.c:313: error: for each function it appears in.) kernel/slow-work.c: In function 'slow_work_wait_for_items': kernel/slow-work.c:950: error: 'slow_work_unreg_sync_lock' undeclared (first use in this function) kernel/slow-work.c:951: error: 'slow_work_unreg_wq' undeclared (first use in this function) kernel/slow-work.c:961: error: 'slow_work_unreg_work_item' undeclared (first use in this function) kernel/slow-work.c:974: error: 'slow_work_unreg_module' undeclared (first use in this function) kernel/slow-work.c:977: error: 'slow_work_thread_processing' undeclared (first use in this function) make[1]: *** [kernel/slow-work.o] Error 1 Fix this by: (1) Extracting the bits of slow_work_execute() that are contingent on CONFIG_MODULES, and the bits that should be, into inline functions and placing them into the #ifdef'd section that defines the relevant variables and adding stubs for moduleless kernels. This allows the removal of some #ifdefs. (2) #ifdef'ing out the contents of slow_work_wait_for_items() in moduleless kernels. The four functions related to handling module unloading synchronisation (and their associated variables) could be offloaded into a separate .c file, but each function is only used once and three of them are tiny, so doing so would prevent them from being inlined. Reported-by: Ingo Molnar Signed-off-by: David Howells Signed-off-by: Linus Torvalds diff --git a/kernel/slow-work.c b/kernel/slow-work.c index da94f3c..b5c17f1 100644 --- a/kernel/slow-work.c +++ b/kernel/slow-work.c @@ -109,6 +109,30 @@ static struct module *slow_work_unreg_module; static struct slow_work *slow_work_unreg_work_item; static DECLARE_WAIT_QUEUE_HEAD(slow_work_unreg_wq); static DEFINE_MUTEX(slow_work_unreg_sync_lock); + +static void slow_work_set_thread_processing(int id, struct slow_work *work) +{ + if (work) + slow_work_thread_processing[id] = work->owner; +} +static void slow_work_done_thread_processing(int id, struct slow_work *work) +{ + struct module *module = slow_work_thread_processing[id]; + + slow_work_thread_processing[id] = NULL; + smp_mb(); + if (slow_work_unreg_work_item == work || + slow_work_unreg_module == module) + wake_up_all(&slow_work_unreg_wq); +} +static void slow_work_clear_thread_processing(int id) +{ + slow_work_thread_processing[id] = NULL; +} +#else +static void slow_work_set_thread_processing(int id, struct slow_work *work) {} +static void slow_work_done_thread_processing(int id, struct slow_work *work) {} +static void slow_work_clear_thread_processing(int id) {} #endif /* @@ -197,9 +221,6 @@ static unsigned slow_work_calc_vsmax(void) */ static noinline bool slow_work_execute(int id) { -#ifdef CONFIG_MODULES - struct module *module; -#endif struct slow_work *work = NULL; unsigned vsmax; bool very_slow; @@ -236,10 +257,7 @@ static noinline bool slow_work_execute(int id) very_slow = false; /* avoid the compiler warning */ } -#ifdef CONFIG_MODULES - if (work) - slow_work_thread_processing[id] = work->owner; -#endif + slow_work_set_thread_processing(id, work); if (work) { slow_work_mark_time(work); slow_work_begin_exec(id, work); @@ -287,15 +305,7 @@ static noinline bool slow_work_execute(int id) /* sort out the race between module unloading and put_ref() */ slow_work_put_ref(work); - -#ifdef CONFIG_MODULES - module = slow_work_thread_processing[id]; - slow_work_thread_processing[id] = NULL; - smp_mb(); - if (slow_work_unreg_work_item == work || - slow_work_unreg_module == module) - wake_up_all(&slow_work_unreg_wq); -#endif + slow_work_done_thread_processing(id, work); return true; @@ -310,7 +320,7 @@ auto_requeue: else list_add_tail(&work->link, &slow_work_queue); spin_unlock_irq(&slow_work_queue_lock); - slow_work_thread_processing[id] = NULL; + slow_work_clear_thread_processing(id); return true; } @@ -943,6 +953,7 @@ EXPORT_SYMBOL(slow_work_register_user); */ static void slow_work_wait_for_items(struct module *module) { +#ifdef CONFIG_MODULES DECLARE_WAITQUEUE(myself, current); struct slow_work *work; int loop; @@ -989,6 +1000,7 @@ static void slow_work_wait_for_items(struct module *module) remove_wait_queue(&slow_work_unreg_wq, &myself); mutex_unlock(&slow_work_unreg_sync_lock); +#endif /* CONFIG_MODULES */ } /** -- cgit v0.10.2 From 3350b2acdd39d23db52710045536b943fe38a35c Mon Sep 17 00:00:00 2001 From: Marc Dionne Date: Tue, 1 Dec 2009 14:09:24 +0000 Subject: CacheFiles: Update IMA counters when using dentry_open When IMA is active, using dentry_open without updating the IMA counters will result in free/open imbalance errors when fput is eventually called. Signed-off-by: Marc Dionne Signed-off-by: David Howells Signed-off-by: Linus Torvalds diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index 1d83325..a6c8c6f 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -11,6 +11,7 @@ #include #include +#include #include "internal.h" /* @@ -922,6 +923,7 @@ int cachefiles_write_page(struct fscache_storage *op, struct page *page) if (IS_ERR(file)) { ret = PTR_ERR(file); } else { + ima_counts_get(file); ret = -EIO; if (file->f_op->write) { pos = (loff_t) page->index << PAGE_SHIFT; -- cgit v0.10.2 From f13a48bd798a159291ca583b95453171b88b7448 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 1 Dec 2009 15:36:11 +0000 Subject: SLOW_WORK: Move slow_work's proc file to debugfs Move slow_work's debugging proc file to debugfs. Signed-off-by: David Howells Requested-and-acked-by: Ingo Molnar Signed-off-by: Linus Torvalds diff --git a/Documentation/slow-work.txt b/Documentation/slow-work.txt index 52bc314..9dbf447 100644 --- a/Documentation/slow-work.txt +++ b/Documentation/slow-work.txt @@ -279,9 +279,9 @@ The slow-work thread pool has a number of configurables: VIEWING EXECUTING AND QUEUED ITEMS ================================== -If CONFIG_SLOW_WORK_PROC is enabled, a proc file is made available: +If CONFIG_SLOW_WORK_DEBUG is enabled, a debugfs file is made available: - /proc/slow_work_rq + /sys/kernel/debug/slow_work/runqueue through which the list of work items being executed and the queues of items to be executed may be viewed. The owner of a work item is given the chance to diff --git a/include/linux/slow-work.h b/include/linux/slow-work.h index 5035a26..13337bf 100644 --- a/include/linux/slow-work.h +++ b/include/linux/slow-work.h @@ -20,7 +20,7 @@ #include struct slow_work; -#ifdef CONFIG_SLOW_WORK_PROC +#ifdef CONFIG_SLOW_WORK_DEBUG struct seq_file; #endif @@ -42,8 +42,8 @@ struct slow_work_ops { /* execute a work item */ void (*execute)(struct slow_work *work); -#ifdef CONFIG_SLOW_WORK_PROC - /* describe a work item for /proc */ +#ifdef CONFIG_SLOW_WORK_DEBUG + /* describe a work item for debugfs */ void (*desc)(struct slow_work *work, struct seq_file *m); #endif }; @@ -64,7 +64,7 @@ struct slow_work { #define SLOW_WORK_DELAYED 5 /* item is struct delayed_slow_work with active timer */ const struct slow_work_ops *ops; /* operations table for this item */ struct list_head link; /* link in queue */ -#ifdef CONFIG_SLOW_WORK_PROC +#ifdef CONFIG_SLOW_WORK_DEBUG struct timespec mark; /* jiffies at which queued or exec begun */ #endif }; diff --git a/init/Kconfig b/init/Kconfig index ab5c648..39923cc 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1098,12 +1098,12 @@ config SLOW_WORK See Documentation/slow-work.txt. -config SLOW_WORK_PROC - bool "Slow work debugging through /proc" +config SLOW_WORK_DEBUG + bool "Slow work debugging through debugfs" default n - depends on SLOW_WORK && PROC_FS + depends on SLOW_WORK && DEBUG_FS help - Display the contents of the slow work run queue through /proc, + Display the contents of the slow work run queue through debugfs, including items currently executing. See Documentation/slow-work.txt. diff --git a/kernel/Makefile b/kernel/Makefile index 776ffed..d7c13d2 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -94,7 +94,7 @@ obj-$(CONFIG_X86_DS) += trace/ obj-$(CONFIG_RING_BUFFER) += trace/ obj-$(CONFIG_SMP) += sched_cpupri.o obj-$(CONFIG_SLOW_WORK) += slow-work.o -obj-$(CONFIG_SLOW_WORK_PROC) += slow-work-proc.o +obj-$(CONFIG_SLOW_WORK_DEBUG) += slow-work-debugfs.o obj-$(CONFIG_PERF_EVENTS) += perf_event.o ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) diff --git a/kernel/slow-work-debugfs.c b/kernel/slow-work-debugfs.c new file mode 100644 index 0000000..e45c436 --- /dev/null +++ b/kernel/slow-work-debugfs.c @@ -0,0 +1,227 @@ +/* Slow work debugging + * + * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include "slow-work.h" + +#define ITERATOR_SHIFT (BITS_PER_LONG - 4) +#define ITERATOR_SELECTOR (0xfUL << ITERATOR_SHIFT) +#define ITERATOR_COUNTER (~ITERATOR_SELECTOR) + +void slow_work_new_thread_desc(struct slow_work *work, struct seq_file *m) +{ + seq_puts(m, "Slow-work: New thread"); +} + +/* + * Render the time mark field on a work item into a 5-char time with units plus + * a space + */ +static void slow_work_print_mark(struct seq_file *m, struct slow_work *work) +{ + struct timespec now, diff; + + now = CURRENT_TIME; + diff = timespec_sub(now, work->mark); + + if (diff.tv_sec < 0) + seq_puts(m, " -ve "); + else if (diff.tv_sec == 0 && diff.tv_nsec < 1000) + seq_printf(m, "%3luns ", diff.tv_nsec); + else if (diff.tv_sec == 0 && diff.tv_nsec < 1000000) + seq_printf(m, "%3luus ", diff.tv_nsec / 1000); + else if (diff.tv_sec == 0 && diff.tv_nsec < 1000000000) + seq_printf(m, "%3lums ", diff.tv_nsec / 1000000); + else if (diff.tv_sec <= 1) + seq_puts(m, " 1s "); + else if (diff.tv_sec < 60) + seq_printf(m, "%4lus ", diff.tv_sec); + else if (diff.tv_sec < 60 * 60) + seq_printf(m, "%4lum ", diff.tv_sec / 60); + else if (diff.tv_sec < 60 * 60 * 24) + seq_printf(m, "%4luh ", diff.tv_sec / 3600); + else + seq_puts(m, "exces "); +} + +/* + * Describe a slow work item for debugfs + */ +static int slow_work_runqueue_show(struct seq_file *m, void *v) +{ + struct slow_work *work; + struct list_head *p = v; + unsigned long id; + + switch ((unsigned long) v) { + case 1: + seq_puts(m, "THR PID ITEM ADDR FL MARK DESC\n"); + return 0; + case 2: + seq_puts(m, "=== ===== ================ == ===== ==========\n"); + return 0; + + case 3 ... 3 + SLOW_WORK_THREAD_LIMIT - 1: + id = (unsigned long) v - 3; + + read_lock(&slow_work_execs_lock); + work = slow_work_execs[id]; + if (work) { + smp_read_barrier_depends(); + + seq_printf(m, "%3lu %5d %16p %2lx ", + id, slow_work_pids[id], work, work->flags); + slow_work_print_mark(m, work); + + if (work->ops->desc) + work->ops->desc(work, m); + seq_putc(m, '\n'); + } + read_unlock(&slow_work_execs_lock); + return 0; + + default: + work = list_entry(p, struct slow_work, link); + seq_printf(m, "%3s - %16p %2lx ", + work->flags & SLOW_WORK_VERY_SLOW ? "vsq" : "sq", + work, work->flags); + slow_work_print_mark(m, work); + + if (work->ops->desc) + work->ops->desc(work, m); + seq_putc(m, '\n'); + return 0; + } +} + +/* + * map the iterator to a work item + */ +static void *slow_work_runqueue_index(struct seq_file *m, loff_t *_pos) +{ + struct list_head *p; + unsigned long count, id; + + switch (*_pos >> ITERATOR_SHIFT) { + case 0x0: + if (*_pos == 0) + *_pos = 1; + if (*_pos < 3) + return (void *)(unsigned long) *_pos; + if (*_pos < 3 + SLOW_WORK_THREAD_LIMIT) + for (id = *_pos - 3; + id < SLOW_WORK_THREAD_LIMIT; + id++, (*_pos)++) + if (slow_work_execs[id]) + return (void *)(unsigned long) *_pos; + *_pos = 0x1UL << ITERATOR_SHIFT; + + case 0x1: + count = *_pos & ITERATOR_COUNTER; + list_for_each(p, &slow_work_queue) { + if (count == 0) + return p; + count--; + } + *_pos = 0x2UL << ITERATOR_SHIFT; + + case 0x2: + count = *_pos & ITERATOR_COUNTER; + list_for_each(p, &vslow_work_queue) { + if (count == 0) + return p; + count--; + } + *_pos = 0x3UL << ITERATOR_SHIFT; + + default: + return NULL; + } +} + +/* + * set up the iterator to start reading from the first line + */ +static void *slow_work_runqueue_start(struct seq_file *m, loff_t *_pos) +{ + spin_lock_irq(&slow_work_queue_lock); + return slow_work_runqueue_index(m, _pos); +} + +/* + * move to the next line + */ +static void *slow_work_runqueue_next(struct seq_file *m, void *v, loff_t *_pos) +{ + struct list_head *p = v; + unsigned long selector = *_pos >> ITERATOR_SHIFT; + + (*_pos)++; + switch (selector) { + case 0x0: + return slow_work_runqueue_index(m, _pos); + + case 0x1: + if (*_pos >> ITERATOR_SHIFT == 0x1) { + p = p->next; + if (p != &slow_work_queue) + return p; + } + *_pos = 0x2UL << ITERATOR_SHIFT; + p = &vslow_work_queue; + + case 0x2: + if (*_pos >> ITERATOR_SHIFT == 0x2) { + p = p->next; + if (p != &vslow_work_queue) + return p; + } + *_pos = 0x3UL << ITERATOR_SHIFT; + + default: + return NULL; + } +} + +/* + * clean up after reading + */ +static void slow_work_runqueue_stop(struct seq_file *m, void *v) +{ + spin_unlock_irq(&slow_work_queue_lock); +} + +static const struct seq_operations slow_work_runqueue_ops = { + .start = slow_work_runqueue_start, + .stop = slow_work_runqueue_stop, + .next = slow_work_runqueue_next, + .show = slow_work_runqueue_show, +}; + +/* + * open "/sys/kernel/debug/slow_work/runqueue" to list queue contents + */ +static int slow_work_runqueue_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &slow_work_runqueue_ops); +} + +const struct file_operations slow_work_runqueue_fops = { + .owner = THIS_MODULE, + .open = slow_work_runqueue_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; diff --git a/kernel/slow-work-proc.c b/kernel/slow-work-proc.c deleted file mode 100644 index 3988032..0000000 --- a/kernel/slow-work-proc.c +++ /dev/null @@ -1,227 +0,0 @@ -/* Slow work debugging - * - * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved. - * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. - */ - -#include -#include -#include -#include -#include -#include "slow-work.h" - -#define ITERATOR_SHIFT (BITS_PER_LONG - 4) -#define ITERATOR_SELECTOR (0xfUL << ITERATOR_SHIFT) -#define ITERATOR_COUNTER (~ITERATOR_SELECTOR) - -void slow_work_new_thread_desc(struct slow_work *work, struct seq_file *m) -{ - seq_puts(m, "Slow-work: New thread"); -} - -/* - * Render the time mark field on a work item into a 5-char time with units plus - * a space - */ -static void slow_work_print_mark(struct seq_file *m, struct slow_work *work) -{ - struct timespec now, diff; - - now = CURRENT_TIME; - diff = timespec_sub(now, work->mark); - - if (diff.tv_sec < 0) - seq_puts(m, " -ve "); - else if (diff.tv_sec == 0 && diff.tv_nsec < 1000) - seq_printf(m, "%3luns ", diff.tv_nsec); - else if (diff.tv_sec == 0 && diff.tv_nsec < 1000000) - seq_printf(m, "%3luus ", diff.tv_nsec / 1000); - else if (diff.tv_sec == 0 && diff.tv_nsec < 1000000000) - seq_printf(m, "%3lums ", diff.tv_nsec / 1000000); - else if (diff.tv_sec <= 1) - seq_puts(m, " 1s "); - else if (diff.tv_sec < 60) - seq_printf(m, "%4lus ", diff.tv_sec); - else if (diff.tv_sec < 60 * 60) - seq_printf(m, "%4lum ", diff.tv_sec / 60); - else if (diff.tv_sec < 60 * 60 * 24) - seq_printf(m, "%4luh ", diff.tv_sec / 3600); - else - seq_puts(m, "exces "); -} - -/* - * Describe a slow work item for /proc - */ -static int slow_work_runqueue_show(struct seq_file *m, void *v) -{ - struct slow_work *work; - struct list_head *p = v; - unsigned long id; - - switch ((unsigned long) v) { - case 1: - seq_puts(m, "THR PID ITEM ADDR FL MARK DESC\n"); - return 0; - case 2: - seq_puts(m, "=== ===== ================ == ===== ==========\n"); - return 0; - - case 3 ... 3 + SLOW_WORK_THREAD_LIMIT - 1: - id = (unsigned long) v - 3; - - read_lock(&slow_work_execs_lock); - work = slow_work_execs[id]; - if (work) { - smp_read_barrier_depends(); - - seq_printf(m, "%3lu %5d %16p %2lx ", - id, slow_work_pids[id], work, work->flags); - slow_work_print_mark(m, work); - - if (work->ops->desc) - work->ops->desc(work, m); - seq_putc(m, '\n'); - } - read_unlock(&slow_work_execs_lock); - return 0; - - default: - work = list_entry(p, struct slow_work, link); - seq_printf(m, "%3s - %16p %2lx ", - work->flags & SLOW_WORK_VERY_SLOW ? "vsq" : "sq", - work, work->flags); - slow_work_print_mark(m, work); - - if (work->ops->desc) - work->ops->desc(work, m); - seq_putc(m, '\n'); - return 0; - } -} - -/* - * map the iterator to a work item - */ -static void *slow_work_runqueue_index(struct seq_file *m, loff_t *_pos) -{ - struct list_head *p; - unsigned long count, id; - - switch (*_pos >> ITERATOR_SHIFT) { - case 0x0: - if (*_pos == 0) - *_pos = 1; - if (*_pos < 3) - return (void *)(unsigned long) *_pos; - if (*_pos < 3 + SLOW_WORK_THREAD_LIMIT) - for (id = *_pos - 3; - id < SLOW_WORK_THREAD_LIMIT; - id++, (*_pos)++) - if (slow_work_execs[id]) - return (void *)(unsigned long) *_pos; - *_pos = 0x1UL << ITERATOR_SHIFT; - - case 0x1: - count = *_pos & ITERATOR_COUNTER; - list_for_each(p, &slow_work_queue) { - if (count == 0) - return p; - count--; - } - *_pos = 0x2UL << ITERATOR_SHIFT; - - case 0x2: - count = *_pos & ITERATOR_COUNTER; - list_for_each(p, &vslow_work_queue) { - if (count == 0) - return p; - count--; - } - *_pos = 0x3UL << ITERATOR_SHIFT; - - default: - return NULL; - } -} - -/* - * set up the iterator to start reading from the first line - */ -static void *slow_work_runqueue_start(struct seq_file *m, loff_t *_pos) -{ - spin_lock_irq(&slow_work_queue_lock); - return slow_work_runqueue_index(m, _pos); -} - -/* - * move to the next line - */ -static void *slow_work_runqueue_next(struct seq_file *m, void *v, loff_t *_pos) -{ - struct list_head *p = v; - unsigned long selector = *_pos >> ITERATOR_SHIFT; - - (*_pos)++; - switch (selector) { - case 0x0: - return slow_work_runqueue_index(m, _pos); - - case 0x1: - if (*_pos >> ITERATOR_SHIFT == 0x1) { - p = p->next; - if (p != &slow_work_queue) - return p; - } - *_pos = 0x2UL << ITERATOR_SHIFT; - p = &vslow_work_queue; - - case 0x2: - if (*_pos >> ITERATOR_SHIFT == 0x2) { - p = p->next; - if (p != &vslow_work_queue) - return p; - } - *_pos = 0x3UL << ITERATOR_SHIFT; - - default: - return NULL; - } -} - -/* - * clean up after reading - */ -static void slow_work_runqueue_stop(struct seq_file *m, void *v) -{ - spin_unlock_irq(&slow_work_queue_lock); -} - -static const struct seq_operations slow_work_runqueue_ops = { - .start = slow_work_runqueue_start, - .stop = slow_work_runqueue_stop, - .next = slow_work_runqueue_next, - .show = slow_work_runqueue_show, -}; - -/* - * open "/proc/slow_work_rq" to list queue contents - */ -static int slow_work_runqueue_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &slow_work_runqueue_ops); -} - -const struct file_operations slow_work_runqueue_fops = { - .owner = THIS_MODULE, - .open = slow_work_runqueue_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; diff --git a/kernel/slow-work.c b/kernel/slow-work.c index b5c17f1..00889bd 100644 --- a/kernel/slow-work.c +++ b/kernel/slow-work.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include "slow-work.h" static void slow_work_cull_timeout(unsigned long); @@ -138,7 +138,7 @@ static void slow_work_clear_thread_processing(int id) {} /* * Data for tracking currently executing items for indication through /proc */ -#ifdef CONFIG_SLOW_WORK_PROC +#ifdef CONFIG_SLOW_WORK_DEBUG struct slow_work *slow_work_execs[SLOW_WORK_THREAD_LIMIT]; pid_t slow_work_pids[SLOW_WORK_THREAD_LIMIT]; DEFINE_RWLOCK(slow_work_execs_lock); @@ -823,7 +823,7 @@ static void slow_work_new_thread_execute(struct slow_work *work) static const struct slow_work_ops slow_work_new_thread_ops = { .owner = THIS_MODULE, .execute = slow_work_new_thread_execute, -#ifdef CONFIG_SLOW_WORK_PROC +#ifdef CONFIG_SLOW_WORK_DEBUG .desc = slow_work_new_thread_desc, #endif }; @@ -1055,9 +1055,15 @@ static int __init init_slow_work(void) if (slow_work_max_max_threads < nr_cpus * 2) slow_work_max_max_threads = nr_cpus * 2; #endif -#ifdef CONFIG_SLOW_WORK_PROC - proc_create("slow_work_rq", S_IFREG | 0400, NULL, - &slow_work_runqueue_fops); +#ifdef CONFIG_SLOW_WORK_DEBUG + { + struct dentry *dbdir; + + dbdir = debugfs_create_dir("slow_work", NULL); + if (dbdir && !IS_ERR(dbdir)) + debugfs_create_file("runqueue", S_IFREG | 0400, dbdir, + NULL, &slow_work_runqueue_fops); + } #endif return 0; } diff --git a/kernel/slow-work.h b/kernel/slow-work.h index 3c2f007..321f3c5 100644 --- a/kernel/slow-work.h +++ b/kernel/slow-work.h @@ -19,7 +19,7 @@ /* * slow-work.c */ -#ifdef CONFIG_SLOW_WORK_PROC +#ifdef CONFIG_SLOW_WORK_DEBUG extern struct slow_work *slow_work_execs[]; extern pid_t slow_work_pids[]; extern rwlock_t slow_work_execs_lock; @@ -30,9 +30,9 @@ extern struct list_head vslow_work_queue; extern spinlock_t slow_work_queue_lock; /* - * slow-work-proc.c + * slow-work-debugfs.c */ -#ifdef CONFIG_SLOW_WORK_PROC +#ifdef CONFIG_SLOW_WORK_DEBUG extern const struct file_operations slow_work_runqueue_fops; extern void slow_work_new_thread_desc(struct slow_work *, struct seq_file *); -- cgit v0.10.2 From 2b5e63f6b8f6566161a261a9face1de433d6608e Mon Sep 17 00:00:00 2001 From: Martin Michlmayr Date: Thu, 19 Nov 2009 16:40:09 +0000 Subject: MIPS: IP22/IP28 Disable early printk to fix boot problems on some systems. Some Debian users have reported that the kernel hangs early during boot on some IP22 systems. Thomas Bogendoerfer found that this is due to a "bad interaction between CONFIG_EARLY_PRINTK and overwritten prom memory during early boot". Since there's no fix yet, disable CONFIG_EARLY_PRINTK for now. Signed-off-by: Martin Michlmayr Cc: linux-mips@linux-mips.org Cc: Thomas Bogendoerfer Cc: Dmitri Vorobiev Patchwork: http://patchwork.linux-mips.org/patch/702/ Signed-off-by: Ralf Baechle diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 1aad0d9..ffdd651 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -358,7 +358,14 @@ config SGI_IP22 select SWAP_IO_SPACE select SYS_HAS_CPU_R4X00 select SYS_HAS_CPU_R5000 - select SYS_HAS_EARLY_PRINTK + # + # Disable EARLY_PRINTK for now since it leads to overwritten prom + # memory during early boot on some machines. + # + # See http://www.linux-mips.org/cgi-bin/mesg.cgi?a=linux-mips&i=20091119164009.GA15038%40deprecation.cyrius.com + # for a more details discussion + # + # select SYS_HAS_EARLY_PRINTK select SYS_SUPPORTS_32BIT_KERNEL select SYS_SUPPORTS_64BIT_KERNEL select SYS_SUPPORTS_BIG_ENDIAN @@ -410,7 +417,14 @@ config SGI_IP28 select SGI_HAS_ZILOG select SWAP_IO_SPACE select SYS_HAS_CPU_R10000 - select SYS_HAS_EARLY_PRINTK + # + # Disable EARLY_PRINTK for now since it leads to overwritten prom + # memory during early boot on some machines. + # + # See http://www.linux-mips.org/cgi-bin/mesg.cgi?a=linux-mips&i=20091119164009.GA15038%40deprecation.cyrius.com + # for a more details discussion + # + # select SYS_HAS_EARLY_PRINTK select SYS_SUPPORTS_64BIT_KERNEL select SYS_SUPPORTS_BIG_ENDIAN help -- cgit v0.10.2 From c677189af9faa3f26fae0fcb7ac59f477048b89a Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Tue, 24 Nov 2009 13:16:02 +0000 Subject: MIPS: Fix build error if __xchg() is not getting inlined. If __xchg() is not getting inlined the outline version of the function will have a reference to __xchg_called_with_bad_pointer() which does not exist remaining. Fixed by using BUILD_BUG_ON() to check for allowable operand sizes. Signed-off-by: Ralf Baechle Patchwork: http://patchwork.linux-mips.org/patch/705/ diff --git a/arch/mips/include/asm/system.h b/arch/mips/include/asm/system.h index fcf5f98..83b5509 100644 --- a/arch/mips/include/asm/system.h +++ b/arch/mips/include/asm/system.h @@ -12,6 +12,7 @@ #ifndef _ASM_SYSTEM_H #define _ASM_SYSTEM_H +#include #include #include @@ -193,10 +194,6 @@ extern __u64 __xchg_u64_unsupported_on_32bit_kernels(volatile __u64 * m, __u64 v #define __xchg_u64 __xchg_u64_unsupported_on_32bit_kernels #endif -/* This function doesn't exist, so you'll get a linker error - if something tries to do an invalid xchg(). */ -extern void __xchg_called_with_bad_pointer(void); - static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size) { switch (size) { @@ -205,11 +202,17 @@ static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int siz case 8: return __xchg_u64(ptr, x); } - __xchg_called_with_bad_pointer(); + return x; } -#define xchg(ptr, x) ((__typeof__(*(ptr)))__xchg((unsigned long)(x), (ptr), sizeof(*(ptr)))) +#define xchg(ptr, x) \ +({ \ + BUILD_BUG_ON(sizeof(*(ptr)) & ~0xc); \ + \ + ((__typeof__(*(ptr))) \ + __xchg((unsigned long)(x), (ptr), sizeof(*(ptr)))); \ +}) extern void set_handler(unsigned long offset, void *addr, unsigned long len); extern void set_uncached_handler(unsigned long offset, void *addr, unsigned long len); -- cgit v0.10.2 From e1eb3a983befdb422e1aae299bdab573d04929f6 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Thu, 26 Nov 2009 18:28:42 +0000 Subject: MIPS: Add missing definition for MADV_HWPOISON. Thanks to Joseph S. Myers for reporting this. Signed-off-by: Ralf Baechle Cc: "Joseph S. Myers" Patchwork: http://patchwork.linux-mips.org/patch/723/ diff --git a/arch/mips/include/asm/mman.h b/arch/mips/include/asm/mman.h index a2250f3..c892bfb 100644 --- a/arch/mips/include/asm/mman.h +++ b/arch/mips/include/asm/mman.h @@ -75,6 +75,7 @@ #define MADV_MERGEABLE 12 /* KSM may merge identical pages */ #define MADV_UNMERGEABLE 13 /* KSM may not merge identical pages */ +#define MADV_HWPOISON 100 /* poison a page for testing */ /* compatibility flags */ #define MAP_FILE 0 -- cgit v0.10.2 From 315fe625f878749a7d2b6b65a40c29bbbe6e1dc7 Mon Sep 17 00:00:00 2001 From: Wu Zhangjin Date: Tue, 1 Dec 2009 14:55:25 +0800 Subject: MIPS: Loongson: Disallow 4kB pages Currently, with PAGE_SIZE_4KB, the kernel for loongson will hang on: Kernel panic - not syncing: Attempted to kill init! The possible reason is the cache aliases problem: Loongson 2F has 64kb, 4 way L1 Cache, the way size is 16kb, which is bigger then 4kb. so, If using 4kb page size, there is cache aliases problem. To avoid this kind of problem, extra cache flushing. The 2nd possible solution is 16kb page size which avoids cache aliases without the need for extra cache flushes. So we disable 4kB pages until the aliasing issue is solved. Signed-off-by: Wu Zhangjin Patchwork: http://patchwork.linux-mips.org/patch/736/ Cc: linux-mips@linux-mips.org Cc: zhangfx@lemote.com Signed-off-by: Ralf Baechle diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index ffdd651..e232e50 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -1453,6 +1453,7 @@ choice config PAGE_SIZE_4KB bool "4kB" + depends on !CPU_LOONGSON2 help This option select the standard 4kB Linux page size. On some R3000-family processors this is the only available page size. Using -- cgit v0.10.2 From f133f22dd6f413bdf71ebf7e00ce441d98ac7c87 Mon Sep 17 00:00:00 2001 From: Wu Zhangjin Date: Tue, 1 Dec 2009 14:55:42 +0800 Subject: MIPS: Loongson: Switch from flatmem to sparsemem With flatmem hibernation for Loongson will fail, and there are also some other problems such as broken files when using NFS or CIFS / Samba. The config help of sparsemem says: "This option provides some potential performance benefits, along with decreased code complexity." So to avoid the potential problems of FLATMEM, we disable FLATMEM directly and use SPARSEMEM instead. Related email thread: http://groups.google.com/group/loongson-dev/browse_thread/thread/b6b65890ec2b0f24/feb43e5aa7f55d9b?show_docid=feb43e5aa7f55d9b Reported-by: Tatu Kilappa Signed-off-by: Wu Zhangjin Patchwork: http://patchwork.linux-mips.org/patch/737/ Cc: linux-mips@linux-mips.org Cc: zhangfx@lemote.com Signed-off-by: Ralf Baechle diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index e232e50..fd7620f 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -1778,7 +1778,7 @@ config SYS_SUPPORTS_SMARTMIPS config ARCH_FLATMEM_ENABLE def_bool y - depends on !NUMA + depends on !NUMA && !CPU_LOONGSON2 config ARCH_DISCONTIGMEM_ENABLE bool -- cgit v0.10.2 From 138f3c8518976953563a1316d7e0420c72d4ab96 Mon Sep 17 00:00:00 2001 From: Li Yewang Date: Tue, 1 Dec 2009 15:35:05 -0800 Subject: ipsec: can not add camellia cipher algorithm when using "ip xfrm state" command can not add camellia cipher algorithm when using "ip xfrm state" command. Signed-off-by: Li Yewang Signed-off-by: David S. Miller diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c index faf54c6..348196d 100644 --- a/net/xfrm/xfrm_algo.c +++ b/net/xfrm/xfrm_algo.c @@ -365,6 +365,7 @@ static struct xfrm_algo_desc ealg_list[] = { }, { .name = "cbc(camellia)", + .compat = "camellia", .uinfo = { .encr = { -- cgit v0.10.2 From b2722b1c3a893ec6021508da15b32282ec79f4da Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 1 Dec 2009 15:53:57 -0800 Subject: ip_fragment: also adjust skb->truesize for packets not owned by a socket When a large packet gets reassembled by ip_defrag(), the head skb accounts for all the fragments in skb->truesize. If this packet is refragmented again, skb->truesize is not re-adjusted to reflect only the head size since its not owned by a socket. If the head fragment then gets recycled and reused for another received fragment, it might exceed the defragmentation limits due to its large truesize value. skb_recycle_check() explicitly checks for linear skbs, so any recycled skb should reflect its true size in skb->truesize. Change ip_fragment() to also adjust the truesize value of skbs not owned by a socket. Reported-and-tested-by: Ben Menchaca Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index f989518..4d50daa 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -501,8 +501,8 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) if (skb->sk) { frag->sk = skb->sk; frag->destructor = sock_wfree; - truesizes += frag->truesize; } + truesizes += frag->truesize; } /* Everything is OK. Generate! */ -- cgit v0.10.2