From 910a9f5636f5c128c02bf9ccd71ac03325700b57 Mon Sep 17 00:00:00 2001 From: Jean-Francois Dagenais Date: Wed, 12 Feb 2014 15:14:28 -0800 Subject: Input: adp5588-keys - get value from data out when dir is out As discussed here: http://ez.analog.com/message/35852, the 5587 revC and 5588 revB spec sheets contain a mistake in the GPIO_DAT_STATx register description. According to R.Shnell at ADI, as well as my own observations, it should read: "GPIO data status (shows GPIO state when read for inputs)". This commit changes the get value function accordingly. Signed-off-by: Jean-Francois Dagenais Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/keyboard/adp5588-keys.c b/drivers/input/keyboard/adp5588-keys.c index bb3b57b..5ef7fcf 100644 --- a/drivers/input/keyboard/adp5588-keys.c +++ b/drivers/input/keyboard/adp5588-keys.c @@ -76,8 +76,18 @@ static int adp5588_gpio_get_value(struct gpio_chip *chip, unsigned off) struct adp5588_kpad *kpad = container_of(chip, struct adp5588_kpad, gc); unsigned int bank = ADP5588_BANK(kpad->gpiomap[off]); unsigned int bit = ADP5588_BIT(kpad->gpiomap[off]); + int val; - return !!(adp5588_read(kpad->client, GPIO_DAT_STAT1 + bank) & bit); + mutex_lock(&kpad->gpio_lock); + + if (kpad->dir[bank] & bit) + val = kpad->dat_out[bank]; + else + val = adp5588_read(kpad->client, GPIO_DAT_STAT1 + bank); + + mutex_unlock(&kpad->gpio_lock); + + return !!(val & bit); } static void adp5588_gpio_set_value(struct gpio_chip *chip, -- cgit v0.10.2 From 70b0052425ffd549bb27fb08649a4d30daaf40e4 Mon Sep 17 00:00:00 2001 From: Anthony Olech Date: Mon, 17 Feb 2014 11:23:39 -0800 Subject: Input: da9052_onkey - use correct register bit for key status The wrong register bit of the DA9052/3 PMIC registers was used to determine the status on the ONKEY. Also a failure in reading the status register will no longer result in the work queue being rescheduled as that would result in a (potentially) endless retry. Signed-off-by: Anthony Olech Acked-by: David Dajun Chen Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/misc/da9052_onkey.c b/drivers/input/misc/da9052_onkey.c index 1f695f2..184c8f2 100644 --- a/drivers/input/misc/da9052_onkey.c +++ b/drivers/input/misc/da9052_onkey.c @@ -27,29 +27,32 @@ struct da9052_onkey { static void da9052_onkey_query(struct da9052_onkey *onkey) { - int key_stat; + int ret; - key_stat = da9052_reg_read(onkey->da9052, DA9052_EVENT_B_REG); - if (key_stat < 0) { + ret = da9052_reg_read(onkey->da9052, DA9052_STATUS_A_REG); + if (ret < 0) { dev_err(onkey->da9052->dev, - "Failed to read onkey event %d\n", key_stat); + "Failed to read onkey event err=%d\n", ret); } else { /* * Since interrupt for deassertion of ONKEY pin is not * generated, onkey event state determines the onkey * button state. */ - key_stat &= DA9052_EVENTB_ENONKEY; - input_report_key(onkey->input, KEY_POWER, key_stat); + bool pressed = !(ret & DA9052_STATUSA_NONKEY); + + input_report_key(onkey->input, KEY_POWER, pressed); input_sync(onkey->input); - } - /* - * Interrupt is generated only when the ONKEY pin is asserted. - * Hence the deassertion of the pin is simulated through work queue. - */ - if (key_stat) - schedule_delayed_work(&onkey->work, msecs_to_jiffies(50)); + /* + * Interrupt is generated only when the ONKEY pin + * is asserted. Hence the deassertion of the pin + * is simulated through work queue. + */ + if (pressed) + schedule_delayed_work(&onkey->work, + msecs_to_jiffies(50)); + } } static void da9052_onkey_work(struct work_struct *work) -- cgit v0.10.2 From 1406b916f4a29d5f9660264a28ce609c8c77e7ae Mon Sep 17 00:00:00 2001 From: "J. R. Okajima" Date: Wed, 19 Feb 2014 00:27:53 +0900 Subject: nfsd: fix lost nfserrno() call in nfsd_setattr() There is a regression in 208d0ac 2014-01-07 nfsd4: break only delegations when appropriate which deletes an nfserrno() call in nfsd_setattr() (by accident, probably), and NFSD becomes ignoring an error from VFS. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 017d3cb..6d7be3f 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -449,6 +449,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, fh_lock(fhp); host_err = notify_change(dentry, iap, NULL); fh_unlock(fhp); + err = nfserrno(host_err); out_put_write_access: if (size_change) -- cgit v0.10.2 From d7a15f8d0777955986a2ab00ab181795cab14b01 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 16 Mar 2014 14:24:08 -0500 Subject: vfs: atomic f_pos access in llseek() Commit 9c225f2655e36a4 ("vfs: atomic f_pos accesses as per POSIX") changed several system calls to use fdget_pos() instead of fdget(), but missed sys_llseek(). Fix it. Signed-off-by: Eric Biggers Signed-off-by: Al Viro diff --git a/fs/read_write.c b/fs/read_write.c index 54e19b9..28cc9c8 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -307,7 +307,7 @@ SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high, unsigned int, whence) { int retval; - struct fd f = fdget(fd); + struct fd f = fdget_pos(fd); loff_t offset; if (!f.file) @@ -327,7 +327,7 @@ SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high, retval = 0; } out_putf: - fdput(f); + fdput_pos(f); return retval; } #endif -- cgit v0.10.2 From 99aea68134f3c2a27b4d463c91cfa298c3efaccf Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 16 Mar 2014 15:47:48 -0500 Subject: vfs: Don't let __fdget_pos() get FMODE_PATH files Commit bd2a31d522344 ("get rid of fget_light()") introduced the __fdget_pos() function, which returns the resulting file pointer and fdput flags combined in an 'unsigned long'. However, it also changed the behavior to return files with FMODE_PATH set, which shouldn't happen because read(), write(), lseek(), etc. aren't allowed on such files. This commit restores the old behavior. This regression actually had no effect on read() and write() since FMODE_READ and FMODE_WRITE are not set on file descriptors opened with O_PATH, but it did cause lseek() on a file descriptor opened with O_PATH to fail with ESPIPE rather than EBADF. Signed-off-by: Eric Biggers Signed-off-by: Al Viro diff --git a/fs/file.c b/fs/file.c index 60a45e9..eb56a13 100644 --- a/fs/file.c +++ b/fs/file.c @@ -713,27 +713,16 @@ unsigned long __fdget_raw(unsigned int fd) unsigned long __fdget_pos(unsigned int fd) { - struct files_struct *files = current->files; - struct file *file; - unsigned long v; - - if (atomic_read(&files->count) == 1) { - file = __fcheck_files(files, fd); - v = 0; - } else { - file = __fget(fd, 0); - v = FDPUT_FPUT; - } - if (!file) - return 0; + unsigned long v = __fdget(fd); + struct file *file = (struct file *)(v & ~3); - if (file->f_mode & FMODE_ATOMIC_POS) { + if (file && (file->f_mode & FMODE_ATOMIC_POS)) { if (file_count(file) > 1) { v |= FDPUT_POS_UNLOCK; mutex_lock(&file->f_pos_lock); } } - return v | (unsigned long)file; + return v; } /* -- cgit v0.10.2 From e825196d48d2b89a6ec3a8eff280098d2a78207e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 23 Mar 2014 00:28:40 -0400 Subject: make prepend_name() work correctly when called with negative *buflen In all callchains leading to prepend_name(), the value left in *buflen is eventually discarded unused if prepend_name() has returned a negative. So we are free to do what prepend() does, and subtract from *buflen *before* checking for underflow (which turns into checking the sign of subtraction result, of course). Cc: stable@vger.kernel.org Signed-off-by: Al Viro diff --git a/fs/dcache.c b/fs/dcache.c index 265e0ce..ca02c13 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2833,9 +2833,9 @@ static int prepend_name(char **buffer, int *buflen, struct qstr *name) u32 dlen = ACCESS_ONCE(name->len); char *p; - if (*buflen < dlen + 1) - return -ENAMETOOLONG; *buflen -= dlen + 1; + if (*buflen < 0) + return -ENAMETOOLONG; p = *buffer -= dlen + 1; *p++ = '/'; while (dlen--) { -- cgit v0.10.2 From b37199e626b31e1175fb06764c5d1d687723aac2 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 20 Mar 2014 15:18:22 -0400 Subject: rcuwalk: recheck mount_lock after mountpoint crossing attempts We can get false negative from __lookup_mnt() if an unrelated vfsmount gets moved. In that case legitimize_mnt() is guaranteed to fail, and we will fall back to non-RCU walk... unless we end up running into a hard error on a filesystem object we wouldn't have reached if not for that false negative. IOW, delaying that check until the end of pathname resolution is wrong - we should recheck right after we attempt to cross the mountpoint. We don't need to recheck unless we see d_mountpoint() being true - in that case even if we have just raced with mount/umount, we can simply go on as if we'd come at the moment when the sucker wasn't a mountpoint; if we run into a hard error as the result, it was a legitimate outcome. __lookup_mnt() returning NULL is different in that respect, since it might've happened due to operation on completely unrelated mountpoint. Cc: stable@vger.kernel.org Signed-off-by: Al Viro diff --git a/fs/namei.c b/fs/namei.c index 2f730ef..4b491b4 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1109,7 +1109,7 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, return false; if (!d_mountpoint(path->dentry)) - break; + return true; mounted = __lookup_mnt(path->mnt, path->dentry); if (!mounted) @@ -1125,20 +1125,7 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, */ *inode = path->dentry->d_inode; } - return true; -} - -static void follow_mount_rcu(struct nameidata *nd) -{ - while (d_mountpoint(nd->path.dentry)) { - struct mount *mounted; - mounted = __lookup_mnt(nd->path.mnt, nd->path.dentry); - if (!mounted) - break; - nd->path.mnt = &mounted->mnt; - nd->path.dentry = mounted->mnt.mnt_root; - nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); - } + return read_seqretry(&mount_lock, nd->m_seq); } static int follow_dotdot_rcu(struct nameidata *nd) @@ -1166,7 +1153,17 @@ static int follow_dotdot_rcu(struct nameidata *nd) break; nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); } - follow_mount_rcu(nd); + while (d_mountpoint(nd->path.dentry)) { + struct mount *mounted; + mounted = __lookup_mnt(nd->path.mnt, nd->path.dentry); + if (!mounted) + break; + nd->path.mnt = &mounted->mnt; + nd->path.dentry = mounted->mnt.mnt_root; + nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); + if (!read_seqretry(&mount_lock, nd->m_seq)) + goto failed; + } nd->inode = nd->path.dentry->d_inode; return 0; -- cgit v0.10.2 From 5f12c5eca6e6b7aeb4b2028d579f614b4fe7a81f Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Tue, 18 Mar 2014 16:10:24 -0500 Subject: i2c: cpm: Fix build by adding of_address.h and of_irq.h Fixes a build break due to the undeclared use of irq_of_parse_and_map() and of_iomap(). This build break was apparently introduced while the driver was unbuildable due to the bug fixed by 62c19c9d29e65086e5ae76df371ed2e6b23f00cd ("i2c: Remove usage of orphaned symbol OF_I2C"). When 62c19c was added in v3.14-rc7, the driver was enabled again, breaking the powerpc mpc85xx_defconfig and mpc85xx_smp_defconfig. 62c19c is marked for stable, so this should go there as well. Reported-by: Geert Uytterhoeven Signed-off-by: Scott Wood Signed-off-by: Wolfram Sang Cc: stable@kernel.org diff --git a/drivers/i2c/busses/i2c-cpm.c b/drivers/i2c/busses/i2c-cpm.c index be7f0a2..f3b89a4 100644 --- a/drivers/i2c/busses/i2c-cpm.c +++ b/drivers/i2c/busses/i2c-cpm.c @@ -39,7 +39,9 @@ #include #include #include +#include #include +#include #include #include #include -- cgit v0.10.2 From 09ed3d5ba06137913960f9c9385f71fc384193ab Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Sat, 15 Mar 2014 16:11:47 +0000 Subject: xen/balloon: flush persistent kmaps in correct position Xen balloon driver will update ballooned out pages' P2M entries to point to scratch page for PV guests. In 24f69373e2 ("xen/balloon: don't alloc page while non-preemptible", kmap_flush_unused was moved after updating P2M table. In that case for 32 bit PV guest we might end up with P2M X -----> S (S is mfn of balloon scratch page) M2P Y -----> X (Y is mfn in persistent kmap entry) kmap_flush_unused() iterates through all the PTEs in the kmap address space, using pte_to_page() to obtain the page. If the p2m and the m2p are inconsistent the incorrect page is returned. This will clear page->address on the wrong page which may cause subsequent oopses if that page is currently kmap'ed. Move the flush back between get_page and __set_phys_to_machine to fix this. Signed-off-by: Wei Liu Signed-off-by: David Vrabel Cc: stable@vger.kernel.org # 3.12+ diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 37d06ea..61a6ac8 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -399,11 +399,25 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp) state = BP_EAGAIN; break; } + scrub_page(page); - pfn = page_to_pfn(page); - frame_list[i] = pfn_to_mfn(pfn); + frame_list[i] = page_to_pfn(page); + } - scrub_page(page); + /* + * Ensure that ballooned highmem pages don't have kmaps. + * + * Do this before changing the p2m as kmap_flush_unused() + * reads PTEs to obtain pages (and hence needs the original + * p2m entry). + */ + kmap_flush_unused(); + + /* Update direct mapping, invalidate P2M, and add to balloon. */ + for (i = 0; i < nr_pages; i++) { + pfn = frame_list[i]; + frame_list[i] = pfn_to_mfn(pfn); + page = pfn_to_page(pfn); #ifdef CONFIG_XEN_HAVE_PVMMU /* @@ -429,11 +443,9 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp) } #endif - balloon_append(pfn_to_page(pfn)); + balloon_append(page); } - /* Ensure that ballooned highmem pages don't have kmaps. */ - kmap_flush_unused(); flush_tlb_all(); set_xen_guest_handle(reservation.extent_start, frame_list); -- cgit v0.10.2 From 5926f87fdaad4be3ed10cec563bf357915e55a86 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Tue, 25 Mar 2014 10:38:37 +0000 Subject: Revert "xen: properly account for _PAGE_NUMA during xen pte translations" This reverts commit a9c8e4beeeb64c22b84c803747487857fe424b68. PTEs in Xen PV guests must contain machine addresses if _PAGE_PRESENT is set and pseudo-physical addresses is _PAGE_PRESENT is clear. This is because during a domain save/restore (migration) the page table entries are "canonicalised" and uncanonicalised". i.e., MFNs are converted to PFNs during domain save so that on a restore the page table entries may be rewritten with the new MFNs on the destination. This canonicalisation is only done for PTEs that are present. This change resulted in writing PTEs with MFNs if _PAGE_PROTNONE (or _PAGE_NUMA) was set but _PAGE_PRESENT was clear. These PTEs would be migrated as-is which would result in unexpected behaviour in the destination domain. Either a) the MFN would be translated to the wrong PFN/page; b) setting the _PAGE_PRESENT bit would clear the PTE because the MFN is no longer owned by the domain; or c) the present bit would not get set. Symptoms include "Bad page" reports when munmapping after migrating a domain. Signed-off-by: David Vrabel Acked-by: Konrad Rzeszutek Wilk Cc: [3.12+] diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 5ad38ad..bbc8b12 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -445,20 +445,10 @@ static inline int pte_same(pte_t a, pte_t b) return a.pte == b.pte; } -static inline int pteval_present(pteval_t pteval) -{ - /* - * Yes Linus, _PAGE_PROTNONE == _PAGE_NUMA. Expressing it this - * way clearly states that the intent is that protnone and numa - * hinting ptes are considered present for the purposes of - * pagetable operations like zapping, protection changes, gup etc. - */ - return pteval & (_PAGE_PRESENT | _PAGE_PROTNONE | _PAGE_NUMA); -} - static inline int pte_present(pte_t a) { - return pteval_present(pte_flags(a)); + return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE | + _PAGE_NUMA); } #define pte_accessible pte_accessible diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 256282e..2423ef0 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -365,7 +365,7 @@ void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, /* Assume pteval_t is equivalent to all the other *val_t types. */ static pteval_t pte_mfn_to_pfn(pteval_t val) { - if (pteval_present(val)) { + if (val & _PAGE_PRESENT) { unsigned long mfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT; unsigned long pfn = mfn_to_pfn(mfn); @@ -381,7 +381,7 @@ static pteval_t pte_mfn_to_pfn(pteval_t val) static pteval_t pte_pfn_to_mfn(pteval_t val) { - if (pteval_present(val)) { + if (val & _PAGE_PRESENT) { unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT; pteval_t flags = val & PTE_FLAGS_MASK; unsigned long mfn; -- cgit v0.10.2 From d6f2589ad561aa5fa39f347eca6942668b7560a1 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 25 Mar 2014 21:37:09 +0100 Subject: fs: Avoid userspace mounting anon_inodefs filesystem anon_inodefs filesystem is a kernel internal filesystem userspace shouldn't mess with. Remove registration of it so userspace cannot even try to mount it (which would fail anyway because the filesystem is MS_NOUSER). This fixes an oops triggered by trinity when it tried mounting anon_inodefs which overwrote anon_inode_inode pointer while other CPU has been in anon_inode_getfile() between ihold() and d_instantiate(). Thus effectively creating dentry pointing to an inode without holding a reference to it. Reported-by: Sasha Levin Signed-off-by: Jan Kara Signed-off-by: Linus Torvalds diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index 2408473..4b4543b 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -177,9 +177,6 @@ static int __init anon_inode_init(void) { int error; - error = register_filesystem(&anon_inode_fs_type); - if (error) - goto err_exit; anon_inode_mnt = kern_mount(&anon_inode_fs_type); if (IS_ERR(anon_inode_mnt)) { error = PTR_ERR(anon_inode_mnt); -- cgit v0.10.2 From fce7fc79c8f7188dfc5eafa1b937bcc3c5a4c2f5 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 25 Mar 2014 17:43:34 -0700 Subject: fs: remove now stale label in anon_inode_init() The previous commit removed the register_filesystem() call and the associated error handling, but left the label for the error path that no longer exists. Remove that too. Signed-off-by: Linus Torvalds diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index 4b4543b..42fcc46 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -186,7 +186,6 @@ static int __init anon_inode_init(void) err_unregister_filesystem: unregister_filesystem(&anon_inode_fs_type); -err_exit: panic(KERN_ERR "anon_inode_init() failed (%d)\n", error); } -- cgit v0.10.2 From 2c4a33aba5f9ea3a28f2e40351f078d95f00786b Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 25 Mar 2014 23:39:41 -0400 Subject: tracing: Fix traceon trigger condition to actually turn tracing on While working on my tutorial for 2014 Linux Collaboration Summit I found that the traceon trigger did not work when conditions were used. The other triggers worked fine though. Looking into it, it is because of the way the triggers use the ring buffer to store the fields it will use for the condition. But if tracing is off, nothing is stored in the buffer, and the tracepoint exits before calling the trigger to test the condition. This is fine for all the triggers that only work when tracing is on, but for traceon trigger that is to work when tracing is off, nothing happens. The fix is simple, just use a temp ring buffer to record the event if tracing is off and the event has a trace event conditional trigger enabled. The rest of the tracepoint code will work just fine, but the tracepoint wont be recorded in the other buffers. Cc: Tom Zanussi Signed-off-by: Steven Rostedt diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 815c878..24c1f23 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1600,15 +1600,31 @@ void trace_buffer_unlock_commit(struct ring_buffer *buffer, } EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit); +static struct ring_buffer *temp_buffer; + struct ring_buffer_event * trace_event_buffer_lock_reserve(struct ring_buffer **current_rb, struct ftrace_event_file *ftrace_file, int type, unsigned long len, unsigned long flags, int pc) { + struct ring_buffer_event *entry; + *current_rb = ftrace_file->tr->trace_buffer.buffer; - return trace_buffer_lock_reserve(*current_rb, + entry = trace_buffer_lock_reserve(*current_rb, type, len, flags, pc); + /* + * If tracing is off, but we have triggers enabled + * we still need to look at the event data. Use the temp_buffer + * to store the trace event for the tigger to use. It's recusive + * safe and will not be recorded anywhere. + */ + if (!entry && ftrace_file->flags & FTRACE_EVENT_FL_TRIGGER_COND) { + *current_rb = temp_buffer; + entry = trace_buffer_lock_reserve(*current_rb, + type, len, flags, pc); + } + return entry; } EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve); @@ -6494,11 +6510,16 @@ __init static int tracer_alloc_buffers(void) raw_spin_lock_init(&global_trace.start_lock); + /* Used for event triggers */ + temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE); + if (!temp_buffer) + goto out_free_cpumask; + /* TODO: make the number of buffers hot pluggable with CPUS */ if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) { printk(KERN_ERR "tracer: failed to allocate ring buffer!\n"); WARN_ON(1); - goto out_free_cpumask; + goto out_free_temp_buffer; } if (global_trace.buffer_disabled) @@ -6540,6 +6561,8 @@ __init static int tracer_alloc_buffers(void) return 0; +out_free_temp_buffer: + ring_buffer_free(temp_buffer); out_free_cpumask: free_percpu(global_trace.trace_buffer.data); #ifdef CONFIG_TRACER_MAX_TRACE -- cgit v0.10.2 From fbd02dd405d0724a0f25897ed4a6813297c9b96f Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Sun, 23 Mar 2014 22:06:36 -0700 Subject: ip_tunnel: Fix dst ref-count. Commit 10ddceb22ba (ip_tunnel:multicast process cause panic due to skb->_skb_refdst NULL pointer) removed dst-drop call from ip-tunnel-recv. Following commit reintroduce dst-drop and fix the original bug by checking loopback packet before releasing dst. Original bug: https://bugzilla.kernel.org/show_bug.cgi?id=70681 CC: Xin Long Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c index 1863422f..250be74 100644 --- a/net/ipv4/gre_demux.c +++ b/net/ipv4/gre_demux.c @@ -182,6 +182,14 @@ static int gre_cisco_rcv(struct sk_buff *skb) int i; bool csum_err = false; +#ifdef CONFIG_NET_IPGRE_BROADCAST + if (ipv4_is_multicast(ip_hdr(skb)->daddr)) { + /* Looped back packet, drop it! */ + if (rt_is_output_route(skb_rtable(skb))) + goto drop; + } +#endif + if (parse_gre_header(skb, &tpi, &csum_err) < 0) goto drop; diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 78a89e6..a82a22d 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -416,9 +416,6 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, #ifdef CONFIG_NET_IPGRE_BROADCAST if (ipv4_is_multicast(iph->daddr)) { - /* Looped back packet, drop it! */ - if (rt_is_output_route(skb_rtable(skb))) - goto drop; tunnel->dev->stats.multicast++; skb->pkt_type = PACKET_BROADCAST; } diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 6f847dd..8d69626 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -108,6 +108,7 @@ int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto) nf_reset(skb); secpath_reset(skb); skb_clear_hash_if_not_l4(skb); + skb_dst_drop(skb); skb->vlan_tci = 0; skb_set_queue_mapping(skb, 0); skb->pkt_type = PACKET_HOST; -- cgit v0.10.2 From 51dfe7b944998eaeb2b34d314f3a6b16a5fd621b Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Mon, 24 Mar 2014 17:52:12 -0400 Subject: tg3: Do not include vlan acceleration features in vlan_features Including hardware acceleration features in vlan_features breaks stacked vlans (Q-in-Q) by marking the bottom vlan interface as capable of acceleration. This causes one of the tags to be lost and the packets are sent with a sing vlan header. CC: Nithin Nayak Sujir CC: Michael Chan Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 3b6d0ba..70a225c8 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -17649,8 +17649,6 @@ static int tg3_init_one(struct pci_dev *pdev, tg3_init_bufmgr_config(tp); - features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; - /* 5700 B0 chips do not support checksumming correctly due * to hardware bugs. */ @@ -17682,7 +17680,8 @@ static int tg3_init_one(struct pci_dev *pdev, features |= NETIF_F_TSO_ECN; } - dev->features |= features; + dev->features |= features | NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX; dev->vlan_features |= features; /* -- cgit v0.10.2 From 6797b39e6f6f34c74177736e146406e894b9482b Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 26 Mar 2014 13:30:52 -0700 Subject: Input: cypress_ps2 - don't report as a button pads The cypress PS/2 trackpad models supported by the cypress_ps2 driver emulate BTN_RIGHT events in firmware based on the finger position, as part of this no motion events are sent when the finger is in the button area. The INPUT_PROP_BUTTONPAD property is there to indicate to userspace that BTN_RIGHT events should be emulated in userspace, which is not necessary in this case. When INPUT_PROP_BUTTONPAD is advertised userspace will wait for a motion event before propagating the button event higher up the stack, as it needs current abs x + y data for its BTN_RIGHT emulation. Since in the cypress_ps2 pads don't report motion events in the button area, this means that clicks in the button area end up being ignored, so INPUT_PROP_BUTTONPAD actually causes problems for these touchpads, and removing it fixes: https://bugs.freedesktop.org/show_bug.cgi?id=76341 Reported-by: Adam Williamson Tested-by: Adam Williamson Reviewed-by: Peter Hutterer Signed-off-by: Hans de Goede Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/mouse/cypress_ps2.c b/drivers/input/mouse/cypress_ps2.c index 87095e2..8af34ff 100644 --- a/drivers/input/mouse/cypress_ps2.c +++ b/drivers/input/mouse/cypress_ps2.c @@ -409,7 +409,6 @@ static int cypress_set_input_params(struct input_dev *input, __clear_bit(REL_X, input->relbit); __clear_bit(REL_Y, input->relbit); - __set_bit(INPUT_PROP_BUTTONPAD, input->propbit); __set_bit(EV_KEY, input->evbit); __set_bit(BTN_LEFT, input->keybit); __set_bit(BTN_RIGHT, input->keybit); -- cgit v0.10.2 From a79121d3b57e7ad61f0b5d23eae05214054f3ccd Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Wed, 26 Mar 2014 00:25:41 +0100 Subject: net: mvneta: rename MVNETA_GMAC2_PSC_ENABLE to MVNETA_GMAC2_PCS_ENABLE Bit 3 of the MVNETA_GMAC_CTRL_2 is actually used to enable the PCS, not the PSC: there was a typo in the name of the define, which this commit fixes. Cc: stable@vger.kernel.org Signed-off-by: Thomas Petazzoni Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index f418f4f..d6b04d0 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -161,7 +161,7 @@ #define MVNETA_GMAC_MAX_RX_SIZE_MASK 0x7ffc #define MVNETA_GMAC0_PORT_ENABLE BIT(0) #define MVNETA_GMAC_CTRL_2 0x2c08 -#define MVNETA_GMAC2_PSC_ENABLE BIT(3) +#define MVNETA_GMAC2_PCS_ENABLE BIT(3) #define MVNETA_GMAC2_PORT_RGMII BIT(4) #define MVNETA_GMAC2_PORT_RESET BIT(6) #define MVNETA_GMAC_STATUS 0x2c10 @@ -733,7 +733,7 @@ static void mvneta_port_sgmii_config(struct mvneta_port *pp) u32 val; val = mvreg_read(pp, MVNETA_GMAC_CTRL_2); - val |= MVNETA_GMAC2_PSC_ENABLE; + val |= MVNETA_GMAC2_PCS_ENABLE; mvreg_write(pp, MVNETA_GMAC_CTRL_2, val); mvreg_write(pp, MVNETA_SGMII_SERDES_CFG, MVNETA_SGMII_SERDES_PROTO); -- cgit v0.10.2 From e3a8786c10e75903f1269474e21fe8cb49c3a670 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Wed, 26 Mar 2014 00:25:42 +0100 Subject: net: mvneta: fix usage as a module on RGMII configurations Commit 5445eaf309ff ('mvneta: Try to fix mvneta when compiled as module') fixed the mvneta driver to make it work properly when loaded as a module in SGMII configuration, which was tested successful by the author on the Armada XP OpenBlocks AX3, which uses SGMII. However, it turns out that the Armada XP GP, which uses RGMII, is affected by a similar problem: its SERDES configuration is lost when mvneta is loaded as a module, because this configuration is set by the bootloader, and then lost because the clock is gated by the clock framework until the mvneta driver is loaded again and the clock is re-enabled. However, it turns out that for the RGMII case, setting the SERDES configuration is not sufficient: the PCS enable bit in the MVNETA_GMAC_CTRL_2 register must also be set, like in the SGMII configuration. Therefore, this commit reworks the SGMII/RGMII initialization: the only difference between the two now is a different SERDES configuration, all the rest is identical. In detail, to achieve this, the commit: * Renames MVNETA_SGMII_SERDES_CFG to MVNETA_SERDES_CFG because it is not specific to SGMII, but also used on RGMII configurations. * Adds a MVNETA_RGMII_SERDES_PROTO definition, that must be used as the MVNETA_SERDES_CFG value in RGMII configurations. * Removes the mvneta_gmac_rgmii_set() and mvneta_port_sgmii_config() functions, and instead directly do the SGMII/RGMII configuration in mvneta_port_up(), from where those functions where called. It is worth mentioning that mvneta_gmac_rgmii_set() had an 'enable' parameter that was always passed as '1', so it was pretty useless. * Reworks the mvneta_port_up() function to set the MVNETA_SERDES_CFG register to the appropriate value depending on the RGMII vs. SGMII configuration. It also unconditionally set the PCS_ENABLE bit (was already done for SGMII, but is now also needed for RGMII), and sets the PORT_RGMII bit (which was already done for both SGMII and RGMII). This commit was successfully tested with mvneta compiled as a module, on both the OpenBlocks AX3 (SGMII configuration) and the Armada XP GP (RGMII configuration). Reported-by: Steve McIntyre Cc: stable@vger.kernel.org # 3.11.x: 5445eaf309ff mvneta: Try to fix mvneta when compiled as module Signed-off-by: Thomas Petazzoni Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index d6b04d0..c9c2faa 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -88,8 +88,9 @@ #define MVNETA_TX_IN_PRGRS BIT(1) #define MVNETA_TX_FIFO_EMPTY BIT(8) #define MVNETA_RX_MIN_FRAME_SIZE 0x247c -#define MVNETA_SGMII_SERDES_CFG 0x24A0 +#define MVNETA_SERDES_CFG 0x24A0 #define MVNETA_SGMII_SERDES_PROTO 0x0cc7 +#define MVNETA_RGMII_SERDES_PROTO 0x0667 #define MVNETA_TYPE_PRIO 0x24bc #define MVNETA_FORCE_UNI BIT(21) #define MVNETA_TXQ_CMD_1 0x24e4 @@ -710,35 +711,6 @@ static void mvneta_rxq_bm_disable(struct mvneta_port *pp, mvreg_write(pp, MVNETA_RXQ_CONFIG_REG(rxq->id), val); } - - -/* Sets the RGMII Enable bit (RGMIIEn) in port MAC control register */ -static void mvneta_gmac_rgmii_set(struct mvneta_port *pp, int enable) -{ - u32 val; - - val = mvreg_read(pp, MVNETA_GMAC_CTRL_2); - - if (enable) - val |= MVNETA_GMAC2_PORT_RGMII; - else - val &= ~MVNETA_GMAC2_PORT_RGMII; - - mvreg_write(pp, MVNETA_GMAC_CTRL_2, val); -} - -/* Config SGMII port */ -static void mvneta_port_sgmii_config(struct mvneta_port *pp) -{ - u32 val; - - val = mvreg_read(pp, MVNETA_GMAC_CTRL_2); - val |= MVNETA_GMAC2_PCS_ENABLE; - mvreg_write(pp, MVNETA_GMAC_CTRL_2, val); - - mvreg_write(pp, MVNETA_SGMII_SERDES_CFG, MVNETA_SGMII_SERDES_PROTO); -} - /* Start the Ethernet port RX and TX activity */ static void mvneta_port_up(struct mvneta_port *pp) { @@ -2756,12 +2728,15 @@ static void mvneta_port_power_up(struct mvneta_port *pp, int phy_mode) mvreg_write(pp, MVNETA_UNIT_INTR_CAUSE, 0); if (phy_mode == PHY_INTERFACE_MODE_SGMII) - mvneta_port_sgmii_config(pp); + mvreg_write(pp, MVNETA_SERDES_CFG, MVNETA_SGMII_SERDES_PROTO); + else + mvreg_write(pp, MVNETA_SERDES_CFG, MVNETA_RGMII_SERDES_PROTO); - mvneta_gmac_rgmii_set(pp, 1); + val = mvreg_read(pp, MVNETA_GMAC_CTRL_2); + + val |= MVNETA_GMAC2_PCS_ENABLE | MVNETA_GMAC2_PORT_RGMII; /* Cancel Port Reset */ - val = mvreg_read(pp, MVNETA_GMAC_CTRL_2); val &= ~MVNETA_GMAC2_PORT_RESET; mvreg_write(pp, MVNETA_GMAC_CTRL_2, val); -- cgit v0.10.2 From b5f3b75d9d3b5526d973fc0bfee5680bdc6acf2a Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Wed, 26 Mar 2014 00:26:55 +0100 Subject: net: mvneta: use devm_ioremap_resource() instead of of_iomap() The mvneta driver currently uses of_iomap(), which has two drawbacks: it doesn't request the resource, and it isn't devm-style so some error handling is needed. This commit switches to use devm_ioremap_resource() instead, which automatically requests the resource (so the I/O registers region shows up properly in /proc/iomem), and also is devm-style, which allows to get rid of some error handling to unmap the I/O registers region. Signed-off-by: Thomas Petazzoni Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index c9c2faa..8d76fca 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -2749,6 +2750,7 @@ static void mvneta_port_power_up(struct mvneta_port *pp, int phy_mode) static int mvneta_probe(struct platform_device *pdev) { const struct mbus_dram_target_info *dram_target_info; + struct resource *res; struct device_node *dn = pdev->dev.of_node; struct device_node *phy_node; u32 phy_addr; @@ -2813,9 +2815,15 @@ static int mvneta_probe(struct platform_device *pdev) clk_prepare_enable(pp->clk); - pp->base = of_iomap(dn, 0); + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) { + err = -ENODEV; + goto err_clk; + } + + pp->base = devm_ioremap_resource(&pdev->dev, res); if (pp->base == NULL) { - err = -ENOMEM; + err = PTR_ERR(pp->base); goto err_clk; } @@ -2823,7 +2831,7 @@ static int mvneta_probe(struct platform_device *pdev) pp->stats = alloc_percpu(struct mvneta_pcpu_stats); if (!pp->stats) { err = -ENOMEM; - goto err_unmap; + goto err_clk; } for_each_possible_cpu(cpu) { @@ -2888,8 +2896,6 @@ err_deinit: mvneta_deinit(pp); err_free_stats: free_percpu(pp->stats); -err_unmap: - iounmap(pp->base); err_clk: clk_disable_unprepare(pp->clk); err_free_irq: @@ -2909,7 +2915,6 @@ static int mvneta_remove(struct platform_device *pdev) mvneta_deinit(pp); clk_disable_unprepare(pp->clk); free_percpu(pp->stats); - iounmap(pp->base); irq_dispose_mapping(dev->irq); free_netdev(dev); -- cgit v0.10.2 From de1443916791d75fdd26becb116898277bb0273f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 25 Mar 2014 18:42:27 -0700 Subject: net: unix: non blocking recvmsg() should not return -EINTR Some applications didn't expect recvmsg() on a non blocking socket could return -EINTR. This possibility was added as a side effect of commit b3ca9b02b00704 ("net: fix multithreaded signal handling in unix recv routines"). To hit this bug, you need to be a bit unlucky, as the u->readlock mutex is usually held for very small periods. Fixes: b3ca9b02b00704 ("net: fix multithreaded signal handling in unix recv routines") Signed-off-by: Eric Dumazet Cc: Rainer Weikusat Signed-off-by: David S. Miller diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index ce6ec6c..94404f1 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1787,8 +1787,11 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, goto out; err = mutex_lock_interruptible(&u->readlock); - if (err) { - err = sock_intr_errno(sock_rcvtimeo(sk, noblock)); + if (unlikely(err)) { + /* recvmsg() in non blocking mode is supposed to return -EAGAIN + * sk_rcvtimeo is not honored by mutex_lock_interruptible() + */ + err = noblock ? -EAGAIN : -ERESTARTSYS; goto out; } @@ -1913,6 +1916,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, struct unix_sock *u = unix_sk(sk); DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name); int copied = 0; + int noblock = flags & MSG_DONTWAIT; int check_creds = 0; int target; int err = 0; @@ -1928,7 +1932,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, goto out; target = sock_rcvlowat(sk, flags&MSG_WAITALL, size); - timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT); + timeo = sock_rcvtimeo(sk, noblock); /* Lock the socket to prevent queue disordering * while sleeps in memcpy_tomsg @@ -1940,8 +1944,11 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, } err = mutex_lock_interruptible(&u->readlock); - if (err) { - err = sock_intr_errno(timeo); + if (unlikely(err)) { + /* recvmsg() in non blocking mode is supposed to return -EAGAIN + * sk_rcvtimeo is not honored by mutex_lock_interruptible() + */ + err = noblock ? -EAGAIN : -ERESTARTSYS; goto out; } -- cgit v0.10.2 From 347cf10aed1657a2b385a95f92763a67062c5ad3 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 25 Mar 2014 14:38:44 +1000 Subject: drm/udl: take reference to device struct for dma-bufs this stops the device from being deleted before all the dma-bufs on it are freed, this fixes an oops when you unplug a udl device while it has imported a buffer from another device. Signed-off-by: Dave Airlie diff --git a/drivers/gpu/drm/udl/udl_gem.c b/drivers/gpu/drm/udl/udl_gem.c index 8d67b94..0394811 100644 --- a/drivers/gpu/drm/udl/udl_gem.c +++ b/drivers/gpu/drm/udl/udl_gem.c @@ -177,8 +177,10 @@ void udl_gem_free_object(struct drm_gem_object *gem_obj) if (obj->vmapping) udl_gem_vunmap(obj); - if (gem_obj->import_attach) + if (gem_obj->import_attach) { drm_prime_gem_destroy(gem_obj, obj->sg); + put_device(gem_obj->dev->dev); + } if (obj->pages) udl_gem_put_pages(obj); @@ -256,9 +258,12 @@ struct drm_gem_object *udl_gem_prime_import(struct drm_device *dev, int ret; /* need to attach */ + get_device(dev->dev); attach = dma_buf_attach(dma_buf, dev->dev); - if (IS_ERR(attach)) + if (IS_ERR(attach)) { + put_device(dev->dev); return ERR_CAST(attach); + } get_dma_buf(dma_buf); @@ -282,6 +287,6 @@ fail_unmap: fail_detach: dma_buf_detach(dma_buf, attach); dma_buf_put(dma_buf); - + put_device(dev->dev); return ERR_PTR(ret); } -- cgit v0.10.2 From adbbdbac04f093c0abf946b1e93e4e5291808491 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 26 Mar 2014 14:09:37 +1000 Subject: drm/nouveau: fail runtime pm properly. If we were on a non-optimus device, we'd return -EINVAL, this would lead to the over engineered runtime pm system to go into an error state, subsequent get_sync's would fail, so we'd never be able to open the device again. (like really get_sync shouldn't fail if the device isn't powered down). Signed-off-by: Dave Airlie Reviewed-by: Alex Deucher diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index 89c484d..4ee702a 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -866,13 +866,16 @@ static int nouveau_pmops_runtime_suspend(struct device *dev) struct drm_device *drm_dev = pci_get_drvdata(pdev); int ret; - if (nouveau_runtime_pm == 0) - return -EINVAL; + if (nouveau_runtime_pm == 0) { + pm_runtime_forbid(dev); + return -EBUSY; + } /* are we optimus enabled? */ if (nouveau_runtime_pm == -1 && !nouveau_is_optimus() && !nouveau_is_v1_dsm()) { DRM_DEBUG_DRIVER("failing to power off - not optimus\n"); - return -EINVAL; + pm_runtime_forbid(dev); + return -EBUSY; } nv_debug_level(SILENT); @@ -923,12 +926,15 @@ static int nouveau_pmops_runtime_idle(struct device *dev) struct nouveau_drm *drm = nouveau_drm(drm_dev); struct drm_crtc *crtc; - if (nouveau_runtime_pm == 0) + if (nouveau_runtime_pm == 0) { + pm_runtime_forbid(dev); return -EBUSY; + } /* are we optimus enabled? */ if (nouveau_runtime_pm == -1 && !nouveau_is_optimus() && !nouveau_is_v1_dsm()) { DRM_DEBUG_DRIVER("failing to power off - not optimus\n"); + pm_runtime_forbid(dev); return -EBUSY; } -- cgit v0.10.2 From e2681a1bf5ae053426a6c5c1daaed17b2f95efe6 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Thu, 27 Mar 2014 19:06:59 +0800 Subject: ASoC: fsl_sai: Add isr to deal with error flag It's quite cricial to clear error flags because SAI might hang if getting FIFO underrun during playback (I haven't confirmed the same issue on Rx overflow though). So this patch enables those irq and adds isr() to clear the flags so as to keep playback entirely safe. Signed-off-by: Nicolin Chen Signed-off-by: Mark Brown diff --git a/sound/soc/fsl/fsl_sai.c b/sound/soc/fsl/fsl_sai.c index c4a4231..0bc98bb 100644 --- a/sound/soc/fsl/fsl_sai.c +++ b/sound/soc/fsl/fsl_sai.c @@ -23,6 +23,71 @@ #include "fsl_sai.h" +#define FSL_SAI_FLAGS (FSL_SAI_CSR_SEIE |\ + FSL_SAI_CSR_FEIE) + +static irqreturn_t fsl_sai_isr(int irq, void *devid) +{ + struct fsl_sai *sai = (struct fsl_sai *)devid; + struct device *dev = &sai->pdev->dev; + u32 xcsr, mask; + + /* Only handle those what we enabled */ + mask = (FSL_SAI_FLAGS >> FSL_SAI_CSR_xIE_SHIFT) << FSL_SAI_CSR_xF_SHIFT; + + /* Tx IRQ */ + regmap_read(sai->regmap, FSL_SAI_TCSR, &xcsr); + xcsr &= mask; + + if (xcsr & FSL_SAI_CSR_WSF) + dev_dbg(dev, "isr: Start of Tx word detected\n"); + + if (xcsr & FSL_SAI_CSR_SEF) + dev_warn(dev, "isr: Tx Frame sync error detected\n"); + + if (xcsr & FSL_SAI_CSR_FEF) { + dev_warn(dev, "isr: Transmit underrun detected\n"); + /* FIFO reset for safety */ + xcsr |= FSL_SAI_CSR_FR; + } + + if (xcsr & FSL_SAI_CSR_FWF) + dev_dbg(dev, "isr: Enabled transmit FIFO is empty\n"); + + if (xcsr & FSL_SAI_CSR_FRF) + dev_dbg(dev, "isr: Transmit FIFO watermark has been reached\n"); + + regmap_update_bits(sai->regmap, FSL_SAI_TCSR, + FSL_SAI_CSR_xF_W_MASK | FSL_SAI_CSR_FR, xcsr); + + /* Rx IRQ */ + regmap_read(sai->regmap, FSL_SAI_RCSR, &xcsr); + xcsr &= mask; + + if (xcsr & FSL_SAI_CSR_WSF) + dev_dbg(dev, "isr: Start of Rx word detected\n"); + + if (xcsr & FSL_SAI_CSR_SEF) + dev_warn(dev, "isr: Rx Frame sync error detected\n"); + + if (xcsr & FSL_SAI_CSR_FEF) { + dev_warn(dev, "isr: Receive overflow detected\n"); + /* FIFO reset for safety */ + xcsr |= FSL_SAI_CSR_FR; + } + + if (xcsr & FSL_SAI_CSR_FWF) + dev_dbg(dev, "isr: Enabled receive FIFO is full\n"); + + if (xcsr & FSL_SAI_CSR_FRF) + dev_dbg(dev, "isr: Receive FIFO watermark has been reached\n"); + + regmap_update_bits(sai->regmap, FSL_SAI_RCSR, + FSL_SAI_CSR_xF_W_MASK | FSL_SAI_CSR_FR, xcsr); + + return IRQ_HANDLED; +} + static int fsl_sai_set_dai_sysclk_tr(struct snd_soc_dai *cpu_dai, int clk_id, unsigned int freq, int fsl_dir) { @@ -373,8 +438,8 @@ static int fsl_sai_dai_probe(struct snd_soc_dai *cpu_dai) { struct fsl_sai *sai = dev_get_drvdata(cpu_dai->dev); - regmap_update_bits(sai->regmap, FSL_SAI_TCSR, 0xffffffff, 0x0); - regmap_update_bits(sai->regmap, FSL_SAI_RCSR, 0xffffffff, 0x0); + regmap_update_bits(sai->regmap, FSL_SAI_TCSR, 0xffffffff, FSL_SAI_FLAGS); + regmap_update_bits(sai->regmap, FSL_SAI_RCSR, 0xffffffff, FSL_SAI_FLAGS); regmap_update_bits(sai->regmap, FSL_SAI_TCR1, FSL_SAI_CR1_RFW_MASK, FSL_SAI_MAXBURST_TX * 2); regmap_update_bits(sai->regmap, FSL_SAI_RCR1, FSL_SAI_CR1_RFW_MASK, @@ -490,12 +555,14 @@ static int fsl_sai_probe(struct platform_device *pdev) struct fsl_sai *sai; struct resource *res; void __iomem *base; - int ret; + int irq, ret; sai = devm_kzalloc(&pdev->dev, sizeof(*sai), GFP_KERNEL); if (!sai) return -ENOMEM; + sai->pdev = pdev; + sai->big_endian_regs = of_property_read_bool(np, "big-endian-regs"); if (sai->big_endian_regs) fsl_sai_regmap_config.val_format_endian = REGMAP_ENDIAN_BIG; @@ -514,6 +581,18 @@ static int fsl_sai_probe(struct platform_device *pdev) return PTR_ERR(sai->regmap); } + irq = platform_get_irq(pdev, 0); + if (irq < 0) { + dev_err(&pdev->dev, "no irq for node %s\n", np->full_name); + return irq; + } + + ret = devm_request_irq(&pdev->dev, irq, fsl_sai_isr, 0, np->name, sai); + if (ret) { + dev_err(&pdev->dev, "failed to claim irq %u\n", irq); + return ret; + } + sai->dma_params_rx.addr = res->start + FSL_SAI_RDR; sai->dma_params_tx.addr = res->start + FSL_SAI_TDR; sai->dma_params_rx.maxburst = FSL_SAI_MAXBURST_RX; diff --git a/sound/soc/fsl/fsl_sai.h b/sound/soc/fsl/fsl_sai.h index e432260..a264185 100644 --- a/sound/soc/fsl/fsl_sai.h +++ b/sound/soc/fsl/fsl_sai.h @@ -37,7 +37,21 @@ /* SAI Transmit/Recieve Control Register */ #define FSL_SAI_CSR_TERE BIT(31) +#define FSL_SAI_CSR_FR BIT(25) +#define FSL_SAI_CSR_xF_SHIFT 16 +#define FSL_SAI_CSR_xF_W_SHIFT 18 +#define FSL_SAI_CSR_xF_MASK (0x1f << FSL_SAI_CSR_xF_SHIFT) +#define FSL_SAI_CSR_xF_W_MASK (0x7 << FSL_SAI_CSR_xF_W_SHIFT) +#define FSL_SAI_CSR_WSF BIT(20) +#define FSL_SAI_CSR_SEF BIT(19) +#define FSL_SAI_CSR_FEF BIT(18) #define FSL_SAI_CSR_FWF BIT(17) +#define FSL_SAI_CSR_FRF BIT(16) +#define FSL_SAI_CSR_xIE_SHIFT 8 +#define FSL_SAI_CSR_WSIE BIT(12) +#define FSL_SAI_CSR_SEIE BIT(11) +#define FSL_SAI_CSR_FEIE BIT(10) +#define FSL_SAI_CSR_FWIE BIT(9) #define FSL_SAI_CSR_FRIE BIT(8) #define FSL_SAI_CSR_FRDE BIT(0) @@ -99,6 +113,7 @@ #define FSL_SAI_MAXBURST_RX 6 struct fsl_sai { + struct platform_device *pdev; struct regmap *regmap; bool big_endian_regs; -- cgit v0.10.2 From 75c5a52da3fc2a06abb6c6192bdf5d680e56d37d Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 26 Mar 2014 06:20:14 +0100 Subject: vfs: Allocate anon_inode_inode in anon_inode_init() Currently we allocated anon_inode_inode in anon_inodefs_mount. This is somewhat fragile as if that function ever gets called again, it will overwrite anon_inode_inode pointer. So move the initialization of anon_inode_inode to anon_inode_init(). Signed-off-by: Jan Kara [ Further simplified on suggestion from Dave Jones ] Signed-off-by: Linus Torvalds diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index 42fcc46..80ef38c 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -41,19 +41,8 @@ static const struct dentry_operations anon_inodefs_dentry_operations = { static struct dentry *anon_inodefs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { - struct dentry *root; - root = mount_pseudo(fs_type, "anon_inode:", NULL, + return mount_pseudo(fs_type, "anon_inode:", NULL, &anon_inodefs_dentry_operations, ANON_INODE_FS_MAGIC); - if (!IS_ERR(root)) { - struct super_block *s = root->d_sb; - anon_inode_inode = alloc_anon_inode(s); - if (IS_ERR(anon_inode_inode)) { - dput(root); - deactivate_locked_super(s); - root = ERR_CAST(anon_inode_inode); - } - } - return root; } static struct file_system_type anon_inode_fs_type = { @@ -175,18 +164,15 @@ EXPORT_SYMBOL_GPL(anon_inode_getfd); static int __init anon_inode_init(void) { - int error; - anon_inode_mnt = kern_mount(&anon_inode_fs_type); - if (IS_ERR(anon_inode_mnt)) { - error = PTR_ERR(anon_inode_mnt); - goto err_unregister_filesystem; - } - return 0; + if (IS_ERR(anon_inode_mnt)) + panic("anon_inode_init() kernel mount failed (%ld)\n", PTR_ERR(anon_inode_mnt)); -err_unregister_filesystem: - unregister_filesystem(&anon_inode_fs_type); - panic(KERN_ERR "anon_inode_init() failed (%d)\n", error); + anon_inode_inode = alloc_anon_inode(anon_inode_mnt->mnt_sb); + if (IS_ERR(anon_inode_inode)) + panic("anon_inode_init() inode allocation failed (%ld)\n", PTR_ERR(anon_inode_inode)); + + return 0; } fs_initcall(anon_inode_init); -- cgit v0.10.2 From 681daee2443291419c57cccb0671f5f94a839005 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 26 Mar 2014 13:03:00 +0800 Subject: virtio-net: correct error handling of virtqueue_kick() Current error handling of virtqueue_kick() was wrong in two places: - The skb were freed immediately when virtqueue_kick() fail during xmit. This may lead double free since the skb was not detached from the virtqueue. - try_fill_recv() returns false when virtqueue_kick() fail. This will lead unnecessary rescheduling of refill work. Actually, it's safe to just ignore the kick failure in those two places. So this patch fixes this by partially revert commit 67975901183799af8e93ec60e322f9e2a1940b9b. Fixes 67975901183799af8e93ec60e322f9e2a1940b9b (virtio_net: verify if virtqueue_kick() succeeded). Cc: Heinz Graalfs Cc: Rusty Russell Cc: Michael S. Tsirkin Signed-off-by: Jason Wang Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 5632a99..841b608 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -671,8 +671,7 @@ static bool try_fill_recv(struct receive_queue *rq, gfp_t gfp) if (err) break; } while (rq->vq->num_free); - if (unlikely(!virtqueue_kick(rq->vq))) - return false; + virtqueue_kick(rq->vq); return !oom; } @@ -877,7 +876,7 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) err = xmit_skb(sq, skb); /* This should not happen! */ - if (unlikely(err) || unlikely(!virtqueue_kick(sq->vq))) { + if (unlikely(err)) { dev->stats.tx_fifo_errors++; if (net_ratelimit()) dev_warn(&dev->dev, @@ -886,6 +885,7 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) kfree_skb(skb); return NETDEV_TX_OK; } + virtqueue_kick(sq->vq); /* Don't wait up for transmitted skbs to be freed. */ skb_orphan(skb); -- cgit v0.10.2 From 14a0d635d18d0fb552dcc979d6d25106e6541f2e Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Wed, 26 Mar 2014 14:32:51 +0100 Subject: usbnet: include wait queue head in device structure This fixes a race which happens by freeing an object on the stack. Quoting Julius: > The issue is > that it calls usbnet_terminate_urbs() before that, which temporarily > installs a waitqueue in dev->wait in order to be able to wait on the > tasklet to run and finish up some queues. The waiting itself looks > okay, but the access to 'dev->wait' is totally unprotected and can > race arbitrarily. I think in this case usbnet_bh() managed to succeed > it's dev->wait check just before usbnet_terminate_urbs() sets it back > to NULL. The latter then finishes and the waitqueue_t structure on its > stack gets overwritten by other functions halfway through the > wake_up() call in usbnet_bh(). The fix is to just not allocate the data structure on the stack. As dev->wait is abused as a flag it also takes a runtime PM change to fix this bug. Signed-off-by: Oliver Neukum Reported-by: Grant Grundler Tested-by: Grant Grundler Signed-off-by: David S. Miller diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index dd10d58..f9e96c4 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -752,14 +752,12 @@ EXPORT_SYMBOL_GPL(usbnet_unlink_rx_urbs); // precondition: never called in_interrupt static void usbnet_terminate_urbs(struct usbnet *dev) { - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(unlink_wakeup); DECLARE_WAITQUEUE(wait, current); int temp; /* ensure there are no more active urbs */ - add_wait_queue(&unlink_wakeup, &wait); + add_wait_queue(&dev->wait, &wait); set_current_state(TASK_UNINTERRUPTIBLE); - dev->wait = &unlink_wakeup; temp = unlink_urbs(dev, &dev->txq) + unlink_urbs(dev, &dev->rxq); @@ -773,15 +771,14 @@ static void usbnet_terminate_urbs(struct usbnet *dev) "waited for %d urb completions\n", temp); } set_current_state(TASK_RUNNING); - dev->wait = NULL; - remove_wait_queue(&unlink_wakeup, &wait); + remove_wait_queue(&dev->wait, &wait); } int usbnet_stop (struct net_device *net) { struct usbnet *dev = netdev_priv(net); struct driver_info *info = dev->driver_info; - int retval; + int retval, pm; clear_bit(EVENT_DEV_OPEN, &dev->flags); netif_stop_queue (net); @@ -791,6 +788,8 @@ int usbnet_stop (struct net_device *net) net->stats.rx_packets, net->stats.tx_packets, net->stats.rx_errors, net->stats.tx_errors); + /* to not race resume */ + pm = usb_autopm_get_interface(dev->intf); /* allow minidriver to stop correctly (wireless devices to turn off * radio etc) */ if (info->stop) { @@ -817,6 +816,9 @@ int usbnet_stop (struct net_device *net) dev->flags = 0; del_timer_sync (&dev->delay); tasklet_kill (&dev->bh); + if (!pm) + usb_autopm_put_interface(dev->intf); + if (info->manage_power && !test_and_clear_bit(EVENT_NO_RUNTIME_PM, &dev->flags)) info->manage_power(dev, 0); @@ -1437,11 +1439,12 @@ static void usbnet_bh (unsigned long param) /* restart RX again after disabling due to high error rate */ clear_bit(EVENT_RX_KILL, &dev->flags); - // waiting for all pending urbs to complete? - if (dev->wait) { - if ((dev->txq.qlen + dev->rxq.qlen + dev->done.qlen) == 0) { - wake_up (dev->wait); - } + /* waiting for all pending urbs to complete? + * only then can we forgo submitting anew + */ + if (waitqueue_active(&dev->wait)) { + if (dev->txq.qlen + dev->rxq.qlen + dev->done.qlen == 0) + wake_up_all(&dev->wait); // or are we maybe short a few urbs? } else if (netif_running (dev->net) && @@ -1580,6 +1583,7 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod) dev->driver_name = name; dev->msg_enable = netif_msg_init (msg_level, NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK); + init_waitqueue_head(&dev->wait); skb_queue_head_init (&dev->rxq); skb_queue_head_init (&dev->txq); skb_queue_head_init (&dev->done); @@ -1791,9 +1795,10 @@ int usbnet_resume (struct usb_interface *intf) spin_unlock_irq(&dev->txq.lock); if (test_bit(EVENT_DEV_OPEN, &dev->flags)) { - /* handle remote wakeup ASAP */ - if (!dev->wait && - netif_device_present(dev->net) && + /* handle remote wakeup ASAP + * we cannot race against stop + */ + if (netif_device_present(dev->net) && !timer_pending(&dev->delay) && !test_bit(EVENT_RX_HALT, &dev->flags)) rx_alloc_submit(dev, GFP_NOIO); diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index e303eef..0662e98 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -30,7 +30,7 @@ struct usbnet { struct driver_info *driver_info; const char *driver_name; void *driver_priv; - wait_queue_head_t *wait; + wait_queue_head_t wait; struct mutex phy_mutex; unsigned char suspend_count; unsigned char pkt_cnt, pkt_err; -- cgit v0.10.2 From fc0d48b8fb449ca007b2057328abf736cb516168 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 26 Mar 2014 11:47:56 -0400 Subject: vlan: Set hard_header_len according to available acceleration Currently, if the card supports CTAG acceleration we do not account for the vlan header even if we are configuring an 8021AD vlan. This may not be best since we'll do software tagging for 8021AD which will cause data copy on skb head expansion Configure the length based on available hw offload capabilities and vlan protocol. CC: Patrick McHardy Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index ec99099..175273f 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -307,9 +307,11 @@ static void vlan_sync_address(struct net_device *dev, static void vlan_transfer_features(struct net_device *dev, struct net_device *vlandev) { + struct vlan_dev_priv *vlan = vlan_dev_priv(vlandev); + vlandev->gso_max_size = dev->gso_max_size; - if (dev->features & NETIF_F_HW_VLAN_CTAG_TX) + if (vlan_hw_offload_capable(dev->features, vlan->vlan_proto)) vlandev->hard_header_len = dev->hard_header_len; else vlandev->hard_header_len = dev->hard_header_len + VLAN_HLEN; diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 4b65aa4..a9591ff 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -592,7 +592,8 @@ static int vlan_dev_init(struct net_device *dev) #endif dev->needed_headroom = real_dev->needed_headroom; - if (real_dev->features & NETIF_F_HW_VLAN_CTAG_TX) { + if (vlan_hw_offload_capable(real_dev->features, + vlan_dev_priv(dev)->vlan_proto)) { dev->header_ops = &vlan_passthru_header_ops; dev->hard_header_len = real_dev->hard_header_len; } else { -- cgit v0.10.2 From 36d5fe6a000790f56039afe26834265db0a3ad4c Mon Sep 17 00:00:00 2001 From: Zoltan Kiss Date: Wed, 26 Mar 2014 22:37:45 +0000 Subject: core, nfqueue, openvswitch: Orphan frags in skb_zerocopy and handle errors skb_zerocopy can copy elements of the frags array between skbs, but it doesn't orphan them. Also, it doesn't handle errors, so this patch takes care of that as well, and modify the callers accordingly. skb_tx_error() is also added to the callers so they will signal the failed delivery towards the creator of the skb. Signed-off-by: Zoltan Kiss Signed-off-by: David S. Miller diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 5e1e6f2..15ede6a 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2451,8 +2451,8 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset, unsigned int flags); void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to); unsigned int skb_zerocopy_headlen(const struct sk_buff *from); -void skb_zerocopy(struct sk_buff *to, const struct sk_buff *from, - int len, int hlen); +int skb_zerocopy(struct sk_buff *to, struct sk_buff *from, + int len, int hlen); void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len); int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen); void skb_scrub_packet(struct sk_buff *skb, bool xnet); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 869c7af..97e5a2c 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2127,25 +2127,31 @@ EXPORT_SYMBOL_GPL(skb_zerocopy_headlen); * * The `hlen` as calculated by skb_zerocopy_headlen() specifies the * headroom in the `to` buffer. + * + * Return value: + * 0: everything is OK + * -ENOMEM: couldn't orphan frags of @from due to lack of memory + * -EFAULT: skb_copy_bits() found some problem with skb geometry */ -void -skb_zerocopy(struct sk_buff *to, const struct sk_buff *from, int len, int hlen) +int +skb_zerocopy(struct sk_buff *to, struct sk_buff *from, int len, int hlen) { int i, j = 0; int plen = 0; /* length of skb->head fragment */ + int ret; struct page *page; unsigned int offset; BUG_ON(!from->head_frag && !hlen); /* dont bother with small payloads */ - if (len <= skb_tailroom(to)) { - skb_copy_bits(from, 0, skb_put(to, len), len); - return; - } + if (len <= skb_tailroom(to)) + return skb_copy_bits(from, 0, skb_put(to, len), len); if (hlen) { - skb_copy_bits(from, 0, skb_put(to, hlen), hlen); + ret = skb_copy_bits(from, 0, skb_put(to, hlen), hlen); + if (unlikely(ret)) + return ret; len -= hlen; } else { plen = min_t(int, skb_headlen(from), len); @@ -2163,6 +2169,11 @@ skb_zerocopy(struct sk_buff *to, const struct sk_buff *from, int len, int hlen) to->len += len + plen; to->data_len += len + plen; + if (unlikely(skb_orphan_frags(from, GFP_ATOMIC))) { + skb_tx_error(from); + return -ENOMEM; + } + for (i = 0; i < skb_shinfo(from)->nr_frags; i++) { if (!len) break; @@ -2173,6 +2184,8 @@ skb_zerocopy(struct sk_buff *to, const struct sk_buff *from, int len, int hlen) j++; } skb_shinfo(to)->nr_frags = j; + + return 0; } EXPORT_SYMBOL_GPL(skb_zerocopy); diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index f072fe8..108120f 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -354,13 +354,16 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, skb = nfnetlink_alloc_skb(net, size, queue->peer_portid, GFP_ATOMIC); - if (!skb) + if (!skb) { + skb_tx_error(entskb); return NULL; + } nlh = nlmsg_put(skb, 0, 0, NFNL_SUBSYS_QUEUE << 8 | NFQNL_MSG_PACKET, sizeof(struct nfgenmsg), 0); if (!nlh) { + skb_tx_error(entskb); kfree_skb(skb); return NULL; } @@ -488,13 +491,15 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, nla->nla_type = NFQA_PAYLOAD; nla->nla_len = nla_attr_size(data_len); - skb_zerocopy(skb, entskb, data_len, hlen); + if (skb_zerocopy(skb, entskb, data_len, hlen)) + goto nla_put_failure; } nlh->nlmsg_len = skb->len; return skb; nla_put_failure: + skb_tx_error(entskb); kfree_skb(skb); net_err_ratelimited("nf_queue: error creating packet message\n"); return NULL; diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 8601b32..270b77d 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -464,7 +464,9 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, } nla->nla_len = nla_attr_size(skb->len); - skb_zerocopy(user_skb, skb, skb->len, hlen); + err = skb_zerocopy(user_skb, skb, skb->len, hlen); + if (err) + goto out; /* Pad OVS_PACKET_ATTR_PACKET if linear copy was performed */ if (!(dp->user_features & OVS_DP_F_UNALIGNED)) { @@ -478,6 +480,8 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, err = genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid); out: + if (err) + skb_tx_error(skb); kfree_skb(nskb); return err; } -- cgit v0.10.2 From 97a5221f56bad2e1c7e8ab55da4ac4748ef59c64 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Thu, 27 Mar 2014 09:28:31 +0800 Subject: net/mlx4_core: pass pci_device_id.driver_data to __mlx4_init_one during reset The second parameter of __mlx4_init_one() is used to identify whether the pci_dev is a PF or VF. Currently, when it is invoked in mlx4_pci_slot_reset() this information is missed. This patch match the pci_dev with mlx4_pci_table and passes the pci_device_id.driver_data to __mlx4_init_one() in mlx4_pci_slot_reset(). Signed-off-by: Wei Yang Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 936c153..d413e60 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -2681,7 +2681,11 @@ static pci_ers_result_t mlx4_pci_err_detected(struct pci_dev *pdev, static pci_ers_result_t mlx4_pci_slot_reset(struct pci_dev *pdev) { - int ret = __mlx4_init_one(pdev, 0); + const struct pci_device_id *id; + int ret; + + id = pci_match_id(mlx4_pci_table, pdev); + ret = __mlx4_init_one(pdev, id->driver_data); return ret ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; } -- cgit v0.10.2 From c53c6beddc0ebb6feb5d64488c2c0cd23dd7f43d Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 27 Mar 2014 02:31:08 +0000 Subject: drm/radeon: fix runtime suspend breaking secondary GPUs Same fix as for nouveau, when we fail with EINVAL, subsequent gets fail hard, causing the device not to open. Signed-off-by: Dave Airlie Reviewed-by: Alex Deucher diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 84a1bbb7..f633c27 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -403,11 +403,15 @@ static int radeon_pmops_runtime_suspend(struct device *dev) struct drm_device *drm_dev = pci_get_drvdata(pdev); int ret; - if (radeon_runtime_pm == 0) - return -EINVAL; + if (radeon_runtime_pm == 0) { + pm_runtime_forbid(dev); + return -EBUSY; + } - if (radeon_runtime_pm == -1 && !radeon_is_px()) - return -EINVAL; + if (radeon_runtime_pm == -1 && !radeon_is_px()) { + pm_runtime_forbid(dev); + return -EBUSY; + } drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; drm_kms_helper_poll_disable(drm_dev); @@ -456,12 +460,15 @@ static int radeon_pmops_runtime_idle(struct device *dev) struct drm_device *drm_dev = pci_get_drvdata(pdev); struct drm_crtc *crtc; - if (radeon_runtime_pm == 0) + if (radeon_runtime_pm == 0) { + pm_runtime_forbid(dev); return -EBUSY; + } /* are we PX enabled? */ if (radeon_runtime_pm == -1 && !radeon_is_px()) { DRM_DEBUG_DRIVER("failing to power off - not px\n"); + pm_runtime_forbid(dev); return -EBUSY; } -- cgit v0.10.2 From 8ee661b505613ef2747b350ca2871a31b3781bee Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 26 Mar 2014 20:10:09 +0100 Subject: drm/i915: Undo gtt scratch pte unmapping again It apparently blows up on some machines. This functionally reverts commit 828c79087cec61eaf4c76bb32c222fbe35ac3930 Author: Ben Widawsky Date: Wed Oct 16 09:21:30 2013 -0700 drm/i915: Disable GGTT PTEs on GEN6+ suspend Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=64841 Reported-and-Tested-by: Brad Jackson Cc: stable@vger.kernel.org Cc: Takashi Iwai Cc: Paulo Zanoni Cc: Todd Previte Signed-off-by: Daniel Vetter Signed-off-by: Dave Airlie diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 40a2b36..d278be1 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -842,7 +842,7 @@ void i915_gem_suspend_gtt_mappings(struct drm_device *dev) dev_priv->gtt.base.clear_range(&dev_priv->gtt.base, dev_priv->gtt.base.start / PAGE_SIZE, dev_priv->gtt.base.total / PAGE_SIZE, - false); + true); } void i915_gem_restore_gtt_mappings(struct drm_device *dev) -- cgit v0.10.2 From cab5e127eef040399902caa8e1510795583fa03a Mon Sep 17 00:00:00 2001 From: John Stultz Date: Thu, 27 Mar 2014 16:30:49 -0700 Subject: time: Revert to calling clock_was_set_delayed() while in irq context In commit 47a1b796306356f35 ("tick/timekeeping: Call update_wall_time outside the jiffies lock"), we moved to calling clock_was_set() due to the fact that we were no longer holding the timekeeping or jiffies lock. However, there is still the problem that clock_was_set() triggers an IPI, which cannot be done from the timer's hard irq context, and will generate WARN_ON warnings. Apparently in my earlier testing, I'm guessing I didn't bump the dmesg log level, so I somehow missed the WARN_ONs. Thus we need to revert back to calling clock_was_set_delayed(). Signed-off-by: John Stultz Cc: Linus Torvalds Link: http://lkml.kernel.org/r/1395963049-11923-1-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 0aa4ce81..5b40279 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1435,7 +1435,8 @@ void update_wall_time(void) out: raw_spin_unlock_irqrestore(&timekeeper_lock, flags); if (clock_set) - clock_was_set(); + /* Have to call _delayed version, since in irq context*/ + clock_was_set_delayed(); } /** -- cgit v0.10.2 From 421e08c41fda1f0c2ff6af81a67b491389b653a5 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Fri, 28 Mar 2014 00:43:00 -0700 Subject: Input: synaptics - add manual min/max quirk The new Lenovo Haswell series (-40's) contains a new Synaptics touchpad. However, these new Synaptics devices report bad axis ranges. Under Windows, it is not a problem because the Windows driver uses RMI4 over SMBus to talk to the device. Under Linux, we are using the PS/2 fallback interface and it occurs the reported ranges are wrong. Of course, it would be too easy to have only one range for the whole series, each touchpad seems to be calibrated in a different way. We can not use SMBus to get the actual range because I suspect the firmware will switch into the SMBus mode and stop talking through PS/2 (this is the case for hybrid HID over I2C / PS/2 Synaptics touchpads). So as a temporary solution (until RMI4 land into upstream), start a new list of quirks with the min/max manually set. Signed-off-by: Benjamin Tissoires CC: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 26386f9..ef148f9 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -265,11 +265,22 @@ static int synaptics_identify(struct psmouse *psmouse) * Read touchpad resolution and maximum reported coordinates * Resolution is left zero if touchpad does not support the query */ + +static const int *quirk_min_max; + static int synaptics_resolution(struct psmouse *psmouse) { struct synaptics_data *priv = psmouse->private; unsigned char resp[3]; + if (quirk_min_max) { + priv->x_min = quirk_min_max[0]; + priv->x_max = quirk_min_max[1]; + priv->y_min = quirk_min_max[2]; + priv->y_max = quirk_min_max[3]; + return 0; + } + if (SYN_ID_MAJOR(priv->identity) < 4) return 0; @@ -1485,10 +1496,46 @@ static const struct dmi_system_id olpc_dmi_table[] __initconst = { { } }; +static const struct dmi_system_id min_max_dmi_table[] __initconst = { +#if defined(CONFIG_DMI) + { + /* Lenovo ThinkPad Helix */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad Helix"), + }, + .driver_data = (int []){1024, 5052, 2258, 4832}, + }, + { + /* Lenovo ThinkPad T440s */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad T440"), + }, + .driver_data = (int []){1024, 5112, 2024, 4832}, + }, + { + /* Lenovo ThinkPad T540p */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad T540"), + }, + .driver_data = (int []){1024, 5056, 2058, 4832}, + }, +#endif + { } +}; + void __init synaptics_module_init(void) { + const struct dmi_system_id *min_max_dmi; + impaired_toshiba_kbc = dmi_check_system(toshiba_dmi_table); broken_olpc_ec = dmi_check_system(olpc_dmi_table); + + min_max_dmi = dmi_first_match(min_max_dmi_table); + if (min_max_dmi) + quirk_min_max = min_max_dmi->driver_data; } static int __synaptics_init(struct psmouse *psmouse, bool absolute_mode) -- cgit v0.10.2 From 8a0435d958fb36d93b8df610124a0e91e5675c82 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 28 Mar 2014 01:01:38 -0700 Subject: Input: synaptics - add manual min/max quirk for ThinkPad X240 This extends Benjamin Tissoires manual min/max quirk table with support for the ThinkPad X240. Cc: stable@vger.kernel.org Signed-off-by: Hans de Goede Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index ef148f9..d8d49d1 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -1507,6 +1507,14 @@ static const struct dmi_system_id min_max_dmi_table[] __initconst = { .driver_data = (int []){1024, 5052, 2258, 4832}, }, { + /* Lenovo ThinkPad X240 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad X240"), + }, + .driver_data = (int []){1232, 5710, 1156, 4696}, + }, + { /* Lenovo ThinkPad T440s */ .matches = { DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), -- cgit v0.10.2 From 335a67d2ad481b03607bf30a38c28178fa1ad61a Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Thu, 27 Mar 2014 02:01:34 -0400 Subject: random32: assign to network folks in MAINTAINERS lib/random32.c was split out of the network code and is de-facto still maintained by the almighty net/ gods. Make it a bit more official so that people who aren't aware of that know where to send their patches. Signed-off-by: Sasha Levin Signed-off-by: David S. Miller diff --git a/MAINTAINERS b/MAINTAINERS index 82640e6..0b3c40f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6006,6 +6006,7 @@ F: include/uapi/linux/net.h F: include/uapi/linux/netdevice.h F: tools/net/ F: tools/testing/selftests/net/ +F: lib/random32.c NETWORKING [IPv4/IPv6] M: "David S. Miller" -- cgit v0.10.2 From 05efa8c943b1d5d90fa8c8147571837573338bb6 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Fri, 28 Mar 2014 17:38:42 +0100 Subject: random32: avoid attempt to late reseed if in the middle of seeding Commit 4af712e8df ("random32: add prandom_reseed_late() and call when nonblocking pool becomes initialized") has added a late reseed stage that happens as soon as the nonblocking pool is marked as initialized. This fails in the case that the nonblocking pool gets initialized during __prandom_reseed()'s call to get_random_bytes(). In that case we'd double back into __prandom_reseed() in an attempt to do a late reseed - deadlocking on 'lock' early on in the boot process. Instead, just avoid even waiting to do a reseed if a reseed is already occuring. Fixes: 4af712e8df99 ("random32: add prandom_reseed_late() and call when nonblocking pool becomes initialized") Signed-off-by: Sasha Levin Acked-by: Hannes Frederic Sowa Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller diff --git a/lib/random32.c b/lib/random32.c index 1e5b2df..6148967 100644 --- a/lib/random32.c +++ b/lib/random32.c @@ -244,8 +244,19 @@ static void __prandom_reseed(bool late) static bool latch = false; static DEFINE_SPINLOCK(lock); + /* Asking for random bytes might result in bytes getting + * moved into the nonblocking pool and thus marking it + * as initialized. In this case we would double back into + * this function and attempt to do a late reseed. + * Ignore the pointless attempt to reseed again if we're + * already waiting for bytes when the nonblocking pool + * got initialized. + */ + /* only allow initial seeding (late == false) once */ - spin_lock_irqsave(&lock, flags); + if (!spin_trylock_irqsave(&lock, flags)) + return; + if (latch && !late) goto out; latch = true; -- cgit v0.10.2 From d8316f3991d207fe32881a9ac20241be8fa2bad0 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 27 Mar 2014 12:00:26 +0200 Subject: vhost: fix total length when packets are too short When mergeable buffers are disabled, and the incoming packet is too large for the rx buffer, get_rx_bufs returns success. This was intentional in order for make recvmsg truncate the packet and then handle_rx would detect err != sock_len and drop it. Unfortunately we pass the original sock_len to recvmsg - which means we use parts of iov not fully validated. Fix this up by detecting this overrun and doing packet drop immediately. CVE-2014-0077 Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index a0fa5de..026be58 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -532,6 +532,12 @@ static int get_rx_bufs(struct vhost_virtqueue *vq, *iovcount = seg; if (unlikely(log)) *log_num = nlogs; + + /* Detect overrun */ + if (unlikely(datalen > 0)) { + r = UIO_MAXIOV + 1; + goto err; + } return headcount; err: vhost_discard_vq_desc(vq, headcount); @@ -587,6 +593,14 @@ static void handle_rx(struct vhost_net *net) /* On error, stop handling until the next kick. */ if (unlikely(headcount < 0)) break; + /* On overrun, truncate and discard */ + if (unlikely(headcount > UIO_MAXIOV)) { + msg.msg_iovlen = 1; + err = sock->ops->recvmsg(NULL, sock, &msg, + 1, MSG_DONTWAIT | MSG_TRUNC); + pr_debug("Discarded rx packet: len %zd\n", sock_len); + continue; + } /* OK, now we need to know about added descriptors. */ if (!headcount) { if (unlikely(vhost_enable_notify(&net->dev, vq))) { -- cgit v0.10.2 From a39ee449f96a2cd44ce056d8a0a112211a9b1a1f Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 27 Mar 2014 12:53:37 +0200 Subject: vhost: validate vhost_get_vq_desc return value vhost fails to validate negative error code from vhost_get_vq_desc causing a crash: we are using -EFAULT which is 0xfffffff2 as vector size, which exceeds the allocated size. The code in question was introduced in commit 8dd014adfea6f173c1ef6378f7e5e7924866c923 vhost-net: mergeable buffers support CVE-2014-0055 Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 026be58..e1e22e0 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -505,9 +505,13 @@ static int get_rx_bufs(struct vhost_virtqueue *vq, r = -ENOBUFS; goto err; } - d = vhost_get_vq_desc(vq->dev, vq, vq->iov + seg, + r = vhost_get_vq_desc(vq->dev, vq, vq->iov + seg, ARRAY_SIZE(vq->iov) - seg, &out, &in, log, log_num); + if (unlikely(r < 0)) + goto err; + + d = r; if (d == vq->num) { r = 0; goto err; -- cgit v0.10.2 From 12464bb8de021a01fa7ec9299c273c247df7f198 Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Thu, 27 Mar 2014 21:46:55 +0900 Subject: bridge: Fix inabillity to retrieve vlan tags when tx offload is disabled Bridge vlan code (br_vlan_get_tag()) assumes that all frames have vlan_tci if they are tagged, but if vlan tx offload is manually disabled on bridge device and frames are sent from vlan device on the bridge device, the tags are embedded in skb->data and they break this assumption. Extract embedded vlan tags and move them to vlan_tci at ingress. Signed-off-by: Toshiaki Makita Acked-by: Vlad Yasevich Signed-off-by: David S. Miller diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 63f0455..8fe8b71 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -49,14 +49,14 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) brstats->tx_bytes += skb->len; u64_stats_update_end(&brstats->syncp); - if (!br_allowed_ingress(br, br_get_vlan_info(br), skb, &vid)) - goto out; - BR_INPUT_SKB_CB(skb)->brdev = dev; skb_reset_mac_header(skb); skb_pull(skb, ETH_HLEN); + if (!br_allowed_ingress(br, br_get_vlan_info(br), skb, &vid)) + goto out; + if (is_broadcast_ether_addr(dest)) br_flood_deliver(br, skb, false); else if (is_multicast_ether_addr(dest)) { diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 8249ca76..44f31af 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -174,6 +174,18 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v, if (!v) return false; + /* If vlan tx offload is disabled on bridge device and frame was + * sent from vlan device on the bridge device, it does not have + * HW accelerated vlan tag. + */ + if (unlikely(!vlan_tx_tag_present(skb) && + (skb->protocol == htons(ETH_P_8021Q) || + skb->protocol == htons(ETH_P_8021AD)))) { + skb = vlan_untag(skb); + if (unlikely(!skb)) + return false; + } + err = br_vlan_get_tag(skb, vid); if (!*vid) { u16 pvid = br_get_pvid(v); -- cgit v0.10.2 From 99b192da9c99284ad3374132e56f66995cadc6b4 Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Thu, 27 Mar 2014 21:46:56 +0900 Subject: bridge: Fix handling stacked vlan tags If a bridge with vlan_filtering enabled receives frames with stacked vlan tags, i.e., they have two vlan tags, br_vlan_untag() strips not only the outer tag but also the inner tag. br_vlan_untag() is called only from br_handle_vlan(), and in this case, it is enough to set skb->vlan_tci to 0 here, because vlan_tci has already been set before calling br_handle_vlan(). Signed-off-by: Toshiaki Makita Acked-by: Vlad Yasevich Signed-off-by: David S. Miller diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 44f31af..c77eed5 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -119,22 +119,6 @@ static void __vlan_flush(struct net_port_vlans *v) kfree_rcu(v, rcu); } -/* Strip the tag from the packet. Will return skb with tci set 0. */ -static struct sk_buff *br_vlan_untag(struct sk_buff *skb) -{ - if (skb->protocol != htons(ETH_P_8021Q)) { - skb->vlan_tci = 0; - return skb; - } - - skb->vlan_tci = 0; - skb = vlan_untag(skb); - if (skb) - skb->vlan_tci = 0; - - return skb; -} - struct sk_buff *br_handle_vlan(struct net_bridge *br, const struct net_port_vlans *pv, struct sk_buff *skb) @@ -150,7 +134,7 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br, */ br_vlan_get_tag(skb, &vid); if (test_bit(vid, pv->untagged_bitmap)) - skb = br_vlan_untag(skb); + skb->vlan_tci = 0; out: return skb; -- cgit v0.10.2 From 4f647e0a3c37b8d5086214128614a136064110c3 Mon Sep 17 00:00:00 2001 From: Flavio Leitner Date: Thu, 27 Mar 2014 11:05:34 -0300 Subject: openvswitch: fix a possible deadlock and lockdep warning There are two problematic situations. A deadlock can happen when is_percpu is false because it can get interrupted while holding the spinlock. Then it executes ovs_flow_stats_update() in softirq context which tries to get the same lock. The second sitation is that when is_percpu is true, the code correctly disables BH but only for the local CPU, so the following can happen when locking the remote CPU without disabling BH: CPU#0 CPU#1 ovs_flow_stats_get() stats_read() +->spin_lock remote CPU#1 ovs_flow_stats_get() | stats_read() | ... +--> spin_lock remote CPU#0 | | | ovs_flow_stats_update() | ... | spin_lock local CPU#0 <--+ ovs_flow_stats_update() +---------------------------------- spin_lock local CPU#1 This patch disables BH for both cases fixing the deadlocks. Acked-by: Jesse Gross ================================= [ INFO: inconsistent lock state ] 3.14.0-rc8-00007-g632b06a #1 Tainted: G I --------------------------------- inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage. swapper/0/0 [HC0[0]:SC1[5]:HE1:SE0] takes: (&(&cpu_stats->lock)->rlock){+.?...}, at: [] ovs_flow_stats_update+0x51/0xd0 [openvswitch] {SOFTIRQ-ON-W} state was registered at: [] __lock_acquire+0x68f/0x1c40 [] lock_acquire+0xa2/0x1d0 [] _raw_spin_lock+0x3e/0x80 [] ovs_flow_stats_get+0xc4/0x1e0 [openvswitch] [] ovs_flow_cmd_fill_info+0x185/0x360 [openvswitch] [] ovs_flow_cmd_build_info.constprop.27+0x55/0x90 [openvswitch] [] ovs_flow_cmd_new_or_set+0x4dd/0x570 [openvswitch] [] genl_family_rcv_msg+0x1cd/0x3f0 [] genl_rcv_msg+0x8e/0xd0 [] netlink_rcv_skb+0xa9/0xc0 [] genl_rcv+0x28/0x40 [] netlink_unicast+0x100/0x1e0 [] netlink_sendmsg+0x347/0x770 [] sock_sendmsg+0x9c/0xe0 [] ___sys_sendmsg+0x3a9/0x3c0 [] __sys_sendmsg+0x51/0x90 [] SyS_sendmsg+0x12/0x20 [] system_call_fastpath+0x16/0x1b irq event stamp: 1740726 hardirqs last enabled at (1740726): [] ip6_finish_output2+0x4f0/0x840 hardirqs last disabled at (1740725): [] ip6_finish_output2+0x4ab/0x840 softirqs last enabled at (1740674): [] _local_bh_enable+0x22/0x50 softirqs last disabled at (1740675): [] irq_exit+0xc5/0xd0 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&(&cpu_stats->lock)->rlock); lock(&(&cpu_stats->lock)->rlock); *** DEADLOCK *** 5 locks held by swapper/0/0: #0: (((&ifa->dad_timer))){+.-...}, at: [] call_timer_fn+0x5/0x320 #1: (rcu_read_lock){.+.+..}, at: [] mld_sendpack+0x5/0x4a0 #2: (rcu_read_lock_bh){.+....}, at: [] ip6_finish_output2+0x59/0x840 #3: (rcu_read_lock_bh){.+....}, at: [] __dev_queue_xmit+0x5/0x9b0 #4: (rcu_read_lock){.+.+..}, at: [] internal_dev_xmit+0x5/0x110 [openvswitch] stack backtrace: CPU: 0 PID: 0 Comm: swapper/0 Tainted: G I 3.14.0-rc8-00007-g632b06a #1 Hardware name: /DX58SO, BIOS SOX5810J.86A.5599.2012.0529.2218 05/29/2012 0000000000000000 0fcf20709903df0c ffff88042d603808 ffffffff817cfe3c ffffffff81c134c0 ffff88042d603858 ffffffff817cb6da 0000000000000005 ffffffff00000001 ffff880400000000 0000000000000006 ffffffff81c134c0 Call Trace: [] dump_stack+0x4d/0x66 [] print_usage_bug+0x1f4/0x205 [] ? check_usage_backwards+0x180/0x180 [] mark_lock+0x223/0x2b0 [] __lock_acquire+0x623/0x1c40 [] ? __lock_is_held+0x57/0x80 [] ? masked_flow_lookup+0x236/0x250 [openvswitch] [] lock_acquire+0xa2/0x1d0 [] ? ovs_flow_stats_update+0x51/0xd0 [openvswitch] [] _raw_spin_lock+0x3e/0x80 [] ? ovs_flow_stats_update+0x51/0xd0 [openvswitch] [] ovs_flow_stats_update+0x51/0xd0 [openvswitch] [] ovs_dp_process_received_packet+0x84/0x120 [openvswitch] [] ? __lock_acquire+0x347/0x1c40 [] ovs_vport_receive+0x2a/0x30 [openvswitch] [] internal_dev_xmit+0x68/0x110 [openvswitch] [] ? internal_dev_xmit+0x5/0x110 [openvswitch] [] dev_hard_start_xmit+0x2e6/0x8b0 [] __dev_queue_xmit+0x417/0x9b0 [] ? __dev_queue_xmit+0x5/0x9b0 [] ? ip6_finish_output2+0x4f0/0x840 [] dev_queue_xmit+0x10/0x20 [] ip6_finish_output2+0x551/0x840 [] ? ip6_finish_output+0x9a/0x220 [] ip6_finish_output+0x9a/0x220 [] ip6_output+0x4f/0x1f0 [] mld_sendpack+0x1d9/0x4a0 [] mld_send_initial_cr.part.32+0x88/0xa0 [] ? addrconf_dad_completed+0x220/0x220 [] ipv6_mc_dad_complete+0x31/0x50 [] addrconf_dad_completed+0x147/0x220 [] ? addrconf_dad_completed+0x220/0x220 [] addrconf_dad_timer+0x19f/0x1c0 [] call_timer_fn+0x99/0x320 [] ? call_timer_fn+0x5/0x320 [] ? addrconf_dad_completed+0x220/0x220 [] run_timer_softirq+0x254/0x3b0 [] __do_softirq+0x12d/0x480 Signed-off-by: Flavio Leitner Signed-off-by: David S. Miller diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index dda451f..2998989 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -103,30 +103,24 @@ static void stats_read(struct flow_stats *stats, void ovs_flow_stats_get(struct sw_flow *flow, struct ovs_flow_stats *ovs_stats, unsigned long *used, __be16 *tcp_flags) { - int cpu, cur_cpu; + int cpu; *used = 0; *tcp_flags = 0; memset(ovs_stats, 0, sizeof(*ovs_stats)); + local_bh_disable(); if (!flow->stats.is_percpu) { stats_read(flow->stats.stat, ovs_stats, used, tcp_flags); } else { - cur_cpu = get_cpu(); for_each_possible_cpu(cpu) { struct flow_stats *stats; - if (cpu == cur_cpu) - local_bh_disable(); - stats = per_cpu_ptr(flow->stats.cpu_stats, cpu); stats_read(stats, ovs_stats, used, tcp_flags); - - if (cpu == cur_cpu) - local_bh_enable(); } - put_cpu(); } + local_bh_enable(); } static void stats_reset(struct flow_stats *stats) @@ -141,25 +135,17 @@ static void stats_reset(struct flow_stats *stats) void ovs_flow_stats_clear(struct sw_flow *flow) { - int cpu, cur_cpu; + int cpu; + local_bh_disable(); if (!flow->stats.is_percpu) { stats_reset(flow->stats.stat); } else { - cur_cpu = get_cpu(); - for_each_possible_cpu(cpu) { - - if (cpu == cur_cpu) - local_bh_disable(); - stats_reset(per_cpu_ptr(flow->stats.cpu_stats, cpu)); - - if (cpu == cur_cpu) - local_bh_enable(); } - put_cpu(); } + local_bh_enable(); } static int check_header(struct sk_buff *skb, int len) -- cgit v0.10.2 From e2a1d3e47bb904082b758dec9d07edf241c45d05 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 27 Mar 2014 07:19:19 -0700 Subject: tcp: fix get_timewait4_sock() delay computation on 64bit It seems I missed one change in get_timewait4_sock() to compute the remaining time before deletion of IPV4 timewait socket. This could result in wrong output in /proc/net/tcp for tm->when field. Fixes: 96f817fedec4 ("tcp: shrink tcp6_timewait_sock by one cache line") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 3cf9765..1e4eac7 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2628,7 +2628,7 @@ static void get_timewait4_sock(const struct inet_timewait_sock *tw, { __be32 dest, src; __u16 destp, srcp; - long delta = tw->tw_ttd - jiffies; + s32 delta = tw->tw_ttd - inet_tw_time_stamp(); dest = tw->tw_daddr; src = tw->tw_rcv_saddr; -- cgit v0.10.2 From c15b1ccadb323ea50023e8f1cca2954129a62b51 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Thu, 27 Mar 2014 18:28:07 +0100 Subject: ipv6: move DAD and addrconf_verify processing to workqueue addrconf_join_solict and addrconf_join_anycast may cause actions which need rtnl locked, especially on first address creation. A new DAD state is introduced which defers processing of the initial DAD processing into a workqueue. To get rtnl lock we need to push the code paths which depend on those calls up to workqueues, specifically addrconf_verify and the DAD processing. (v2) addrconf_dad_failure needs to be queued up to the workqueue, too. This patch introduces a new DAD state and stop the DAD processing in the workqueue (this is because of the possible ipv6_del_addr processing which removes the solicited multicast address from the device). addrconf_verify_lock is removed, too. After the transition it is not needed any more. As we are not processing in bottom half anymore we need to be a bit more careful about disabling bottom half out when we lock spin_locks which are also used in bh. Relevant backtrace: [ 541.030090] RTNL: assertion failed at net/core/dev.c (4496) [ 541.031143] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G O 3.10.33-1-amd64-vyatta #1 [ 541.031145] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007 [ 541.031146] ffffffff8148a9f0 000000000000002f ffffffff813c98c1 ffff88007c4451f8 [ 541.031148] 0000000000000000 0000000000000000 ffffffff813d3540 ffff88007fc03d18 [ 541.031150] 0000880000000006 ffff88007c445000 ffffffffa0194160 0000000000000000 [ 541.031152] Call Trace: [ 541.031153] [] ? dump_stack+0xd/0x17 [ 541.031180] [] ? __dev_set_promiscuity+0x101/0x180 [ 541.031183] [] ? __hw_addr_create_ex+0x60/0xc0 [ 541.031185] [] ? __dev_set_rx_mode+0xaa/0xc0 [ 541.031189] [] ? __dev_mc_add+0x61/0x90 [ 541.031198] [] ? igmp6_group_added+0xfc/0x1a0 [ipv6] [ 541.031208] [] ? kmem_cache_alloc+0xcb/0xd0 [ 541.031212] [] ? ipv6_dev_mc_inc+0x267/0x300 [ipv6] [ 541.031216] [] ? addrconf_join_solict+0x2e/0x40 [ipv6] [ 541.031219] [] ? ipv6_dev_ac_inc+0x159/0x1f0 [ipv6] [ 541.031223] [] ? addrconf_join_anycast+0x92/0xa0 [ipv6] [ 541.031226] [] ? __ipv6_ifa_notify+0x11e/0x1e0 [ipv6] [ 541.031229] [] ? ipv6_ifa_notify+0x33/0x50 [ipv6] [ 541.031233] [] ? addrconf_dad_completed+0x28/0x100 [ipv6] [ 541.031241] [] ? task_cputime+0x2d/0x50 [ 541.031244] [] ? addrconf_dad_timer+0x136/0x150 [ipv6] [ 541.031247] [] ? addrconf_dad_completed+0x100/0x100 [ipv6] [ 541.031255] [] ? call_timer_fn.isra.22+0x2a/0x90 [ 541.031258] [] ? addrconf_dad_completed+0x100/0x100 [ipv6] Hunks and backtrace stolen from a patch by Stephen Hemminger. Reported-by: Stephen Hemminger Signed-off-by: Stephen Hemminger Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index 9650a3f..b4956a5 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -31,8 +31,10 @@ #define IF_PREFIX_AUTOCONF 0x02 enum { + INET6_IFADDR_STATE_PREDAD, INET6_IFADDR_STATE_DAD, INET6_IFADDR_STATE_POSTDAD, + INET6_IFADDR_STATE_ERRDAD, INET6_IFADDR_STATE_UP, INET6_IFADDR_STATE_DEAD, }; @@ -58,7 +60,7 @@ struct inet6_ifaddr { unsigned long cstamp; /* created timestamp */ unsigned long tstamp; /* updated timestamp */ - struct timer_list dad_timer; + struct delayed_work dad_work; struct inet6_dev *idev; struct rt6_info *rt; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 344e972..6c7fa08 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -133,10 +133,12 @@ static int ipv6_count_addresses(struct inet6_dev *idev); static struct hlist_head inet6_addr_lst[IN6_ADDR_HSIZE]; static DEFINE_SPINLOCK(addrconf_hash_lock); -static void addrconf_verify(unsigned long); +static void addrconf_verify(void); +static void addrconf_verify_rtnl(void); +static void addrconf_verify_work(struct work_struct *); -static DEFINE_TIMER(addr_chk_timer, addrconf_verify, 0, 0); -static DEFINE_SPINLOCK(addrconf_verify_lock); +static struct workqueue_struct *addrconf_wq; +static DECLARE_DELAYED_WORK(addr_chk_work, addrconf_verify_work); static void addrconf_join_anycast(struct inet6_ifaddr *ifp); static void addrconf_leave_anycast(struct inet6_ifaddr *ifp); @@ -151,7 +153,7 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, u32 flags, u32 noflags); static void addrconf_dad_start(struct inet6_ifaddr *ifp); -static void addrconf_dad_timer(unsigned long data); +static void addrconf_dad_work(struct work_struct *w); static void addrconf_dad_completed(struct inet6_ifaddr *ifp); static void addrconf_dad_run(struct inet6_dev *idev); static void addrconf_rs_timer(unsigned long data); @@ -247,9 +249,9 @@ static void addrconf_del_rs_timer(struct inet6_dev *idev) __in6_dev_put(idev); } -static void addrconf_del_dad_timer(struct inet6_ifaddr *ifp) +static void addrconf_del_dad_work(struct inet6_ifaddr *ifp) { - if (del_timer(&ifp->dad_timer)) + if (cancel_delayed_work(&ifp->dad_work)) __in6_ifa_put(ifp); } @@ -261,12 +263,12 @@ static void addrconf_mod_rs_timer(struct inet6_dev *idev, mod_timer(&idev->rs_timer, jiffies + when); } -static void addrconf_mod_dad_timer(struct inet6_ifaddr *ifp, - unsigned long when) +static void addrconf_mod_dad_work(struct inet6_ifaddr *ifp, + unsigned long delay) { - if (!timer_pending(&ifp->dad_timer)) + if (!delayed_work_pending(&ifp->dad_work)) in6_ifa_hold(ifp); - mod_timer(&ifp->dad_timer, jiffies + when); + mod_delayed_work(addrconf_wq, &ifp->dad_work, delay); } static int snmp6_alloc_dev(struct inet6_dev *idev) @@ -751,8 +753,9 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp) in6_dev_put(ifp->idev); - if (del_timer(&ifp->dad_timer)) - pr_notice("Timer is still running, when freeing ifa=%p\n", ifp); + if (cancel_delayed_work(&ifp->dad_work)) + pr_notice("delayed DAD work was pending while freeing ifa=%p\n", + ifp); if (ifp->state != INET6_IFADDR_STATE_DEAD) { pr_warn("Freeing alive inet6 address %p\n", ifp); @@ -849,8 +852,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, spin_lock_init(&ifa->lock); spin_lock_init(&ifa->state_lock); - setup_timer(&ifa->dad_timer, addrconf_dad_timer, - (unsigned long)ifa); + INIT_DELAYED_WORK(&ifa->dad_work, addrconf_dad_work); INIT_HLIST_NODE(&ifa->addr_lst); ifa->scope = scope; ifa->prefix_len = pfxlen; @@ -990,6 +992,8 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) enum cleanup_prefix_rt_t action = CLEANUP_PREFIX_RT_NOP; unsigned long expires; + ASSERT_RTNL(); + spin_lock_bh(&ifp->state_lock); state = ifp->state; ifp->state = INET6_IFADDR_STATE_DEAD; @@ -1021,7 +1025,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) write_unlock_bh(&ifp->idev->lock); - addrconf_del_dad_timer(ifp); + addrconf_del_dad_work(ifp); ipv6_ifa_notify(RTM_DELADDR, ifp); @@ -1604,7 +1608,7 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed) { if (ifp->flags&IFA_F_PERMANENT) { spin_lock_bh(&ifp->lock); - addrconf_del_dad_timer(ifp); + addrconf_del_dad_work(ifp); ifp->flags |= IFA_F_TENTATIVE; if (dad_failed) ifp->flags |= IFA_F_DADFAILED; @@ -1625,20 +1629,21 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed) spin_unlock_bh(&ifp->lock); } ipv6_del_addr(ifp); - } else + } else { ipv6_del_addr(ifp); + } } static int addrconf_dad_end(struct inet6_ifaddr *ifp) { int err = -ENOENT; - spin_lock(&ifp->state_lock); + spin_lock_bh(&ifp->state_lock); if (ifp->state == INET6_IFADDR_STATE_DAD) { ifp->state = INET6_IFADDR_STATE_POSTDAD; err = 0; } - spin_unlock(&ifp->state_lock); + spin_unlock_bh(&ifp->state_lock); return err; } @@ -1671,7 +1676,12 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp) } } - addrconf_dad_stop(ifp, 1); + spin_lock_bh(&ifp->state_lock); + /* transition from _POSTDAD to _ERRDAD */ + ifp->state = INET6_IFADDR_STATE_ERRDAD; + spin_unlock_bh(&ifp->state_lock); + + addrconf_mod_dad_work(ifp, 0); } /* Join to solicited addr multicast group. */ @@ -1680,6 +1690,8 @@ void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr) { struct in6_addr maddr; + ASSERT_RTNL(); + if (dev->flags&(IFF_LOOPBACK|IFF_NOARP)) return; @@ -1691,6 +1703,8 @@ void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr) { struct in6_addr maddr; + ASSERT_RTNL(); + if (idev->dev->flags&(IFF_LOOPBACK|IFF_NOARP)) return; @@ -1701,6 +1715,9 @@ void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr) static void addrconf_join_anycast(struct inet6_ifaddr *ifp) { struct in6_addr addr; + + ASSERT_RTNL(); + if (ifp->prefix_len >= 127) /* RFC 6164 */ return; ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len); @@ -1712,6 +1729,9 @@ static void addrconf_join_anycast(struct inet6_ifaddr *ifp) static void addrconf_leave_anycast(struct inet6_ifaddr *ifp) { struct in6_addr addr; + + ASSERT_RTNL(); + if (ifp->prefix_len >= 127) /* RFC 6164 */ return; ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len); @@ -2271,11 +2291,13 @@ ok: return; } - ifp->flags |= IFA_F_MANAGETEMPADDR; update_lft = 0; create = 1; + spin_lock_bh(&ifp->lock); + ifp->flags |= IFA_F_MANAGETEMPADDR; ifp->cstamp = jiffies; ifp->tokenized = tokenized; + spin_unlock_bh(&ifp->lock); addrconf_dad_start(ifp); } @@ -2326,7 +2348,7 @@ ok: create, now); in6_ifa_put(ifp); - addrconf_verify(0); + addrconf_verify(); } } inet6_prefix_notify(RTM_NEWPREFIX, in6_dev, pinfo); @@ -2475,7 +2497,7 @@ static int inet6_addr_add(struct net *net, int ifindex, manage_tempaddrs(idev, ifp, valid_lft, prefered_lft, true, jiffies); in6_ifa_put(ifp); - addrconf_verify(0); + addrconf_verify_rtnl(); return 0; } @@ -3011,7 +3033,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) hlist_for_each_entry_rcu(ifa, h, addr_lst) { if (ifa->idev == idev) { hlist_del_init_rcu(&ifa->addr_lst); - addrconf_del_dad_timer(ifa); + addrconf_del_dad_work(ifa); goto restart; } } @@ -3049,7 +3071,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) while (!list_empty(&idev->addr_list)) { ifa = list_first_entry(&idev->addr_list, struct inet6_ifaddr, if_list); - addrconf_del_dad_timer(ifa); + addrconf_del_dad_work(ifa); list_del(&ifa->if_list); @@ -3148,10 +3170,10 @@ static void addrconf_dad_kick(struct inet6_ifaddr *ifp) rand_num = prandom_u32() % (idev->cnf.rtr_solicit_delay ? : 1); ifp->dad_probes = idev->cnf.dad_transmits; - addrconf_mod_dad_timer(ifp, rand_num); + addrconf_mod_dad_work(ifp, rand_num); } -static void addrconf_dad_start(struct inet6_ifaddr *ifp) +static void addrconf_dad_begin(struct inet6_ifaddr *ifp) { struct inet6_dev *idev = ifp->idev; struct net_device *dev = idev->dev; @@ -3203,25 +3225,68 @@ out: read_unlock_bh(&idev->lock); } -static void addrconf_dad_timer(unsigned long data) +static void addrconf_dad_start(struct inet6_ifaddr *ifp) { - struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data; + bool begin_dad = false; + + spin_lock_bh(&ifp->state_lock); + if (ifp->state != INET6_IFADDR_STATE_DEAD) { + ifp->state = INET6_IFADDR_STATE_PREDAD; + begin_dad = true; + } + spin_unlock_bh(&ifp->state_lock); + + if (begin_dad) + addrconf_mod_dad_work(ifp, 0); +} + +static void addrconf_dad_work(struct work_struct *w) +{ + struct inet6_ifaddr *ifp = container_of(to_delayed_work(w), + struct inet6_ifaddr, + dad_work); struct inet6_dev *idev = ifp->idev; struct in6_addr mcaddr; + enum { + DAD_PROCESS, + DAD_BEGIN, + DAD_ABORT, + } action = DAD_PROCESS; + + rtnl_lock(); + + spin_lock_bh(&ifp->state_lock); + if (ifp->state == INET6_IFADDR_STATE_PREDAD) { + action = DAD_BEGIN; + ifp->state = INET6_IFADDR_STATE_DAD; + } else if (ifp->state == INET6_IFADDR_STATE_ERRDAD) { + action = DAD_ABORT; + ifp->state = INET6_IFADDR_STATE_POSTDAD; + } + spin_unlock_bh(&ifp->state_lock); + + if (action == DAD_BEGIN) { + addrconf_dad_begin(ifp); + goto out; + } else if (action == DAD_ABORT) { + addrconf_dad_stop(ifp, 1); + goto out; + } + if (!ifp->dad_probes && addrconf_dad_end(ifp)) goto out; - write_lock(&idev->lock); + write_lock_bh(&idev->lock); if (idev->dead || !(idev->if_flags & IF_READY)) { - write_unlock(&idev->lock); + write_unlock_bh(&idev->lock); goto out; } spin_lock(&ifp->lock); if (ifp->state == INET6_IFADDR_STATE_DEAD) { spin_unlock(&ifp->lock); - write_unlock(&idev->lock); + write_unlock_bh(&idev->lock); goto out; } @@ -3232,7 +3297,7 @@ static void addrconf_dad_timer(unsigned long data) ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED); spin_unlock(&ifp->lock); - write_unlock(&idev->lock); + write_unlock_bh(&idev->lock); addrconf_dad_completed(ifp); @@ -3240,16 +3305,17 @@ static void addrconf_dad_timer(unsigned long data) } ifp->dad_probes--; - addrconf_mod_dad_timer(ifp, - NEIGH_VAR(ifp->idev->nd_parms, RETRANS_TIME)); + addrconf_mod_dad_work(ifp, + NEIGH_VAR(ifp->idev->nd_parms, RETRANS_TIME)); spin_unlock(&ifp->lock); - write_unlock(&idev->lock); + write_unlock_bh(&idev->lock); /* send a neighbour solicitation for our addr */ addrconf_addr_solict_mult(&ifp->addr, &mcaddr); ndisc_send_ns(ifp->idev->dev, NULL, &ifp->addr, &mcaddr, &in6addr_any); out: in6_ifa_put(ifp); + rtnl_unlock(); } /* ifp->idev must be at least read locked */ @@ -3276,7 +3342,7 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp) struct in6_addr lladdr; bool send_rs, send_mld; - addrconf_del_dad_timer(ifp); + addrconf_del_dad_work(ifp); /* * Configure the address for reception. Now it is valid. @@ -3517,23 +3583,23 @@ int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr) * Periodic address status verification */ -static void addrconf_verify(unsigned long foo) +static void addrconf_verify_rtnl(void) { unsigned long now, next, next_sec, next_sched; struct inet6_ifaddr *ifp; int i; + ASSERT_RTNL(); + rcu_read_lock_bh(); - spin_lock(&addrconf_verify_lock); now = jiffies; next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY); - del_timer(&addr_chk_timer); + cancel_delayed_work(&addr_chk_work); for (i = 0; i < IN6_ADDR_HSIZE; i++) { restart: - hlist_for_each_entry_rcu_bh(ifp, - &inet6_addr_lst[i], addr_lst) { + hlist_for_each_entry_rcu_bh(ifp, &inet6_addr_lst[i], addr_lst) { unsigned long age; /* When setting preferred_lft to a value not zero or @@ -3628,13 +3694,22 @@ restart: ADBG(KERN_DEBUG "now = %lu, schedule = %lu, rounded schedule = %lu => %lu\n", now, next, next_sec, next_sched); - - addr_chk_timer.expires = next_sched; - add_timer(&addr_chk_timer); - spin_unlock(&addrconf_verify_lock); + mod_delayed_work(addrconf_wq, &addr_chk_work, next_sched - now); rcu_read_unlock_bh(); } +static void addrconf_verify_work(struct work_struct *w) +{ + rtnl_lock(); + addrconf_verify_rtnl(); + rtnl_unlock(); +} + +static void addrconf_verify(void) +{ + mod_delayed_work(addrconf_wq, &addr_chk_work, 0); +} + static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local, struct in6_addr **peer_pfx) { @@ -3691,6 +3766,8 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags, bool was_managetempaddr; bool had_prefixroute; + ASSERT_RTNL(); + if (!valid_lft || (prefered_lft > valid_lft)) return -EINVAL; @@ -3756,7 +3833,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags, !was_managetempaddr, jiffies); } - addrconf_verify(0); + addrconf_verify_rtnl(); return 0; } @@ -4386,6 +4463,8 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token) bool update_rs = false; struct in6_addr ll_addr; + ASSERT_RTNL(); + if (token == NULL) return -EINVAL; if (ipv6_addr_any(token)) @@ -4434,7 +4513,7 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token) } write_unlock_bh(&idev->lock); - addrconf_verify(0); + addrconf_verify_rtnl(); return 0; } @@ -4636,6 +4715,9 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) { struct net *net = dev_net(ifp->idev->dev); + if (event) + ASSERT_RTNL(); + inet6_ifa_notify(event ? : RTM_NEWADDR, ifp); switch (event) { @@ -5244,6 +5326,12 @@ int __init addrconf_init(void) if (err < 0) goto out_addrlabel; + addrconf_wq = create_workqueue("ipv6_addrconf"); + if (!addrconf_wq) { + err = -ENOMEM; + goto out_nowq; + } + /* The addrconf netdev notifier requires that loopback_dev * has it's ipv6 private information allocated and setup * before it can bring up and give link-local addresses @@ -5274,7 +5362,7 @@ int __init addrconf_init(void) register_netdevice_notifier(&ipv6_dev_notf); - addrconf_verify(0); + addrconf_verify(); rtnl_af_register(&inet6_ops); @@ -5302,6 +5390,8 @@ errout: rtnl_af_unregister(&inet6_ops); unregister_netdevice_notifier(&ipv6_dev_notf); errlo: + destroy_workqueue(addrconf_wq); +out_nowq: unregister_pernet_subsys(&addrconf_ops); out_addrlabel: ipv6_addr_label_cleanup(); @@ -5337,7 +5427,8 @@ void addrconf_cleanup(void) for (i = 0; i < IN6_ADDR_HSIZE; i++) WARN_ON(!hlist_empty(&inet6_addr_lst[i])); spin_unlock_bh(&addrconf_hash_lock); - - del_timer(&addr_chk_timer); + cancel_delayed_work(&addr_chk_work); rtnl_unlock(); + + destroy_workqueue(addrconf_wq); } -- cgit v0.10.2 From d9060742fbf630fe31951dfc10b798deb2813f01 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Fri, 28 Mar 2014 13:33:38 -0700 Subject: ocfs2: check if cluster name exists before deref Commit c74a3bdd9b52 ("ocfs2: add clustername to cluster connection") is trying to strlcpy a string which was explicitly passed as NULL in the very same patch, triggering a NULL ptr deref. BUG: unable to handle kernel NULL pointer dereference at (null) IP: strlcpy (lib/string.c:388 lib/string.c:151) CPU: 19 PID: 19426 Comm: trinity-c19 Tainted: G W 3.14.0-rc7-next-20140325-sasha-00014-g9476368-dirty #274 RIP: strlcpy (lib/string.c:388 lib/string.c:151) Call Trace: ocfs2_cluster_connect (fs/ocfs2/stackglue.c:350) ocfs2_cluster_connect_agnostic (fs/ocfs2/stackglue.c:396) user_dlm_register (fs/ocfs2/dlmfs/userdlm.c:679) dlmfs_mkdir (fs/ocfs2/dlmfs/dlmfs.c:503) vfs_mkdir (fs/namei.c:3467) SyS_mkdirat (fs/namei.c:3488 fs/namei.c:3472) tracesys (arch/x86/kernel/entry_64.S:749) akpm: this patch probably disables the feature. A temporary thing to avoid triviel oopses. Signed-off-by: Sasha Levin Cc: Goldwyn Rodrigues Cc: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index 1324e66..ca5ce14 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c @@ -346,7 +346,9 @@ int ocfs2_cluster_connect(const char *stack_name, strlcpy(new_conn->cc_name, group, GROUP_NAME_MAX + 1); new_conn->cc_namelen = grouplen; - strlcpy(new_conn->cc_cluster_name, cluster_name, CLUSTER_NAME_MAX + 1); + if (cluster_name_len) + strlcpy(new_conn->cc_cluster_name, cluster_name, + CLUSTER_NAME_MAX + 1); new_conn->cc_cluster_name_len = cluster_name_len; new_conn->cc_recovery_handler = recovery_handler; new_conn->cc_recovery_data = recovery_data; -- cgit v0.10.2 From 825600c0f20e595daaa7a6dd8970f84fa2a2ee57 Mon Sep 17 00:00:00 2001 From: Artem Fetishev Date: Fri, 28 Mar 2014 13:33:39 -0700 Subject: x86: fix boot on uniprocessor systems On x86 uniprocessor systems topology_physical_package_id() returns -1 which causes rapl_cpu_prepare() to leave rapl_pmu variable uninitialized which leads to GPF in rapl_pmu_init(). See arch/x86/kernel/cpu/perf_event_intel_rapl.c. It turns out that physical_package_id and core_id can actually be retreived for uniprocessor systems too. Enabling them also fixes rapl_pmu code. Signed-off-by: Artem Fetishev Cc: Stephane Eranian Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index d35f24e..1306d11 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -119,9 +119,10 @@ static inline void setup_node_to_cpumask_map(void) { } extern const struct cpumask *cpu_coregroup_mask(int cpu); -#ifdef ENABLE_TOPO_DEFINES #define topology_physical_package_id(cpu) (cpu_data(cpu).phys_proc_id) #define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id) + +#ifdef ENABLE_TOPO_DEFINES #define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu)) #define topology_thread_cpumask(cpu) (per_cpu(cpu_sibling_map, cpu)) #endif -- cgit v0.10.2 From 79b30750d99a7f0fb4eaaa1c69139a7821bf6771 Mon Sep 17 00:00:00 2001 From: Jay Vosburgh Date: Thu, 27 Mar 2014 10:33:44 -0700 Subject: MAINTAINERS: bonding: change email address Update my email address. Signed-off-by: Jay Vosburgh Signed-off-by: David S. Miller diff --git a/MAINTAINERS b/MAINTAINERS index 0b3c40f..acac797 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1832,7 +1832,7 @@ F: net/bluetooth/ F: include/net/bluetooth/ BONDING DRIVER -M: Jay Vosburgh +M: Jay Vosburgh M: Veaceslav Falico M: Andy Gospodarek L: netdev@vger.kernel.org -- cgit v0.10.2 From 898602a049504dea256e696ee3152dc0b788a393 Mon Sep 17 00:00:00 2001 From: Veaceslav Falico Date: Thu, 27 Mar 2014 18:43:50 +0100 Subject: MAINTAINERS: bonding: change email address Signed-off-by: Veaceslav Falico Signed-off-by: David S. Miller diff --git a/MAINTAINERS b/MAINTAINERS index acac797..a80c84e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1833,7 +1833,7 @@ F: include/net/bluetooth/ BONDING DRIVER M: Jay Vosburgh -M: Veaceslav Falico +M: Veaceslav Falico M: Andy Gospodarek L: netdev@vger.kernel.org W: http://sourceforge.net/projects/bonding/ -- cgit v0.10.2 From 53d6471cef17262d3ad1c7ce8982a234244f68ec Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 27 Mar 2014 17:26:18 -0400 Subject: net: Account for all vlan headers in skb_mac_gso_segment skb_network_protocol() already accounts for multiple vlan headers that may be present in the skb. However, skb_mac_gso_segment() doesn't know anything about it and assumes that skb->mac_len is set correctly to skip all mac headers. That may not always be the case. If we are simply forwarding the packet (via bridge or macvtap), all vlan headers may not be accounted for. A simple solution is to allow skb_network_protocol to return the vlan depth it has calculated. This way skb_mac_gso_segment will correctly skip all mac headers. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e8eeebd..daafd95 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3014,7 +3014,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, netdev_features_t features) { return __skb_gso_segment(skb, features, true); } -__be16 skb_network_protocol(struct sk_buff *skb); +__be16 skb_network_protocol(struct sk_buff *skb, int *depth); static inline bool can_checksum_protocol(netdev_features_t features, __be16 protocol) diff --git a/net/core/dev.c b/net/core/dev.c index b1b0c8d..45fa2f1 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2286,7 +2286,7 @@ out: } EXPORT_SYMBOL(skb_checksum_help); -__be16 skb_network_protocol(struct sk_buff *skb) +__be16 skb_network_protocol(struct sk_buff *skb, int *depth) { __be16 type = skb->protocol; int vlan_depth = ETH_HLEN; @@ -2313,6 +2313,8 @@ __be16 skb_network_protocol(struct sk_buff *skb) vlan_depth += VLAN_HLEN; } + *depth = vlan_depth; + return type; } @@ -2326,12 +2328,13 @@ struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb, { struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); struct packet_offload *ptype; - __be16 type = skb_network_protocol(skb); + int vlan_depth = skb->mac_len; + __be16 type = skb_network_protocol(skb, &vlan_depth); if (unlikely(!type)) return ERR_PTR(-EINVAL); - __skb_pull(skb, skb->mac_len); + __skb_pull(skb, vlan_depth); rcu_read_lock(); list_for_each_entry_rcu(ptype, &offload_base, list) { @@ -2498,8 +2501,10 @@ static netdev_features_t harmonize_features(struct sk_buff *skb, const struct net_device *dev, netdev_features_t features) { + int tmp; + if (skb->ip_summed != CHECKSUM_NONE && - !can_checksum_protocol(features, skb_network_protocol(skb))) { + !can_checksum_protocol(features, skb_network_protocol(skb, &tmp))) { features &= ~NETIF_F_ALL_CSUM; } else if (illegal_highdma(dev, skb)) { features &= ~NETIF_F_SG; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 97e5a2c..90b96a1 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2879,8 +2879,9 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, int err = -ENOMEM; int i = 0; int pos; + int dummy; - proto = skb_network_protocol(head_skb); + proto = skb_network_protocol(head_skb, &dummy); if (unlikely(!proto)) return ERR_PTR(-EINVAL); -- cgit v0.10.2 From fc92f745f8d0d3736ce5afb00a905d7cc61f9c46 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 27 Mar 2014 21:51:18 -0400 Subject: bridge: Fix crash with vlan filtering and tcpdump When the vlan filtering is enabled on the bridge, but the filter is not configured on the bridge device itself, running tcpdump on the bridge device will result in a an Oops with NULL pointer dereference. The reason is that br_pass_frame_up() will bypass the vlan check because promisc flag is set. It will then try to get the table pointer and process the packet based on the table. Since the table pointer is NULL, we oops. Catch this special condition in br_handle_vlan(). Reported-by: Toshiaki Makita CC: Toshiaki Makita Signed-off-by: Vlad Yasevich Acked-by: Toshiaki Makita Signed-off-by: David S. Miller diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 28d5446..d0cca3c 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -29,6 +29,7 @@ static int br_pass_frame_up(struct sk_buff *skb) struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev; struct net_bridge *br = netdev_priv(brdev); struct pcpu_sw_netstats *brstats = this_cpu_ptr(br->stats); + struct net_port_vlans *pv; u64_stats_update_begin(&brstats->syncp); brstats->rx_packets++; @@ -39,18 +40,18 @@ static int br_pass_frame_up(struct sk_buff *skb) * packet is allowed except in promisc modue when someone * may be running packet capture. */ + pv = br_get_vlan_info(br); if (!(brdev->flags & IFF_PROMISC) && - !br_allowed_egress(br, br_get_vlan_info(br), skb)) { + !br_allowed_egress(br, pv, skb)) { kfree_skb(skb); return NET_RX_DROP; } - skb = br_handle_vlan(br, br_get_vlan_info(br), skb); - if (!skb) - return NET_RX_DROP; - indev = skb->dev; skb->dev = brdev; + skb = br_handle_vlan(br, pv, skb); + if (!skb) + return NET_RX_DROP; return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, skb, indev, NULL, netif_receive_skb); diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index c77eed5..f23c74b 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -128,6 +128,20 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br, if (!br->vlan_enabled) goto out; + /* Vlan filter table must be configured at this point. The + * only exception is the bridge is set in promisc mode and the + * packet is destined for the bridge device. In this case + * pass the packet as is. + */ + if (!pv) { + if ((br->dev->flags & IFF_PROMISC) && skb->dev == br->dev) { + goto out; + } else { + kfree_skb(skb); + return NULL; + } + } + /* At this point, we know that the frame was filtered and contains * a valid vlan id. If the vlan id is set in the untagged bitmap, * send untagged; otherwise, send tagged. -- cgit v0.10.2 From f6d1ac4b5f15f57929fe0fa283b3a45dfec717a0 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 27 Mar 2014 22:14:46 -0400 Subject: qlge: Do not propaged vlan tag offloads to vlans qlge driver turns off NETIF_F_HW_CTAG_FILTER, but forgets to turn off HW_CTAG_TX and HW_CTAG_RX on vlan devices. With the current settings, q-in-q will only generate a single vlan header. Remember to mask off CTAG_TX and CTAG_RX features in vlan_features. CC: Shahed Shaikh CC: Jitendra Kalsaria CC: Ron Mercer Signed-off-by: Vlad Yasevich Acked-by: Jitendra Kalsaria Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_main.c b/drivers/net/ethernet/qlogic/qlge/qlge_main.c index ce2cfdd..656c65d 100644 --- a/drivers/net/ethernet/qlogic/qlge/qlge_main.c +++ b/drivers/net/ethernet/qlogic/qlge/qlge_main.c @@ -4765,7 +4765,9 @@ static int qlge_probe(struct pci_dev *pdev, ndev->features = ndev->hw_features; ndev->vlan_features = ndev->hw_features; /* vlan gets same features (except vlan filter) */ - ndev->vlan_features &= ~NETIF_F_HW_VLAN_CTAG_FILTER; + ndev->vlan_features &= ~(NETIF_F_HW_VLAN_CTAG_FILTER | + NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX); if (test_bit(QL_DMA64, &qdev->flags)) ndev->features |= NETIF_F_HIGHDMA; -- cgit v0.10.2 From 8dd6e147b0c29723ec10d0e836c7f3466d61a19b Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 27 Mar 2014 22:14:47 -0400 Subject: ifb: Remove vlan acceleration from vlan_features Do not include vlan acceleration features in vlan_features as that precludes correct Q-in-Q operation. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c index c14d39b..d7b2e94 100644 --- a/drivers/net/ifb.c +++ b/drivers/net/ifb.c @@ -180,7 +180,8 @@ static void ifb_setup(struct net_device *dev) dev->tx_queue_len = TX_Q_LIMIT; dev->features |= IFB_FEATURES; - dev->vlan_features |= IFB_FEATURES; + dev->vlan_features |= IFB_FEATURES & ~(NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_STAG_TX); dev->flags |= IFF_NOARP; dev->flags &= ~IFF_MULTICAST; -- cgit v0.10.2 From 3f8c707b9a83cd956af65796081b6c8cb8716089 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 27 Mar 2014 22:14:48 -0400 Subject: veth: Turn off vlan rx acceleration in vlan_features For completeness, turn off vlan rx acceleration in vlan_features so that it doesn't show up on q-in-q setups. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 5b37437..c0e7c64 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -286,7 +286,10 @@ static void veth_setup(struct net_device *dev) dev->features |= NETIF_F_LLTX; dev->features |= VETH_FEATURES; dev->vlan_features = dev->features & - ~(NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX); + ~(NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_STAG_TX | + NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_STAG_RX); dev->destructor = veth_dev_free; dev->hw_features = VETH_FEATURES; -- cgit v0.10.2 From 2adb956b084d6d49f519541a4b5f9947e96f8ef7 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 27 Mar 2014 22:14:49 -0400 Subject: vlan: Warn the user if lowerdev has bad vlan features. Some drivers incorrectly assign vlan acceleration features to vlan_features thus causing issues for Q-in-Q vlan configurations. Warn the user of such cases. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 1005ebf..5a09a48 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -163,4 +163,11 @@ enum { /* changeable features with no special hardware requirements */ #define NETIF_F_SOFT_FEATURES (NETIF_F_GSO | NETIF_F_GRO) +#define NETIF_F_VLAN_FEATURES (NETIF_F_HW_VLAN_CTAG_FILTER | \ + NETIF_F_HW_VLAN_CTAG_RX | \ + NETIF_F_HW_VLAN_CTAG_TX | \ + NETIF_F_HW_VLAN_STAG_FILTER | \ + NETIF_F_HW_VLAN_STAG_RX | \ + NETIF_F_HW_VLAN_STAG_TX) + #endif /* _LINUX_NETDEV_FEATURES_H */ diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index a9591ff..27bfe2f 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -578,6 +578,9 @@ static int vlan_dev_init(struct net_device *dev) dev->features |= real_dev->vlan_features | NETIF_F_LLTX; dev->gso_max_size = real_dev->gso_max_size; + if (dev->features & NETIF_F_VLAN_FEATURES) + netdev_warn(real_dev, "VLAN features are set incorrectly. Q-in-Q configurations may not work correctly.\n"); + /* ipv6 shared card related stuff */ dev->dev_id = real_dev->dev_id; -- cgit v0.10.2 From 638c323c4d1f8eaf25224946e21ce8818f1bcee1 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Tue, 25 Mar 2014 15:36:02 +0200 Subject: rbd: drop an unsafe assertion Olivier Bonvalet reported having repeated crashes due to a failed assertion he was hitting in rbd_img_obj_callback(): Assertion failure in rbd_img_obj_callback() at line 2165: rbd_assert(which >= img_request->next_completion); With a lot of help from Olivier with reproducing the problem we were able to determine the object and image requests had already been completed (and often freed) at the point the assertion failed. There was a great deal of discussion on the ceph-devel mailing list about this. The problem only arose when there were two (or more) object requests in an image request, and the problem was always seen when the second request was being completed. The problem is due to a race in the window between setting the "done" flag on an object request and checking the image request's next completion value. When the first object request completes, it checks to see if its successor request is marked "done", and if so, that request is also completed. In the process, the image request's next_completion value is updated to reflect that both the first and second requests are completed. By the time the second request is able to check the next_completion value, it has been set to a value *greater* than its own "which" value, which caused an assertion to fail. Fix this problem by skipping over any completion processing unless the completing object request is the next one expected. Test only for inequality (not >=), and eliminate the bad assertion. Tested-by: Olivier Bonvalet Signed-off-by: Alex Elder Reviewed-by: Sage Weil Reviewed-by: Ilya Dryomov diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index b365e0d..34898d5 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -2109,7 +2109,6 @@ static void rbd_img_obj_callback(struct rbd_obj_request *obj_request) rbd_assert(img_request->obj_request_count > 0); rbd_assert(which != BAD_WHICH); rbd_assert(which < img_request->obj_request_count); - rbd_assert(which >= img_request->next_completion); spin_lock_irq(&img_request->completion_lock); if (which != img_request->next_completion) -- cgit v0.10.2 From fc7392aa1b20debc7f398acc39ffc817630f11e6 Mon Sep 17 00:00:00 2001 From: Elias Vanderstuyft Date: Sat, 29 Mar 2014 12:08:45 -0700 Subject: Input: don't modify the id of ioctl-provided ff effect on upload failure If a new (id == -1) ff effect was uploaded from userspace, ff-core.c::input_ff_upload() will have assigned a positive number to the new effect id. Currently, evdev.c::evdev_do_ioctl() will save this new id to userspace, regardless of whether the upload succeeded or not. On upload failure, this can be confusing because the dev->ff->effects[] array will not contain an element at the index of that new effect id. This patch fixes this by leaving the id unchanged after upload fails. Note: Unfortunately applications should still expect changed effect id for quite some time. This has been discussed on: http://www.mail-archive.com/linux-input@vger.kernel.org/msg08513.html ("ff-core effect id handling in case of a failed effect upload") Suggested-by: Dmitry Torokhov Signed-off-by: Elias Vanderstuyft Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c index a06e125..ce953d8 100644 --- a/drivers/input/evdev.c +++ b/drivers/input/evdev.c @@ -954,11 +954,13 @@ static long evdev_do_ioctl(struct file *file, unsigned int cmd, return -EFAULT; error = input_ff_upload(dev, &effect, file); + if (error) + return error; if (put_user(effect.id, &(((struct ff_effect __user *)p)->id))) return -EFAULT; - return error; + return 0; } /* Multi-number variable-length handlers */ -- cgit v0.10.2 From e4dbedc7eac7da9db363a36f2bd4366962eeefcc Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Thu, 6 Mar 2014 12:57:24 -0800 Subject: Input: mousedev - fix race when creating mixed device We should not be using static variable mousedev_mix in methods that can be called before that singleton gets assigned. While at it let's add open and close methods to mousedev structure so that we do not need to test if we are dealing with multiplexor or normal device and simply call appropriate method directly. This fixes: https://bugzilla.kernel.org/show_bug.cgi?id=71551 Reported-by: GiulioDP Tested-by: GiulioDP Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/mousedev.c b/drivers/input/mousedev.c index 4c842c3..b604564 100644 --- a/drivers/input/mousedev.c +++ b/drivers/input/mousedev.c @@ -67,7 +67,6 @@ struct mousedev { struct device dev; struct cdev cdev; bool exist; - bool is_mixdev; struct list_head mixdev_node; bool opened_by_mixdev; @@ -77,6 +76,9 @@ struct mousedev { int old_x[4], old_y[4]; int frac_dx, frac_dy; unsigned long touch; + + int (*open_device)(struct mousedev *mousedev); + void (*close_device)(struct mousedev *mousedev); }; enum mousedev_emul { @@ -116,9 +118,6 @@ static unsigned char mousedev_imex_seq[] = { 0xf3, 200, 0xf3, 200, 0xf3, 80 }; static struct mousedev *mousedev_mix; static LIST_HEAD(mousedev_mix_list); -static void mixdev_open_devices(void); -static void mixdev_close_devices(void); - #define fx(i) (mousedev->old_x[(mousedev->pkt_count - (i)) & 03]) #define fy(i) (mousedev->old_y[(mousedev->pkt_count - (i)) & 03]) @@ -428,9 +427,7 @@ static int mousedev_open_device(struct mousedev *mousedev) if (retval) return retval; - if (mousedev->is_mixdev) - mixdev_open_devices(); - else if (!mousedev->exist) + if (!mousedev->exist) retval = -ENODEV; else if (!mousedev->open++) { retval = input_open_device(&mousedev->handle); @@ -446,9 +443,7 @@ static void mousedev_close_device(struct mousedev *mousedev) { mutex_lock(&mousedev->mutex); - if (mousedev->is_mixdev) - mixdev_close_devices(); - else if (mousedev->exist && !--mousedev->open) + if (mousedev->exist && !--mousedev->open) input_close_device(&mousedev->handle); mutex_unlock(&mousedev->mutex); @@ -459,21 +454,29 @@ static void mousedev_close_device(struct mousedev *mousedev) * stream. Note that this function is called with mousedev_mix->mutex * held. */ -static void mixdev_open_devices(void) +static int mixdev_open_devices(struct mousedev *mixdev) { - struct mousedev *mousedev; + int error; + + error = mutex_lock_interruptible(&mixdev->mutex); + if (error) + return error; - if (mousedev_mix->open++) - return; + if (!mixdev->open++) { + struct mousedev *mousedev; - list_for_each_entry(mousedev, &mousedev_mix_list, mixdev_node) { - if (!mousedev->opened_by_mixdev) { - if (mousedev_open_device(mousedev)) - continue; + list_for_each_entry(mousedev, &mousedev_mix_list, mixdev_node) { + if (!mousedev->opened_by_mixdev) { + if (mousedev_open_device(mousedev)) + continue; - mousedev->opened_by_mixdev = true; + mousedev->opened_by_mixdev = true; + } } } + + mutex_unlock(&mixdev->mutex); + return 0; } /* @@ -481,19 +484,22 @@ static void mixdev_open_devices(void) * device. Note that this function is called with mousedev_mix->mutex * held. */ -static void mixdev_close_devices(void) +static void mixdev_close_devices(struct mousedev *mixdev) { - struct mousedev *mousedev; + mutex_lock(&mixdev->mutex); - if (--mousedev_mix->open) - return; + if (!--mixdev->open) { + struct mousedev *mousedev; - list_for_each_entry(mousedev, &mousedev_mix_list, mixdev_node) { - if (mousedev->opened_by_mixdev) { - mousedev->opened_by_mixdev = false; - mousedev_close_device(mousedev); + list_for_each_entry(mousedev, &mousedev_mix_list, mixdev_node) { + if (mousedev->opened_by_mixdev) { + mousedev->opened_by_mixdev = false; + mousedev_close_device(mousedev); + } } } + + mutex_unlock(&mixdev->mutex); } @@ -522,7 +528,7 @@ static int mousedev_release(struct inode *inode, struct file *file) mousedev_detach_client(mousedev, client); kfree(client); - mousedev_close_device(mousedev); + mousedev->close_device(mousedev); return 0; } @@ -550,7 +556,7 @@ static int mousedev_open(struct inode *inode, struct file *file) client->mousedev = mousedev; mousedev_attach_client(mousedev, client); - error = mousedev_open_device(mousedev); + error = mousedev->open_device(mousedev); if (error) goto err_free_client; @@ -861,16 +867,21 @@ static struct mousedev *mousedev_create(struct input_dev *dev, if (mixdev) { dev_set_name(&mousedev->dev, "mice"); + + mousedev->open_device = mixdev_open_devices; + mousedev->close_device = mixdev_close_devices; } else { int dev_no = minor; /* Normalize device number if it falls into legacy range */ if (dev_no < MOUSEDEV_MINOR_BASE + MOUSEDEV_MINORS) dev_no -= MOUSEDEV_MINOR_BASE; dev_set_name(&mousedev->dev, "mouse%d", dev_no); + + mousedev->open_device = mousedev_open_device; + mousedev->close_device = mousedev_close_device; } mousedev->exist = true; - mousedev->is_mixdev = mixdev; mousedev->handle.dev = input_get_device(dev); mousedev->handle.name = dev_name(&mousedev->dev); mousedev->handle.handler = handler; @@ -919,7 +930,7 @@ static void mousedev_destroy(struct mousedev *mousedev) device_del(&mousedev->dev); mousedev_cleanup(mousedev); input_free_minor(MINOR(mousedev->dev.devt)); - if (!mousedev->is_mixdev) + if (mousedev != mousedev_mix) input_unregister_handle(&mousedev->handle); put_device(&mousedev->dev); } -- cgit v0.10.2 From 0818bf27c05b2de56c5b2bd08cfae2a939bd5f52 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 28 Feb 2014 13:46:44 -0500 Subject: resizable namespace.c hashes * switch allocation to alloc_large_system_hash() * make sizes overridable by boot parameters (mhash_entries=, mphash_entries=) * switch mountpoint_hashtable from list_head to hlist_head Cc: stable@vger.kernel.org Signed-off-by: Al Viro diff --git a/fs/mount.h b/fs/mount.h index a17458c..acdb428 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -19,7 +19,7 @@ struct mnt_pcp { }; struct mountpoint { - struct list_head m_hash; + struct hlist_node m_hash; struct dentry *m_dentry; int m_count; }; diff --git a/fs/namespace.c b/fs/namespace.c index 22e5367..3b648da 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -23,11 +23,34 @@ #include #include #include +#include #include "pnode.h" #include "internal.h" -#define HASH_SHIFT ilog2(PAGE_SIZE / sizeof(struct list_head)) -#define HASH_SIZE (1UL << HASH_SHIFT) +static unsigned int m_hash_mask __read_mostly; +static unsigned int m_hash_shift __read_mostly; +static unsigned int mp_hash_mask __read_mostly; +static unsigned int mp_hash_shift __read_mostly; + +static __initdata unsigned long mhash_entries; +static int __init set_mhash_entries(char *str) +{ + if (!str) + return 0; + mhash_entries = simple_strtoul(str, &str, 0); + return 1; +} +__setup("mhash_entries=", set_mhash_entries); + +static __initdata unsigned long mphash_entries; +static int __init set_mphash_entries(char *str) +{ + if (!str) + return 0; + mphash_entries = simple_strtoul(str, &str, 0); + return 1; +} +__setup("mphash_entries=", set_mphash_entries); static int event; static DEFINE_IDA(mnt_id_ida); @@ -37,7 +60,7 @@ static int mnt_id_start = 0; static int mnt_group_start = 1; static struct list_head *mount_hashtable __read_mostly; -static struct list_head *mountpoint_hashtable __read_mostly; +static struct hlist_head *mountpoint_hashtable __read_mostly; static struct kmem_cache *mnt_cache __read_mostly; static DECLARE_RWSEM(namespace_sem); @@ -55,12 +78,19 @@ EXPORT_SYMBOL_GPL(fs_kobj); */ __cacheline_aligned_in_smp DEFINE_SEQLOCK(mount_lock); -static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) +static inline struct list_head *m_hash(struct vfsmount *mnt, struct dentry *dentry) { unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES); tmp += ((unsigned long)dentry / L1_CACHE_BYTES); - tmp = tmp + (tmp >> HASH_SHIFT); - return tmp & (HASH_SIZE - 1); + tmp = tmp + (tmp >> m_hash_shift); + return &mount_hashtable[tmp & m_hash_mask]; +} + +static inline struct hlist_head *mp_hash(struct dentry *dentry) +{ + unsigned long tmp = ((unsigned long)dentry / L1_CACHE_BYTES); + tmp = tmp + (tmp >> mp_hash_shift); + return &mountpoint_hashtable[tmp & mp_hash_mask]; } /* @@ -575,7 +605,7 @@ bool legitimize_mnt(struct vfsmount *bastard, unsigned seq) */ struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry) { - struct list_head *head = mount_hashtable + hash(mnt, dentry); + struct list_head *head = m_hash(mnt, dentry); struct mount *p; list_for_each_entry_rcu(p, head, mnt_hash) @@ -590,7 +620,7 @@ struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry) */ struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry) { - struct list_head *head = mount_hashtable + hash(mnt, dentry); + struct list_head *head = m_hash(mnt, dentry); struct mount *p; list_for_each_entry_reverse(p, head, mnt_hash) @@ -633,11 +663,11 @@ struct vfsmount *lookup_mnt(struct path *path) static struct mountpoint *new_mountpoint(struct dentry *dentry) { - struct list_head *chain = mountpoint_hashtable + hash(NULL, dentry); + struct hlist_head *chain = mp_hash(dentry); struct mountpoint *mp; int ret; - list_for_each_entry(mp, chain, m_hash) { + hlist_for_each_entry(mp, chain, m_hash) { if (mp->m_dentry == dentry) { /* might be worth a WARN_ON() */ if (d_unlinked(dentry)) @@ -659,7 +689,7 @@ static struct mountpoint *new_mountpoint(struct dentry *dentry) mp->m_dentry = dentry; mp->m_count = 1; - list_add(&mp->m_hash, chain); + hlist_add_head(&mp->m_hash, chain); return mp; } @@ -670,7 +700,7 @@ static void put_mountpoint(struct mountpoint *mp) spin_lock(&dentry->d_lock); dentry->d_flags &= ~DCACHE_MOUNTED; spin_unlock(&dentry->d_lock); - list_del(&mp->m_hash); + hlist_del(&mp->m_hash); kfree(mp); } } @@ -739,8 +769,7 @@ static void attach_mnt(struct mount *mnt, struct mountpoint *mp) { mnt_set_mountpoint(parent, mp, mnt); - list_add_tail(&mnt->mnt_hash, mount_hashtable + - hash(&parent->mnt, mp->m_dentry)); + list_add_tail(&mnt->mnt_hash, m_hash(&parent->mnt, mp->m_dentry)); list_add_tail(&mnt->mnt_child, &parent->mnt_mounts); } @@ -762,8 +791,8 @@ static void commit_tree(struct mount *mnt) list_splice(&head, n->list.prev); - list_add_tail(&mnt->mnt_hash, mount_hashtable + - hash(&parent->mnt, mnt->mnt_mountpoint)); + list_add_tail(&mnt->mnt_hash, + m_hash(&parent->mnt, mnt->mnt_mountpoint)); list_add_tail(&mnt->mnt_child, &parent->mnt_mounts); touch_mnt_namespace(n); } @@ -2777,18 +2806,24 @@ void __init mnt_init(void) mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct mount), 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); - mount_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC); - mountpoint_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC); + mount_hashtable = alloc_large_system_hash("Mount-cache", + sizeof(struct list_head), + mhash_entries, 19, + 0, + &m_hash_shift, &m_hash_mask, 0, 0); + mountpoint_hashtable = alloc_large_system_hash("Mountpoint-cache", + sizeof(struct hlist_head), + mphash_entries, 19, + 0, + &mp_hash_shift, &mp_hash_mask, 0, 0); if (!mount_hashtable || !mountpoint_hashtable) panic("Failed to allocate mount hash table\n"); - printk(KERN_INFO "Mount-cache hash table entries: %lu\n", HASH_SIZE); - - for (u = 0; u < HASH_SIZE; u++) + for (u = 0; u <= m_hash_mask; u++) INIT_LIST_HEAD(&mount_hashtable[u]); - for (u = 0; u < HASH_SIZE; u++) - INIT_LIST_HEAD(&mountpoint_hashtable[u]); + for (u = 0; u <= mp_hash_mask; u++) + INIT_HLIST_HEAD(&mountpoint_hashtable[u]); kernfs_init(); -- cgit v0.10.2 From 1d6a32acd70ab18499829c0a9a5dbe2bace72a13 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 20 Mar 2014 20:34:43 -0400 Subject: keep shadowed vfsmounts together preparation to switching mnt_hash to hlist Cc: stable@vger.kernel.org Signed-off-by: Al Viro diff --git a/fs/namespace.c b/fs/namespace.c index 3b648da..9db3ce3 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -621,12 +621,20 @@ struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry) struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry) { struct list_head *head = m_hash(mnt, dentry); - struct mount *p; + struct mount *p, *res = NULL; - list_for_each_entry_reverse(p, head, mnt_hash) + list_for_each_entry(p, head, mnt_hash) if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry) - return p; - return NULL; + goto found; + return res; +found: + res = p; + list_for_each_entry_continue(p, head, mnt_hash) { + if (&p->mnt_parent->mnt != mnt || p->mnt_mountpoint != dentry) + break; + res = p; + } + return res; } /* @@ -769,14 +777,14 @@ static void attach_mnt(struct mount *mnt, struct mountpoint *mp) { mnt_set_mountpoint(parent, mp, mnt); - list_add_tail(&mnt->mnt_hash, m_hash(&parent->mnt, mp->m_dentry)); + list_add(&mnt->mnt_hash, m_hash(&parent->mnt, mp->m_dentry)); list_add_tail(&mnt->mnt_child, &parent->mnt_mounts); } /* * vfsmount lock must be held for write */ -static void commit_tree(struct mount *mnt) +static void commit_tree(struct mount *mnt, struct mount *shadows) { struct mount *parent = mnt->mnt_parent; struct mount *m; @@ -791,7 +799,10 @@ static void commit_tree(struct mount *mnt) list_splice(&head, n->list.prev); - list_add_tail(&mnt->mnt_hash, + if (shadows) + list_add(&mnt->mnt_hash, &shadows->mnt_hash); + else + list_add(&mnt->mnt_hash, m_hash(&parent->mnt, mnt->mnt_mountpoint)); list_add_tail(&mnt->mnt_child, &parent->mnt_mounts); touch_mnt_namespace(n); @@ -1659,12 +1670,15 @@ static int attach_recursive_mnt(struct mount *source_mnt, touch_mnt_namespace(source_mnt->mnt_ns); } else { mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt); - commit_tree(source_mnt); + commit_tree(source_mnt, NULL); } list_for_each_entry_safe(child, p, &tree_list, mnt_hash) { + struct mount *q; list_del_init(&child->mnt_hash); - commit_tree(child); + q = __lookup_mnt_last(&child->mnt_parent->mnt, + child->mnt_mountpoint); + commit_tree(child, q); } unlock_mount_hash(); -- cgit v0.10.2 From 0b1b901b5a98bb36943d10820efc796f7cd45ff3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 21 Mar 2014 10:14:08 -0400 Subject: don't bother with propagate_mnt() unless the target is shared If the dest_mnt is not shared, propagate_mnt() does nothing - there's no mounts to propagate to and thus no copies to create. Might as well don't bother calling it in that case. Cc: stable@vger.kernel.org Signed-off-by: Al Viro diff --git a/fs/namespace.c b/fs/namespace.c index 9db3ce3..d3fb9f0 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1653,16 +1653,14 @@ static int attach_recursive_mnt(struct mount *source_mnt, err = invent_group_ids(source_mnt, true); if (err) goto out; - } - err = propagate_mnt(dest_mnt, dest_mp, source_mnt, &tree_list); - if (err) - goto out_cleanup_ids; - - lock_mount_hash(); - - if (IS_MNT_SHARED(dest_mnt)) { + err = propagate_mnt(dest_mnt, dest_mp, source_mnt, &tree_list); + if (err) + goto out_cleanup_ids; + lock_mount_hash(); for (p = source_mnt; p; p = next_mnt(p, source_mnt)) set_mnt_shared(p); + } else { + lock_mount_hash(); } if (parent_path) { detach_mnt(source_mnt, parent_path); @@ -1685,8 +1683,7 @@ static int attach_recursive_mnt(struct mount *source_mnt, return 0; out_cleanup_ids: - if (IS_MNT_SHARED(dest_mnt)) - cleanup_group_ids(source_mnt, NULL); + cleanup_group_ids(source_mnt, NULL); out: return err; } -- cgit v0.10.2 From 38129a13e6e71f666e0468e99fdd932a687b4d7e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 20 Mar 2014 21:10:51 -0400 Subject: switch mnt_hash to hlist fixes RCU bug - walking through hlist is safe in face of element moves, since it's self-terminating. Cyclic lists are not - if we end up jumping to another hash chain, we'll loop infinitely without ever hitting the original list head. [fix for dumb braino folded] Spotted by: Max Kellermann Cc: stable@vger.kernel.org Signed-off-by: Al Viro diff --git a/fs/mount.h b/fs/mount.h index acdb428..b29e42f 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -25,7 +25,7 @@ struct mountpoint { }; struct mount { - struct list_head mnt_hash; + struct hlist_node mnt_hash; struct mount *mnt_parent; struct dentry *mnt_mountpoint; struct vfsmount mnt; diff --git a/fs/namespace.c b/fs/namespace.c index d3fb9f0..2ffc5a2 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -59,7 +59,7 @@ static DEFINE_SPINLOCK(mnt_id_lock); static int mnt_id_start = 0; static int mnt_group_start = 1; -static struct list_head *mount_hashtable __read_mostly; +static struct hlist_head *mount_hashtable __read_mostly; static struct hlist_head *mountpoint_hashtable __read_mostly; static struct kmem_cache *mnt_cache __read_mostly; static DECLARE_RWSEM(namespace_sem); @@ -78,7 +78,7 @@ EXPORT_SYMBOL_GPL(fs_kobj); */ __cacheline_aligned_in_smp DEFINE_SEQLOCK(mount_lock); -static inline struct list_head *m_hash(struct vfsmount *mnt, struct dentry *dentry) +static inline struct hlist_head *m_hash(struct vfsmount *mnt, struct dentry *dentry) { unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES); tmp += ((unsigned long)dentry / L1_CACHE_BYTES); @@ -217,7 +217,7 @@ static struct mount *alloc_vfsmnt(const char *name) mnt->mnt_writers = 0; #endif - INIT_LIST_HEAD(&mnt->mnt_hash); + INIT_HLIST_NODE(&mnt->mnt_hash); INIT_LIST_HEAD(&mnt->mnt_child); INIT_LIST_HEAD(&mnt->mnt_mounts); INIT_LIST_HEAD(&mnt->mnt_list); @@ -605,10 +605,10 @@ bool legitimize_mnt(struct vfsmount *bastard, unsigned seq) */ struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry) { - struct list_head *head = m_hash(mnt, dentry); + struct hlist_head *head = m_hash(mnt, dentry); struct mount *p; - list_for_each_entry_rcu(p, head, mnt_hash) + hlist_for_each_entry_rcu(p, head, mnt_hash) if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry) return p; return NULL; @@ -620,20 +620,16 @@ struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry) */ struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry) { - struct list_head *head = m_hash(mnt, dentry); - struct mount *p, *res = NULL; - - list_for_each_entry(p, head, mnt_hash) - if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry) - goto found; - return res; -found: - res = p; - list_for_each_entry_continue(p, head, mnt_hash) { + struct mount *p, *res; + res = p = __lookup_mnt(mnt, dentry); + if (!p) + goto out; + hlist_for_each_entry_continue(p, mnt_hash) { if (&p->mnt_parent->mnt != mnt || p->mnt_mountpoint != dentry) break; res = p; } +out: return res; } @@ -750,7 +746,7 @@ static void detach_mnt(struct mount *mnt, struct path *old_path) mnt->mnt_parent = mnt; mnt->mnt_mountpoint = mnt->mnt.mnt_root; list_del_init(&mnt->mnt_child); - list_del_init(&mnt->mnt_hash); + hlist_del_init_rcu(&mnt->mnt_hash); put_mountpoint(mnt->mnt_mp); mnt->mnt_mp = NULL; } @@ -777,7 +773,7 @@ static void attach_mnt(struct mount *mnt, struct mountpoint *mp) { mnt_set_mountpoint(parent, mp, mnt); - list_add(&mnt->mnt_hash, m_hash(&parent->mnt, mp->m_dentry)); + hlist_add_head_rcu(&mnt->mnt_hash, m_hash(&parent->mnt, mp->m_dentry)); list_add_tail(&mnt->mnt_child, &parent->mnt_mounts); } @@ -800,9 +796,9 @@ static void commit_tree(struct mount *mnt, struct mount *shadows) list_splice(&head, n->list.prev); if (shadows) - list_add(&mnt->mnt_hash, &shadows->mnt_hash); + hlist_add_after_rcu(&shadows->mnt_hash, &mnt->mnt_hash); else - list_add(&mnt->mnt_hash, + hlist_add_head_rcu(&mnt->mnt_hash, m_hash(&parent->mnt, mnt->mnt_mountpoint)); list_add_tail(&mnt->mnt_child, &parent->mnt_mounts); touch_mnt_namespace(n); @@ -1193,26 +1189,28 @@ int may_umount(struct vfsmount *mnt) EXPORT_SYMBOL(may_umount); -static LIST_HEAD(unmounted); /* protected by namespace_sem */ +static HLIST_HEAD(unmounted); /* protected by namespace_sem */ static void namespace_unlock(void) { struct mount *mnt; - LIST_HEAD(head); + struct hlist_head head = unmounted; - if (likely(list_empty(&unmounted))) { + if (likely(hlist_empty(&head))) { up_write(&namespace_sem); return; } - list_splice_init(&unmounted, &head); + head.first->pprev = &head.first; + INIT_HLIST_HEAD(&unmounted); + up_write(&namespace_sem); synchronize_rcu(); - while (!list_empty(&head)) { - mnt = list_first_entry(&head, struct mount, mnt_hash); - list_del_init(&mnt->mnt_hash); + while (!hlist_empty(&head)) { + mnt = hlist_entry(head.first, struct mount, mnt_hash); + hlist_del_init(&mnt->mnt_hash); if (mnt->mnt_ex_mountpoint.mnt) path_put(&mnt->mnt_ex_mountpoint); mntput(&mnt->mnt); @@ -1233,16 +1231,19 @@ static inline void namespace_lock(void) */ void umount_tree(struct mount *mnt, int how) { - LIST_HEAD(tmp_list); + HLIST_HEAD(tmp_list); struct mount *p; + struct mount *last = NULL; - for (p = mnt; p; p = next_mnt(p, mnt)) - list_move(&p->mnt_hash, &tmp_list); + for (p = mnt; p; p = next_mnt(p, mnt)) { + hlist_del_init_rcu(&p->mnt_hash); + hlist_add_head(&p->mnt_hash, &tmp_list); + } if (how) propagate_umount(&tmp_list); - list_for_each_entry(p, &tmp_list, mnt_hash) { + hlist_for_each_entry(p, &tmp_list, mnt_hash) { list_del_init(&p->mnt_expire); list_del_init(&p->mnt_list); __touch_mnt_namespace(p->mnt_ns); @@ -1260,8 +1261,13 @@ void umount_tree(struct mount *mnt, int how) p->mnt_mp = NULL; } change_mnt_propagation(p, MS_PRIVATE); + last = p; + } + if (last) { + last->mnt_hash.next = unmounted.first; + unmounted.first = tmp_list.first; + unmounted.first->pprev = &unmounted.first; } - list_splice(&tmp_list, &unmounted); } static void shrink_submounts(struct mount *mnt); @@ -1645,8 +1651,9 @@ static int attach_recursive_mnt(struct mount *source_mnt, struct mountpoint *dest_mp, struct path *parent_path) { - LIST_HEAD(tree_list); + HLIST_HEAD(tree_list); struct mount *child, *p; + struct hlist_node *n; int err; if (IS_MNT_SHARED(dest_mnt)) { @@ -1671,9 +1678,9 @@ static int attach_recursive_mnt(struct mount *source_mnt, commit_tree(source_mnt, NULL); } - list_for_each_entry_safe(child, p, &tree_list, mnt_hash) { + hlist_for_each_entry_safe(child, n, &tree_list, mnt_hash) { struct mount *q; - list_del_init(&child->mnt_hash); + hlist_del_init(&child->mnt_hash); q = __lookup_mnt_last(&child->mnt_parent->mnt, child->mnt_mountpoint); commit_tree(child, q); @@ -2818,7 +2825,7 @@ void __init mnt_init(void) 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); mount_hashtable = alloc_large_system_hash("Mount-cache", - sizeof(struct list_head), + sizeof(struct hlist_head), mhash_entries, 19, 0, &m_hash_shift, &m_hash_mask, 0, 0); @@ -2832,7 +2839,7 @@ void __init mnt_init(void) panic("Failed to allocate mount hash table\n"); for (u = 0; u <= m_hash_mask; u++) - INIT_LIST_HEAD(&mount_hashtable[u]); + INIT_HLIST_HEAD(&mount_hashtable[u]); for (u = 0; u <= mp_hash_mask; u++) INIT_HLIST_HEAD(&mountpoint_hashtable[u]); diff --git a/fs/pnode.c b/fs/pnode.c index c7221bb..88396df 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -220,14 +220,14 @@ static struct mount *get_source(struct mount *dest, * @tree_list : list of heads of trees to be attached. */ int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp, - struct mount *source_mnt, struct list_head *tree_list) + struct mount *source_mnt, struct hlist_head *tree_list) { struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns; struct mount *m, *child; int ret = 0; struct mount *prev_dest_mnt = dest_mnt; struct mount *prev_src_mnt = source_mnt; - LIST_HEAD(tmp_list); + HLIST_HEAD(tmp_list); for (m = propagation_next(dest_mnt, dest_mnt); m; m = propagation_next(m, dest_mnt)) { @@ -246,27 +246,29 @@ int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp, child = copy_tree(source, source->mnt.mnt_root, type); if (IS_ERR(child)) { ret = PTR_ERR(child); - list_splice(tree_list, tmp_list.prev); + tmp_list = *tree_list; + tmp_list.first->pprev = &tmp_list.first; + INIT_HLIST_HEAD(tree_list); goto out; } if (is_subdir(dest_mp->m_dentry, m->mnt.mnt_root)) { mnt_set_mountpoint(m, dest_mp, child); - list_add_tail(&child->mnt_hash, tree_list); + hlist_add_head(&child->mnt_hash, tree_list); } else { /* * This can happen if the parent mount was bind mounted * on some subdirectory of a shared/slave mount. */ - list_add_tail(&child->mnt_hash, &tmp_list); + hlist_add_head(&child->mnt_hash, &tmp_list); } prev_dest_mnt = m; prev_src_mnt = child; } out: lock_mount_hash(); - while (!list_empty(&tmp_list)) { - child = list_first_entry(&tmp_list, struct mount, mnt_hash); + while (!hlist_empty(&tmp_list)) { + child = hlist_entry(tmp_list.first, struct mount, mnt_hash); umount_tree(child, 0); } unlock_mount_hash(); @@ -338,8 +340,10 @@ static void __propagate_umount(struct mount *mnt) * umount the child only if the child has no * other children */ - if (child && list_empty(&child->mnt_mounts)) - list_move_tail(&child->mnt_hash, &mnt->mnt_hash); + if (child && list_empty(&child->mnt_mounts)) { + hlist_del_init_rcu(&child->mnt_hash); + hlist_add_before_rcu(&child->mnt_hash, &mnt->mnt_hash); + } } } @@ -350,11 +354,11 @@ static void __propagate_umount(struct mount *mnt) * * vfsmount lock must be held for write */ -int propagate_umount(struct list_head *list) +int propagate_umount(struct hlist_head *list) { struct mount *mnt; - list_for_each_entry(mnt, list, mnt_hash) + hlist_for_each_entry(mnt, list, mnt_hash) __propagate_umount(mnt); return 0; } diff --git a/fs/pnode.h b/fs/pnode.h index 59e7eda..fc28a27 100644 --- a/fs/pnode.h +++ b/fs/pnode.h @@ -36,8 +36,8 @@ static inline void set_mnt_shared(struct mount *mnt) void change_mnt_propagation(struct mount *, int); int propagate_mnt(struct mount *, struct mountpoint *, struct mount *, - struct list_head *); -int propagate_umount(struct list_head *); + struct hlist_head *); +int propagate_umount(struct hlist_head *); int propagate_mount_busy(struct mount *, int); void mnt_release_group_id(struct mount *); int get_dominating_id(struct mount *mnt, const struct path *root); -- cgit v0.10.2 From 00a1a053ebe5febcfc2ec498bd894f035ad2aa06 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 30 Mar 2014 10:20:01 -0400 Subject: ext4: atomically set inode->i_flags in ext4_set_inode_flags() Use cmpxchg() to atomically set i_flags instead of clearing out the S_IMMUTABLE, S_APPEND, etc. flags and then setting them from the EXT4_IMMUTABLE_FL, EXT4_APPEND_FL flags, since this opens up a race where an immutable file has the immutable flag cleared for a brief window of time. Reported-by: John Sullivan Signed-off-by: "Theodore Ts'o" Cc: stable@kernel.org Signed-off-by: Linus Torvalds diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 6e39895..24bfd7f 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -38,6 +38,7 @@ #include #include #include +#include #include "ext4_jbd2.h" #include "xattr.h" @@ -3921,18 +3922,20 @@ int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc) void ext4_set_inode_flags(struct inode *inode) { unsigned int flags = EXT4_I(inode)->i_flags; + unsigned int new_fl = 0; - inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); if (flags & EXT4_SYNC_FL) - inode->i_flags |= S_SYNC; + new_fl |= S_SYNC; if (flags & EXT4_APPEND_FL) - inode->i_flags |= S_APPEND; + new_fl |= S_APPEND; if (flags & EXT4_IMMUTABLE_FL) - inode->i_flags |= S_IMMUTABLE; + new_fl |= S_IMMUTABLE; if (flags & EXT4_NOATIME_FL) - inode->i_flags |= S_NOATIME; + new_fl |= S_NOATIME; if (flags & EXT4_DIRSYNC_FL) - inode->i_flags |= S_DIRSYNC; + new_fl |= S_DIRSYNC; + set_mask_bits(&inode->i_flags, + S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC, new_fl); } /* Propagate flags from i_flags to EXT4_I(inode)->i_flags */ diff --git a/include/linux/bitops.h b/include/linux/bitops.h index abc9ca7..be5fd38 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -196,6 +196,21 @@ static inline unsigned long __ffs64(u64 word) #ifdef __KERNEL__ +#ifndef set_mask_bits +#define set_mask_bits(ptr, _mask, _bits) \ +({ \ + const typeof(*ptr) mask = (_mask), bits = (_bits); \ + typeof(*ptr) old, new; \ + \ + do { \ + old = ACCESS_ONCE(*ptr); \ + new = (old & ~mask) | bits; \ + } while (cmpxchg(ptr, old, new) != old); \ + \ + new; \ +}) +#endif + #ifndef find_last_bit /** * find_last_bit - find the last set bit in a memory region -- cgit v0.10.2 From aa4af831bb4f3168f2f574b2620124699c09c4a3 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Sun, 30 Mar 2014 19:07:54 -0400 Subject: AUDIT: Allow login in non-init namespaces It its possible to configure your PAM stack to refuse login if audit messages (about the login) were unable to be sent. This is common in many distros and thus normal configuration of many containers. The PAM modules determine if audit is enabled/disabled in the kernel based on the return value from sending an audit message on the netlink socket. If userspace gets back ECONNREFUSED it believes audit is disabled in the kernel. If it gets any other error else it refuses to let the login proceed. Just about ever since the introduction of namespaces the kernel audit subsystem has returned EPERM if the task sending a message was not in the init user or pid namespace. So many forms of containers have never worked if audit was enabled in the kernel. BUT if the container was not in net_init then the kernel network code would send ECONNREFUSED (instead of the audit code sending EPERM). Thus by pure accident/dumb luck/bug if an admin configured the PAM stack to reject all logins that didn't talk to audit, but then ran the login untility in the non-init_net namespace, it would work!! Clearly this was a bug, but it is a bug some people expected. With the introduction of network namespace support in 3.14-rc1 the two bugs stopped cancelling each other out. Now, containers in the non-init_net namespace refused to let users log in (just like PAM was configfured!) Obviously some people were not happy that what used to let users log in, now didn't! This fix is kinda hacky. We return ECONNREFUSED for all non-init relevant namespaces. That means that not only will the old broken non-init_net setups continue to work, now the broken non-init_pid or non-init_user setups will 'work'. They don't really work, since audit isn't logging things. But it's what most users want. In 3.15 we should have patches to support not only the non-init_net (3.14) namespace but also the non-init_pid and non-init_user namespace. So all will be right in the world. This just opens the doors wide open on 3.14 and hopefully makes users happy, if not the audit system... Reported-by: Andre Tomt Reported-by: Adam Richter Signed-off-by: Eric Paris Signed-off-by: Linus Torvalds diff --git a/kernel/audit.c b/kernel/audit.c index 3392d3e..95a20f3 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -608,9 +608,19 @@ static int audit_netlink_ok(struct sk_buff *skb, u16 msg_type) int err = 0; /* Only support the initial namespaces for now. */ + /* + * We return ECONNREFUSED because it tricks userspace into thinking + * that audit was not configured into the kernel. Lots of users + * configure their PAM stack (because that's what the distro does) + * to reject login if unable to send messages to audit. If we return + * ECONNREFUSED the PAM stack thinks the kernel does not have audit + * configured in and will let login proceed. If we return EPERM + * userspace will reject all logins. This should be removed when we + * support non init namespaces!! + */ if ((current_user_ns() != &init_user_ns) || (task_active_pid_ns(current) != &init_pid_ns)) - return -EPERM; + return -ECONNREFUSED; switch (msg_type) { case AUDIT_LIST: -- cgit v0.10.2 From 01358e562a8b97f50ec04025c009c71508e6d373 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 28 Mar 2014 09:45:33 -0700 Subject: MAINTAINERS: resume as Documentation maintainer I am the new kernel tree Documentation maintainer (except for parts that are handled by other people, of course). Signed-off-by: Randy Dunlap Acked-by: Rob Landley Signed-off-by: Linus Torvalds diff --git a/MAINTAINERS b/MAINTAINERS index a80c84e..900d98e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2801,9 +2801,9 @@ S: Supported F: drivers/acpi/dock.c DOCUMENTATION -M: Rob Landley +M: Randy Dunlap L: linux-doc@vger.kernel.org -T: TBD +T: quilt http://www.infradead.org/~rdunlap/Doc/patches/ S: Maintained F: Documentation/ -- cgit v0.10.2 From 455c6fdbd219161bd09b1165f11699d6d73de11c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 30 Mar 2014 20:40:15 -0700 Subject: Linux 3.14 diff --git a/Makefile b/Makefile index c10b734..e5ac8a6 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 3 PATCHLEVEL = 14 SUBLEVEL = 0 -EXTRAVERSION = -rc8 +EXTRAVERSION = NAME = Shuffling Zombie Juror # *DOCUMENTATION* -- cgit v0.10.2 From 03a620d8fa5f5423b87aa93e49bfa79b731c5790 Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Mon, 31 Mar 2014 11:05:17 -0600 Subject: ASoC: rt5640: add an of_match table Add a device tree match table. This serves to make the driver's support of device tree more explicit. Perhaps the fallback for DT matching to using the i2c_device_id table will go away one day, since it fails in face of devices from different vendors with the same name. Signed-off-by: Stephen Warren Signed-off-by: Mark Brown diff --git a/sound/soc/codecs/rt5640.c b/sound/soc/codecs/rt5640.c index 0061ae6..68b4dd6 100644 --- a/sound/soc/codecs/rt5640.c +++ b/sound/soc/codecs/rt5640.c @@ -2074,6 +2074,14 @@ static const struct i2c_device_id rt5640_i2c_id[] = { }; MODULE_DEVICE_TABLE(i2c, rt5640_i2c_id); +#if defined(CONFIG_OF) +static const struct of_device_id rt5640_of_match[] = { + { .compatible = "realtek,rt5640", }, + {}, +}; +MODULE_DEVICE_TABLE(of, rt5640_of_match); +#endif + #ifdef CONFIG_ACPI static struct acpi_device_id rt5640_acpi_match[] = { { "INT33CA", 0 }, @@ -2203,6 +2211,7 @@ static struct i2c_driver rt5640_i2c_driver = { .name = "rt5640", .owner = THIS_MODULE, .acpi_match_table = ACPI_PTR(rt5640_acpi_match), + .of_match_table = of_match_ptr(rt5640_of_match), }, .probe = rt5640_i2c_probe, .remove = rt5640_i2c_remove, -- cgit v0.10.2 From e3efe3bedb6592465d9a2bd116d5e611dae362c3 Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Mon, 31 Mar 2014 12:25:24 -0600 Subject: ASoC: alc5632: fix uninit var in alc5632_probe() alc5632_probe() returns ret, yet it is not initialized or set anywhere. This ends up causing the function to appear to fail, and audio not to work on the Toshiba AC100, with my compiler at least. This function used to set ret in all cases, but recent cleanup removed that. Fixes: 5d6be5aa6bec ("ASoC: codec: Simplify ASoC probe code.") Signed-off-by: Stephen Warren Reviewed-by: Thierry Reding Signed-off-by: Mark Brown diff --git a/sound/soc/codecs/alc5632.c b/sound/soc/codecs/alc5632.c index ec071a6..3ee8d4e 100644 --- a/sound/soc/codecs/alc5632.c +++ b/sound/soc/codecs/alc5632.c @@ -1061,7 +1061,6 @@ static int alc5632_resume(struct snd_soc_codec *codec) static int alc5632_probe(struct snd_soc_codec *codec) { struct alc5632_priv *alc5632 = snd_soc_codec_get_drvdata(codec); - int ret; /* power on device */ alc5632_set_bias_level(codec, SND_SOC_BIAS_STANDBY); @@ -1075,7 +1074,7 @@ static int alc5632_probe(struct snd_soc_codec *codec) return -EINVAL; } - return ret; + return 0; } /* power down chip */ -- cgit v0.10.2 From c31b0cb1f1a19bc551875e07e9dd7c531ac3580e Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Mon, 31 Mar 2014 12:38:16 -0600 Subject: ASoC: alc5632: add an of_match table Add a device tree match table. This serves to make the driver's support of device tree more explicit. Perhaps the fallback for DT matching to using the i2c_device_id table will go away one day, since it fails in face of devices from different vendors with the same name. Signed-off-by: Stephen Warren Signed-off-by: Mark Brown diff --git a/sound/soc/codecs/alc5632.c b/sound/soc/codecs/alc5632.c index 3ee8d4e..85942ca 100644 --- a/sound/soc/codecs/alc5632.c +++ b/sound/soc/codecs/alc5632.c @@ -1190,11 +1190,18 @@ static const struct i2c_device_id alc5632_i2c_table[] = { }; MODULE_DEVICE_TABLE(i2c, alc5632_i2c_table); +static const struct of_device_id alc5632_of_match[] = { + { .compatible = "realtek,alc5632", }, + { } +}; +MODULE_DEVICE_TABLE(of, alc5632_of_match); + /* i2c codec control layer */ static struct i2c_driver alc5632_i2c_driver = { .driver = { .name = "alc5632", .owner = THIS_MODULE, + .of_match_table = of_match_ptr(alc5632_of_match), }, .probe = alc5632_i2c_probe, .remove = alc5632_i2c_remove, -- cgit v0.10.2 From 2951f93f431a2fc8956a3b13882dc07cb5b8b2b9 Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Mon, 31 Mar 2014 12:38:18 -0600 Subject: ASoC: max98090: add an of_match table Add a device tree match table. This serves to make the driver's support of device tree more explicit. Perhaps the fallback for DT matching to using the i2c_device_id table will go away one day, since it fails in face of devices from different vendors with the same name. Signed-off-by: Stephen Warren Signed-off-by: Mark Brown diff --git a/sound/soc/codecs/max98090.c b/sound/soc/codecs/max98090.c index 361862d..83a773c 100644 --- a/sound/soc/codecs/max98090.c +++ b/sound/soc/codecs/max98090.c @@ -2406,11 +2406,18 @@ static const struct i2c_device_id max98090_i2c_id[] = { }; MODULE_DEVICE_TABLE(i2c, max98090_i2c_id); +static const struct of_device_id max98090_of_match[] = { + { .compatible = "maxim,max98090", }, + { } +}; +MODULE_DEVICE_TABLE(of, max98090_of_match); + static struct i2c_driver max98090_i2c_driver = { .driver = { .name = "max98090", .owner = THIS_MODULE, .pm = &max98090_pm, + .of_match_table = of_match_ptr(max98090_of_match), }, .probe = max98090_i2c_probe, .remove = max98090_i2c_remove, -- cgit v0.10.2 From 3534b842a83549eb4d06613c616844c8762e9fd0 Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Mon, 31 Mar 2014 12:38:17 -0600 Subject: ASoC: tlv320aic23: add an of_match table Add a device tree match table. This serves to make the driver's support of device tree more explicit. Perhaps the fallback for DT matching to using the i2c_device_id table will go away one day, since it fails in face of devices from different vendors with the same name. Signed-off-by: Stephen Warren Signed-off-by: Mark Brown diff --git a/sound/soc/codecs/tlv320aic23-i2c.c b/sound/soc/codecs/tlv320aic23-i2c.c index 20fc460..b73c94e 100644 --- a/sound/soc/codecs/tlv320aic23-i2c.c +++ b/sound/soc/codecs/tlv320aic23-i2c.c @@ -43,9 +43,16 @@ static const struct i2c_device_id tlv320aic23_id[] = { MODULE_DEVICE_TABLE(i2c, tlv320aic23_id); +static const struct of_device_id tlv320aic23_of_match[] = { + { .compatible = "ti,tlv320aic23", }, + { } +}; +MODULE_DEVICE_TABLE(of, tlv320aic23_of_match); + static struct i2c_driver tlv320aic23_i2c_driver = { .driver = { .name = "tlv320aic23-codec", + .of_match_table = of_match_ptr(tlv320aic23_of_match), }, .probe = tlv320aic23_i2c_probe, .remove = __exit_p(tlv320aic23_i2c_remove), -- cgit v0.10.2 From 3567de676b601696676faf91b39cf82d9f7ae7ec Mon Sep 17 00:00:00 2001 From: Markus Pargmann Date: Sat, 15 Mar 2014 13:44:23 +0100 Subject: ASoC: fsl-ssi: Update binding documentation A lot of bindings where changed without an update of the binding documentation. This patch adds those changes to the documentation. Signed-off-by: Markus Pargmann Signed-off-by: Mark Brown diff --git a/Documentation/devicetree/bindings/sound/fsl,ssi.txt b/Documentation/devicetree/bindings/sound/fsl,ssi.txt index b93e9a9..3aa4a8f 100644 --- a/Documentation/devicetree/bindings/sound/fsl,ssi.txt +++ b/Documentation/devicetree/bindings/sound/fsl,ssi.txt @@ -20,15 +20,6 @@ Required properties: have. - interrupt-parent: The phandle for the interrupt controller that services interrupts for this device. -- fsl,mode: The operating mode for the SSI interface. - "i2s-slave" - I2S mode, SSI is clock slave - "i2s-master" - I2S mode, SSI is clock master - "lj-slave" - left-justified mode, SSI is clock slave - "lj-master" - l.j. mode, SSI is clock master - "rj-slave" - right-justified mode, SSI is clock slave - "rj-master" - r.j., SSI is clock master - "ac97-slave" - AC97 mode, SSI is clock slave - "ac97-master" - AC97 mode, SSI is clock master - fsl,playback-dma: Phandle to a node for the DMA channel to use for playback of audio. This is typically dictated by SOC design. See the notes below. @@ -47,6 +38,9 @@ Required properties: be connected together, and SRFS and STFS be connected together. This would still allow different sample sizes, but not different sample rates. + - clocks: "ipg" - Required clock for the SSI unit + "baud" - Required clock for SSI master mode. Otherwise this + clock is not used Required are also ac97 link bindings if ac97 is used. See Documentation/devicetree/bindings/sound/soc-ac97link.txt for the necessary @@ -64,6 +58,15 @@ Optional properties: Documentation/devicetree/bindings/dma/dma.txt. - dma-names: Two dmas have to be defined, "tx" and "rx", if fsl,imx-fiq is not defined. +- fsl,mode: The operating mode for the SSI interface. + "i2s-slave" - I2S mode, SSI is clock slave + "i2s-master" - I2S mode, SSI is clock master + "lj-slave" - left-justified mode, SSI is clock slave + "lj-master" - l.j. mode, SSI is clock master + "rj-slave" - right-justified mode, SSI is clock slave + "rj-master" - r.j., SSI is clock master + "ac97-slave" - AC97 mode, SSI is clock slave + "ac97-master" - AC97 mode, SSI is clock master Child 'codec' node required properties: - compatible: Compatible list, contains the name of the codec -- cgit v0.10.2 From 06b4b813058f6092ded5d7e0d92d4c34d92975bd Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Fri, 4 Apr 2014 11:04:35 +0800 Subject: ASoC: cs42xx8: Check return value of regmap_read and report correct chipid value Fix checking return value of regmap_read(). Also fix reporting the chip_id value. CS42XX8_CHIPID_CHIP_ID_MASK is 0xF0, so the chip_id value is (val & CS42XX8_CHIPID_CHIP_ID_MASK) >> 4). Signed-off-by: Axel Lin Acked-by: Paul Handrigan Acked-by: Brian Austin Acked-by: Nicolin Chen Signed-off-by: Mark Brown diff --git a/sound/soc/codecs/cs42xx8.c b/sound/soc/codecs/cs42xx8.c index 082299a..8502032 100644 --- a/sound/soc/codecs/cs42xx8.c +++ b/sound/soc/codecs/cs42xx8.c @@ -495,17 +495,16 @@ int cs42xx8_probe(struct device *dev, struct regmap *regmap) regcache_cache_bypass(cs42xx8->regmap, true); /* Validate the chip ID */ - regmap_read(cs42xx8->regmap, CS42XX8_CHIPID, &val); - if (val < 0) { - dev_err(dev, "failed to get device ID: %x", val); - ret = -EINVAL; + ret = regmap_read(cs42xx8->regmap, CS42XX8_CHIPID, &val); + if (ret < 0) { + dev_err(dev, "failed to get device ID, ret = %d", ret); goto err_enable; } /* The top four bits of the chip ID should be 0000 */ - if ((val & CS42XX8_CHIPID_CHIP_ID_MASK) != 0x00) { + if (((val & CS42XX8_CHIPID_CHIP_ID_MASK) >> 4) != 0x00) { dev_err(dev, "unmatched chip ID: %d\n", - val & CS42XX8_CHIPID_CHIP_ID_MASK); + (val & CS42XX8_CHIPID_CHIP_ID_MASK) >> 4); ret = -EINVAL; goto err_enable; } -- cgit v0.10.2 From f78b1e0a8b85ed80b38e9b603b675dd7a0923128 Mon Sep 17 00:00:00 2001 From: Christoph Jaeger Date: Fri, 4 Apr 2014 13:44:19 +0200 Subject: ASoC: alc56(23|32): fix undefined return value of probing code Commit 5d6be5aa ("ASoC: codec: Simplify ASoC probe code.") left variable 'ret', whose value is returned, uninitialized. Since it is not used otherwise, remove it. Signed-off-by: Christoph Jaeger Signed-off-by: Mark Brown diff --git a/sound/soc/codecs/alc5623.c b/sound/soc/codecs/alc5623.c index 09f7e77..f500905 100644 --- a/sound/soc/codecs/alc5623.c +++ b/sound/soc/codecs/alc5623.c @@ -902,7 +902,6 @@ static int alc5623_probe(struct snd_soc_codec *codec) { struct alc5623_priv *alc5623 = snd_soc_codec_get_drvdata(codec); struct snd_soc_dapm_context *dapm = &codec->dapm; - int ret; alc5623_reset(codec); @@ -961,7 +960,7 @@ static int alc5623_probe(struct snd_soc_codec *codec) return -EINVAL; } - return ret; + return 0; } /* power down chip */ diff --git a/sound/soc/codecs/alc5632.c b/sound/soc/codecs/alc5632.c index ec071a6..3ee8d4e 100644 --- a/sound/soc/codecs/alc5632.c +++ b/sound/soc/codecs/alc5632.c @@ -1061,7 +1061,6 @@ static int alc5632_resume(struct snd_soc_codec *codec) static int alc5632_probe(struct snd_soc_codec *codec) { struct alc5632_priv *alc5632 = snd_soc_codec_get_drvdata(codec); - int ret; /* power on device */ alc5632_set_bias_level(codec, SND_SOC_BIAS_STANDBY); @@ -1075,7 +1074,7 @@ static int alc5632_probe(struct snd_soc_codec *codec) return -EINVAL; } - return ret; + return 0; } /* power down chip */ -- cgit v0.10.2 From a14bf88749c5b54bb9c636bcd47c26ea79560ce5 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Sun, 6 Apr 2014 00:04:35 +0800 Subject: ASoC: cs42l52: Fix mask for REVID BIT[0:2] of register 01h is REVID, so the mask for REVID should be 0x7. Also updates the code to use CS42L52_CHIP_REV_MASK. Signed-off-by: Axel Lin Acked-by: Brian Austin Signed-off-by: Mark Brown diff --git a/sound/soc/codecs/cs42l52.c b/sound/soc/codecs/cs42l52.c index f0ca6be..460d355 100644 --- a/sound/soc/codecs/cs42l52.c +++ b/sound/soc/codecs/cs42l52.c @@ -1259,7 +1259,7 @@ static int cs42l52_i2c_probe(struct i2c_client *i2c_client, } dev_info(&i2c_client->dev, "Cirrus Logic CS42L52, Revision: %02X\n", - reg & 0xFF); + reg & CS42L52_CHIP_REV_MASK); /* Set Platform Data */ if (cs42l52->pdata.mica_diff_cfg) diff --git a/sound/soc/codecs/cs42l52.h b/sound/soc/codecs/cs42l52.h index 6fb8f00..ac445993 100644 --- a/sound/soc/codecs/cs42l52.h +++ b/sound/soc/codecs/cs42l52.h @@ -37,7 +37,7 @@ #define CS42L52_CHIP_REV_A0 0x00 #define CS42L52_CHIP_REV_A1 0x01 #define CS42L52_CHIP_REV_B0 0x02 -#define CS42L52_CHIP_REV_MASK 0x03 +#define CS42L52_CHIP_REV_MASK 0x07 #define CS42L52_PWRCTL1 0x02 #define CS42L52_PWRCTL1_PDN_ALL 0x9F -- cgit v0.10.2 From 05b0006734f2aa728331bfe169f105c4b386858a Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Thu, 3 Apr 2014 19:56:32 +0800 Subject: ASoC: da732x: Print correct major id DA732X_ID_MAJOR_MASK is 0xF0, so the major id is (reg & DA732X_ID_MAJOR_MASK) >> 4. Signed-off-by: Axel Lin Acked-by: Adam Thomson Signed-off-by: Mark Brown diff --git a/sound/soc/codecs/da732x.c b/sound/soc/codecs/da732x.c index 7d168ec..48f3fef 100644 --- a/sound/soc/codecs/da732x.c +++ b/sound/soc/codecs/da732x.c @@ -1571,7 +1571,8 @@ static int da732x_i2c_probe(struct i2c_client *i2c, } dev_info(&i2c->dev, "Revision: %d.%d\n", - (reg & DA732X_ID_MAJOR_MASK), (reg & DA732X_ID_MINOR_MASK)); + (reg & DA732X_ID_MAJOR_MASK) >> 4, + (reg & DA732X_ID_MINOR_MASK)); ret = snd_soc_register_codec(&i2c->dev, &soc_codec_dev_da732x, da732x_dai, ARRAY_SIZE(da732x_dai)); -- cgit v0.10.2 From ef33bc3217c7aa9868f497c4f797cc50ad3ce357 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Fri, 4 Apr 2014 15:09:47 +0800 Subject: ASoC: fsl_sai: Fix Bit Clock Polarity configurations The BCP bit in TCR4/RCR4 register rules as followings: 0 Bit clock is active high with drive outputs on rising edge and sample inputs on falling edge. 1 Bit clock is active low with drive outputs on falling edge and sample inputs on rising edge. For all formats currently supported in the fsl_sai driver, they're exactly sending data on the falling edge and sampling on the rising edge. However, the driver clears this BCP bit for all of them which results click noise when working with SGTL5000 and big noise with WM8962. Thus this patch corrects the BCP settings for all the formats here to fix the nosie issue. Signed-off-by: Nicolin Chen Acked-by: Xiubo Li Signed-off-by: Mark Brown diff --git a/sound/soc/fsl/fsl_sai.c b/sound/soc/fsl/fsl_sai.c index 0bc98bb..56da8c8 100644 --- a/sound/soc/fsl/fsl_sai.c +++ b/sound/soc/fsl/fsl_sai.c @@ -179,7 +179,7 @@ static int fsl_sai_set_dai_fmt_tr(struct snd_soc_dai *cpu_dai, * that is, together with the last bit of the previous * data word. */ - val_cr2 &= ~FSL_SAI_CR2_BCP; + val_cr2 |= FSL_SAI_CR2_BCP; val_cr4 |= FSL_SAI_CR4_FSE | FSL_SAI_CR4_FSP; break; case SND_SOC_DAIFMT_LEFT_J: @@ -187,7 +187,7 @@ static int fsl_sai_set_dai_fmt_tr(struct snd_soc_dai *cpu_dai, * Frame high, one word length for frame sync, * frame sync asserts with the first bit of the frame. */ - val_cr2 &= ~FSL_SAI_CR2_BCP; + val_cr2 |= FSL_SAI_CR2_BCP; val_cr4 &= ~(FSL_SAI_CR4_FSE | FSL_SAI_CR4_FSP); break; case SND_SOC_DAIFMT_DSP_A: @@ -197,7 +197,7 @@ static int fsl_sai_set_dai_fmt_tr(struct snd_soc_dai *cpu_dai, * that is, together with the last bit of the previous * data word. */ - val_cr2 &= ~FSL_SAI_CR2_BCP; + val_cr2 |= FSL_SAI_CR2_BCP; val_cr4 &= ~FSL_SAI_CR4_FSP; val_cr4 |= FSL_SAI_CR4_FSE; sai->is_dsp_mode = true; @@ -207,7 +207,7 @@ static int fsl_sai_set_dai_fmt_tr(struct snd_soc_dai *cpu_dai, * Frame high, one bit for frame sync, * frame sync asserts with the first bit of the frame. */ - val_cr2 &= ~FSL_SAI_CR2_BCP; + val_cr2 |= FSL_SAI_CR2_BCP; val_cr4 &= ~(FSL_SAI_CR4_FSE | FSL_SAI_CR4_FSP); sai->is_dsp_mode = true; break; -- cgit v0.10.2 From 71e5222cbea124d737e1fe7de8e255253c12cd29 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Tue, 8 Apr 2014 17:11:04 +0530 Subject: ASoC: samsung: Fix build on multiplatform PCM and S/PDIF drivers referenced mach headers for a trivial data structure. This caused build errors on multiplatform builds as machine headers are not accessible from driver files. Move the data structure definition to the driver header and remove the dependency. While at it rename the structure to avoid multiple definition errors as the same structure is also used by the platform code. Signed-off-by: Sachin Kamat Signed-off-by: Mark Brown diff --git a/sound/soc/samsung/ac97.c b/sound/soc/samsung/ac97.c index 4a88e36..76b072b 100644 --- a/sound/soc/samsung/ac97.c +++ b/sound/soc/samsung/ac97.c @@ -39,15 +39,15 @@ struct s3c_ac97_info { }; static struct s3c_ac97_info s3c_ac97; -static struct s3c2410_dma_client s3c_dma_client_out = { +static struct s3c_dma_client s3c_dma_client_out = { .name = "AC97 PCMOut" }; -static struct s3c2410_dma_client s3c_dma_client_in = { +static struct s3c_dma_client s3c_dma_client_in = { .name = "AC97 PCMIn" }; -static struct s3c2410_dma_client s3c_dma_client_micin = { +static struct s3c_dma_client s3c_dma_client_micin = { .name = "AC97 MicIn" }; diff --git a/sound/soc/samsung/dma.h b/sound/soc/samsung/dma.h index 225e537..ad7c0f0 100644 --- a/sound/soc/samsung/dma.h +++ b/sound/soc/samsung/dma.h @@ -14,8 +14,12 @@ #include +struct s3c_dma_client { + char *name; +}; + struct s3c_dma_params { - struct s3c2410_dma_client *client; /* stream identifier */ + struct s3c_dma_client *client; /* stream identifier */ int channel; /* Channel ID */ dma_addr_t dma_addr; int dma_size; /* Size of the DMA transfer */ diff --git a/sound/soc/samsung/i2s.c b/sound/soc/samsung/i2s.c index 0a9b44c..048ead9 100644 --- a/sound/soc/samsung/i2s.c +++ b/sound/soc/samsung/i2s.c @@ -1211,10 +1211,10 @@ static int samsung_i2s_probe(struct platform_device *pdev) pri_dai->dma_playback.dma_addr = regs_base + I2STXD; pri_dai->dma_capture.dma_addr = regs_base + I2SRXD; pri_dai->dma_playback.client = - (struct s3c2410_dma_client *)&pri_dai->dma_playback; + (struct s3c_dma_client *)&pri_dai->dma_playback; pri_dai->dma_playback.ch_name = "tx"; pri_dai->dma_capture.client = - (struct s3c2410_dma_client *)&pri_dai->dma_capture; + (struct s3c_dma_client *)&pri_dai->dma_capture; pri_dai->dma_capture.ch_name = "rx"; pri_dai->dma_playback.dma_size = 4; pri_dai->dma_capture.dma_size = 4; @@ -1233,7 +1233,7 @@ static int samsung_i2s_probe(struct platform_device *pdev) } sec_dai->dma_playback.dma_addr = regs_base + I2STXDS; sec_dai->dma_playback.client = - (struct s3c2410_dma_client *)&sec_dai->dma_playback; + (struct s3c_dma_client *)&sec_dai->dma_playback; sec_dai->dma_playback.ch_name = "tx-sec"; if (!np) { diff --git a/sound/soc/samsung/pcm.c b/sound/soc/samsung/pcm.c index 6a5e4bf..ab54e29 100644 --- a/sound/soc/samsung/pcm.c +++ b/sound/soc/samsung/pcm.c @@ -20,7 +20,6 @@ #include #include -#include #include "dma.h" #include "pcm.h" @@ -132,11 +131,11 @@ struct s3c_pcm_info { struct s3c_dma_params *dma_capture; }; -static struct s3c2410_dma_client s3c_pcm_dma_client_out = { +static struct s3c_dma_client s3c_pcm_dma_client_out = { .name = "PCM Stereo out" }; -static struct s3c2410_dma_client s3c_pcm_dma_client_in = { +static struct s3c_dma_client s3c_pcm_dma_client_in = { .name = "PCM Stereo in" }; diff --git a/sound/soc/samsung/s3c2412-i2s.c b/sound/soc/samsung/s3c2412-i2s.c index d079445..e9bb5d7 100644 --- a/sound/soc/samsung/s3c2412-i2s.c +++ b/sound/soc/samsung/s3c2412-i2s.c @@ -33,11 +33,11 @@ #include "regs-i2s-v2.h" #include "s3c2412-i2s.h" -static struct s3c2410_dma_client s3c2412_dma_client_out = { +static struct s3c_dma_client s3c2412_dma_client_out = { .name = "I2S PCM Stereo out" }; -static struct s3c2410_dma_client s3c2412_dma_client_in = { +static struct s3c_dma_client s3c2412_dma_client_in = { .name = "I2S PCM Stereo in" }; diff --git a/sound/soc/samsung/s3c24xx-i2s.c b/sound/soc/samsung/s3c24xx-i2s.c index f31e916..d7b8457 100644 --- a/sound/soc/samsung/s3c24xx-i2s.c +++ b/sound/soc/samsung/s3c24xx-i2s.c @@ -31,11 +31,11 @@ #include "dma.h" #include "s3c24xx-i2s.h" -static struct s3c2410_dma_client s3c24xx_dma_client_out = { +static struct s3c_dma_client s3c24xx_dma_client_out = { .name = "I2S PCM Stereo out" }; -static struct s3c2410_dma_client s3c24xx_dma_client_in = { +static struct s3c_dma_client s3c24xx_dma_client_in = { .name = "I2S PCM Stereo in" }; diff --git a/sound/soc/samsung/spdif.c b/sound/soc/samsung/spdif.c index 28487dc..cfe63b7 100644 --- a/sound/soc/samsung/spdif.c +++ b/sound/soc/samsung/spdif.c @@ -18,7 +18,6 @@ #include #include -#include #include "dma.h" #include "spdif.h" @@ -94,7 +93,7 @@ struct samsung_spdif_info { struct s3c_dma_params *dma_playback; }; -static struct s3c2410_dma_client spdif_dma_client_out = { +static struct s3c_dma_client spdif_dma_client_out = { .name = "S/PDIF Stereo out", }; -- cgit v0.10.2 From 74ddd8c40d8ac747ec780be3da40b37641a9b396 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Fri, 4 Apr 2014 14:31:41 +0300 Subject: ASoC: davinci-mcasp: Fix bit clock polarity settings IB_NF, NB_IF and IB_IF configured the bc polarity incorrectly. The receive polarity was set to the same edge as the TX in these cases. Signed-off-by: Peter Ujfalusi Signed-off-by: Mark Brown diff --git a/sound/soc/davinci/davinci-mcasp.c b/sound/soc/davinci/davinci-mcasp.c index a01ae97..4f75cac 100644 --- a/sound/soc/davinci/davinci-mcasp.c +++ b/sound/soc/davinci/davinci-mcasp.c @@ -336,7 +336,7 @@ static int davinci_mcasp_set_dai_fmt(struct snd_soc_dai *cpu_dai, mcasp_clr_bits(mcasp, DAVINCI_MCASP_ACLKXCTL_REG, ACLKXPOL); mcasp_clr_bits(mcasp, DAVINCI_MCASP_TXFMCTL_REG, FSXPOL); - mcasp_set_bits(mcasp, DAVINCI_MCASP_ACLKRCTL_REG, ACLKRPOL); + mcasp_clr_bits(mcasp, DAVINCI_MCASP_ACLKRCTL_REG, ACLKRPOL); mcasp_clr_bits(mcasp, DAVINCI_MCASP_RXFMCTL_REG, FSRPOL); break; @@ -344,7 +344,7 @@ static int davinci_mcasp_set_dai_fmt(struct snd_soc_dai *cpu_dai, mcasp_set_bits(mcasp, DAVINCI_MCASP_ACLKXCTL_REG, ACLKXPOL); mcasp_set_bits(mcasp, DAVINCI_MCASP_TXFMCTL_REG, FSXPOL); - mcasp_clr_bits(mcasp, DAVINCI_MCASP_ACLKRCTL_REG, ACLKRPOL); + mcasp_set_bits(mcasp, DAVINCI_MCASP_ACLKRCTL_REG, ACLKRPOL); mcasp_set_bits(mcasp, DAVINCI_MCASP_RXFMCTL_REG, FSRPOL); break; @@ -352,7 +352,7 @@ static int davinci_mcasp_set_dai_fmt(struct snd_soc_dai *cpu_dai, mcasp_clr_bits(mcasp, DAVINCI_MCASP_ACLKXCTL_REG, ACLKXPOL); mcasp_set_bits(mcasp, DAVINCI_MCASP_TXFMCTL_REG, FSXPOL); - mcasp_set_bits(mcasp, DAVINCI_MCASP_ACLKRCTL_REG, ACLKRPOL); + mcasp_clr_bits(mcasp, DAVINCI_MCASP_ACLKRCTL_REG, ACLKRPOL); mcasp_set_bits(mcasp, DAVINCI_MCASP_RXFMCTL_REG, FSRPOL); break; -- cgit v0.10.2